diff options
Diffstat (limited to 'kernel/cgroup')
-rw-r--r-- | kernel/cgroup/cgroup-internal.h | 69 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-v1.c | 10 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 212 |
3 files changed, 158 insertions, 133 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 589b0e7013ec..9203bfb05603 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -26,6 +26,64 @@ struct cgrp_cset_link { struct list_head cgrp_link; }; +/* used to track tasks and csets during migration */ +struct cgroup_taskset { + /* the src and dst cset list running through cset->mg_node */ + struct list_head src_csets; + struct list_head dst_csets; + + /* the subsys currently being processed */ + int ssid; + + /* + * Fields for cgroup_taskset_*() iteration. + * + * Before migration is committed, the target migration tasks are on + * ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of + * the csets on ->dst_csets. ->csets point to either ->src_csets + * or ->dst_csets depending on whether migration is committed. + * + * ->cur_csets and ->cur_task point to the current task position + * during iteration. + */ + struct list_head *csets; + struct css_set *cur_cset; + struct task_struct *cur_task; +}; + +/* migration context also tracks preloading */ +struct cgroup_mgctx { + /* + * Preloaded source and destination csets. Used to guarantee + * atomic success or failure on actual migration. + */ + struct list_head preloaded_src_csets; + struct list_head preloaded_dst_csets; + + /* tasks and csets to migrate */ + struct cgroup_taskset tset; + + /* subsystems affected by migration */ + u16 ss_mask; +}; + +#define CGROUP_TASKSET_INIT(tset) \ +{ \ + .src_csets = LIST_HEAD_INIT(tset.src_csets), \ + .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \ + .csets = &tset.src_csets, \ +} + +#define CGROUP_MGCTX_INIT(name) \ +{ \ + LIST_HEAD_INIT(name.preloaded_src_csets), \ + LIST_HEAD_INIT(name.preloaded_dst_csets), \ + CGROUP_TASKSET_INIT(name.tset), \ +} + +#define DEFINE_CGROUP_MGCTX(name) \ + struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) + struct cgroup_sb_opts { u16 subsys_mask; unsigned int flags; @@ -112,13 +170,12 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, struct cgroup_namespace *ns); bool cgroup_may_migrate_to(struct cgroup *dst_cgrp); -void cgroup_migrate_finish(struct list_head *preloaded_csets); -void cgroup_migrate_add_src(struct css_set *src_cset, - struct cgroup *dst_cgrp, - struct list_head *preloaded_csets); -int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets); +void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); +void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, + struct cgroup_mgctx *mgctx); +int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx); int cgroup_migrate(struct task_struct *leader, bool threadgroup, - struct cgroup_root *root); + struct cgroup_mgctx *mgctx); int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, bool threadgroup); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 465b10111eaf..fc34bcf2329f 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -87,7 +87,7 @@ EXPORT_SYMBOL_GPL(cgroup_attach_task_all); */ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) { - LIST_HEAD(preloaded_csets); + DEFINE_CGROUP_MGCTX(mgctx); struct cgrp_cset_link *link; struct css_task_iter it; struct task_struct *task; @@ -106,10 +106,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) /* all tasks in @from are being moved, all csets are source */ spin_lock_irq(&css_set_lock); list_for_each_entry(link, &from->cset_links, cset_link) - cgroup_migrate_add_src(link->cset, to, &preloaded_csets); + cgroup_migrate_add_src(link->cset, to, &mgctx); spin_unlock_irq(&css_set_lock); - ret = cgroup_migrate_prepare_dst(&preloaded_csets); + ret = cgroup_migrate_prepare_dst(&mgctx); if (ret) goto out_err; @@ -125,14 +125,14 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) css_task_iter_end(&it); if (task) { - ret = cgroup_migrate(task, false, to->root); + ret = cgroup_migrate(task, false, &mgctx); if (!ret) trace_cgroup_transfer_tasks(to, task, false); put_task_struct(task); } } while (task && !ret); out_err: - cgroup_migrate_finish(&preloaded_csets); + cgroup_migrate_finish(&mgctx); percpu_up_write(&cgroup_threadgroup_rwsem); mutex_unlock(&cgroup_mutex); return ret; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d9d82e96d67f..fe374f803b20 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -159,7 +159,7 @@ static bool cgrp_dfl_visible; static u16 cgrp_dfl_inhibit_ss_mask; /* some controllers are implicitly enabled on the default hierarchy */ -static unsigned long cgrp_dfl_implicit_ss_mask; +static u16 cgrp_dfl_implicit_ss_mask; /* The list of hierarchy roots */ LIST_HEAD(cgroup_roots); @@ -178,13 +178,13 @@ static DEFINE_IDR(cgroup_hierarchy_idr); static u64 css_serial_nr_next = 1; /* - * These bitmask flags indicate whether tasks in the fork and exit paths have - * fork/exit handlers to call. This avoids us having to do extra work in the - * fork/exit path to check which subsystems have fork/exit callbacks. + * These bitmasks identify subsystems with specific features to avoid + * having to do iterative checks repeatedly. */ static u16 have_fork_callback __read_mostly; static u16 have_exit_callback __read_mostly; static u16 have_free_callback __read_mostly; +static u16 have_canfork_callback __read_mostly; /* cgroup namespace for init task */ struct cgroup_namespace init_cgroup_ns = { @@ -195,9 +195,6 @@ struct cgroup_namespace init_cgroup_ns = { .root_cset = &init_css_set, }; -/* Ditto for the can_fork callback. */ -static u16 have_canfork_callback __read_mostly; - static struct file_system_type cgroup2_fs_type; static struct cftype cgroup_base_files[]; @@ -1916,49 +1913,18 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) } EXPORT_SYMBOL_GPL(task_cgroup_path); -/* used to track tasks and other necessary states during migration */ -struct cgroup_taskset { - /* the src and dst cset list running through cset->mg_node */ - struct list_head src_csets; - struct list_head dst_csets; - - /* the subsys currently being processed */ - int ssid; - - /* - * Fields for cgroup_taskset_*() iteration. - * - * Before migration is committed, the target migration tasks are on - * ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of - * the csets on ->dst_csets. ->csets point to either ->src_csets - * or ->dst_csets depending on whether migration is committed. - * - * ->cur_csets and ->cur_task point to the current task position - * during iteration. - */ - struct list_head *csets; - struct css_set *cur_cset; - struct task_struct *cur_task; -}; - -#define CGROUP_TASKSET_INIT(tset) (struct cgroup_taskset){ \ - .src_csets = LIST_HEAD_INIT(tset.src_csets), \ - .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \ - .csets = &tset.src_csets, \ -} - /** - * cgroup_taskset_add - try to add a migration target task to a taskset + * cgroup_migrate_add_task - add a migration target task to a migration context * @task: target task - * @tset: target taskset + * @mgctx: target migration context * - * Add @task, which is a migration target, to @tset. This function becomes - * noop if @task doesn't need to be migrated. @task's css_set should have - * been added as a migration source and @task->cg_list will be moved from - * the css_set's tasks list to mg_tasks one. + * Add @task, which is a migration target, to @mgctx->tset. This function + * becomes noop if @task doesn't need to be migrated. @task's css_set + * should have been added as a migration source and @task->cg_list will be + * moved from the css_set's tasks list to mg_tasks one. */ -static void cgroup_taskset_add(struct task_struct *task, - struct cgroup_taskset *tset) +static void cgroup_migrate_add_task(struct task_struct *task, + struct cgroup_mgctx *mgctx) { struct css_set *cset; @@ -1978,10 +1944,11 @@ static void cgroup_taskset_add(struct task_struct *task, list_move_tail(&task->cg_list, &cset->mg_tasks); if (list_empty(&cset->mg_node)) - list_add_tail(&cset->mg_node, &tset->src_csets); + list_add_tail(&cset->mg_node, + &mgctx->tset.src_csets); if (list_empty(&cset->mg_dst_cset->mg_node)) - list_move_tail(&cset->mg_dst_cset->mg_node, - &tset->dst_csets); + list_add_tail(&cset->mg_dst_cset->mg_node, + &mgctx->tset.dst_csets); } /** @@ -2048,17 +2015,16 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, /** * cgroup_taskset_migrate - migrate a taskset - * @tset: taget taskset - * @root: cgroup root the migration is taking place on + * @mgctx: migration context * - * Migrate tasks in @tset as setup by migration preparation functions. + * Migrate tasks in @mgctx as setup by migration preparation functions. * This function fails iff one of the ->can_attach callbacks fails and - * guarantees that either all or none of the tasks in @tset are migrated. - * @tset is consumed regardless of success. + * guarantees that either all or none of the tasks in @mgctx are migrated. + * @mgctx is consumed regardless of success. */ -static int cgroup_taskset_migrate(struct cgroup_taskset *tset, - struct cgroup_root *root) +static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) { + struct cgroup_taskset *tset = &mgctx->tset; struct cgroup_subsys *ss; struct task_struct *task, *tmp_task; struct css_set *cset, *tmp_cset; @@ -2069,7 +2035,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, return 0; /* check that we can legitimately attach to the cgroup */ - do_each_subsys_mask(ss, ssid, root->subsys_mask) { + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { if (ss->can_attach) { tset->ssid = ssid; ret = ss->can_attach(tset); @@ -2105,7 +2071,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, */ tset->csets = &tset->dst_csets; - do_each_subsys_mask(ss, ssid, root->subsys_mask) { + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { if (ss->attach) { tset->ssid = ssid; ss->attach(tset); @@ -2116,7 +2082,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, goto out_release_tset; out_cancel_attach: - do_each_subsys_mask(ss, ssid, root->subsys_mask) { + do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { if (ssid == failed_ssid) break; if (ss->cancel_attach) { @@ -2151,25 +2117,31 @@ bool cgroup_may_migrate_to(struct cgroup *dst_cgrp) /** * cgroup_migrate_finish - cleanup after attach - * @preloaded_csets: list of preloaded css_sets + * @mgctx: migration context * * Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst(). See * those functions for details. */ -void cgroup_migrate_finish(struct list_head *preloaded_csets) +void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) { + LIST_HEAD(preloaded); struct css_set *cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) { + + list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded); + list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded); + + list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) { cset->mg_src_cgrp = NULL; cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; list_del_init(&cset->mg_preload_node); put_css_set_locked(cset); } + spin_unlock_irq(&css_set_lock); } @@ -2177,10 +2149,10 @@ void cgroup_migrate_finish(struct list_head *preloaded_csets) * cgroup_migrate_add_src - add a migration source css_set * @src_cset: the source css_set to add * @dst_cgrp: the destination cgroup - * @preloaded_csets: list of preloaded css_sets + * @mgctx: migration context * * Tasks belonging to @src_cset are about to be migrated to @dst_cgrp. Pin - * @src_cset and add it to @preloaded_csets, which should later be cleaned + * @src_cset and add it to @mgctx->src_csets, which should later be cleaned * up by cgroup_migrate_finish(). * * This function may be called without holding cgroup_threadgroup_rwsem @@ -2191,7 +2163,7 @@ void cgroup_migrate_finish(struct list_head *preloaded_csets) */ void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, - struct list_head *preloaded_csets) + struct cgroup_mgctx *mgctx) { struct cgroup *src_cgrp; @@ -2219,33 +2191,35 @@ void cgroup_migrate_add_src(struct css_set *src_cset, src_cset->mg_src_cgrp = src_cgrp; src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); - list_add(&src_cset->mg_preload_node, preloaded_csets); + list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets); } /** * cgroup_migrate_prepare_dst - prepare destination css_sets for migration - * @preloaded_csets: list of preloaded source css_sets + * @mgctx: migration context * * Tasks are about to be moved and all the source css_sets have been - * preloaded to @preloaded_csets. This function looks up and pins all - * destination css_sets, links each to its source, and append them to - * @preloaded_csets. + * preloaded to @mgctx->preloaded_src_csets. This function looks up and + * pins all destination css_sets, links each to its source, and append them + * to @mgctx->preloaded_dst_csets. * * This function must be called after cgroup_migrate_add_src() has been * called on each migration source css_set. After migration is performed * using cgroup_migrate(), cgroup_migrate_finish() must be called on - * @preloaded_csets. + * @mgctx. */ -int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets) +int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) { - LIST_HEAD(csets); struct css_set *src_cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); /* look up the dst cset for each src cset and link it to src */ - list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) { + list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, + mg_preload_node) { struct css_set *dst_cset; + struct cgroup_subsys *ss; + int ssid; dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp); if (!dst_cset) @@ -2270,15 +2244,19 @@ int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets) src_cset->mg_dst_cset = dst_cset; if (list_empty(&dst_cset->mg_preload_node)) - list_add(&dst_cset->mg_preload_node, &csets); + list_add_tail(&dst_cset->mg_preload_node, + &mgctx->preloaded_dst_csets); else put_css_set(dst_cset); + + for_each_subsys(ss, ssid) + if (src_cset->subsys[ssid] != dst_cset->subsys[ssid]) + mgctx->ss_mask |= 1 << ssid; } - list_splice_tail(&csets, preloaded_csets); return 0; err: - cgroup_migrate_finish(&csets); + cgroup_migrate_finish(mgctx); return -ENOMEM; } @@ -2286,7 +2264,7 @@ err: * cgroup_migrate - migrate a process or task to a cgroup * @leader: the leader of the process or the task to migrate * @threadgroup: whether @leader points to the whole process or a single task - * @root: cgroup root migration is taking place on + * @mgctx: migration context * * Migrate a process or task denoted by @leader. If migrating a process, * the caller must be holding cgroup_threadgroup_rwsem. The caller is also @@ -2301,9 +2279,8 @@ err: * actually starting migrating. */ int cgroup_migrate(struct task_struct *leader, bool threadgroup, - struct cgroup_root *root) + struct cgroup_mgctx *mgctx) { - struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset); struct task_struct *task; /* @@ -2315,14 +2292,14 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup, rcu_read_lock(); task = leader; do { - cgroup_taskset_add(task, &tset); + cgroup_migrate_add_task(task, mgctx); if (!threadgroup) break; } while_each_thread(leader, task); rcu_read_unlock(); spin_unlock_irq(&css_set_lock); - return cgroup_taskset_migrate(&tset, root); + return cgroup_migrate_execute(mgctx); } /** @@ -2336,7 +2313,7 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup, int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, bool threadgroup) { - LIST_HEAD(preloaded_csets); + DEFINE_CGROUP_MGCTX(mgctx); struct task_struct *task; int ret; @@ -2348,8 +2325,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, rcu_read_lock(); task = leader; do { - cgroup_migrate_add_src(task_css_set(task), dst_cgrp, - &preloaded_csets); + cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx); if (!threadgroup) break; } while_each_thread(leader, task); @@ -2357,11 +2333,11 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, spin_unlock_irq(&css_set_lock); /* prepare dst csets and commit */ - ret = cgroup_migrate_prepare_dst(&preloaded_csets); + ret = cgroup_migrate_prepare_dst(&mgctx); if (!ret) - ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root); + ret = cgroup_migrate(leader, threadgroup, &mgctx); - cgroup_migrate_finish(&preloaded_csets); + cgroup_migrate_finish(&mgctx); if (!ret) trace_cgroup_attach_task(dst_cgrp, leader, threadgroup); @@ -2373,20 +2349,9 @@ static int cgroup_procs_write_permission(struct task_struct *task, struct cgroup *dst_cgrp, struct kernfs_open_file *of) { - const struct cred *cred = current_cred(); - const struct cred *tcred = get_task_cred(task); int ret = 0; - /* - * even if we're attaching all tasks in the thread group, we only - * need to check permissions on one of them. - */ - if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && - !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->euid, tcred->suid)) - ret = -EACCES; - - if (!ret && cgroup_on_dfl(dst_cgrp)) { + if (cgroup_on_dfl(dst_cgrp)) { struct super_block *sb = of->file->f_path.dentry->d_sb; struct cgroup *cgrp; struct inode *inode; @@ -2404,9 +2369,21 @@ static int cgroup_procs_write_permission(struct task_struct *task, ret = inode_permission(inode, MAY_WRITE); iput(inode); } + } else { + const struct cred *cred = current_cred(); + const struct cred *tcred = get_task_cred(task); + + /* + * even if we're attaching all tasks in the thread group, + * we only need to check permissions on one of them. + */ + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) + ret = -EACCES; + put_cred(tcred); } - put_cred(tcred); return ret; } @@ -2528,8 +2505,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v) */ static int cgroup_update_dfl_csses(struct cgroup *cgrp) { - LIST_HEAD(preloaded_csets); - struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset); + DEFINE_CGROUP_MGCTX(mgctx); struct cgroup_subsys_state *d_css; struct cgroup *dsct; struct css_set *src_cset; @@ -2545,33 +2521,28 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) struct cgrp_cset_link *link; list_for_each_entry(link, &dsct->cset_links, cset_link) - cgroup_migrate_add_src(link->cset, dsct, - &preloaded_csets); + cgroup_migrate_add_src(link->cset, dsct, &mgctx); } spin_unlock_irq(&css_set_lock); /* NULL dst indicates self on default hierarchy */ - ret = cgroup_migrate_prepare_dst(&preloaded_csets); + ret = cgroup_migrate_prepare_dst(&mgctx); if (ret) goto out_finish; spin_lock_irq(&css_set_lock); - list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) { + list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) { struct task_struct *task, *ntask; - /* src_csets precede dst_csets, break on the first dst_cset */ - if (!src_cset->mg_src_cgrp) - break; - /* all tasks in src_csets need to be migrated */ list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list) - cgroup_taskset_add(task, &tset); + cgroup_migrate_add_task(task, &mgctx); } spin_unlock_irq(&css_set_lock); - ret = cgroup_taskset_migrate(&tset, cgrp->root); + ret = cgroup_migrate_execute(&mgctx); out_finish: - cgroup_migrate_finish(&preloaded_csets); + cgroup_migrate_finish(&mgctx); percpu_up_write(&cgroup_threadgroup_rwsem); return ret; } @@ -4098,6 +4069,11 @@ err_free_css: return ERR_PTR(err); } +/* + * The returned cgroup is fully initialized including its control mask, but + * it isn't associated with its kernfs_node and doesn't have the control + * mask applied. + */ static struct cgroup *cgroup_create(struct cgroup *parent) { struct cgroup_root *root = parent->root; @@ -4165,11 +4141,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgroup_propagate_control(cgrp); - /* @cgrp doesn't have dir yet so the following will only create csses */ - ret = cgroup_apply_control_enable(cgrp); - if (ret) - goto out_destroy; - return cgrp; out_cancel_ref: @@ -4177,9 +4148,6 @@ out_cancel_ref: out_free_cgrp: kfree(cgrp); return ERR_PTR(ret); -out_destroy: - cgroup_destroy_locked(cgrp); - return ERR_PTR(ret); } int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) |