diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 821 |
1 files changed, 473 insertions, 348 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a7c9e6ddb979..ebbfc043153f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -189,9 +189,12 @@ struct cgroup_event { static LIST_HEAD(roots); static int root_count; -static DEFINE_IDA(hierarchy_ida); -static int next_hierarchy_id; -static DEFINE_SPINLOCK(hierarchy_id_lock); +/* + * Hierarchy ID allocation and mapping. It follows the same exclusion + * rules as other root ops - both cgroup_mutex and cgroup_root_mutex for + * writes, either for reads. + */ +static DEFINE_IDR(cgroup_hierarchy_idr); /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ #define dummytop (&rootnode.top_cgroup) @@ -205,6 +208,7 @@ static struct cgroup_name root_cgroup_name = { .name = "/" }; */ static int need_forkexit_callback __read_mostly; +static void cgroup_offline_fn(struct work_struct *work); static int cgroup_destroy_locked(struct cgroup *cgrp); static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, struct cftype cfts[], bool is_add); @@ -223,9 +227,9 @@ static int css_refcnt(struct cgroup_subsys_state *css) } /* convenient tests for these bits */ -inline int cgroup_is_removed(const struct cgroup *cgrp) +static inline bool cgroup_is_dead(const struct cgroup *cgrp) { - return test_bit(CGRP_REMOVED, &cgrp->flags); + return test_bit(CGRP_DEAD, &cgrp->flags); } /** @@ -297,7 +301,7 @@ static inline struct cftype *__d_cft(struct dentry *dentry) static bool cgroup_lock_live_group(struct cgroup *cgrp) { mutex_lock(&cgroup_mutex); - if (cgroup_is_removed(cgrp)) { + if (cgroup_is_dead(cgrp)) { mutex_unlock(&cgroup_mutex); return false; } @@ -312,20 +316,24 @@ static void cgroup_release_agent(struct work_struct *work); static DECLARE_WORK(release_agent_work, cgroup_release_agent); static void check_for_release(struct cgroup *cgrp); -/* Link structure for associating css_set objects with cgroups */ -struct cg_cgroup_link { - /* - * List running through cg_cgroup_links associated with a - * cgroup, anchored on cgroup->css_sets - */ - struct list_head cgrp_link_list; - struct cgroup *cgrp; - /* - * List running through cg_cgroup_links pointing at a - * single css_set object, anchored on css_set->cg_links - */ - struct list_head cg_link_list; - struct css_set *cg; +/* + * A cgroup can be associated with multiple css_sets as different tasks may + * belong to different cgroups on different hierarchies. In the other + * direction, a css_set is naturally associated with multiple cgroups. + * This M:N relationship is represented by the following link structure + * which exists for each association and allows traversing the associations + * from both sides. + */ +struct cgrp_cset_link { + /* the cgroup and css_set this link associates */ + struct cgroup *cgrp; + struct css_set *cset; + + /* list of cgrp_cset_links anchored at cgrp->cset_links */ + struct list_head cset_link; + + /* list of cgrp_cset_links anchored at css_set->cgrp_links */ + struct list_head cgrp_link; }; /* The default css_set - used by init and its children prior to any @@ -336,7 +344,7 @@ struct cg_cgroup_link { */ static struct css_set init_css_set; -static struct cg_cgroup_link init_css_set_link; +static struct cgrp_cset_link init_cgrp_cset_link; static int cgroup_init_idr(struct cgroup_subsys *ss, struct cgroup_subsys_state *css); @@ -373,90 +381,83 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) * compiled into their kernel but not actually in use */ static int use_task_css_set_links __read_mostly; -static void __put_css_set(struct css_set *cg, int taskexit) +static void __put_css_set(struct css_set *cset, int taskexit) { - struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; + struct cgrp_cset_link *link, *tmp_link; + /* * Ensure that the refcount doesn't hit zero while any readers * can see it. Similar to atomic_dec_and_lock(), but for an * rwlock */ - if (atomic_add_unless(&cg->refcount, -1, 1)) + if (atomic_add_unless(&cset->refcount, -1, 1)) return; write_lock(&css_set_lock); - if (!atomic_dec_and_test(&cg->refcount)) { + if (!atomic_dec_and_test(&cset->refcount)) { write_unlock(&css_set_lock); return; } /* This css_set is dead. unlink it and release cgroup refcounts */ - hash_del(&cg->hlist); + hash_del(&cset->hlist); css_set_count--; - list_for_each_entry_safe(link, saved_link, &cg->cg_links, - cg_link_list) { + list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) { struct cgroup *cgrp = link->cgrp; - list_del(&link->cg_link_list); - list_del(&link->cgrp_link_list); - /* - * We may not be holding cgroup_mutex, and if cgrp->count is - * dropped to 0 the cgroup can be destroyed at any time, hence - * rcu_read_lock is used to keep it alive. - */ - rcu_read_lock(); - if (atomic_dec_and_test(&cgrp->count) && - notify_on_release(cgrp)) { + list_del(&link->cset_link); + list_del(&link->cgrp_link); + + /* @cgrp can't go away while we're holding css_set_lock */ + if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) { if (taskexit) set_bit(CGRP_RELEASABLE, &cgrp->flags); check_for_release(cgrp); } - rcu_read_unlock(); kfree(link); } write_unlock(&css_set_lock); - kfree_rcu(cg, rcu_head); + kfree_rcu(cset, rcu_head); } /* * refcounted get/put for css_set objects */ -static inline void get_css_set(struct css_set *cg) +static inline void get_css_set(struct css_set *cset) { - atomic_inc(&cg->refcount); + atomic_inc(&cset->refcount); } -static inline void put_css_set(struct css_set *cg) +static inline void put_css_set(struct css_set *cset) { - __put_css_set(cg, 0); + __put_css_set(cset, 0); } -static inline void put_css_set_taskexit(struct css_set *cg) +static inline void put_css_set_taskexit(struct css_set *cset) { - __put_css_set(cg, 1); + __put_css_set(cset, 1); } /* * compare_css_sets - helper function for find_existing_css_set(). - * @cg: candidate css_set being tested - * @old_cg: existing css_set for a task + * @cset: candidate css_set being tested + * @old_cset: existing css_set for a task * @new_cgrp: cgroup that's being entered by the task * @template: desired set of css pointers in css_set (pre-calculated) * * Returns true if "cg" matches "old_cg" except for the hierarchy * which "new_cgrp" belongs to, for which it should match "new_cgrp". */ -static bool compare_css_sets(struct css_set *cg, - struct css_set *old_cg, +static bool compare_css_sets(struct css_set *cset, + struct css_set *old_cset, struct cgroup *new_cgrp, struct cgroup_subsys_state *template[]) { struct list_head *l1, *l2; - if (memcmp(template, cg->subsys, sizeof(cg->subsys))) { + if (memcmp(template, cset->subsys, sizeof(cset->subsys))) { /* Not all subsystems matched */ return false; } @@ -470,28 +471,28 @@ static bool compare_css_sets(struct css_set *cg, * candidates. */ - l1 = &cg->cg_links; - l2 = &old_cg->cg_links; + l1 = &cset->cgrp_links; + l2 = &old_cset->cgrp_links; while (1) { - struct cg_cgroup_link *cgl1, *cgl2; - struct cgroup *cg1, *cg2; + struct cgrp_cset_link *link1, *link2; + struct cgroup *cgrp1, *cgrp2; l1 = l1->next; l2 = l2->next; /* See if we reached the end - both lists are equal length. */ - if (l1 == &cg->cg_links) { - BUG_ON(l2 != &old_cg->cg_links); + if (l1 == &cset->cgrp_links) { + BUG_ON(l2 != &old_cset->cgrp_links); break; } else { - BUG_ON(l2 == &old_cg->cg_links); + BUG_ON(l2 == &old_cset->cgrp_links); } /* Locate the cgroups associated with these links. */ - cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list); - cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list); - cg1 = cgl1->cgrp; - cg2 = cgl2->cgrp; + link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link); + link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link); + cgrp1 = link1->cgrp; + cgrp2 = link2->cgrp; /* Hierarchies should be linked in the same order. */ - BUG_ON(cg1->root != cg2->root); + BUG_ON(cgrp1->root != cgrp2->root); /* * If this hierarchy is the hierarchy of the cgroup @@ -500,11 +501,11 @@ static bool compare_css_sets(struct css_set *cg, * hierarchy, then this css_set should point to the * same cgroup as the old css_set. */ - if (cg1->root == new_cgrp->root) { - if (cg1 != new_cgrp) + if (cgrp1->root == new_cgrp->root) { + if (cgrp1 != new_cgrp) return false; } else { - if (cg1 != cg2) + if (cgrp1 != cgrp2) return false; } } @@ -524,14 +525,13 @@ static bool compare_css_sets(struct css_set *cg, * template: location in which to build the desired set of subsystem * state objects for the new cgroup group */ -static struct css_set *find_existing_css_set( - struct css_set *oldcg, - struct cgroup *cgrp, - struct cgroup_subsys_state *template[]) +static struct css_set *find_existing_css_set(struct css_set *old_cset, + struct cgroup *cgrp, + struct cgroup_subsys_state *template[]) { int i; struct cgroupfs_root *root = cgrp->root; - struct css_set *cg; + struct css_set *cset; unsigned long key; /* @@ -548,78 +548,80 @@ static struct css_set *find_existing_css_set( } else { /* Subsystem is not in this hierarchy, so we * don't want to change the subsystem state */ - template[i] = oldcg->subsys[i]; + template[i] = old_cset->subsys[i]; } } key = css_set_hash(template); - hash_for_each_possible(css_set_table, cg, hlist, key) { - if (!compare_css_sets(cg, oldcg, cgrp, template)) + hash_for_each_possible(css_set_table, cset, hlist, key) { + if (!compare_css_sets(cset, old_cset, cgrp, template)) continue; /* This css_set matches what we need */ - return cg; + return cset; } /* No existing cgroup group matched */ return NULL; } -static void free_cg_links(struct list_head *tmp) +static void free_cgrp_cset_links(struct list_head *links_to_free) { - struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; + struct cgrp_cset_link *link, *tmp_link; - list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) { - list_del(&link->cgrp_link_list); + list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) { + list_del(&link->cset_link); kfree(link); } } -/* - * allocate_cg_links() allocates "count" cg_cgroup_link structures - * and chains them on tmp through their cgrp_link_list fields. Returns 0 on - * success or a negative error +/** + * allocate_cgrp_cset_links - allocate cgrp_cset_links + * @count: the number of links to allocate + * @tmp_links: list_head the allocated links are put on + * + * Allocate @count cgrp_cset_link structures and chain them on @tmp_links + * through ->cset_link. Returns 0 on success or -errno. */ -static int allocate_cg_links(int count, struct list_head *tmp) +static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links) { - struct cg_cgroup_link *link; + struct cgrp_cset_link *link; int i; - INIT_LIST_HEAD(tmp); + + INIT_LIST_HEAD(tmp_links); + for (i = 0; i < count; i++) { - link = kmalloc(sizeof(*link), GFP_KERNEL); + link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) { - free_cg_links(tmp); + free_cgrp_cset_links(tmp_links); return -ENOMEM; } - list_add(&link->cgrp_link_list, tmp); + list_add(&link->cset_link, tmp_links); } return 0; } /** * link_css_set - a helper function to link a css_set to a cgroup - * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links() - * @cg: the css_set to be linked + * @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links() + * @cset: the css_set to be linked * @cgrp: the destination cgroup */ -static void link_css_set(struct list_head *tmp_cg_links, - struct css_set *cg, struct cgroup *cgrp) +static void link_css_set(struct list_head *tmp_links, struct css_set *cset, + struct cgroup *cgrp) { - struct cg_cgroup_link *link; + struct cgrp_cset_link *link; - BUG_ON(list_empty(tmp_cg_links)); - link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, - cgrp_link_list); - link->cg = cg; + BUG_ON(list_empty(tmp_links)); + link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link); + link->cset = cset; link->cgrp = cgrp; - atomic_inc(&cgrp->count); - list_move(&link->cgrp_link_list, &cgrp->css_sets); + list_move(&link->cset_link, &cgrp->cset_links); /* * Always add links to the tail of the list so that the list * is sorted by order of hierarchy creation */ - list_add_tail(&link->cg_link_list, &cg->cg_links); + list_add_tail(&link->cgrp_link, &cset->cgrp_links); } /* @@ -629,67 +631,66 @@ static void link_css_set(struct list_head *tmp_cg_links, * substituted into the appropriate hierarchy. Must be called with * cgroup_mutex held */ -static struct css_set *find_css_set( - struct css_set *oldcg, struct cgroup *cgrp) +static struct css_set *find_css_set(struct css_set *old_cset, + struct cgroup *cgrp) { - struct css_set *res; + struct css_set *cset; struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; - - struct list_head tmp_cg_links; - - struct cg_cgroup_link *link; + struct list_head tmp_links; + struct cgrp_cset_link *link; unsigned long key; /* First see if we already have a cgroup group that matches * the desired set */ read_lock(&css_set_lock); - res = find_existing_css_set(oldcg, cgrp, template); - if (res) - get_css_set(res); + cset = find_existing_css_set(old_cset, cgrp, template); + if (cset) + get_css_set(cset); read_unlock(&css_set_lock); - if (res) - return res; + if (cset) + return cset; - res = kmalloc(sizeof(*res), GFP_KERNEL); - if (!res) + cset = kzalloc(sizeof(*cset), GFP_KERNEL); + if (!cset) return NULL; - /* Allocate all the cg_cgroup_link objects that we'll need */ - if (allocate_cg_links(root_count, &tmp_cg_links) < 0) { - kfree(res); + /* Allocate all the cgrp_cset_link objects that we'll need */ + if (allocate_cgrp_cset_links(root_count, &tmp_links) < 0) { + kfree(cset); return NULL; } - atomic_set(&res->refcount, 1); - INIT_LIST_HEAD(&res->cg_links); - INIT_LIST_HEAD(&res->tasks); - INIT_HLIST_NODE(&res->hlist); + atomic_set(&cset->refcount, 1); + INIT_LIST_HEAD(&cset->cgrp_links); + INIT_LIST_HEAD(&cset->tasks); + INIT_HLIST_NODE(&cset->hlist); /* Copy the set of subsystem state objects generated in * find_existing_css_set() */ - memcpy(res->subsys, template, sizeof(res->subsys)); + memcpy(cset->subsys, template, sizeof(cset->subsys)); write_lock(&css_set_lock); /* Add reference counts and links from the new css_set. */ - list_for_each_entry(link, &oldcg->cg_links, cg_link_list) { + list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; + if (c->root == cgrp->root) c = cgrp; - link_css_set(&tmp_cg_links, res, c); + link_css_set(&tmp_links, cset, c); } - BUG_ON(!list_empty(&tmp_cg_links)); + BUG_ON(!list_empty(&tmp_links)); css_set_count++; /* Add this cgroup group to the hash table */ - key = css_set_hash(res->subsys); - hash_add(css_set_table, &res->hlist, key); + key = css_set_hash(cset->subsys); + hash_add(css_set_table, &cset->hlist, key); write_unlock(&css_set_lock); - return res; + return cset; } /* @@ -699,7 +700,7 @@ static struct css_set *find_css_set( static struct cgroup *task_cgroup_from_root(struct task_struct *task, struct cgroupfs_root *root) { - struct css_set *css; + struct css_set *cset; struct cgroup *res = NULL; BUG_ON(!mutex_is_locked(&cgroup_mutex)); @@ -709,13 +710,15 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, * task can't change groups, so the only thing that can happen * is that it exits and its css is set back to init_css_set. */ - css = task->cgroups; - if (css == &init_css_set) { + cset = task->cgroups; + if (cset == &init_css_set) { res = &root->top_cgroup; } else { - struct cg_cgroup_link *link; - list_for_each_entry(link, &css->cg_links, cg_link_list) { + struct cgrp_cset_link *link; + + list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; + if (c->root == root) { res = c; break; @@ -828,7 +831,7 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry) static void cgroup_free_fn(struct work_struct *work) { - struct cgroup *cgrp = container_of(work, struct cgroup, free_work); + struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); struct cgroup_subsys *ss; mutex_lock(&cgroup_mutex); @@ -873,7 +876,8 @@ static void cgroup_free_rcu(struct rcu_head *head) { struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); - schedule_work(&cgrp->free_work); + INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); + schedule_work(&cgrp->destroy_work); } static void cgroup_diput(struct dentry *dentry, struct inode *inode) @@ -882,7 +886,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) if (S_ISDIR(inode->i_mode)) { struct cgroup *cgrp = dentry->d_fsdata; - BUG_ON(!(cgroup_is_removed(cgrp))); + BUG_ON(!(cgroup_is_dead(cgrp))); call_rcu(&cgrp->rcu_head, cgroup_free_rcu); } else { struct cfent *cfe = __d_cfe(dentry); @@ -1401,11 +1405,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->sibling); INIT_LIST_HEAD(&cgrp->children); INIT_LIST_HEAD(&cgrp->files); - INIT_LIST_HEAD(&cgrp->css_sets); + INIT_LIST_HEAD(&cgrp->cset_links); INIT_LIST_HEAD(&cgrp->allcg_node); INIT_LIST_HEAD(&cgrp->release_list); INIT_LIST_HEAD(&cgrp->pidlists); - INIT_WORK(&cgrp->free_work, cgroup_free_fn); mutex_init(&cgrp->pidlist_mutex); INIT_LIST_HEAD(&cgrp->event_list); spin_lock_init(&cgrp->event_list_lock); @@ -1426,29 +1429,30 @@ static void init_cgroup_root(struct cgroupfs_root *root) list_add_tail(&cgrp->allcg_node, &root->allcg_list); } -static bool init_root_id(struct cgroupfs_root *root) +static int cgroup_init_root_id(struct cgroupfs_root *root) { - int ret = 0; + int id; - do { - if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL)) - return false; - spin_lock(&hierarchy_id_lock); - /* Try to allocate the next unused ID */ - ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id, - &root->hierarchy_id); - if (ret == -ENOSPC) - /* Try again starting from 0 */ - ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id); - if (!ret) { - next_hierarchy_id = root->hierarchy_id + 1; - } else if (ret != -EAGAIN) { - /* Can only get here if the 31-bit IDR is full ... */ - BUG_ON(ret); - } - spin_unlock(&hierarchy_id_lock); - } while (ret); - return true; + lockdep_assert_held(&cgroup_mutex); + lockdep_assert_held(&cgroup_root_mutex); + + id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 2, 0, GFP_KERNEL); + if (id < 0) + return id; + + root->hierarchy_id = id; + return 0; +} + +static void cgroup_exit_root_id(struct cgroupfs_root *root) +{ + lockdep_assert_held(&cgroup_mutex); + lockdep_assert_held(&cgroup_root_mutex); + + if (root->hierarchy_id) { + idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id); + root->hierarchy_id = 0; + } } static int cgroup_test_super(struct super_block *sb, void *data) @@ -1482,10 +1486,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) if (!root) return ERR_PTR(-ENOMEM); - if (!init_root_id(root)) { - kfree(root); - return ERR_PTR(-ENOMEM); - } init_cgroup_root(root); root->subsys_mask = opts->subsys_mask; @@ -1500,17 +1500,15 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) return root; } -static void cgroup_drop_root(struct cgroupfs_root *root) +static void cgroup_free_root(struct cgroupfs_root *root) { - if (!root) - return; + if (root) { + /* hierarhcy ID shoulid already have been released */ + WARN_ON_ONCE(root->hierarchy_id); - BUG_ON(!root->hierarchy_id); - spin_lock(&hierarchy_id_lock); - ida_remove(&hierarchy_ida, root->hierarchy_id); - spin_unlock(&hierarchy_id_lock); - ida_destroy(&root->cgroup_ida); - kfree(root); + ida_destroy(&root->cgroup_ida); + kfree(root); + } } static int cgroup_set_super(struct super_block *sb, void *data) @@ -1597,7 +1595,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts); if (IS_ERR(sb)) { ret = PTR_ERR(sb); - cgroup_drop_root(opts.new_root); + cgroup_free_root(opts.new_root); goto drop_modules; } @@ -1605,12 +1603,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, BUG_ON(!root); if (root == opts.new_root) { /* We used the new root structure, so this is a new hierarchy */ - struct list_head tmp_cg_links; + struct list_head tmp_links; struct cgroup *root_cgrp = &root->top_cgroup; struct cgroupfs_root *existing_root; const struct cred *cred; int i; - struct css_set *cg; + struct css_set *cset; BUG_ON(sb->s_root != NULL); @@ -1637,13 +1635,17 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, * that's us. The worst that can happen is that we * have some link structures left over */ - ret = allocate_cg_links(css_set_count, &tmp_cg_links); + ret = allocate_cgrp_cset_links(css_set_count, &tmp_links); + if (ret) + goto unlock_drop; + + ret = cgroup_init_root_id(root); if (ret) goto unlock_drop; ret = rebind_subsystems(root, root->subsys_mask); if (ret == -EBUSY) { - free_cg_links(&tmp_cg_links); + free_cgrp_cset_links(&tmp_links); goto unlock_drop; } /* @@ -1664,11 +1666,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, /* Link the top cgroup in this hierarchy into all * the css_set objects */ write_lock(&css_set_lock); - hash_for_each(css_set_table, i, cg, hlist) - link_css_set(&tmp_cg_links, cg, root_cgrp); + hash_for_each(css_set_table, i, cset, hlist) + link_css_set(&tmp_links, cset, root_cgrp); write_unlock(&css_set_lock); - free_cg_links(&tmp_cg_links); + free_cgrp_cset_links(&tmp_links); BUG_ON(!list_empty(&root_cgrp->children)); BUG_ON(root->number_of_cgroups != 1); @@ -1684,7 +1686,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, * We re-used an existing hierarchy - the new root (if * any) is not needed */ - cgroup_drop_root(opts.new_root); + cgroup_free_root(opts.new_root); if (root->flags != opts.flags) { if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { @@ -1705,6 +1707,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, return dget(sb->s_root); unlock_drop: + cgroup_exit_root_id(root); mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); @@ -1721,9 +1724,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, static void cgroup_kill_sb(struct super_block *sb) { struct cgroupfs_root *root = sb->s_fs_info; struct cgroup *cgrp = &root->top_cgroup; + struct cgrp_cset_link *link, *tmp_link; int ret; - struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; BUG_ON(!root); @@ -1739,15 +1741,14 @@ static void cgroup_kill_sb(struct super_block *sb) { BUG_ON(ret); /* - * Release all the links from css_sets to this hierarchy's + * Release all the links from cset_links to this hierarchy's * root cgroup */ write_lock(&css_set_lock); - list_for_each_entry_safe(link, saved_link, &cgrp->css_sets, - cgrp_link_list) { - list_del(&link->cg_link_list); - list_del(&link->cgrp_link_list); + list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) { + list_del(&link->cset_link); + list_del(&link->cgrp_link); kfree(link); } write_unlock(&css_set_lock); @@ -1757,13 +1758,15 @@ static void cgroup_kill_sb(struct super_block *sb) { root_count--; } + cgroup_exit_root_id(root); + mutex_unlock(&cgroup_root_mutex); mutex_unlock(&cgroup_mutex); simple_xattrs_free(&cgrp->xattrs); kill_litter_super(sb); - cgroup_drop_root(root); + cgroup_free_root(root); } static struct file_system_type cgroup_fs_type = { @@ -1825,6 +1828,38 @@ out: } EXPORT_SYMBOL_GPL(cgroup_path); +/** + * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy + * @task: target task + * @hierarchy_id: the hierarchy to look up @task's cgroup from + * @buf: the buffer to write the path into + * @buflen: the length of the buffer + * + * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and + * copy its path into @buf. This function grabs cgroup_mutex and shouldn't + * be used inside locks used by cgroup controller callbacks. + */ +int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id, + char *buf, size_t buflen) +{ + struct cgroupfs_root *root; + struct cgroup *cgrp = NULL; + int ret = -ENOENT; + + mutex_lock(&cgroup_mutex); + + root = idr_find(&cgroup_hierarchy_idr, hierarchy_id); + if (root) { + cgrp = task_cgroup_from_root(task, root); + ret = cgroup_path(cgrp, buf, buflen); + } + + mutex_unlock(&cgroup_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy); + /* * Control Group taskset */ @@ -1910,10 +1945,11 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size); * * Must be called with cgroup_mutex and threadgroup locked. */ -static void cgroup_task_migrate(struct cgroup *oldcgrp, - struct task_struct *tsk, struct css_set *newcg) +static void cgroup_task_migrate(struct cgroup *old_cgrp, + struct task_struct *tsk, + struct css_set *new_cset) { - struct css_set *oldcg; + struct css_set *old_cset; /* * We are synchronized through threadgroup_lock() against PF_EXITING @@ -1921,25 +1957,25 @@ static void cgroup_task_migrate(struct cgroup *oldcgrp, * css_set to init_css_set and dropping the old one. */ WARN_ON_ONCE(tsk->flags & PF_EXITING); - oldcg = tsk->cgroups; + old_cset = tsk->cgroups; task_lock(tsk); - rcu_assign_pointer(tsk->cgroups, newcg); + rcu_assign_pointer(tsk->cgroups, new_cset); task_unlock(tsk); /* Update the css_set linked lists if we're using them */ write_lock(&css_set_lock); if (!list_empty(&tsk->cg_list)) - list_move(&tsk->cg_list, &newcg->tasks); + list_move(&tsk->cg_list, &new_cset->tasks); write_unlock(&css_set_lock); /* - * We just gained a reference on oldcg by taking it from the task. As - * trading it for newcg is protected by cgroup_mutex, we're safe to drop - * it here; it will be freed under RCU. + * We just gained a reference on old_cset by taking it from the + * task. As trading it for new_cset is protected by cgroup_mutex, + * we're safe to drop it here; it will be freed under RCU. */ - set_bit(CGRP_RELEASABLE, &oldcgrp->flags); - put_css_set(oldcg); + set_bit(CGRP_RELEASABLE, &old_cgrp->flags); + put_css_set(old_cset); } /** @@ -2323,7 +2359,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, struct cftype *cft = __d_cft(file->f_dentry); struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); - if (cgroup_is_removed(cgrp)) + if (cgroup_is_dead(cgrp)) return -ENODEV; if (cft->write) return cft->write(cgrp, cft, file, buf, nbytes, ppos); @@ -2368,7 +2404,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, struct cftype *cft = __d_cft(file->f_dentry); struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); - if (cgroup_is_removed(cgrp)) + if (cgroup_is_dead(cgrp)) return -ENODEV; if (cft->read) @@ -2435,10 +2471,12 @@ static int cgroup_file_open(struct inode *inode, struct file *file) cft = __d_cft(file->f_dentry); if (cft->read_map || cft->read_seq_string) { - struct cgroup_seqfile_state *state = - kzalloc(sizeof(*state), GFP_USER); + struct cgroup_seqfile_state *state; + + state = kzalloc(sizeof(*state), GFP_USER); if (!state) return -ENOMEM; + state->cft = cft; state->cgroup = __d_cgrp(file->f_dentry->d_parent); file->f_op = &cgroup_seqfile_operations; @@ -2789,7 +2827,7 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, mutex_lock(&inode->i_mutex); mutex_lock(&cgroup_mutex); - if (!cgroup_is_removed(cgrp)) + if (!cgroup_is_dead(cgrp)) cgroup_addrm_files(cgrp, ss, cfts, is_add); mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); @@ -2872,12 +2910,11 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) int cgroup_task_count(const struct cgroup *cgrp) { int count = 0; - struct cg_cgroup_link *link; + struct cgrp_cset_link *link; read_lock(&css_set_lock); - list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { - count += atomic_read(&link->cg->refcount); - } + list_for_each_entry(link, &cgrp->cset_links, cset_link) + count += atomic_read(&link->cset->refcount); read_unlock(&css_set_lock); return count; } @@ -2886,25 +2923,24 @@ int cgroup_task_count(const struct cgroup *cgrp) * Advance a list_head iterator. The iterator should be positioned at * the start of a css_set */ -static void cgroup_advance_iter(struct cgroup *cgrp, - struct cgroup_iter *it) +static void cgroup_advance_iter(struct cgroup *cgrp, struct cgroup_iter *it) { - struct list_head *l = it->cg_link; - struct cg_cgroup_link *link; - struct css_set *cg; + struct list_head *l = it->cset_link; + struct cgrp_cset_link *link; + struct css_set *cset; /* Advance to the next non-empty css_set */ do { l = l->next; - if (l == &cgrp->css_sets) { - it->cg_link = NULL; + if (l == &cgrp->cset_links) { + it->cset_link = NULL; return; } - link = list_entry(l, struct cg_cgroup_link, cgrp_link_list); - cg = link->cg; - } while (list_empty(&cg->tasks)); - it->cg_link = l; - it->task = cg->tasks.next; + link = list_entry(l, struct cgrp_cset_link, cset_link); + cset = link->cset; + } while (list_empty(&cset->tasks)); + it->cset_link = l; + it->task = cset->tasks.next; } /* @@ -2942,12 +2978,67 @@ static void cgroup_enable_task_cg_lists(void) } /** + * cgroup_next_sibling - find the next sibling of a given cgroup + * @pos: the current cgroup + * + * This function returns the next sibling of @pos and should be called + * under RCU read lock. The only requirement is that @pos is accessible. + * The next sibling is guaranteed to be returned regardless of @pos's + * state. + */ +struct cgroup *cgroup_next_sibling(struct cgroup *pos) +{ + struct cgroup *next; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + /* + * @pos could already have been removed. Once a cgroup is removed, + * its ->sibling.next is no longer updated when its next sibling + * changes. As CGRP_DEAD assertion is serialized and happens + * before the cgroup is taken off the ->sibling list, if we see it + * unasserted, it's guaranteed that the next sibling hasn't + * finished its grace period even if it's already removed, and thus + * safe to dereference from this RCU critical section. If + * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed + * to be visible as %true here. + */ + if (likely(!cgroup_is_dead(pos))) { + next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); + if (&next->sibling != &pos->parent->children) + return next; + return NULL; + } + + /* + * Can't dereference the next pointer. Each cgroup is given a + * monotonically increasing unique serial number and always + * appended to the sibling list, so the next one can be found by + * walking the parent's children until we see a cgroup with higher + * serial number than @pos's. + * + * While this path can be slow, it's taken only when either the + * current cgroup is removed or iteration and removal race. + */ + list_for_each_entry_rcu(next, &pos->parent->children, sibling) + if (next->serial_nr > pos->serial_nr) + return next; + return NULL; +} +EXPORT_SYMBOL_GPL(cgroup_next_sibling); + +/** * cgroup_next_descendant_pre - find the next descendant for pre-order walk * @pos: the current position (%NULL to initiate traversal) * @cgroup: cgroup whose descendants to walk * * To be used by cgroup_for_each_descendant_pre(). Find the next * descendant to visit for pre-order traversal of @cgroup's descendants. + * + * While this function requires RCU read locking, it doesn't require the + * whole traversal to be contained in a single RCU critical section. This + * function will return the correct next descendant as long as both @pos + * and @cgroup are accessible and @pos is a descendant of @cgroup. */ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, struct cgroup *cgroup) @@ -2967,11 +3058,9 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, /* no child, visit my or the closest ancestor's next sibling */ while (pos != cgroup) { - next = list_entry_rcu(pos->sibling.next, struct cgroup, - sibling); - if (&next->sibling != &pos->parent->children) + next = cgroup_next_sibling(pos); + if (next) return next; - pos = pos->parent; } @@ -2986,6 +3075,11 @@ EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); * Return the rightmost descendant of @pos. If there's no descendant, * @pos is returned. This can be used during pre-order traversal to skip * subtree of @pos. + * + * While this function requires RCU read locking, it doesn't require the + * whole traversal to be contained in a single RCU critical section. This + * function will return the correct rightmost descendant as long as @pos is + * accessible. */ struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) { @@ -3025,6 +3119,11 @@ static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) * * To be used by cgroup_for_each_descendant_post(). Find the next * descendant to visit for post-order traversal of @cgroup's descendants. + * + * While this function requires RCU read locking, it doesn't require the + * whole traversal to be contained in a single RCU critical section. This + * function will return the correct next descendant as long as both @pos + * and @cgroup are accessible and @pos is a descendant of @cgroup. */ struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, struct cgroup *cgroup) @@ -3040,8 +3139,8 @@ struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, } /* if there's an unvisited sibling, visit its leftmost descendant */ - next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); - if (&next->sibling != &pos->parent->children) + next = cgroup_next_sibling(pos); + if (next) return cgroup_leftmost_descendant(next); /* no sibling left, visit parent */ @@ -3062,7 +3161,7 @@ void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) cgroup_enable_task_cg_lists(); read_lock(&css_set_lock); - it->cg_link = &cgrp->css_sets; + it->cset_link = &cgrp->cset_links; cgroup_advance_iter(cgrp, it); } @@ -3071,16 +3170,16 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, { struct task_struct *res; struct list_head *l = it->task; - struct cg_cgroup_link *link; + struct cgrp_cset_link *link; /* If the iterator cg is NULL, we have no tasks */ - if (!it->cg_link) + if (!it->cset_link) return NULL; res = list_entry(l, struct task_struct, cg_list); /* Advance iterator to find next entry */ l = l->next; - link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list); - if (l == &link->cg->tasks) { + link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link); + if (l == &link->cset->tasks) { /* We reached the end of this task list - move on to * the next cg_cgroup_link */ cgroup_advance_iter(cgrp, it); @@ -3411,7 +3510,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, } } /* entry not found; create a new one */ - l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); + l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); if (!l) { mutex_unlock(&cgrp->pidlist_mutex); return l; @@ -3420,8 +3519,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, down_write(&l->mutex); l->key.type = type; l->key.ns = get_pid_ns(ns); - l->use_count = 0; /* don't increment here */ - l->list = NULL; l->owner = cgrp; list_add(&l->links, &cgrp->pidlists); mutex_unlock(&cgrp->pidlist_mutex); @@ -3933,33 +4030,16 @@ static int cgroup_clone_children_write(struct cgroup *cgrp, return 0; } -/* - * for the common functions, 'private' gives the type of file - */ -/* for hysterical raisins, we can't put this on the older files */ -#define CGROUP_FILE_GENERIC_PREFIX "cgroup." -static struct cftype files[] = { - { - .name = "tasks", - .open = cgroup_tasks_open, - .write_u64 = cgroup_tasks_write, - .release = cgroup_pidlist_release, - .mode = S_IRUGO | S_IWUSR, - }, +static struct cftype cgroup_base_files[] = { { - .name = CGROUP_FILE_GENERIC_PREFIX "procs", + .name = "cgroup.procs", .open = cgroup_procs_open, .write_u64 = cgroup_procs_write, .release = cgroup_pidlist_release, .mode = S_IRUGO | S_IWUSR, }, { - .name = "notify_on_release", - .read_u64 = cgroup_read_notify_on_release, - .write_u64 = cgroup_write_notify_on_release, - }, - { - .name = CGROUP_FILE_GENERIC_PREFIX "event_control", + .name = "cgroup.event_control", .write_string = cgroup_write_event_control, .mode = S_IWUGO, }, @@ -3974,9 +4054,29 @@ static struct cftype files[] = { .flags = CFTYPE_ONLY_ON_ROOT, .read_seq_string = cgroup_sane_behavior_show, }, + + /* + * Historical crazy stuff. These don't have "cgroup." prefix and + * don't exist if sane_behavior. If you're depending on these, be + * prepared to be burned. + */ + { + .name = "tasks", + .flags = CFTYPE_INSANE, /* use "procs" instead */ + .open = cgroup_tasks_open, + .write_u64 = cgroup_tasks_write, + .release = cgroup_pidlist_release, + .mode = S_IRUGO | S_IWUSR, + }, + { + .name = "notify_on_release", + .flags = CFTYPE_INSANE, + .read_u64 = cgroup_read_notify_on_release, + .write_u64 = cgroup_write_notify_on_release, + }, { .name = "release_agent", - .flags = CFTYPE_ONLY_ON_ROOT, + .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, .read_seq_string = cgroup_release_agent_show, .write_string = cgroup_release_agent_write, .max_write_len = PATH_MAX, @@ -3997,7 +4097,7 @@ static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, struct cgroup_subsys *ss; if (base_files) { - err = cgroup_addrm_files(cgrp, NULL, files, true); + err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true); if (err < 0) return err; } @@ -4103,6 +4203,7 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp) static long cgroup_create(struct cgroup *parent, struct dentry *dentry, umode_t mode) { + static atomic64_t serial_nr_cursor = ATOMIC64_INIT(0); struct cgroup *cgrp; struct cgroup_name *name; struct cgroupfs_root *root = parent->root; @@ -4183,6 +4284,14 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, goto err_free_all; lockdep_assert_held(&dentry->d_inode->i_mutex); + /* + * Assign a monotonically increasing serial number. With the list + * appending below, it guarantees that sibling cgroups are always + * sorted in the ascending serial number order on the parent's + * ->children. + */ + cgrp->serial_nr = atomic64_inc_return(&serial_nr_cursor); + /* allocation complete, commit to creation */ list_add_tail(&cgrp->allcg_node, &root->allcg_list); list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); @@ -4255,21 +4364,27 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) __releases(&cgroup_mutex) __acquires(&cgroup_mutex) { struct dentry *d = cgrp->dentry; - struct cgroup *parent = cgrp->parent; struct cgroup_event *event, *tmp; struct cgroup_subsys *ss; + bool empty; lockdep_assert_held(&d->d_inode->i_mutex); lockdep_assert_held(&cgroup_mutex); - if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) + /* + * css_set_lock synchronizes access to ->cset_links and prevents + * @cgrp from being removed while __put_css_set() is in progress. + */ + read_lock(&css_set_lock); + empty = list_empty(&cgrp->cset_links) && list_empty(&cgrp->children); + read_unlock(&css_set_lock); + if (!empty) return -EBUSY; /* * Block new css_tryget() by deactivating refcnt and mark @cgrp - * removed. This makes future css_tryget() and child creation - * attempts fail thus maintaining the removal conditions verified - * above. + * removed. This makes future css_tryget() attempts fail which we + * guarantee to ->css_offline() callbacks. */ for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; @@ -4277,37 +4392,28 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) WARN_ON(atomic_read(&css->refcnt) < 0); atomic_add(CSS_DEACT_BIAS, &css->refcnt); } - set_bit(CGRP_REMOVED, &cgrp->flags); - - /* tell subsystems to initate destruction */ - for_each_subsys(cgrp->root, ss) - offline_css(ss, cgrp); /* - * Put all the base refs. Each css holds an extra reference to the - * cgroup's dentry and cgroup removal proceeds regardless of css - * refs. On the last put of each css, whenever that may be, the - * extra dentry ref is put so that dentry destruction happens only - * after all css's are released. + * Mark @cgrp dead. This prevents further task migration and child + * creation by disabling cgroup_lock_live_group(). Note that + * CGRP_DEAD assertion is depended upon by cgroup_next_sibling() to + * resume iteration after dropping RCU read lock. See + * cgroup_next_sibling() for details. */ - for_each_subsys(cgrp->root, ss) - css_put(cgrp->subsys[ss->subsys_id]); + set_bit(CGRP_DEAD, &cgrp->flags); + /* CGRP_DEAD is set, remove from ->release_list for the last time */ raw_spin_lock(&release_list_lock); if (!list_empty(&cgrp->release_list)) list_del_init(&cgrp->release_list); raw_spin_unlock(&release_list_lock); - /* delete this cgroup from parent->children */ - list_del_rcu(&cgrp->sibling); - list_del_init(&cgrp->allcg_node); - + /* + * Remove @cgrp directory. The removal puts the base ref but we + * aren't quite done with @cgrp yet, so hold onto it. + */ dget(d); cgroup_d_remove_dir(d); - dput(d); - - set_bit(CGRP_RELEASABLE, &parent->flags); - check_for_release(parent); /* * Unregister events and notify userspace. @@ -4321,7 +4427,45 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) } spin_unlock(&cgrp->event_list_lock); + INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn); + schedule_work(&cgrp->destroy_work); + return 0; +}; + +static void cgroup_offline_fn(struct work_struct *work) +{ + struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); + struct cgroup *parent = cgrp->parent; + struct dentry *d = cgrp->dentry; + struct cgroup_subsys *ss; + + mutex_lock(&cgroup_mutex); + + /* tell subsystems to initate destruction */ + for_each_subsys(cgrp->root, ss) + offline_css(ss, cgrp); + + /* + * Put all the base refs. Each css holds an extra reference to the + * cgroup's dentry and cgroup removal proceeds regardless of css + * refs. On the last put of each css, whenever that may be, the + * extra dentry ref is put so that dentry destruction happens only + * after all css's are released. + */ + for_each_subsys(cgrp->root, ss) + css_put(cgrp->subsys[ss->subsys_id]); + + /* delete this cgroup from parent->children */ + list_del_rcu(&cgrp->sibling); + list_del_init(&cgrp->allcg_node); + + dput(d); + + set_bit(CGRP_RELEASABLE, &parent->flags); + check_for_release(parent); + + mutex_unlock(&cgroup_mutex); } static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) @@ -4404,7 +4548,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) struct cgroup_subsys_state *css; int i, ret; struct hlist_node *tmp; - struct css_set *cg; + struct css_set *cset; unsigned long key; /* check name and function validity */ @@ -4471,17 +4615,17 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) * this is all done under the css_set_lock. */ write_lock(&css_set_lock); - hash_for_each_safe(css_set_table, i, tmp, cg, hlist) { + hash_for_each_safe(css_set_table, i, tmp, cset, hlist) { /* skip entries that we already rehashed */ - if (cg->subsys[ss->subsys_id]) + if (cset->subsys[ss->subsys_id]) continue; /* remove existing entry */ - hash_del(&cg->hlist); + hash_del(&cset->hlist); /* set new value */ - cg->subsys[ss->subsys_id] = css; + cset->subsys[ss->subsys_id] = css; /* recompute hash and restore entry */ - key = css_set_hash(cg->subsys); - hash_add(css_set_table, &cg->hlist, key); + key = css_set_hash(cset->subsys); + hash_add(css_set_table, &cset->hlist, key); } write_unlock(&css_set_lock); @@ -4511,7 +4655,7 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys); */ void cgroup_unload_subsys(struct cgroup_subsys *ss) { - struct cg_cgroup_link *link; + struct cgrp_cset_link *link; BUG_ON(ss->module == NULL); @@ -4540,14 +4684,14 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) * in loading, we need to pay our respects to the hashtable gods. */ write_lock(&css_set_lock); - list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { - struct css_set *cg = link->cg; + list_for_each_entry(link, &dummytop->cset_links, cset_link) { + struct css_set *cset = link->cset; unsigned long key; - hash_del(&cg->hlist); - cg->subsys[ss->subsys_id] = NULL; - key = css_set_hash(cg->subsys); - hash_add(css_set_table, &cg->hlist, key); + hash_del(&cset->hlist); + cset->subsys[ss->subsys_id] = NULL; + key = css_set_hash(cset->subsys); + hash_add(css_set_table, &cset->hlist, key); } write_unlock(&css_set_lock); @@ -4574,7 +4718,7 @@ int __init cgroup_init_early(void) { int i; atomic_set(&init_css_set.refcount, 1); - INIT_LIST_HEAD(&init_css_set.cg_links); + INIT_LIST_HEAD(&init_css_set.cgrp_links); INIT_LIST_HEAD(&init_css_set.tasks); INIT_HLIST_NODE(&init_css_set.hlist); css_set_count = 1; @@ -4582,12 +4726,10 @@ int __init cgroup_init_early(void) root_count = 1; init_task.cgroups = &init_css_set; - init_css_set_link.cg = &init_css_set; - init_css_set_link.cgrp = dummytop; - list_add(&init_css_set_link.cgrp_link_list, - &rootnode.top_cgroup.css_sets); - list_add(&init_css_set_link.cg_link_list, - &init_css_set.cg_links); + init_cgrp_cset_link.cset = &init_css_set; + init_cgrp_cset_link.cgrp = dummytop; + list_add(&init_cgrp_cset_link.cset_link, &rootnode.top_cgroup.cset_links); + list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links); for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; @@ -4643,7 +4785,15 @@ int __init cgroup_init(void) /* Add init_css_set to the hash table */ key = css_set_hash(init_css_set.subsys); hash_add(css_set_table, &init_css_set.hlist, key); - BUG_ON(!init_root_id(&rootnode)); + + /* allocate id for the dummy hierarchy */ + mutex_lock(&cgroup_mutex); + mutex_lock(&cgroup_root_mutex); + + BUG_ON(cgroup_init_root_id(&rootnode)); + + mutex_unlock(&cgroup_root_mutex); + mutex_unlock(&cgroup_mutex); cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); if (!cgroup_kobj) { @@ -4886,7 +5036,7 @@ void cgroup_post_fork(struct task_struct *child) */ void cgroup_exit(struct task_struct *tsk, int run_callbacks) { - struct css_set *cg; + struct css_set *cset; int i; /* @@ -4903,7 +5053,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) /* Reassign the task to the init_css_set. */ task_lock(tsk); - cg = tsk->cgroups; + cset = tsk->cgroups; tsk->cgroups = &init_css_set; if (run_callbacks && need_forkexit_callback) { @@ -4916,7 +5066,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) if (ss->exit) { struct cgroup *old_cgrp = - rcu_dereference_raw(cg->subsys[i])->cgroup; + rcu_dereference_raw(cset->subsys[i])->cgroup; struct cgroup *cgrp = task_cgroup(tsk, i); ss->exit(cgrp, old_cgrp, tsk); } @@ -4924,15 +5074,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) } task_unlock(tsk); - put_css_set_taskexit(cg); + put_css_set_taskexit(cset); } static void check_for_release(struct cgroup *cgrp) { - /* All of these checks rely on RCU to keep the cgroup - * structure alive */ if (cgroup_is_releasable(cgrp) && - !atomic_read(&cgrp->count) && list_empty(&cgrp->children)) { + list_empty(&cgrp->cset_links) && list_empty(&cgrp->children)) { /* * Control Group is currently removeable. If it's not * already queued for a userspace notification, queue @@ -4941,7 +5089,7 @@ static void check_for_release(struct cgroup *cgrp) int need_schedule_work = 0; raw_spin_lock(&release_list_lock); - if (!cgroup_is_removed(cgrp) && + if (!cgroup_is_dead(cgrp) && list_empty(&cgrp->release_list)) { list_add(&cgrp->release_list, &release_list); need_schedule_work = 1; @@ -5087,9 +5235,7 @@ __setup("cgroup_disable=", cgroup_disable); * Functons for CSS ID. */ -/* - *To get ID other than 0, this should be called when !cgroup_is_removed(). - */ +/* to get ID other than 0, this should be called when !cgroup_is_dead() */ unsigned short css_id(struct cgroup_subsys_state *css) { struct css_id *cssid; @@ -5107,18 +5253,6 @@ unsigned short css_id(struct cgroup_subsys_state *css) } EXPORT_SYMBOL_GPL(css_id); -unsigned short css_depth(struct cgroup_subsys_state *css) -{ - struct css_id *cssid; - - cssid = rcu_dereference_check(css->id, css_refcnt(css)); - - if (cssid) - return cssid->depth; - return 0; -} -EXPORT_SYMBOL_GPL(css_depth); - /** * css_is_ancestor - test "root" css is an ancestor of "child" * @child: the css to be tested. @@ -5314,11 +5448,6 @@ static void debug_css_free(struct cgroup *cont) kfree(cont->subsys[debug_subsys_id]); } -static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft) -{ - return atomic_read(&cont->count); -} - static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft) { return cgroup_task_count(cont); @@ -5344,13 +5473,13 @@ static int current_css_set_cg_links_read(struct cgroup *cont, struct cftype *cft, struct seq_file *seq) { - struct cg_cgroup_link *link; - struct css_set *cg; + struct cgrp_cset_link *link; + struct css_set *cset; read_lock(&css_set_lock); rcu_read_lock(); - cg = rcu_dereference(current->cgroups); - list_for_each_entry(link, &cg->cg_links, cg_link_list) { + cset = rcu_dereference(current->cgroups); + list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; const char *name; @@ -5371,15 +5500,15 @@ static int cgroup_css_links_read(struct cgroup *cont, struct cftype *cft, struct seq_file *seq) { - struct cg_cgroup_link *link; + struct cgrp_cset_link *link; read_lock(&css_set_lock); - list_for_each_entry(link, &cont->css_sets, cgrp_link_list) { - struct css_set *cg = link->cg; + list_for_each_entry(link, &cont->cset_links, cset_link) { + struct css_set *cset = link->cset; struct task_struct *task; int count = 0; - seq_printf(seq, "css_set %p\n", cg); - list_for_each_entry(task, &cg->tasks, cg_list) { + seq_printf(seq, "css_set %p\n", cset); + list_for_each_entry(task, &cset->tasks, cg_list) { if (count++ > MAX_TASKS_SHOWN_PER_CSS) { seq_puts(seq, " ...\n"); break; @@ -5400,10 +5529,6 @@ static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) static struct cftype debug_files[] = { { - .name = "cgroup_refcount", - .read_u64 = cgroup_refcount_read, - }, - { .name = "taskcount", .read_u64 = debug_taskcount_read, }, |