diff options
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r-- | include/linux/cgroup.h | 275 |
1 files changed, 132 insertions, 143 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9450f025fe0c..c2515851c1aa 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -14,18 +14,17 @@ #include <linux/rcupdate.h> #include <linux/rculist.h> #include <linux/cgroupstats.h> -#include <linux/prio_heap.h> #include <linux/rwsem.h> #include <linux/idr.h> #include <linux/workqueue.h> -#include <linux/xattr.h> #include <linux/fs.h> #include <linux/percpu-refcount.h> #include <linux/seq_file.h> +#include <linux/kernfs.h> #ifdef CONFIG_CGROUPS -struct cgroupfs_root; +struct cgroup_root; struct cgroup_subsys; struct inode; struct cgroup; @@ -34,31 +33,16 @@ extern int cgroup_init_early(void); extern int cgroup_init(void); extern void cgroup_fork(struct task_struct *p); extern void cgroup_post_fork(struct task_struct *p); -extern void cgroup_exit(struct task_struct *p, int run_callbacks); +extern void cgroup_exit(struct task_struct *p); extern int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); -extern int cgroup_load_subsys(struct cgroup_subsys *ss); -extern void cgroup_unload_subsys(struct cgroup_subsys *ss); extern int proc_cgroup_show(struct seq_file *, void *); -/* - * Define the enumeration of all cgroup subsystems. - * - * We define ids for builtin subsystems and then modular ones. - */ -#define SUBSYS(_x) _x ## _subsys_id, +/* define the enumeration of all cgroup subsystems */ +#define SUBSYS(_x) _x ## _cgrp_id, enum cgroup_subsys_id { -#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) -#include <linux/cgroup_subsys.h> -#undef IS_SUBSYS_ENABLED - CGROUP_BUILTIN_SUBSYS_COUNT, - - __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1, - -#define IS_SUBSYS_ENABLED(option) IS_MODULE(option) #include <linux/cgroup_subsys.h> -#undef IS_SUBSYS_ENABLED CGROUP_SUBSYS_COUNT, }; #undef SUBSYS @@ -153,11 +137,6 @@ enum { CGRP_SANE_BEHAVIOR, }; -struct cgroup_name { - struct rcu_head rcu_head; - char name[]; -}; - struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ @@ -174,16 +153,17 @@ struct cgroup { /* the number of attached css's */ int nr_css; + atomic_t refcnt; + /* * We link our 'sibling' struct into our parent's 'children'. * Our children link their 'sibling' into our 'children'. */ struct list_head sibling; /* my parent's children */ struct list_head children; /* my children */ - struct list_head files; /* my files */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry, RCU protected */ + struct kernfs_node *kn; /* cgroup kernfs entry */ /* * Monotonically increasing unique serial number which defines a @@ -193,23 +173,13 @@ struct cgroup { */ u64 serial_nr; - /* - * This is a copy of dentry->d_name, and it's needed because - * we can't use dentry->d_name in cgroup_path(). - * - * You must acquire rcu_read_lock() to access cgrp->name, and - * the only place that can change it is rename(), which is - * protected by parent dir's i_mutex. - * - * Normally you should use cgroup_name() wrapper rather than - * access it directly. - */ - struct cgroup_name __rcu *name; + /* The bitmask of subsystems attached to this cgroup */ + unsigned long subsys_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; - struct cgroupfs_root *root; + struct cgroup_root *root; /* * List of cgrp_cset_links pointing at css_sets with tasks in this @@ -237,14 +207,11 @@ struct cgroup { /* For css percpu_ref killing and RCU-protected deletion */ struct rcu_head rcu_head; struct work_struct destroy_work; - - /* directory xattrs */ - struct simple_xattrs xattrs; }; #define MAX_CGROUP_ROOT_NAMELEN 64 -/* cgroupfs_root->flags */ +/* cgroup_root->flags */ enum { /* * Unfortunately, cgroup core and various controllers are riddled @@ -262,8 +229,8 @@ enum { * * The followings are the behaviors currently affected this flag. * - * - Mount options "noprefix" and "clone_children" are disallowed. - * Also, cgroupfs file cgroup.clone_children is not created. + * - Mount options "noprefix", "xattr", "clone_children", + * "release_agent" and "name" are disallowed. * * - When mounting an existing superblock, mount options should * match. @@ -281,6 +248,11 @@ enum { * - "release_agent" and "notify_on_release" are removed. * Replacement notification mechanism will be implemented. * + * - "cgroup.clone_children" is removed. + * + * - If mount is requested with sane_behavior but without any + * subsystem, the default unified hierarchy is mounted. + * * - cpuset: tasks will be kept in empty cpusets when hotplug happens * and take masks of ancestors with non-empty cpus/mems, instead of * being moved to an ancestor. @@ -300,29 +272,24 @@ enum { /* mount options live below bit 16 */ CGRP_ROOT_OPTION_MASK = (1 << 16) - 1, - - CGRP_ROOT_SUBSYS_BOUND = (1 << 16), /* subsystems finished binding */ }; /* - * A cgroupfs_root represents the root of a cgroup hierarchy, and may be - * associated with a superblock to form an active hierarchy. This is + * A cgroup_root represents the root of a cgroup hierarchy, and may be + * associated with a kernfs_root to form an active hierarchy. This is * internal to cgroup core. Don't access directly from controllers. */ -struct cgroupfs_root { - struct super_block *sb; - - /* The bitmask of subsystems attached to this hierarchy */ - unsigned long subsys_mask; +struct cgroup_root { + struct kernfs_root *kf_root; /* Unique id for this hierarchy. */ int hierarchy_id; - /* The root cgroup for this hierarchy */ - struct cgroup top_cgroup; + /* The root cgroup. Root is destroyed on its release. */ + struct cgroup cgrp; - /* Tracks how many cgroups are currently defined in hierarchy.*/ - int number_of_cgroups; + /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ + atomic_t nr_cgrps; /* A list running through the active hierarchies */ struct list_head root_list; @@ -360,10 +327,14 @@ struct css_set { struct hlist_node hlist; /* - * List running through all tasks using this cgroup - * group. Protected by css_set_lock + * Lists running through all tasks using this cgroup group. + * mg_tasks lists tasks which belong to this cset but are in the + * process of being migrated out or in. Protected by + * css_set_rwsem, but, during migration, once tasks are moved to + * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; + struct list_head mg_tasks; /* * List of cgrp_cset_links pointing at cgroups referenced from this @@ -372,13 +343,29 @@ struct css_set { struct list_head cgrp_links; /* - * Set of subsystem states, one for each subsystem. This array - * is immutable after creation apart from the init_css_set - * during subsystem registration (at boot time) and modular subsystem - * loading/unloading. + * Set of subsystem states, one for each subsystem. This array is + * immutable after creation apart from the init_css_set during + * subsystem registration (at boot time). */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + /* + * List of csets participating in the on-going migration either as + * source or destination. Protected by cgroup_mutex. + */ + struct list_head mg_preload_node; + struct list_head mg_node; + + /* + * If this cset is acting as the source of migration the following + * two fields are set. mg_src_cgrp is the source cgroup of the + * on-going migration and mg_dst_cset is the destination cset the + * target tasks on this cset should be migrated to. Protected by + * cgroup_mutex. + */ + struct cgroup *mg_src_cgrp; + struct css_set *mg_dst_cset; + /* For RCU-protected deletion */ struct rcu_head rcu_head; }; @@ -397,6 +384,7 @@ enum { CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */ CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ + CFTYPE_ONLY_ON_DFL = (1 << 4), /* only on default hierarchy */ }; #define MAX_CFTYPE_NAME 64 @@ -416,8 +404,9 @@ struct cftype { umode_t mode; /* - * If non-zero, defines the maximum length of string that can - * be passed to write_string; defaults to 64 + * The maximum length of string, excluding trailing nul, that can + * be passed to write_string. If < PAGE_SIZE-1, PAGE_SIZE-1 is + * assumed. */ size_t max_write_len; @@ -425,10 +414,12 @@ struct cftype { unsigned int flags; /* - * The subsys this file belongs to. Initialized automatically - * during registration. NULL for cgroup core files. + * Fields used for internal bookkeeping. Initialized automatically + * during registration. */ - struct cgroup_subsys *ss; + struct cgroup_subsys *ss; /* NULL for cgroup core files */ + struct list_head node; /* anchored at ss->cfts */ + struct kernfs_ops *kf_ops; /* * read_u64() is a shortcut for the common case of returning a @@ -467,7 +458,7 @@ struct cftype { * Returns 0 or -ve error code. */ int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft, - const char *buffer); + char *buffer); /* * trigger() callback can be used to get some kick from the * userspace, when the actual string written is not important @@ -475,37 +466,18 @@ struct cftype { * kick type for multiplexing. */ int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); -}; -/* - * cftype_sets describe cftypes belonging to a subsystem and are chained at - * cgroup_subsys->cftsets. Each cftset points to an array of cftypes - * terminated by zero length name. - */ -struct cftype_set { - struct list_head node; /* chained at subsys->cftsets */ - struct cftype *cfts; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lockdep_key; +#endif }; -/* - * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. Don't - * access directly. - */ -struct cfent { - struct list_head node; - struct dentry *dentry; - struct cftype *type; - struct cgroup_subsys_state *css; - - /* file xattrs */ - struct simple_xattrs xattrs; -}; +extern struct cgroup_root cgrp_dfl_root; -/* seq_file->private points to the following, only ->priv is public */ -struct cgroup_open_file { - struct cfent *cfe; - void *priv; -}; +static inline bool cgroup_on_dfl(const struct cgroup *cgrp) +{ + return cgrp->root == &cgrp_dfl_root; +} /* * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This @@ -516,34 +488,63 @@ static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; } -/* Caller should hold rcu_read_lock() */ -static inline const char *cgroup_name(const struct cgroup *cgrp) +/* no synchronization, the result can only be used as a hint */ +static inline bool cgroup_has_tasks(struct cgroup *cgrp) { - return rcu_dereference(cgrp->name)->name; + return !list_empty(&cgrp->cset_links); } -static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq) +/* returns ino associated with a cgroup, 0 indicates unmounted root */ +static inline ino_t cgroup_ino(struct cgroup *cgrp) { - struct cgroup_open_file *of = seq->private; - return of->cfe->css; + if (cgrp->kn) + return cgrp->kn->ino; + else + return 0; } static inline struct cftype *seq_cft(struct seq_file *seq) { - struct cgroup_open_file *of = seq->private; - return of->cfe->type; + struct kernfs_open_file *of = seq->private; + + return of->kn->priv; +} + +struct cgroup_subsys_state *seq_css(struct seq_file *seq); + +/* + * Name / path handling functions. All are thin wrappers around the kernfs + * counterparts and can be called under any context. + */ + +static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen) +{ + return kernfs_name(cgrp->kn, buf, buflen); } +static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf, + size_t buflen) +{ + return kernfs_path(cgrp->kn, buf, buflen); +} + +static inline void pr_cont_cgroup_name(struct cgroup *cgrp) +{ + pr_cont_kernfs_name(cgrp->kn); +} + +static inline void pr_cont_cgroup_path(struct cgroup *cgrp) +{ + pr_cont_kernfs_path(cgrp->kn); +} + +char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); + int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); -int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); -int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen); - -int cgroup_task_count(const struct cgroup *cgrp); - /* * Control Group taskset, used to pass around set of tasks to cgroup_subsys * methods. @@ -551,22 +552,15 @@ int cgroup_task_count(const struct cgroup *cgrp); struct cgroup_taskset; struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset); struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset); -struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset, - int subsys_id); -int cgroup_taskset_size(struct cgroup_taskset *tset); /** * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor - * @skip_css: skip if task's css matches this, %NULL to iterate through all * @tset: taskset to iterate */ -#define cgroup_taskset_for_each(task, skip_css, tset) \ +#define cgroup_taskset_for_each(task, tset) \ for ((task) = cgroup_taskset_first((tset)); (task); \ - (task) = cgroup_taskset_next((tset))) \ - if (!(skip_css) || \ - cgroup_taskset_cur_css((tset), \ - (skip_css)->ss->subsys_id) != (skip_css)) + (task) = cgroup_taskset_next((tset))) /* * Control Group subsystem type. @@ -591,7 +585,6 @@ struct cgroup_subsys { struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); - int subsys_id; int disabled; int early_init; @@ -610,27 +603,26 @@ struct cgroup_subsys { bool broken_hierarchy; bool warned_broken_hierarchy; + /* the following two fields are initialized automtically during boot */ + int id; #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; /* link to parent, protected by cgroup_lock() */ - struct cgroupfs_root *root; + struct cgroup_root *root; - /* list of cftype_sets */ - struct list_head cftsets; + /* + * List of cftypes. Each entry is the first entry of an array + * terminated by zero length name. + */ + struct list_head cfts; - /* base cftypes, automatically [de]registered with subsys itself */ + /* base cftypes, automatically registered with subsys itself */ struct cftype *base_cftypes; - struct cftype_set base_cftset; - - /* should be defined only by modular subsystems */ - struct module *module; }; -#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; -#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) +#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include <linux/cgroup_subsys.h> -#undef IS_SUBSYS_ENABLED #undef SUBSYS /** @@ -661,10 +653,12 @@ struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css) */ #ifdef CONFIG_PROVE_RCU extern struct mutex cgroup_mutex; +extern struct rw_semaphore css_set_rwsem; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ - lockdep_is_held(&(task)->alloc_lock) || \ - lockdep_is_held(&cgroup_mutex) || (__c)) + lockdep_is_held(&cgroup_mutex) || \ + lockdep_is_held(&css_set_rwsem) || \ + ((task)->flags & PF_EXITING) || (__c)) #else #define task_css_set_check(task, __c) \ rcu_dereference((task)->cgroups) @@ -837,16 +831,11 @@ void css_task_iter_start(struct cgroup_subsys_state *css, struct task_struct *css_task_iter_next(struct css_task_iter *it); void css_task_iter_end(struct css_task_iter *it); -int css_scan_tasks(struct cgroup_subsys_state *css, - bool (*test)(struct task_struct *, void *), - void (*process)(struct task_struct *, void *), - void *data, struct ptr_heap *heap); - int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); -struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, - struct cgroup_subsys *ss); +struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, + struct cgroup_subsys *ss); #else /* !CONFIG_CGROUPS */ @@ -854,7 +843,7 @@ static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {} -static inline void cgroup_exit(struct task_struct *p, int callbacks) {} +static inline void cgroup_exit(struct task_struct *p) {} static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) |