summaryrefslogtreecommitdiffstats
path: root/include/linux/cgroup.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r--include/linux/cgroup.h275
1 files changed, 132 insertions, 143 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9450f025fe0c..c2515851c1aa 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -14,18 +14,17 @@
#include <linux/rcupdate.h>
#include <linux/rculist.h>
#include <linux/cgroupstats.h>
-#include <linux/prio_heap.h>
#include <linux/rwsem.h>
#include <linux/idr.h>
#include <linux/workqueue.h>
-#include <linux/xattr.h>
#include <linux/fs.h>
#include <linux/percpu-refcount.h>
#include <linux/seq_file.h>
+#include <linux/kernfs.h>
#ifdef CONFIG_CGROUPS
-struct cgroupfs_root;
+struct cgroup_root;
struct cgroup_subsys;
struct inode;
struct cgroup;
@@ -34,31 +33,16 @@ extern int cgroup_init_early(void);
extern int cgroup_init(void);
extern void cgroup_fork(struct task_struct *p);
extern void cgroup_post_fork(struct task_struct *p);
-extern void cgroup_exit(struct task_struct *p, int run_callbacks);
+extern void cgroup_exit(struct task_struct *p);
extern int cgroupstats_build(struct cgroupstats *stats,
struct dentry *dentry);
-extern int cgroup_load_subsys(struct cgroup_subsys *ss);
-extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
extern int proc_cgroup_show(struct seq_file *, void *);
-/*
- * Define the enumeration of all cgroup subsystems.
- *
- * We define ids for builtin subsystems and then modular ones.
- */
-#define SUBSYS(_x) _x ## _subsys_id,
+/* define the enumeration of all cgroup subsystems */
+#define SUBSYS(_x) _x ## _cgrp_id,
enum cgroup_subsys_id {
-#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
-#include <linux/cgroup_subsys.h>
-#undef IS_SUBSYS_ENABLED
- CGROUP_BUILTIN_SUBSYS_COUNT,
-
- __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1,
-
-#define IS_SUBSYS_ENABLED(option) IS_MODULE(option)
#include <linux/cgroup_subsys.h>
-#undef IS_SUBSYS_ENABLED
CGROUP_SUBSYS_COUNT,
};
#undef SUBSYS
@@ -153,11 +137,6 @@ enum {
CGRP_SANE_BEHAVIOR,
};
-struct cgroup_name {
- struct rcu_head rcu_head;
- char name[];
-};
-
struct cgroup {
unsigned long flags; /* "unsigned long" so bitops work */
@@ -174,16 +153,17 @@ struct cgroup {
/* the number of attached css's */
int nr_css;
+ atomic_t refcnt;
+
/*
* We link our 'sibling' struct into our parent's 'children'.
* Our children link their 'sibling' into our 'children'.
*/
struct list_head sibling; /* my parent's children */
struct list_head children; /* my children */
- struct list_head files; /* my files */
struct cgroup *parent; /* my parent */
- struct dentry *dentry; /* cgroup fs entry, RCU protected */
+ struct kernfs_node *kn; /* cgroup kernfs entry */
/*
* Monotonically increasing unique serial number which defines a
@@ -193,23 +173,13 @@ struct cgroup {
*/
u64 serial_nr;
- /*
- * This is a copy of dentry->d_name, and it's needed because
- * we can't use dentry->d_name in cgroup_path().
- *
- * You must acquire rcu_read_lock() to access cgrp->name, and
- * the only place that can change it is rename(), which is
- * protected by parent dir's i_mutex.
- *
- * Normally you should use cgroup_name() wrapper rather than
- * access it directly.
- */
- struct cgroup_name __rcu *name;
+ /* The bitmask of subsystems attached to this cgroup */
+ unsigned long subsys_mask;
/* Private pointers for each registered subsystem */
struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
- struct cgroupfs_root *root;
+ struct cgroup_root *root;
/*
* List of cgrp_cset_links pointing at css_sets with tasks in this
@@ -237,14 +207,11 @@ struct cgroup {
/* For css percpu_ref killing and RCU-protected deletion */
struct rcu_head rcu_head;
struct work_struct destroy_work;
-
- /* directory xattrs */
- struct simple_xattrs xattrs;
};
#define MAX_CGROUP_ROOT_NAMELEN 64
-/* cgroupfs_root->flags */
+/* cgroup_root->flags */
enum {
/*
* Unfortunately, cgroup core and various controllers are riddled
@@ -262,8 +229,8 @@ enum {
*
* The followings are the behaviors currently affected this flag.
*
- * - Mount options "noprefix" and "clone_children" are disallowed.
- * Also, cgroupfs file cgroup.clone_children is not created.
+ * - Mount options "noprefix", "xattr", "clone_children",
+ * "release_agent" and "name" are disallowed.
*
* - When mounting an existing superblock, mount options should
* match.
@@ -281,6 +248,11 @@ enum {
* - "release_agent" and "notify_on_release" are removed.
* Replacement notification mechanism will be implemented.
*
+ * - "cgroup.clone_children" is removed.
+ *
+ * - If mount is requested with sane_behavior but without any
+ * subsystem, the default unified hierarchy is mounted.
+ *
* - cpuset: tasks will be kept in empty cpusets when hotplug happens
* and take masks of ancestors with non-empty cpus/mems, instead of
* being moved to an ancestor.
@@ -300,29 +272,24 @@ enum {
/* mount options live below bit 16 */
CGRP_ROOT_OPTION_MASK = (1 << 16) - 1,
-
- CGRP_ROOT_SUBSYS_BOUND = (1 << 16), /* subsystems finished binding */
};
/*
- * A cgroupfs_root represents the root of a cgroup hierarchy, and may be
- * associated with a superblock to form an active hierarchy. This is
+ * A cgroup_root represents the root of a cgroup hierarchy, and may be
+ * associated with a kernfs_root to form an active hierarchy. This is
* internal to cgroup core. Don't access directly from controllers.
*/
-struct cgroupfs_root {
- struct super_block *sb;
-
- /* The bitmask of subsystems attached to this hierarchy */
- unsigned long subsys_mask;
+struct cgroup_root {
+ struct kernfs_root *kf_root;
/* Unique id for this hierarchy. */
int hierarchy_id;
- /* The root cgroup for this hierarchy */
- struct cgroup top_cgroup;
+ /* The root cgroup. Root is destroyed on its release. */
+ struct cgroup cgrp;
- /* Tracks how many cgroups are currently defined in hierarchy.*/
- int number_of_cgroups;
+ /* Number of cgroups in the hierarchy, used only for /proc/cgroups */
+ atomic_t nr_cgrps;
/* A list running through the active hierarchies */
struct list_head root_list;
@@ -360,10 +327,14 @@ struct css_set {
struct hlist_node hlist;
/*
- * List running through all tasks using this cgroup
- * group. Protected by css_set_lock
+ * Lists running through all tasks using this cgroup group.
+ * mg_tasks lists tasks which belong to this cset but are in the
+ * process of being migrated out or in. Protected by
+ * css_set_rwsem, but, during migration, once tasks are moved to
+ * mg_tasks, it can be read safely while holding cgroup_mutex.
*/
struct list_head tasks;
+ struct list_head mg_tasks;
/*
* List of cgrp_cset_links pointing at cgroups referenced from this
@@ -372,13 +343,29 @@ struct css_set {
struct list_head cgrp_links;
/*
- * Set of subsystem states, one for each subsystem. This array
- * is immutable after creation apart from the init_css_set
- * during subsystem registration (at boot time) and modular subsystem
- * loading/unloading.
+ * Set of subsystem states, one for each subsystem. This array is
+ * immutable after creation apart from the init_css_set during
+ * subsystem registration (at boot time).
*/
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+ /*
+ * List of csets participating in the on-going migration either as
+ * source or destination. Protected by cgroup_mutex.
+ */
+ struct list_head mg_preload_node;
+ struct list_head mg_node;
+
+ /*
+ * If this cset is acting as the source of migration the following
+ * two fields are set. mg_src_cgrp is the source cgroup of the
+ * on-going migration and mg_dst_cset is the destination cset the
+ * target tasks on this cset should be migrated to. Protected by
+ * cgroup_mutex.
+ */
+ struct cgroup *mg_src_cgrp;
+ struct css_set *mg_dst_cset;
+
/* For RCU-protected deletion */
struct rcu_head rcu_head;
};
@@ -397,6 +384,7 @@ enum {
CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */
CFTYPE_INSANE = (1 << 2), /* don't create if sane_behavior */
CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
+ CFTYPE_ONLY_ON_DFL = (1 << 4), /* only on default hierarchy */
};
#define MAX_CFTYPE_NAME 64
@@ -416,8 +404,9 @@ struct cftype {
umode_t mode;
/*
- * If non-zero, defines the maximum length of string that can
- * be passed to write_string; defaults to 64
+ * The maximum length of string, excluding trailing nul, that can
+ * be passed to write_string. If < PAGE_SIZE-1, PAGE_SIZE-1 is
+ * assumed.
*/
size_t max_write_len;
@@ -425,10 +414,12 @@ struct cftype {
unsigned int flags;
/*
- * The subsys this file belongs to. Initialized automatically
- * during registration. NULL for cgroup core files.
+ * Fields used for internal bookkeeping. Initialized automatically
+ * during registration.
*/
- struct cgroup_subsys *ss;
+ struct cgroup_subsys *ss; /* NULL for cgroup core files */
+ struct list_head node; /* anchored at ss->cfts */
+ struct kernfs_ops *kf_ops;
/*
* read_u64() is a shortcut for the common case of returning a
@@ -467,7 +458,7 @@ struct cftype {
* Returns 0 or -ve error code.
*/
int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
- const char *buffer);
+ char *buffer);
/*
* trigger() callback can be used to get some kick from the
* userspace, when the actual string written is not important
@@ -475,37 +466,18 @@ struct cftype {
* kick type for multiplexing.
*/
int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
-};
-/*
- * cftype_sets describe cftypes belonging to a subsystem and are chained at
- * cgroup_subsys->cftsets. Each cftset points to an array of cftypes
- * terminated by zero length name.
- */
-struct cftype_set {
- struct list_head node; /* chained at subsys->cftsets */
- struct cftype *cfts;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lock_class_key lockdep_key;
+#endif
};
-/*
- * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. Don't
- * access directly.
- */
-struct cfent {
- struct list_head node;
- struct dentry *dentry;
- struct cftype *type;
- struct cgroup_subsys_state *css;
-
- /* file xattrs */
- struct simple_xattrs xattrs;
-};
+extern struct cgroup_root cgrp_dfl_root;
-/* seq_file->private points to the following, only ->priv is public */
-struct cgroup_open_file {
- struct cfent *cfe;
- void *priv;
-};
+static inline bool cgroup_on_dfl(const struct cgroup *cgrp)
+{
+ return cgrp->root == &cgrp_dfl_root;
+}
/*
* See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
@@ -516,34 +488,63 @@ static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
}
-/* Caller should hold rcu_read_lock() */
-static inline const char *cgroup_name(const struct cgroup *cgrp)
+/* no synchronization, the result can only be used as a hint */
+static inline bool cgroup_has_tasks(struct cgroup *cgrp)
{
- return rcu_dereference(cgrp->name)->name;
+ return !list_empty(&cgrp->cset_links);
}
-static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
+/* returns ino associated with a cgroup, 0 indicates unmounted root */
+static inline ino_t cgroup_ino(struct cgroup *cgrp)
{
- struct cgroup_open_file *of = seq->private;
- return of->cfe->css;
+ if (cgrp->kn)
+ return cgrp->kn->ino;
+ else
+ return 0;
}
static inline struct cftype *seq_cft(struct seq_file *seq)
{
- struct cgroup_open_file *of = seq->private;
- return of->cfe->type;
+ struct kernfs_open_file *of = seq->private;
+
+ return of->kn->priv;
+}
+
+struct cgroup_subsys_state *seq_css(struct seq_file *seq);
+
+/*
+ * Name / path handling functions. All are thin wrappers around the kernfs
+ * counterparts and can be called under any context.
+ */
+
+static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen)
+{
+ return kernfs_name(cgrp->kn, buf, buflen);
}
+static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf,
+ size_t buflen)
+{
+ return kernfs_path(cgrp->kn, buf, buflen);
+}
+
+static inline void pr_cont_cgroup_name(struct cgroup *cgrp)
+{
+ pr_cont_kernfs_name(cgrp->kn);
+}
+
+static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
+{
+ pr_cont_kernfs_path(cgrp->kn);
+}
+
+char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
+
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
-int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
-int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
-
-int cgroup_task_count(const struct cgroup *cgrp);
-
/*
* Control Group taskset, used to pass around set of tasks to cgroup_subsys
* methods.
@@ -551,22 +552,15 @@ int cgroup_task_count(const struct cgroup *cgrp);
struct cgroup_taskset;
struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
-struct cgroup_subsys_state *cgroup_taskset_cur_css(struct cgroup_taskset *tset,
- int subsys_id);
-int cgroup_taskset_size(struct cgroup_taskset *tset);
/**
* cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor
- * @skip_css: skip if task's css matches this, %NULL to iterate through all
* @tset: taskset to iterate
*/
-#define cgroup_taskset_for_each(task, skip_css, tset) \
+#define cgroup_taskset_for_each(task, tset) \
for ((task) = cgroup_taskset_first((tset)); (task); \
- (task) = cgroup_taskset_next((tset))) \
- if (!(skip_css) || \
- cgroup_taskset_cur_css((tset), \
- (skip_css)->ss->subsys_id) != (skip_css))
+ (task) = cgroup_taskset_next((tset)))
/*
* Control Group subsystem type.
@@ -591,7 +585,6 @@ struct cgroup_subsys {
struct task_struct *task);
void (*bind)(struct cgroup_subsys_state *root_css);
- int subsys_id;
int disabled;
int early_init;
@@ -610,27 +603,26 @@ struct cgroup_subsys {
bool broken_hierarchy;
bool warned_broken_hierarchy;
+ /* the following two fields are initialized automtically during boot */
+ int id;
#define MAX_CGROUP_TYPE_NAMELEN 32
const char *name;
/* link to parent, protected by cgroup_lock() */
- struct cgroupfs_root *root;
+ struct cgroup_root *root;
- /* list of cftype_sets */
- struct list_head cftsets;
+ /*
+ * List of cftypes. Each entry is the first entry of an array
+ * terminated by zero length name.
+ */
+ struct list_head cfts;
- /* base cftypes, automatically [de]registered with subsys itself */
+ /* base cftypes, automatically registered with subsys itself */
struct cftype *base_cftypes;
- struct cftype_set base_cftset;
-
- /* should be defined only by modular subsystems */
- struct module *module;
};
-#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
-#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
+#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
#include <linux/cgroup_subsys.h>
-#undef IS_SUBSYS_ENABLED
#undef SUBSYS
/**
@@ -661,10 +653,12 @@ struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
*/
#ifdef CONFIG_PROVE_RCU
extern struct mutex cgroup_mutex;
+extern struct rw_semaphore css_set_rwsem;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
- lockdep_is_held(&(task)->alloc_lock) || \
- lockdep_is_held(&cgroup_mutex) || (__c))
+ lockdep_is_held(&cgroup_mutex) || \
+ lockdep_is_held(&css_set_rwsem) || \
+ ((task)->flags & PF_EXITING) || (__c))
#else
#define task_css_set_check(task, __c) \
rcu_dereference((task)->cgroups)
@@ -837,16 +831,11 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
struct task_struct *css_task_iter_next(struct css_task_iter *it);
void css_task_iter_end(struct css_task_iter *it);
-int css_scan_tasks(struct cgroup_subsys_state *css,
- bool (*test)(struct task_struct *, void *),
- void (*process)(struct task_struct *, void *),
- void *data, struct ptr_heap *heap);
-
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
-struct cgroup_subsys_state *css_from_dir(struct dentry *dentry,
- struct cgroup_subsys *ss);
+struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
+ struct cgroup_subsys *ss);
#else /* !CONFIG_CGROUPS */
@@ -854,7 +843,7 @@ static inline int cgroup_init_early(void) { return 0; }
static inline int cgroup_init(void) { return 0; }
static inline void cgroup_fork(struct task_struct *p) {}
static inline void cgroup_post_fork(struct task_struct *p) {}
-static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
+static inline void cgroup_exit(struct task_struct *p) {}
static inline int cgroupstats_build(struct cgroupstats *stats,
struct dentry *dentry)