summaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev.h5
-rw-r--r--include/linux/cgroup-defs.h76
-rw-r--r--include/linux/cgroup.h129
-rw-r--r--include/linux/hugetlb_cgroup.h4
-rw-r--r--include/linux/init_task.h8
-rw-r--r--include/linux/jump_label.h18
-rw-r--r--include/linux/memcontrol.h8
-rw-r--r--include/linux/sched.h12
8 files changed, 144 insertions, 116 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c85f74946a8b..c82794f20110 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -13,7 +13,6 @@
#include <linux/sched.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
-#include <linux/memcontrol.h>
#include <linux/blk-cgroup.h>
#include <linux/backing-dev-defs.h>
#include <linux/slab.h>
@@ -267,8 +266,8 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
{
struct backing_dev_info *bdi = inode_to_bdi(inode);
- return cgroup_on_dfl(mem_cgroup_root_css->cgroup) &&
- cgroup_on_dfl(blkcg_root_css->cgroup) &&
+ return cgroup_subsys_on_dfl(memory_cgrp_subsys) &&
+ cgroup_subsys_on_dfl(io_cgrp_subsys) &&
bdi_cap_account_dirty(bdi) &&
(bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) &&
(inode->i_sb->s_iflags & SB_I_CGROUPWB);
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8492721b39be..60d44b26276d 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -76,6 +76,7 @@ enum {
CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */
CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */
CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
+ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */
/* internal flags, do not use outside cgroup core proper */
__CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */
@@ -83,6 +84,17 @@ enum {
};
/*
+ * cgroup_file is the handle for a file instance created in a cgroup which
+ * is used, for example, to generate file changed notifications. This can
+ * be obtained by setting cftype->file_offset.
+ */
+struct cgroup_file {
+ /* do not access any fields from outside cgroup core */
+ struct list_head node; /* anchored at css->files */
+ struct kernfs_node *kn;
+};
+
+/*
* Per-subsystem/per-cgroup state maintained by the system. This is the
* fundamental structural building block that controllers deal with.
*
@@ -122,6 +134,9 @@ struct cgroup_subsys_state {
*/
u64 serial_nr;
+ /* all cgroup_files associated with this css */
+ struct list_head files;
+
/* percpu_ref killing and RCU release */
struct rcu_head rcu_head;
struct work_struct destroy_work;
@@ -196,6 +211,9 @@ struct css_set {
*/
struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
+ /* all css_task_iters currently walking this cset */
+ struct list_head task_iters;
+
/* For RCU-protected deletion */
struct rcu_head rcu_head;
};
@@ -217,16 +235,16 @@ struct cgroup {
int id;
/*
- * If this cgroup contains any tasks, it contributes one to
- * populated_cnt. All children with non-zero popuplated_cnt of
- * their own contribute one. The count is zero iff there's no task
- * in this cgroup or its subtree.
+ * Each non-empty css_set associated with this cgroup contributes
+ * one to populated_cnt. All children with non-zero popuplated_cnt
+ * of their own contribute one. The count is zero iff there's no
+ * task in this cgroup or its subtree.
*/
int populated_cnt;
struct kernfs_node *kn; /* cgroup kernfs entry */
- struct kernfs_node *procs_kn; /* kn for "cgroup.procs" */
- struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
+ struct cgroup_file procs_file; /* handle for "cgroup.procs" */
+ struct cgroup_file events_file; /* handle for "cgroup.events" */
/*
* The bitmask of subsystems enabled on the child cgroups.
@@ -324,11 +342,6 @@ struct cftype {
*/
char name[MAX_CFTYPE_NAME];
unsigned long private;
- /*
- * If not 0, file mode is set to this value, otherwise it will
- * be figured out automatically
- */
- umode_t mode;
/*
* The maximum length of string, excluding trailing nul, that can
@@ -340,6 +353,14 @@ struct cftype {
unsigned int flags;
/*
+ * If non-zero, should contain the offset from the start of css to
+ * a struct cgroup_file field. cgroup will record the handle of
+ * the created file into it. The recorded handle can be used as
+ * long as the containing css remains accessible.
+ */
+ unsigned int file_offset;
+
+ /*
* Fields used for internal bookkeeping. Initialized automatically
* during registration.
*/
@@ -414,12 +435,10 @@ struct cgroup_subsys {
int (*can_fork)(struct task_struct *task, void **priv_p);
void (*cancel_fork)(struct task_struct *task, void *priv);
void (*fork)(struct task_struct *task, void *priv);
- void (*exit)(struct cgroup_subsys_state *css,
- struct cgroup_subsys_state *old_css,
- struct task_struct *task);
+ void (*exit)(struct task_struct *task);
+ void (*free)(struct task_struct *task);
void (*bind)(struct cgroup_subsys_state *root_css);
- int disabled;
int early_init;
/*
@@ -473,8 +492,31 @@ struct cgroup_subsys {
unsigned int depends_on;
};
-void cgroup_threadgroup_change_begin(struct task_struct *tsk);
-void cgroup_threadgroup_change_end(struct task_struct *tsk);
+extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+
+/**
+ * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups
+ * @tsk: target task
+ *
+ * Called from threadgroup_change_begin() and allows cgroup operations to
+ * synchronize against threadgroup changes using a percpu_rw_semaphore.
+ */
+static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk)
+{
+ percpu_down_read(&cgroup_threadgroup_rwsem);
+}
+
+/**
+ * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups
+ * @tsk: target task
+ *
+ * Called from threadgroup_change_end(). Counterpart of
+ * cgroup_threadcgroup_change_begin().
+ */
+static inline void cgroup_threadgroup_change_end(struct task_struct *tsk)
+{
+ percpu_up_read(&cgroup_threadgroup_rwsem);
+}
#else /* CONFIG_CGROUPS */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index eb7ca55f72ef..22e3754f89c5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -13,10 +13,10 @@
#include <linux/nodemask.h>
#include <linux/rculist.h>
#include <linux/cgroupstats.h>
-#include <linux/rwsem.h>
#include <linux/fs.h>
#include <linux/seq_file.h>
#include <linux/kernfs.h>
+#include <linux/jump_label.h>
#include <linux/cgroup-defs.h>
@@ -41,6 +41,10 @@ struct css_task_iter {
struct list_head *task_pos;
struct list_head *tasks_head;
struct list_head *mg_tasks_head;
+
+ struct css_set *cur_cset;
+ struct task_struct *cur_task;
+ struct list_head iters_node; /* css_set->task_iters */
};
extern struct cgroup_root cgrp_dfl_root;
@@ -50,6 +54,26 @@ extern struct css_set init_css_set;
#include <linux/cgroup_subsys.h>
#undef SUBSYS
+#define SUBSYS(_x) \
+ extern struct static_key_true _x ## _cgrp_subsys_enabled_key; \
+ extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key;
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+
+/**
+ * cgroup_subsys_enabled - fast test on whether a subsys is enabled
+ * @ss: subsystem in question
+ */
+#define cgroup_subsys_enabled(ss) \
+ static_branch_likely(&ss ## _enabled_key)
+
+/**
+ * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy
+ * @ss: subsystem in question
+ */
+#define cgroup_subsys_on_dfl(ss) \
+ static_branch_likely(&ss ## _on_dfl_key)
+
bool css_has_online_children(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
@@ -78,6 +102,7 @@ extern void cgroup_cancel_fork(struct task_struct *p,
extern void cgroup_post_fork(struct task_struct *p,
void *old_ss_priv[CGROUP_CANFORK_COUNT]);
void cgroup_exit(struct task_struct *p);
+void cgroup_free(struct task_struct *p);
int cgroup_init_early(void);
int cgroup_init(void);
@@ -211,11 +236,33 @@ void css_task_iter_end(struct css_task_iter *it);
* cgroup_taskset_for_each - iterate cgroup_taskset
* @task: the loop cursor
* @tset: taskset to iterate
+ *
+ * @tset may contain multiple tasks and they may belong to multiple
+ * processes. When there are multiple tasks in @tset, if a task of a
+ * process is in @tset, all tasks of the process are in @tset. Also, all
+ * are guaranteed to share the same source and destination csses.
+ *
+ * Iteration is not in any specific order.
*/
#define cgroup_taskset_for_each(task, tset) \
for ((task) = cgroup_taskset_first((tset)); (task); \
(task) = cgroup_taskset_next((tset)))
+/**
+ * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
+ * @leader: the loop cursor
+ * @tset: takset to iterate
+ *
+ * Iterate threadgroup leaders of @tset. For single-task migrations, @tset
+ * may not contain any.
+ */
+#define cgroup_taskset_for_each_leader(leader, tset) \
+ for ((leader) = cgroup_taskset_first((tset)); (leader); \
+ (leader) = cgroup_taskset_next((tset))) \
+ if ((leader) != (leader)->group_leader) \
+ ; \
+ else
+
/*
* Inline functions.
*/
@@ -320,11 +367,11 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
*/
#ifdef CONFIG_PROVE_RCU
extern struct mutex cgroup_mutex;
-extern struct rw_semaphore css_set_rwsem;
+extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
lockdep_is_held(&cgroup_mutex) || \
- lockdep_is_held(&css_set_rwsem) || \
+ lockdep_is_held(&css_set_lock) || \
((task)->flags & PF_EXITING) || (__c))
#else
#define task_css_set_check(task, __c) \
@@ -412,68 +459,10 @@ static inline struct cgroup *task_cgroup(struct task_struct *task,
return task_css(task, subsys_id)->cgroup;
}
-/**
- * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
- * @cgrp: the cgroup of interest
- *
- * The default hierarchy is the v2 interface of cgroup and this function
- * can be used to test whether a cgroup is on the default hierarchy for
- * cases where a subsystem should behave differnetly depending on the
- * interface version.
- *
- * The set of behaviors which change on the default hierarchy are still
- * being determined and the mount option is prefixed with __DEVEL__.
- *
- * List of changed behaviors:
- *
- * - Mount options "noprefix", "xattr", "clone_children", "release_agent"
- * and "name" are disallowed.
- *
- * - When mounting an existing superblock, mount options should match.
- *
- * - Remount is disallowed.
- *
- * - rename(2) is disallowed.
- *
- * - "tasks" is removed. Everything should be at process granularity. Use
- * "cgroup.procs" instead.
- *
- * - "cgroup.procs" is not sorted. pids will be unique unless they got
- * recycled inbetween reads.
- *
- * - "release_agent" and "notify_on_release" are removed. Replacement
- * notification mechanism will be implemented.
- *
- * - "cgroup.clone_children" is removed.
- *
- * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup
- * and its descendants contain no task; otherwise, 1. The file also
- * generates kernfs notification which can be monitored through poll and
- * [di]notify when the value of the file changes.
- *
- * - cpuset: tasks will be kept in empty cpusets when hotplug happens and
- * take masks of ancestors with non-empty cpus/mems, instead of being
- * moved to an ancestor.
- *
- * - cpuset: a task can be moved into an empty cpuset, and again it takes
- * masks of ancestors.
- *
- * - memcg: use_hierarchy is on by default and the cgroup file for the flag
- * is not created.
- *
- * - blkcg: blk-throttle becomes properly hierarchical.
- *
- * - debug: disallowed on the default hierarchy.
- */
-static inline bool cgroup_on_dfl(const struct cgroup *cgrp)
-{
- return cgrp->root == &cgrp_dfl_root;
-}
-
/* no synchronization, the result can only be used as a hint */
-static inline bool cgroup_has_tasks(struct cgroup *cgrp)
+static inline bool cgroup_is_populated(struct cgroup *cgrp)
{
- return !list_empty(&cgrp->cset_links);
+ return cgrp->populated_cnt;
}
/* returns ino associated with a cgroup */
@@ -527,6 +516,19 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
pr_cont_kernfs_path(cgrp->kn);
}
+/**
+ * cgroup_file_notify - generate a file modified event for a cgroup_file
+ * @cfile: target cgroup_file
+ *
+ * @cfile must have been obtained by setting cftype->file_offset.
+ */
+static inline void cgroup_file_notify(struct cgroup_file *cfile)
+{
+ /* might not have been created due to one of the CFTYPE selector flags */
+ if (cfile->kn)
+ kernfs_notify(cfile->kn);
+}
+
#else /* !CONFIG_CGROUPS */
struct cgroup_subsys_state;
@@ -546,6 +548,7 @@ static inline void cgroup_cancel_fork(struct task_struct *p,
static inline void cgroup_post_fork(struct task_struct *p,
void *ss_priv[CGROUP_CANFORK_COUNT]) {}
static inline void cgroup_exit(struct task_struct *p) {}
+static inline void cgroup_free(struct task_struct *p) {}
static inline int cgroup_init_early(void) { return 0; }
static inline int cgroup_init(void) { return 0; }
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index bcc853eccc85..7edd30515298 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -48,9 +48,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
static inline bool hugetlb_cgroup_disabled(void)
{
- if (hugetlb_cgrp_subsys.disabled)
- return true;
- return false;
+ return !cgroup_subsys_enabled(hugetlb_cgrp_subsys);
}
extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 810a34f60424..1c1ff7e4faa4 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -25,13 +25,6 @@
extern struct files_struct init_files;
extern struct fs_struct init_fs;
-#ifdef CONFIG_CGROUPS
-#define INIT_GROUP_RWSEM(sig) \
- .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem),
-#else
-#define INIT_GROUP_RWSEM(sig)
-#endif
-
#ifdef CONFIG_CPUSETS
#define INIT_CPUSET_SEQ(tsk) \
.mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq),
@@ -65,7 +58,6 @@ extern struct fs_struct init_fs;
INIT_PREV_CPUTIME(sig) \
.cred_guard_mutex = \
__MUTEX_INITIALIZER(sig.cred_guard_mutex), \
- INIT_GROUP_RWSEM(sig) \
}
extern struct nsproxy init_nsproxy;
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index f1094238ab2a..8dde55974f18 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -214,11 +214,6 @@ static inline int jump_label_apply_nops(struct module *mod)
#define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
#define jump_label_enabled static_key_enabled
-static inline bool static_key_enabled(struct static_key *key)
-{
- return static_key_count(key) > 0;
-}
-
static inline void static_key_enable(struct static_key *key)
{
int count = static_key_count(key);
@@ -265,6 +260,17 @@ struct static_key_false {
#define DEFINE_STATIC_KEY_FALSE(name) \
struct static_key_false name = STATIC_KEY_FALSE_INIT
+extern bool ____wrong_branch_error(void);
+
+#define static_key_enabled(x) \
+({ \
+ if (!__builtin_types_compatible_p(typeof(*x), struct static_key) && \
+ !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\
+ !__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
+ ____wrong_branch_error(); \
+ static_key_count((struct static_key *)x) > 0; \
+})
+
#ifdef HAVE_JUMP_LABEL
/*
@@ -323,8 +329,6 @@ struct static_key_false {
* See jump_label_type() / jump_label_init_type().
*/
-extern bool ____wrong_branch_error(void);
-
#define static_branch_likely(x) \
({ \
bool branch; \
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3e3318ddfc0e..27251ed428f7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -213,6 +213,9 @@ struct mem_cgroup {
/* OOM-Killer disable */
int oom_kill_disable;
+ /* handle for "memory.events" */
+ struct cgroup_file events_file;
+
/* protect arrays of thresholds */
struct mutex thresholds_lock;
@@ -285,6 +288,7 @@ static inline void mem_cgroup_events(struct mem_cgroup *memcg,
unsigned int nr)
{
this_cpu_add(memcg->stat->events[idx], nr);
+ cgroup_file_notify(&memcg->events_file);
}
bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg);
@@ -346,9 +350,7 @@ ino_t page_cgroup_ino(struct page *page);
static inline bool mem_cgroup_disabled(void)
{
- if (memory_cgrp_subsys.disabled)
- return true;
- return false;
+ return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c115d617739d..4effb1025fbb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -771,18 +771,6 @@ struct signal_struct {
unsigned audit_tty_log_passwd;
struct tty_audit_buf *tty_audit_buf;
#endif
-#ifdef CONFIG_CGROUPS
- /*
- * group_rwsem prevents new tasks from entering the threadgroup and
- * member tasks from exiting,a more specifically, setting of
- * PF_EXITING. fork and exit paths are protected with this rwsem
- * using threadgroup_change_begin/end(). Users which require
- * threadgroup to remain stable should use threadgroup_[un]lock()
- * which also takes care of exec path. Currently, cgroup is the
- * only user.
- */
- struct rw_semaphore group_rwsem;
-#endif
oom_flags_t oom_flags;
short oom_score_adj; /* OOM kill score adjustment */