From 2468c7234b366eeb799ee0648cb58f9cba394a54 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Wed, 10 Mar 2010 15:22:03 -0800 Subject: cgroup: introduce cancel_attach() Add cancel_attach() operation to struct cgroup_subsys. cancel_attach() can be used when can_attach() operation prepares something for the subsys, but we should rollback what can_attach() operation has prepared if attach task fails after we've succeeded in can_attach(). Signed-off-by: Daisuke Nishimura Acked-by: Li Zefan Reviewed-by: Paul Menage Cc: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c9bbcb2a75ae..d08cfe7e12e5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -428,6 +428,8 @@ struct cgroup_subsys { void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp); int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk, bool threadgroup); + void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct task_struct *tsk, bool threadgroup); void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp, struct cgroup *old_cgrp, struct task_struct *tsk, bool threadgroup); -- cgit v1.2.3 From d7b9fff711d5e8db8c844161c684017e556c38a0 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Wed, 10 Mar 2010 15:22:05 -0800 Subject: cgroup: introduce coalesce css_get() and css_put() Current css_get() and css_put() increment/decrement css->refcnt one by one. This patch add a new function __css_get(), which takes "count" as a arg and increment the css->refcnt by "count". And this patch also add a new arg("count") to __css_put() and change the function to decrement the css->refcnt by "count". These coalesce version of __css_get()/__css_put() will be used to improve performance of memcg's moving charge feature later, where instead of calling css_get()/css_put() repeatedly, these new functions will be used. No change is needed for current users of css_get()/css_put(). Signed-off-by: Daisuke Nishimura Acked-by: Paul Menage Cc: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Li Zefan Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d08cfe7e12e5..14160b5b693f 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -76,6 +76,12 @@ enum { CSS_REMOVED, /* This CSS is dead */ }; +/* Caller must verify that the css is not for root cgroup */ +static inline void __css_get(struct cgroup_subsys_state *css, int count) +{ + atomic_add(count, &css->refcnt); +} + /* * Call css_get() to hold a reference on the css; it can be used * for a reference obtained via: @@ -87,7 +93,7 @@ static inline void css_get(struct cgroup_subsys_state *css) { /* We don't need to reference count the root state */ if (!test_bit(CSS_ROOT, &css->flags)) - atomic_inc(&css->refcnt); + __css_get(css, 1); } static inline bool css_is_removed(struct cgroup_subsys_state *css) @@ -118,11 +124,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css) * css_get() or css_tryget() */ -extern void __css_put(struct cgroup_subsys_state *css); +extern void __css_put(struct cgroup_subsys_state *css, int count); static inline void css_put(struct cgroup_subsys_state *css) { if (!test_bit(CSS_ROOT, &css->flags)) - __css_put(css); + __css_put(css, 1); } /* bits in struct cgroup flags field */ -- cgit v1.2.3 From aae8aab40367036931608fdaf9e2dc568b516f19 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 10 Mar 2010 15:22:07 -0800 Subject: cgroups: revamp subsys array This patch series provides the ability for cgroup subsystems to be compiled as modules both within and outside the kernel tree. This is mainly useful for classifiers and subsystems that hook into components that are already modules. cls_cgroup and blkio-cgroup serve as the example use cases for this feature. It provides an interface cgroup_load_subsys() and cgroup_unload_subsys() which modular subsystems can use to register and depart during runtime. The net_cls classifier subsystem serves as the example for a subsystem which can be converted into a module using these changes. Patch #1 sets up the subsys[] array so its contents can be dynamic as modules appear and (eventually) disappear. Iterations over the array are modified to handle when subsystems are absent, and the dynamic section of the array is protected by cgroup_mutex. Patch #2 implements an interface for modules to load subsystems, called cgroup_load_subsys, similar to cgroup_init_subsys, and adds a module pointer in struct cgroup_subsys. Patch #3 adds a mechanism for unloading modular subsystems, which includes a more advanced rework of the rudimentary reference counting introduced in patch 2. Patch #4 modifies the net_cls subsystem, which already had some module declarations, to be configurable as a module, which also serves as a simple proof-of-concept. Part of implementing patches 2 and 4 involved updating css pointers in each css_set when the module appears or leaves. In doing this, it was discovered that css_sets always remain linked to the dummy cgroup, regardless of whether or not any subsystems are actually bound to it (i.e., not mounted on an actual hierarchy). The subsystem loading and unloading code therefore should keep in mind the special cases where the added subsystem is the only one in the dummy cgroup (and therefore all css_sets need to be linked back into it) and where the removed subsys was the only one in the dummy cgroup (and therefore all css_sets should be unlinked from it) - however, as all css_sets always stay attached to the dummy cgroup anyway, these cases are ignored. Any fix that addresses this issue should also make sure these cases are addressed in the subsystem loading and unloading code. This patch: Make subsys[] able to be dynamically populated to support modular subsystems This patch reworks the way the subsys[] array is used so that subsystems can register themselves after boot time, and enables the internals of cgroups to be able to handle when subsystems are not present or may appear/disappear. Signed-off-by: Ben Blum Acked-by: Li Zefan Cc: Paul Menage Cc: "David S. Miller" Cc: KAMEZAWA Hiroyuki Cc: Lai Jiangshan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 14160b5b693f..28319a9fe569 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -40,13 +40,19 @@ extern int cgroupstats_build(struct cgroupstats *stats, extern const struct file_operations proc_cgroup_operations; -/* Define the enumeration of all cgroup subsystems */ +/* Define the enumeration of all builtin cgroup subsystems */ #define SUBSYS(_x) _x ## _subsys_id, enum cgroup_subsys_id { #include - CGROUP_SUBSYS_COUNT + CGROUP_BUILTIN_SUBSYS_COUNT }; #undef SUBSYS +/* + * This define indicates the maximum number of subsystems that can be loaded + * at once. We limit to this many since cgroupfs_root has subsys_bits to keep + * track of all of them. + */ +#define CGROUP_SUBSYS_COUNT (BITS_PER_BYTE*sizeof(unsigned long)) /* Per-subsystem/per-cgroup state maintained by the system. */ struct cgroup_subsys_state { -- cgit v1.2.3 From e6a1105ba08b265023dd71a4174fb4a29ebc7083 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 10 Mar 2010 15:22:09 -0800 Subject: cgroups: subsystem module loading interface Add interface between cgroups subsystem management and module loading This patch implements rudimentary module-loading support for cgroups - namely, a cgroup_load_subsys (similar to cgroup_init_subsys) for use as a module initcall, and a struct module pointer in struct cgroup_subsys. Several functions that might be wanted by modules have had EXPORT_SYMBOL added to them, but it's unclear exactly which functions want it and which won't. Signed-off-by: Ben Blum Acked-by: Li Zefan Cc: Paul Menage Cc: "David S. Miller" Cc: KAMEZAWA Hiroyuki Cc: Lai Jiangshan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 28319a9fe569..402ce477c47e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -37,6 +37,7 @@ extern void cgroup_post_fork(struct task_struct *p); extern void cgroup_exit(struct task_struct *p, int run_callbacks); extern int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); +extern int cgroup_load_subsys(struct cgroup_subsys *ss); extern const struct file_operations proc_cgroup_operations; @@ -486,6 +487,9 @@ struct cgroup_subsys { /* used when use_id == true */ struct idr idr; spinlock_t id_lock; + + /* should be defined only by modular subsystems */ + struct module *module; }; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; -- cgit v1.2.3 From cf5d5941fda647fe3d2f2d00cf9e0245236a5f08 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Wed, 10 Mar 2010 15:22:09 -0800 Subject: cgroups: subsystem module unloading Provides support for unloading modular subsystems. This patch adds a new function cgroup_unload_subsys which is to be used for removing a loaded subsystem during module deletion. Reference counting of the subsystems' modules is moved from once (at load time) to once per attached hierarchy (in parse_cgroupfs_options and rebind_subsystems) (i.e., 0 or 1). Signed-off-by: Ben Blum Acked-by: Li Zefan Cc: Paul Menage Cc: "David S. Miller" Cc: KAMEZAWA Hiroyuki Cc: Lai Jiangshan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 402ce477c47e..2a59d3101e5d 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -38,6 +38,7 @@ extern void cgroup_exit(struct task_struct *p, int run_callbacks); extern int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); extern int cgroup_load_subsys(struct cgroup_subsys *ss); +extern void cgroup_unload_subsys(struct cgroup_subsys *ss); extern const struct file_operations proc_cgroup_operations; @@ -271,7 +272,8 @@ struct css_set { /* * Set of subsystem states, one for each subsystem. This array * is immutable after creation apart from the init_css_set - * during subsystem registration (at boot time). + * during subsystem registration (at boot time) and modular subsystem + * loading/unloading. */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; -- cgit v1.2.3 From 0dea116876eefc9c7ca9c5d74fe665481e499fa3 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 10 Mar 2010 15:22:20 -0800 Subject: cgroup: implement eventfd-based generic API for notifications This patchset introduces eventfd-based API for notifications in cgroups and implements memory notifications on top of it. It uses statistics in memory controler to track memory usage. Output of time(1) on building kernel on tmpfs: Root cgroup before changes: make -j2 506.37 user 60.93s system 193% cpu 4:52.77 total Non-root cgroup before changes: make -j2 507.14 user 62.66s system 193% cpu 4:54.74 total Root cgroup after changes (0 thresholds): make -j2 507.13 user 62.20s system 193% cpu 4:53.55 total Non-root cgroup after changes (0 thresholds): make -j2 507.70 user 64.20s system 193% cpu 4:55.70 total Root cgroup after changes (1 thresholds, never crossed): make -j2 506.97 user 62.20s system 193% cpu 4:53.90 total Non-root cgroup after changes (1 thresholds, never crossed): make -j2 507.55 user 64.08s system 193% cpu 4:55.63 total This patch: Introduce the write-only file "cgroup.event_control" in every cgroup. To register new notification handler you need: - create an eventfd; - open a control file to be monitored. Callbacks register_event() and unregister_event() must be defined for the control file; - write " " to cgroup.event_control. Interpretation of args is defined by control file implementation; eventfd will be woken up by control file implementation or when the cgroup is removed. To unregister notification handler just close eventfd. If you need notification functionality for a control file you have to implement callbacks register_event() and unregister_event() in the struct cftype. [kamezawa.hiroyu@jp.fujitsu.com: Kconfig fix] Signed-off-by: Kirill A. Shutemov Reviewed-by: KAMEZAWA Hiroyuki Paul Menage Cc: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Cc: Dan Malek Cc: Vladislav Buzov Cc: Daisuke Nishimura Cc: Alexander Shishkin Cc: Davide Libenzi Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2a59d3101e5d..b4f2201321cd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -235,6 +235,10 @@ struct cgroup { /* For RCU-protected deletion */ struct rcu_head rcu_head; + + /* List of events which userspace want to recieve */ + struct list_head event_list; + spinlock_t event_list_lock; }; /* @@ -378,6 +382,26 @@ struct cftype { int (*trigger)(struct cgroup *cgrp, unsigned int event); int (*release)(struct inode *inode, struct file *file); + + /* + * register_event() callback will be used to add new userspace + * waiter for changes related to the cftype. Implement it if + * you want to provide this functionality. Use eventfd_signal() + * on eventfd to send notification to userspace. + */ + int (*register_event)(struct cgroup *cgrp, struct cftype *cft, + struct eventfd_ctx *eventfd, const char *args); + /* + * unregister_event() callback will be called when userspace + * closes the eventfd or on cgroup removing. + * This callback must be implemented, if you want provide + * notification functionality. + * + * Be careful. It can be called after destroy(), so you have + * to keep all nesessary data, until all events are removed. + */ + int (*unregister_event)(struct cgroup *cgrp, struct cftype *cft, + struct eventfd_ctx *eventfd); }; struct cgroup_scanner { -- cgit v1.2.3 From a0a4db548edcce067c1201ef25cf2bc29f32dca4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 10 Mar 2010 15:22:34 -0800 Subject: cgroups: remove events before destroying subsystem state objects Events should be removed after rmdir of cgroup directory, but before destroying subsystem state objects. Let's take reference to cgroup directory dentry to do that. Signed-off-by: Kirill A. Shutemov Acked-by: KAMEZAWA Hiroyuki Cc: Paul Menage Acked-by: Li Zefan Cc: Balbir Singh Cc: Pavel Emelyanov Cc: Dan Malek Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux/cgroup.h') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b4f2201321cd..b8ad1ea99586 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -396,9 +396,6 @@ struct cftype { * closes the eventfd or on cgroup removing. * This callback must be implemented, if you want provide * notification functionality. - * - * Be careful. It can be called after destroy(), so you have - * to keep all nesessary data, until all events are removed. */ int (*unregister_event)(struct cgroup *cgrp, struct cftype *cft, struct eventfd_ctx *eventfd); -- cgit v1.2.3