From 90129625d9203a917fc1d3e4768976ba90d71b44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 5 Jan 2019 00:38:03 -0500 Subject: cgroup: start switching to fs_context Unfortunately, cgroup is tangled into kernfs infrastructure. To avoid converting all kernfs-based filesystems at once, we need to untangle the remount part of things, instead of having it go through kernfs_sop_remount_fs(). Fortunately, it's not hard to do. This commit just gets cgroup/cgroup1 to use fs_context to deliver options on mount and remount paths. Parsing those is going to be done in the next commits; for now we do pretty much what legacy case does. Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 14 +++++ kernel/cgroup/cgroup-v1.c | 9 +-- kernel/cgroup/cgroup.c | 134 +++++++++++++++++++++++++++++----------- 3 files changed, 116 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index c9a35f09e4b9..a89cb0ba7a68 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -7,6 +7,7 @@ #include #include #include +#include #define TRACE_CGROUP_PATH_LEN 1024 extern spinlock_t trace_cgroup_path_lock; @@ -36,6 +37,18 @@ extern void __init enable_debug_cgroup(void); } \ } while (0) +/* + * The cgroup filesystem superblock creation/mount context. + */ +struct cgroup_fs_context { + char *data; +}; + +static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) +{ + return fc->fs_private; +} + /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other @@ -255,5 +268,6 @@ void cgroup1_check_for_release(struct cgroup *cgrp); struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, void *data, unsigned long magic, struct cgroup_namespace *ns); +int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index f94a7229974e..e377e19dd3e6 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1046,17 +1046,19 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) return 0; } -static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) +int cgroup1_reconfigure(struct fs_context *fc) { - int ret = 0; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb); struct cgroup_root *root = cgroup_root_from_kf(kf_root); + int ret = 0; struct cgroup_sb_opts opts; u16 added_mask, removed_mask; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* See what subsystems are wanted */ - ret = parse_cgroupfs_options(data, &opts); + ret = parse_cgroupfs_options(ctx->data, &opts); if (ret) goto out_unlock; @@ -1106,7 +1108,6 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { .rename = cgroup1_rename, .show_options = cgroup1_show_options, - .remount_fs = cgroup1_remount, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .show_path = cgroup_show_path, diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 7fd9f22e406d..7f7db5f967e3 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1811,12 +1811,13 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root return 0; } -static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) +static int cgroup_reconfigure(struct fs_context *fc) { + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); unsigned int root_flags; int ret; - ret = parse_cgroup_root_flags(data, &root_flags); + ret = parse_cgroup_root_flags(ctx->data, &root_flags); if (ret) return ret; @@ -2067,21 +2068,98 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, return dentry; } -static struct dentry *cgroup_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +/* + * Destroy a cgroup filesystem context. + */ +static void cgroup_fs_context_free(struct fs_context *fc) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + kfree(ctx); +} + +static int cgroup_parse_monolithic(struct fs_context *fc, void *data) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + ctx->data = data; + if (ctx->data) + security_sb_eat_lsm_opts(ctx->data, &fc->security); + return 0; +} + +static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; - struct dentry *dentry; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + unsigned int root_flags; + struct dentry *root; int ret; - get_cgroup_ns(ns); + /* Check if the caller has permission to mount. */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + ret = parse_cgroup_root_flags(ctx->data, &root_flags); + if (ret) + return ret; + + cgrp_dfl_visible = true; + cgroup_get_live(&cgrp_dfl_root.cgrp); + + root = cgroup_do_mount(&cgroup2_fs_type, fc->sb_flags, &cgrp_dfl_root, + CGROUP2_SUPER_MAGIC, ns); + if (IS_ERR(root)) + return PTR_ERR(root); + + apply_cgroup_root_flags(root_flags); + fc->root = root; + return 0; +} + +static int cgroup1_get_tree(struct fs_context *fc) +{ + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct dentry *root; /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) { - put_cgroup_ns(ns); - return ERR_PTR(-EPERM); - } + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + root = cgroup1_mount(&cgroup_fs_type, fc->sb_flags, ctx->data, + CGROUP_SUPER_MAGIC, ns); + if (IS_ERR(root)) + return PTR_ERR(root); + + fc->root = root; + return 0; +} + +static const struct fs_context_operations cgroup_fs_context_ops = { + .free = cgroup_fs_context_free, + .parse_monolithic = cgroup_parse_monolithic, + .get_tree = cgroup_get_tree, + .reconfigure = cgroup_reconfigure, +}; + +static const struct fs_context_operations cgroup1_fs_context_ops = { + .free = cgroup_fs_context_free, + .parse_monolithic = cgroup_parse_monolithic, + .get_tree = cgroup1_get_tree, + .reconfigure = cgroup1_reconfigure, +}; + +/* + * Initialise the cgroup filesystem creation/reconfiguration context. + */ +static int cgroup_init_fs_context(struct fs_context *fc) +{ + struct cgroup_fs_context *ctx; + + ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; /* * The first time anyone tries to mount a cgroup, enable the list @@ -2090,29 +2168,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, if (!use_task_css_set_links) cgroup_enable_task_cg_lists(); - if (fs_type == &cgroup2_fs_type) { - unsigned int root_flags; - - ret = parse_cgroup_root_flags(data, &root_flags); - if (ret) { - put_cgroup_ns(ns); - return ERR_PTR(ret); - } - - cgrp_dfl_visible = true; - cgroup_get_live(&cgrp_dfl_root.cgrp); - - dentry = cgroup_do_mount(&cgroup2_fs_type, flags, &cgrp_dfl_root, - CGROUP2_SUPER_MAGIC, ns); - if (!IS_ERR(dentry)) - apply_cgroup_root_flags(root_flags); - } else { - dentry = cgroup1_mount(&cgroup_fs_type, flags, data, - CGROUP_SUPER_MAGIC, ns); - } - - put_cgroup_ns(ns); - return dentry; + fc->fs_private = ctx; + if (fc->fs_type == &cgroup2_fs_type) + fc->ops = &cgroup_fs_context_ops; + else + fc->ops = &cgroup1_fs_context_ops; + return 0; } static void cgroup_kill_sb(struct super_block *sb) @@ -2136,14 +2197,14 @@ static void cgroup_kill_sb(struct super_block *sb) struct file_system_type cgroup_fs_type = { .name = "cgroup", - .mount = cgroup_mount, + .init_fs_context = cgroup_init_fs_context, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; static struct file_system_type cgroup2_fs_type = { .name = "cgroup2", - .mount = cgroup_mount, + .init_fs_context = cgroup_init_fs_context, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; @@ -5268,7 +5329,6 @@ int cgroup_rmdir(struct kernfs_node *kn) static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { .show_options = cgroup_show_options, - .remount_fs = cgroup_remount, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .show_path = cgroup_show_path, -- cgit v1.2.3 From 7feeef58690a5ea8c5033d43e696ef41b28d82eb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Jan 2019 21:23:02 -0500 Subject: cgroup: fold cgroup1_mount() into cgroup1_get_tree() Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 4 +--- kernel/cgroup/cgroup-v1.c | 26 +++++++++++++++++--------- kernel/cgroup/cgroup.c | 19 ------------------- 3 files changed, 18 insertions(+), 31 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index a89cb0ba7a68..37836d598ff8 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -265,9 +265,7 @@ bool cgroup1_ssid_disabled(int ssid); void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); -struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, - void *data, unsigned long magic, - struct cgroup_namespace *ns); +int cgroup1_get_tree(struct fs_context *fc); int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index e377e19dd3e6..7ae3810dcbdf 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1113,20 +1113,24 @@ struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { .show_path = cgroup_show_path, }; -struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, - void *data, unsigned long magic, - struct cgroup_namespace *ns) +int cgroup1_get_tree(struct fs_context *fc) { + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct cgroup_sb_opts opts; struct cgroup_root *root; struct cgroup_subsys *ss; struct dentry *dentry; int i, ret; + /* Check if the caller has permission to mount. */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* First find the desired set of subsystems */ - ret = parse_cgroupfs_options(data, &opts); + ret = parse_cgroupfs_options(ctx->data, &opts); if (ret) goto out_unlock; @@ -1228,19 +1232,23 @@ out_free: kfree(opts.name); if (ret) - return ERR_PTR(ret); + return ret; - dentry = cgroup_do_mount(&cgroup_fs_type, flags, root, + dentry = cgroup_do_mount(&cgroup_fs_type, fc->sb_flags, root, CGROUP_SUPER_MAGIC, ns); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); - if (!IS_ERR(dentry) && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + if (percpu_ref_is_dying(&root->cgrp.self.refcnt)) { struct super_block *sb = dentry->d_sb; dput(dentry); deactivate_locked_super(sb); msleep(10); - dentry = ERR_PTR(restart_syscall()); + return restart_syscall(); } - return dentry; + + fc->root = dentry; + return 0; } static int __init cgroup1_wq_init(void) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 7f7db5f967e3..0652f74064a2 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2117,25 +2117,6 @@ static int cgroup_get_tree(struct fs_context *fc) return 0; } -static int cgroup1_get_tree(struct fs_context *fc) -{ - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; - struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - struct dentry *root; - - /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return -EPERM; - - root = cgroup1_mount(&cgroup_fs_type, fc->sb_flags, ctx->data, - CGROUP_SUPER_MAGIC, ns); - if (IS_ERR(root)) - return PTR_ERR(root); - - fc->root = root; - return 0; -} - static const struct fs_context_operations cgroup_fs_context_ops = { .free = cgroup_fs_context_free, .parse_monolithic = cgroup_parse_monolithic, -- cgit v1.2.3 From f5dfb5315d340abd71bec523be9b114d5ac410de Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 16 Jan 2019 23:42:38 -0500 Subject: cgroup: take options parsing into ->parse_monolithic() Store the results in cgroup_fs_context. There's a nasty twist caused by the enabling/disabling subsystems - we can't do the checks sensitive to that until cgroup_mutex gets grabbed. Frankly, these checks are complete bullshit (e.g. all,none combination is accepted if all subsystems are disabled; so's cpusets,none and all,cpusets when cpusets is disabled, etc.), but touching that would be a userland-visible behaviour change ;-/ So we do parsing in ->parse_monolithic() and have the consistency checks done in check_cgroupfs_options(), with the latter called (on already parsed options) from cgroup1_get_tree() and cgroup1_reconfigure(). Freeing the strdup'ed strings is done from fs_context destructor, which somewhat simplifies the life for cgroup1_{get_tree,reconfigure}(). Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 23 ++++--- kernel/cgroup/cgroup-v1.c | 143 +++++++++++++++++++--------------------- kernel/cgroup/cgroup.c | 54 ++++++++------- 3 files changed, 104 insertions(+), 116 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 37836d598ff8..e627ff193dba 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -41,7 +41,15 @@ extern void __init enable_debug_cgroup(void); * The cgroup filesystem superblock creation/mount context. */ struct cgroup_fs_context { - char *data; + unsigned int flags; /* CGRP_ROOT_* flags */ + + /* cgroup1 bits */ + bool cpuset_clone_children; + bool none; /* User explicitly requested empty subsystem */ + bool all_ss; /* Seen 'all' option */ + u16 subsys_mask; /* Selected subsystems */ + char *name; /* Hierarchy name */ + char *release_agent; /* Path for release notifications */ }; static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) @@ -130,16 +138,6 @@ struct cgroup_mgctx { #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) -struct cgroup_sb_opts { - u16 subsys_mask; - unsigned int flags; - char *release_agent; - bool cpuset_clone_children; - char *name; - /* User explicitly requested empty subsystem */ - bool none; -}; - extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; @@ -210,7 +208,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); void cgroup_free_root(struct cgroup_root *root); -void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts); +void init_cgroup_root(struct cgroup_root *root, struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, @@ -266,6 +264,7 @@ void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); int cgroup1_get_tree(struct fs_context *fc); +int parse_cgroup1_options(char *data, struct cgroup_fs_context *ctx); int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 7ae3810dcbdf..5c93643090e9 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -906,61 +906,47 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo return 0; } -static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) +int parse_cgroup1_options(char *data, struct cgroup_fs_context *ctx) { char *token, *o = data; - bool all_ss = false, one_ss = false; - u16 mask = U16_MAX; struct cgroup_subsys *ss; - int nr_opts = 0; int i; -#ifdef CONFIG_CPUSETS - mask = ~((u16)1 << cpuset_cgrp_id); -#endif - - memset(opts, 0, sizeof(*opts)); - while ((token = strsep(&o, ",")) != NULL) { - nr_opts++; - if (!*token) return -EINVAL; if (!strcmp(token, "none")) { /* Explicitly have no subsystems */ - opts->none = true; + ctx->none = true; continue; } if (!strcmp(token, "all")) { - /* Mutually exclusive option 'all' + subsystem name */ - if (one_ss) - return -EINVAL; - all_ss = true; + ctx->all_ss = true; continue; } if (!strcmp(token, "noprefix")) { - opts->flags |= CGRP_ROOT_NOPREFIX; + ctx->flags |= CGRP_ROOT_NOPREFIX; continue; } if (!strcmp(token, "clone_children")) { - opts->cpuset_clone_children = true; + ctx->cpuset_clone_children = true; continue; } if (!strcmp(token, "cpuset_v2_mode")) { - opts->flags |= CGRP_ROOT_CPUSET_V2_MODE; + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; continue; } if (!strcmp(token, "xattr")) { - opts->flags |= CGRP_ROOT_XATTR; + ctx->flags |= CGRP_ROOT_XATTR; continue; } if (!strncmp(token, "release_agent=", 14)) { /* Specifying two release agents is forbidden */ - if (opts->release_agent) + if (ctx->release_agent) return -EINVAL; - opts->release_agent = + ctx->release_agent = kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); - if (!opts->release_agent) + if (!ctx->release_agent) return -ENOMEM; continue; } @@ -983,12 +969,12 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) return -EINVAL; } /* Specifying two names is forbidden */ - if (opts->name) + if (ctx->name) return -EINVAL; - opts->name = kstrndup(name, + ctx->name = kstrndup(name, MAX_CGROUP_ROOT_NAMELEN - 1, GFP_KERNEL); - if (!opts->name) + if (!ctx->name) return -ENOMEM; continue; @@ -997,38 +983,51 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) for_each_subsys(ss, i) { if (strcmp(token, ss->legacy_name)) continue; - if (!cgroup_ssid_enabled(i)) - continue; - if (cgroup1_ssid_disabled(i)) - continue; - - /* Mutually exclusive option 'all' + subsystem name */ - if (all_ss) - return -EINVAL; - opts->subsys_mask |= (1 << i); - one_ss = true; - + ctx->subsys_mask |= (1 << i); break; } if (i == CGROUP_SUBSYS_COUNT) return -ENOENT; } + return 0; +} + +static int check_cgroupfs_options(struct cgroup_fs_context *ctx) +{ + u16 mask = U16_MAX; + u16 enabled = 0; + struct cgroup_subsys *ss; + int i; + +#ifdef CONFIG_CPUSETS + mask = ~((u16)1 << cpuset_cgrp_id); +#endif + for_each_subsys(ss, i) + if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) + enabled |= 1 << i; + + ctx->subsys_mask &= enabled; /* - * If the 'all' option was specified select all the subsystems, - * otherwise if 'none', 'name=' and a subsystem name options were - * not specified, let's default to 'all' + * In absense of 'none', 'name=' or subsystem name options, + * let's default to 'all'. */ - if (all_ss || (!one_ss && !opts->none && !opts->name)) - for_each_subsys(ss, i) - if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i)) - opts->subsys_mask |= (1 << i); + if (!ctx->subsys_mask && !ctx->none && !ctx->name) + ctx->all_ss = true; + + if (ctx->all_ss) { + /* Mutually exclusive option 'all' + subsystem name */ + if (ctx->subsys_mask) + return -EINVAL; + /* 'all' => select all the subsystems */ + ctx->subsys_mask = enabled; + } /* * We either have to specify by name or by subsystems. (So all * empty hierarchies must have a name). */ - if (!opts->subsys_mask && !opts->name) + if (!ctx->subsys_mask && !ctx->name) return -EINVAL; /* @@ -1036,11 +1035,11 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) * with the old cpuset, so we allow noprefix only if mounting just * the cpuset subsystem. */ - if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) + if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) return -EINVAL; /* Can't specify "none" and some subsystems */ - if (opts->subsys_mask && opts->none) + if (ctx->subsys_mask && ctx->none) return -EINVAL; return 0; @@ -1052,28 +1051,27 @@ int cgroup1_reconfigure(struct fs_context *fc) struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb); struct cgroup_root *root = cgroup_root_from_kf(kf_root); int ret = 0; - struct cgroup_sb_opts opts; u16 added_mask, removed_mask; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* See what subsystems are wanted */ - ret = parse_cgroupfs_options(ctx->data, &opts); + ret = check_cgroupfs_options(ctx); if (ret) goto out_unlock; - if (opts.subsys_mask != root->subsys_mask || opts.release_agent) + if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent) pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); - added_mask = opts.subsys_mask & ~root->subsys_mask; - removed_mask = root->subsys_mask & ~opts.subsys_mask; + added_mask = ctx->subsys_mask & ~root->subsys_mask; + removed_mask = root->subsys_mask & ~ctx->subsys_mask; /* Don't allow flags or name to change at remount */ - if ((opts.flags ^ root->flags) || - (opts.name && strcmp(opts.name, root->name))) { + if ((ctx->flags ^ root->flags) || + (ctx->name && strcmp(ctx->name, root->name))) { pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", - opts.flags, opts.name ?: "", root->flags, root->name); + ctx->flags, ctx->name ?: "", root->flags, root->name); ret = -EINVAL; goto out_unlock; } @@ -1090,17 +1088,15 @@ int cgroup1_reconfigure(struct fs_context *fc) WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask)); - if (opts.release_agent) { + if (ctx->release_agent) { spin_lock(&release_agent_path_lock); - strcpy(root->release_agent_path, opts.release_agent); + strcpy(root->release_agent_path, ctx->release_agent); spin_unlock(&release_agent_path_lock); } trace_cgroup_remount(root); out_unlock: - kfree(opts.release_agent); - kfree(opts.name); mutex_unlock(&cgroup_mutex); return ret; } @@ -1117,7 +1113,6 @@ int cgroup1_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - struct cgroup_sb_opts opts; struct cgroup_root *root; struct cgroup_subsys *ss; struct dentry *dentry; @@ -1130,7 +1125,7 @@ int cgroup1_get_tree(struct fs_context *fc) cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* First find the desired set of subsystems */ - ret = parse_cgroupfs_options(ctx->data, &opts); + ret = check_cgroupfs_options(ctx); if (ret) goto out_unlock; @@ -1142,7 +1137,7 @@ int cgroup1_get_tree(struct fs_context *fc) * starting. Testing ref liveliness is good enough. */ for_each_subsys(ss, i) { - if (!(opts.subsys_mask & (1 << i)) || + if (!(ctx->subsys_mask & (1 << i)) || ss->root == &cgrp_dfl_root) continue; @@ -1166,8 +1161,8 @@ int cgroup1_get_tree(struct fs_context *fc) * name matches but sybsys_mask doesn't, we should fail. * Remember whether name matched. */ - if (opts.name) { - if (strcmp(opts.name, root->name)) + if (ctx->name) { + if (strcmp(ctx->name, root->name)) continue; name_match = true; } @@ -1176,15 +1171,15 @@ int cgroup1_get_tree(struct fs_context *fc) * If we asked for subsystems (or explicitly for no * subsystems) then they must match. */ - if ((opts.subsys_mask || opts.none) && - (opts.subsys_mask != root->subsys_mask)) { + if ((ctx->subsys_mask || ctx->none) && + (ctx->subsys_mask != root->subsys_mask)) { if (!name_match) continue; ret = -EBUSY; goto out_unlock; } - if (root->flags ^ opts.flags) + if (root->flags ^ ctx->flags) pr_warn("new mount options do not match the existing superblock, will be ignored\n"); ret = 0; @@ -1196,7 +1191,7 @@ int cgroup1_get_tree(struct fs_context *fc) * specification is allowed for already existing hierarchies but we * can't create new one without subsys specification. */ - if (!opts.subsys_mask && !opts.none) { + if (!ctx->subsys_mask && !ctx->none) { ret = -EINVAL; goto out_unlock; } @@ -1213,9 +1208,9 @@ int cgroup1_get_tree(struct fs_context *fc) goto out_unlock; } - init_cgroup_root(root, &opts); + init_cgroup_root(root, ctx); - ret = cgroup_setup_root(root, opts.subsys_mask); + ret = cgroup_setup_root(root, ctx->subsys_mask); if (ret) cgroup_free_root(root); @@ -1223,14 +1218,10 @@ out_unlock: if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { mutex_unlock(&cgroup_mutex); msleep(10); - ret = restart_syscall(); - goto out_free; + return restart_syscall(); } mutex_unlock(&cgroup_mutex); out_free: - kfree(opts.release_agent); - kfree(opts.name); - if (ret) return ret; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0652f74064a2..33da9eef3ef4 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1814,14 +1814,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root static int cgroup_reconfigure(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - unsigned int root_flags; - int ret; - - ret = parse_cgroup_root_flags(ctx->data, &root_flags); - if (ret) - return ret; - apply_cgroup_root_flags(root_flags); + apply_cgroup_root_flags(ctx->flags); return 0; } @@ -1909,7 +1903,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } -void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) +void init_cgroup_root(struct cgroup_root *root, struct cgroup_fs_context *ctx) { struct cgroup *cgrp = &root->cgrp; @@ -1919,12 +1913,12 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts) init_cgroup_housekeeping(cgrp); idr_init(&root->cgroup_idr); - root->flags = opts->flags; - if (opts->release_agent) - strscpy(root->release_agent_path, opts->release_agent, PATH_MAX); - if (opts->name) - strscpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN); - if (opts->cpuset_clone_children) + root->flags = ctx->flags; + if (ctx->release_agent) + strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX); + if (ctx->name) + strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN); + if (ctx->cpuset_clone_children) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } @@ -2075,6 +2069,8 @@ static void cgroup_fs_context_free(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + kfree(ctx->name); + kfree(ctx->release_agent); kfree(ctx); } @@ -2082,28 +2078,30 @@ static int cgroup_parse_monolithic(struct fs_context *fc, void *data) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - ctx->data = data; - if (ctx->data) - security_sb_eat_lsm_opts(ctx->data, &fc->security); - return 0; + if (data) + security_sb_eat_lsm_opts(data, &fc->security); + return parse_cgroup_root_flags(data, &ctx->flags); +} + +static int cgroup1_parse_monolithic(struct fs_context *fc, void *data) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + + if (data) + security_sb_eat_lsm_opts(data, &fc->security); + return parse_cgroup1_options(data, ctx); } static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - unsigned int root_flags; struct dentry *root; - int ret; /* Check if the caller has permission to mount. */ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; - ret = parse_cgroup_root_flags(ctx->data, &root_flags); - if (ret) - return ret; - cgrp_dfl_visible = true; cgroup_get_live(&cgrp_dfl_root.cgrp); @@ -2112,7 +2110,7 @@ static int cgroup_get_tree(struct fs_context *fc) if (IS_ERR(root)) return PTR_ERR(root); - apply_cgroup_root_flags(root_flags); + apply_cgroup_root_flags(ctx->flags); fc->root = root; return 0; } @@ -2126,7 +2124,7 @@ static const struct fs_context_operations cgroup_fs_context_ops = { static const struct fs_context_operations cgroup1_fs_context_ops = { .free = cgroup_fs_context_free, - .parse_monolithic = cgroup_parse_monolithic, + .parse_monolithic = cgroup1_parse_monolithic, .get_tree = cgroup1_get_tree, .reconfigure = cgroup1_reconfigure, }; @@ -5376,11 +5374,11 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) */ int __init cgroup_init_early(void) { - static struct cgroup_sb_opts __initdata opts; + static struct cgroup_fs_context __initdata ctx; struct cgroup_subsys *ss; int i; - init_cgroup_root(&cgrp_dfl_root, &opts); + init_cgroup_root(&cgrp_dfl_root, &ctx); cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; RCU_INIT_POINTER(init_task.cgroups, &init_css_set); -- cgit v1.2.3 From 8d2451f4994fa60a57617282bab91b98266a00b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 00:15:11 -0500 Subject: cgroup1: switch to option-by-option parsing [dhowells should be the author - it's carved out of his patch] Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 3 +- kernel/cgroup/cgroup-v1.c | 192 +++++++++++++++++++++++----------------- kernel/cgroup/cgroup.c | 20 ++--- 3 files changed, 117 insertions(+), 98 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index e627ff193dba..a7b5a41f170c 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -257,14 +257,15 @@ extern const struct proc_ns_operations cgroupns_operations; */ extern struct cftype cgroup1_base_files[]; extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops; +extern const struct fs_parameter_description cgroup1_fs_parameters; int proc_cgroupstats_show(struct seq_file *m, void *v); bool cgroup1_ssid_disabled(int ssid); void cgroup1_pidlist_destroy_all(struct cgroup *cgrp); void cgroup1_release_agent(struct work_struct *work); void cgroup1_check_for_release(struct cgroup *cgrp); +int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param); int cgroup1_get_tree(struct fs_context *fc); -int parse_cgroup1_options(char *data, struct cgroup_fs_context *ctx); int cgroup1_reconfigure(struct fs_context *ctx); #endif /* __CGROUP_INTERNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 5c93643090e9..725e9f6fe80d 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -13,9 +13,12 @@ #include #include #include +#include #include +#define cg_invalf(fc, fmt, ...) ({ pr_err(fmt, ## __VA_ARGS__); -EINVAL; }) + /* * pidlists linger the following amount before being destroyed. The goal * is avoiding frequent destruction in the middle of consecutive read calls @@ -906,94 +909,117 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo return 0; } -int parse_cgroup1_options(char *data, struct cgroup_fs_context *ctx) -{ - char *token, *o = data; - struct cgroup_subsys *ss; - int i; +enum cgroup1_param { + Opt_all, + Opt_clone_children, + Opt_cpuset_v2_mode, + Opt_name, + Opt_none, + Opt_noprefix, + Opt_release_agent, + Opt_xattr, +}; - while ((token = strsep(&o, ",")) != NULL) { - if (!*token) - return -EINVAL; - if (!strcmp(token, "none")) { - /* Explicitly have no subsystems */ - ctx->none = true; - continue; - } - if (!strcmp(token, "all")) { - ctx->all_ss = true; - continue; - } - if (!strcmp(token, "noprefix")) { - ctx->flags |= CGRP_ROOT_NOPREFIX; - continue; - } - if (!strcmp(token, "clone_children")) { - ctx->cpuset_clone_children = true; - continue; - } - if (!strcmp(token, "cpuset_v2_mode")) { - ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; - continue; - } - if (!strcmp(token, "xattr")) { - ctx->flags |= CGRP_ROOT_XATTR; - continue; - } - if (!strncmp(token, "release_agent=", 14)) { - /* Specifying two release agents is forbidden */ - if (ctx->release_agent) - return -EINVAL; - ctx->release_agent = - kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); - if (!ctx->release_agent) - return -ENOMEM; - continue; - } - if (!strncmp(token, "name=", 5)) { - const char *name = token + 5; - - /* blocked by boot param? */ - if (cgroup_no_v1_named) - return -ENOENT; - /* Can't specify an empty name */ - if (!strlen(name)) - return -EINVAL; - /* Must match [\w.-]+ */ - for (i = 0; i < strlen(name); i++) { - char c = name[i]; - if (isalnum(c)) - continue; - if ((c == '.') || (c == '-') || (c == '_')) - continue; - return -EINVAL; - } - /* Specifying two names is forbidden */ - if (ctx->name) - return -EINVAL; - ctx->name = kstrndup(name, - MAX_CGROUP_ROOT_NAMELEN - 1, - GFP_KERNEL); - if (!ctx->name) - return -ENOMEM; +static const struct fs_parameter_spec cgroup1_param_specs[] = { + fsparam_flag ("all", Opt_all), + fsparam_flag ("clone_children", Opt_clone_children), + fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), + fsparam_string("name", Opt_name), + fsparam_flag ("none", Opt_none), + fsparam_flag ("noprefix", Opt_noprefix), + fsparam_string("release_agent", Opt_release_agent), + fsparam_flag ("xattr", Opt_xattr), + {} +}; - continue; - } +const struct fs_parameter_description cgroup1_fs_parameters = { + .name = "cgroup1", + .specs = cgroup1_param_specs, +}; +int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct cgroup_subsys *ss; + struct fs_parse_result result; + int opt, i; + + opt = fs_parse(fc, &cgroup1_fs_parameters, param, &result); + if (opt == -ENOPARAM) { + if (strcmp(param->key, "source") == 0) { + fc->source = param->string; + param->string = NULL; + return 0; + } for_each_subsys(ss, i) { - if (strcmp(token, ss->legacy_name)) + if (strcmp(param->key, ss->legacy_name)) continue; ctx->subsys_mask |= (1 << i); - break; + return 0; } - if (i == CGROUP_SUBSYS_COUNT) + return cg_invalf(fc, "cgroup1: Unknown subsys name '%s'", param->key); + } + if (opt < 0) + return opt; + + switch (opt) { + case Opt_none: + /* Explicitly have no subsystems */ + ctx->none = true; + break; + case Opt_all: + ctx->all_ss = true; + break; + case Opt_noprefix: + ctx->flags |= CGRP_ROOT_NOPREFIX; + break; + case Opt_clone_children: + ctx->cpuset_clone_children = true; + break; + case Opt_cpuset_v2_mode: + ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; + break; + case Opt_xattr: + ctx->flags |= CGRP_ROOT_XATTR; + break; + case Opt_release_agent: + /* Specifying two release agents is forbidden */ + if (ctx->release_agent) + return cg_invalf(fc, "cgroup1: release_agent respecified"); + ctx->release_agent = param->string; + param->string = NULL; + break; + case Opt_name: + /* blocked by boot param? */ + if (cgroup_no_v1_named) return -ENOENT; + /* Can't specify an empty name */ + if (!param->size) + return cg_invalf(fc, "cgroup1: Empty name"); + if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1) + return cg_invalf(fc, "cgroup1: Name too long"); + /* Must match [\w.-]+ */ + for (i = 0; i < param->size; i++) { + char c = param->string[i]; + if (isalnum(c)) + continue; + if ((c == '.') || (c == '-') || (c == '_')) + continue; + return cg_invalf(fc, "cgroup1: Invalid name"); + } + /* Specifying two names is forbidden */ + if (ctx->name) + return cg_invalf(fc, "cgroup1: name respecified"); + ctx->name = param->string; + param->string = NULL; + break; } return 0; } -static int check_cgroupfs_options(struct cgroup_fs_context *ctx) +static int check_cgroupfs_options(struct fs_context *fc) { + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); u16 mask = U16_MAX; u16 enabled = 0; struct cgroup_subsys *ss; @@ -1018,7 +1044,7 @@ static int check_cgroupfs_options(struct cgroup_fs_context *ctx) if (ctx->all_ss) { /* Mutually exclusive option 'all' + subsystem name */ if (ctx->subsys_mask) - return -EINVAL; + return cg_invalf(fc, "cgroup1: subsys name conflicts with all"); /* 'all' => select all the subsystems */ ctx->subsys_mask = enabled; } @@ -1028,7 +1054,7 @@ static int check_cgroupfs_options(struct cgroup_fs_context *ctx) * empty hierarchies must have a name). */ if (!ctx->subsys_mask && !ctx->name) - return -EINVAL; + return cg_invalf(fc, "cgroup1: Need name or subsystem set"); /* * Option noprefix was introduced just for backward compatibility @@ -1036,11 +1062,11 @@ static int check_cgroupfs_options(struct cgroup_fs_context *ctx) * the cpuset subsystem. */ if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask)) - return -EINVAL; + return cg_invalf(fc, "cgroup1: noprefix used incorrectly"); /* Can't specify "none" and some subsystems */ if (ctx->subsys_mask && ctx->none) - return -EINVAL; + return cg_invalf(fc, "cgroup1: none used incorrectly"); return 0; } @@ -1056,7 +1082,7 @@ int cgroup1_reconfigure(struct fs_context *fc) cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* See what subsystems are wanted */ - ret = check_cgroupfs_options(ctx); + ret = check_cgroupfs_options(fc); if (ret) goto out_unlock; @@ -1070,7 +1096,7 @@ int cgroup1_reconfigure(struct fs_context *fc) /* Don't allow flags or name to change at remount */ if ((ctx->flags ^ root->flags) || (ctx->name && strcmp(ctx->name, root->name))) { - pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n", + cg_invalf(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"", ctx->flags, ctx->name ?: "", root->flags, root->name); ret = -EINVAL; goto out_unlock; @@ -1125,7 +1151,7 @@ int cgroup1_get_tree(struct fs_context *fc) cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* First find the desired set of subsystems */ - ret = check_cgroupfs_options(ctx); + ret = check_cgroupfs_options(fc); if (ret) goto out_unlock; @@ -1192,7 +1218,7 @@ int cgroup1_get_tree(struct fs_context *fc) * can't create new one without subsys specification. */ if (!ctx->subsys_mask && !ctx->none) { - ret = -EINVAL; + ret = cg_invalf(fc, "cgroup1: No subsys list or none specified"); goto out_unlock; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 33da9eef3ef4..faba00caa197 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2083,15 +2083,6 @@ static int cgroup_parse_monolithic(struct fs_context *fc, void *data) return parse_cgroup_root_flags(data, &ctx->flags); } -static int cgroup1_parse_monolithic(struct fs_context *fc, void *data) -{ - struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - - if (data) - security_sb_eat_lsm_opts(data, &fc->security); - return parse_cgroup1_options(data, ctx); -} - static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; @@ -2124,7 +2115,7 @@ static const struct fs_context_operations cgroup_fs_context_ops = { static const struct fs_context_operations cgroup1_fs_context_ops = { .free = cgroup_fs_context_free, - .parse_monolithic = cgroup1_parse_monolithic, + .parse_param = cgroup1_parse_param, .get_tree = cgroup1_get_tree, .reconfigure = cgroup1_reconfigure, }; @@ -2175,10 +2166,11 @@ static void cgroup_kill_sb(struct super_block *sb) } struct file_system_type cgroup_fs_type = { - .name = "cgroup", - .init_fs_context = cgroup_init_fs_context, - .kill_sb = cgroup_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "cgroup", + .init_fs_context = cgroup_init_fs_context, + .parameters = &cgroup1_fs_parameters, + .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; static struct file_system_type cgroup2_fs_type = { -- cgit v1.2.3 From e34a98d5b226b84a3ed6da93e7a92e65cc1c81ba Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 00:22:58 -0500 Subject: cgroup2: switch to option-by-option parsing [again, carved out of patch by dhowells] [NB: we probably want to handle "source" in parse_param here] Signed-off-by: Al Viro --- kernel/cgroup/cgroup.c | 62 +++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index faba00caa197..d0cddfbdf5cf 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -1772,26 +1773,37 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, return len; } -static int parse_cgroup_root_flags(char *data, unsigned int *root_flags) -{ - char *token; +enum cgroup2_param { + Opt_nsdelegate, + nr__cgroup2_params +}; - *root_flags = 0; +static const struct fs_parameter_spec cgroup2_param_specs[] = { + fsparam_flag ("nsdelegate", Opt_nsdelegate), + {} +}; - if (!data || *data == '\0') - return 0; +static const struct fs_parameter_description cgroup2_fs_parameters = { + .name = "cgroup2", + .specs = cgroup2_param_specs, +}; - while ((token = strsep(&data, ",")) != NULL) { - if (!strcmp(token, "nsdelegate")) { - *root_flags |= CGRP_ROOT_NS_DELEGATE; - continue; - } +static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + struct fs_parse_result result; + int opt; - pr_err("cgroup2: unknown option \"%s\"\n", token); - return -EINVAL; - } + opt = fs_parse(fc, &cgroup2_fs_parameters, param, &result); + if (opt < 0) + return opt; - return 0; + switch (opt) { + case Opt_nsdelegate: + ctx->flags |= CGRP_ROOT_NS_DELEGATE; + return 0; + } + return -EINVAL; } static void apply_cgroup_root_flags(unsigned int root_flags) @@ -2074,15 +2086,6 @@ static void cgroup_fs_context_free(struct fs_context *fc) kfree(ctx); } -static int cgroup_parse_monolithic(struct fs_context *fc, void *data) -{ - struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - - if (data) - security_sb_eat_lsm_opts(data, &fc->security); - return parse_cgroup_root_flags(data, &ctx->flags); -} - static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; @@ -2108,7 +2111,7 @@ static int cgroup_get_tree(struct fs_context *fc) static const struct fs_context_operations cgroup_fs_context_ops = { .free = cgroup_fs_context_free, - .parse_monolithic = cgroup_parse_monolithic, + .parse_param = cgroup2_parse_param, .get_tree = cgroup_get_tree, .reconfigure = cgroup_reconfigure, }; @@ -2174,10 +2177,11 @@ struct file_system_type cgroup_fs_type = { }; static struct file_system_type cgroup2_fs_type = { - .name = "cgroup2", - .init_fs_context = cgroup_init_fs_context, - .kill_sb = cgroup_kill_sb, - .fs_flags = FS_USERNS_MOUNT, + .name = "cgroup2", + .init_fs_context = cgroup_init_fs_context, + .parameters = &cgroup2_fs_parameters, + .kill_sb = cgroup_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, -- cgit v1.2.3 From cf6299b1d00555cd10dc30d95b300d7084128a2c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 02:25:51 -0500 Subject: cgroup: stash cgroup_root reference into cgroup_fs_context Note that this reference is *NOT* contributing to refcount of cgroup_root in question and is valid only until cgroup_do_mount() returns. Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 3 ++- kernel/cgroup/cgroup-v1.c | 4 +++- kernel/cgroup/cgroup.c | 7 +++++-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index a7b5a41f170c..3c1613a7648c 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -41,6 +41,7 @@ extern void __init enable_debug_cgroup(void); * The cgroup filesystem superblock creation/mount context. */ struct cgroup_fs_context { + struct cgroup_root *root; unsigned int flags; /* CGRP_ROOT_* flags */ /* cgroup1 bits */ @@ -208,7 +209,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); void cgroup_free_root(struct cgroup_root *root); -void init_cgroup_root(struct cgroup_root *root, struct cgroup_fs_context *ctx); +void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 725e9f6fe80d..45a198c63d6e 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1208,6 +1208,7 @@ int cgroup1_get_tree(struct fs_context *fc) if (root->flags ^ ctx->flags) pr_warn("new mount options do not match the existing superblock, will be ignored\n"); + ctx->root = root; ret = 0; goto out_unlock; } @@ -1234,7 +1235,8 @@ int cgroup1_get_tree(struct fs_context *fc) goto out_unlock; } - init_cgroup_root(root, ctx); + ctx->root = root; + init_cgroup_root(ctx); ret = cgroup_setup_root(root, ctx->subsys_mask); if (ret) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d0cddfbdf5cf..57f43f63363a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1915,8 +1915,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } -void init_cgroup_root(struct cgroup_root *root, struct cgroup_fs_context *ctx) +void init_cgroup_root(struct cgroup_fs_context *ctx) { + struct cgroup_root *root = ctx->root; struct cgroup *cgrp = &root->cgrp; INIT_LIST_HEAD(&root->root_list); @@ -2098,6 +2099,7 @@ static int cgroup_get_tree(struct fs_context *fc) cgrp_dfl_visible = true; cgroup_get_live(&cgrp_dfl_root.cgrp); + ctx->root = &cgrp_dfl_root; root = cgroup_do_mount(&cgroup2_fs_type, fc->sb_flags, &cgrp_dfl_root, CGROUP2_SUPER_MAGIC, ns); @@ -5374,7 +5376,8 @@ int __init cgroup_init_early(void) struct cgroup_subsys *ss; int i; - init_cgroup_root(&cgrp_dfl_root, &ctx); + ctx.root = &cgrp_dfl_root; + init_cgroup_root(&ctx); cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; RCU_INIT_POINTER(init_task.cgroups, &init_css_set); -- cgit v1.2.3 From 71d883c37e8d4484207708af56685abb39703b04 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 02:44:07 -0500 Subject: cgroup_do_mount(): massage calling conventions pass it fs_context instead of fs_type/flags/root triple, have it return int instead of dentry and make it deal with setting fc->root. Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 3 +-- kernel/cgroup/cgroup-v1.c | 17 +++++----------- kernel/cgroup/cgroup.c | 45 +++++++++++++++++++++-------------------- 3 files changed, 29 insertions(+), 36 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 3c1613a7648c..f7fd54f2973f 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -212,8 +212,7 @@ void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); -struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, - struct cgroup_root *root, unsigned long magic, +int cgroup_do_mount(struct fs_context *fc, unsigned long magic, struct cgroup_namespace *ns); int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 45a198c63d6e..05f05d773adf 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1141,7 +1141,6 @@ int cgroup1_get_tree(struct fs_context *fc) struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct cgroup_root *root; struct cgroup_subsys *ss; - struct dentry *dentry; int i, ret; /* Check if the caller has permission to mount. */ @@ -1253,21 +1252,15 @@ out_free: if (ret) return ret; - dentry = cgroup_do_mount(&cgroup_fs_type, fc->sb_flags, root, - CGROUP_SUPER_MAGIC, ns); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - if (percpu_ref_is_dying(&root->cgrp.self.refcnt)) { - struct super_block *sb = dentry->d_sb; - dput(dentry); + ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); + if (!ret && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + struct super_block *sb = fc->root->d_sb; + dput(fc->root); deactivate_locked_super(sb); msleep(10); return restart_syscall(); } - - fc->root = dentry; - return 0; + return ret; } static int __init cgroup1_wq_init(void) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 57f43f63363a..64360a46d4df 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2036,43 +2036,48 @@ out: return ret; } -struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, - struct cgroup_root *root, unsigned long magic, - struct cgroup_namespace *ns) +int cgroup_do_mount(struct fs_context *fc, unsigned long magic, + struct cgroup_namespace *ns) { - struct dentry *dentry; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); bool new_sb = false; + int ret = 0; - dentry = kernfs_mount(fs_type, flags, root->kf_root, magic, &new_sb); + fc->root = kernfs_mount(fc->fs_type, fc->sb_flags, ctx->root->kf_root, + magic, &new_sb); + if (IS_ERR(fc->root)) + ret = PTR_ERR(fc->root); /* * In non-init cgroup namespace, instead of root cgroup's dentry, * we return the dentry corresponding to the cgroupns->root_cgrp. */ - if (!IS_ERR(dentry) && ns != &init_cgroup_ns) { + if (!ret && ns != &init_cgroup_ns) { struct dentry *nsdentry; - struct super_block *sb = dentry->d_sb; + struct super_block *sb = fc->root->d_sb; struct cgroup *cgrp; mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock); - cgrp = cset_cgroup_from_root(ns->root_cset, root); + cgrp = cset_cgroup_from_root(ns->root_cset, ctx->root); spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); nsdentry = kernfs_node_dentry(cgrp->kn, sb); - dput(dentry); - if (IS_ERR(nsdentry)) + dput(fc->root); + fc->root = nsdentry; + if (IS_ERR(nsdentry)) { + ret = PTR_ERR(nsdentry); deactivate_locked_super(sb); - dentry = nsdentry; + } } if (!new_sb) - cgroup_put(&root->cgrp); + cgroup_put(&ctx->root->cgrp); - return dentry; + return ret; } /* @@ -2091,7 +2096,7 @@ static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - struct dentry *root; + int ret; /* Check if the caller has permission to mount. */ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) @@ -2101,14 +2106,10 @@ static int cgroup_get_tree(struct fs_context *fc) cgroup_get_live(&cgrp_dfl_root.cgrp); ctx->root = &cgrp_dfl_root; - root = cgroup_do_mount(&cgroup2_fs_type, fc->sb_flags, &cgrp_dfl_root, - CGROUP2_SUPER_MAGIC, ns); - if (IS_ERR(root)) - return PTR_ERR(root); - - apply_cgroup_root_flags(ctx->flags); - fc->root = root; - return 0; + ret = cgroup_do_mount(fc, CGROUP2_SUPER_MAGIC, ns); + if (!ret) + apply_cgroup_root_flags(ctx->flags); + return ret; } static const struct fs_context_operations cgroup_fs_context_ops = { -- cgit v1.2.3 From 6678889f0726910fc884c54f951d8c5646a04819 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 09:42:30 -0500 Subject: cgroup1_get_tree(): separate "get cgroup_root to use" into a separate helper Signed-off-by: Al Viro --- kernel/cgroup/cgroup-v1.c | 87 +++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 05f05d773adf..0d71fc98e73d 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1135,7 +1135,15 @@ struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { .show_path = cgroup_show_path, }; -int cgroup1_get_tree(struct fs_context *fc) +/* + * The guts of cgroup1 mount - find or create cgroup_root to use. + * Called with cgroup_mutex held; returns 0 on success, -E... on + * error and positive - in case when the candidate is busy dying. + * On success it stashes a reference to cgroup_root into given + * cgroup_fs_context; that reference is *NOT* counting towards the + * cgroup_root refcount. + */ +static int cgroup1_root_to_use(struct fs_context *fc) { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); @@ -1143,16 +1151,10 @@ int cgroup1_get_tree(struct fs_context *fc) struct cgroup_subsys *ss; int i, ret; - /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return -EPERM; - - cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); - /* First find the desired set of subsystems */ ret = check_cgroupfs_options(fc); if (ret) - goto out_unlock; + return ret; /* * Destruction of cgroup root is asynchronous, so subsystems may @@ -1166,12 +1168,8 @@ int cgroup1_get_tree(struct fs_context *fc) ss->root == &cgrp_dfl_root) continue; - if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) { - mutex_unlock(&cgroup_mutex); - msleep(10); - ret = restart_syscall(); - goto out_free; - } + if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) + return 1; /* restart */ cgroup_put(&ss->root->cgrp); } @@ -1200,16 +1198,14 @@ int cgroup1_get_tree(struct fs_context *fc) (ctx->subsys_mask != root->subsys_mask)) { if (!name_match) continue; - ret = -EBUSY; - goto out_unlock; + return -EBUSY; } if (root->flags ^ ctx->flags) pr_warn("new mount options do not match the existing superblock, will be ignored\n"); ctx->root = root; - ret = 0; - goto out_unlock; + return 0; } /* @@ -1217,22 +1213,16 @@ int cgroup1_get_tree(struct fs_context *fc) * specification is allowed for already existing hierarchies but we * can't create new one without subsys specification. */ - if (!ctx->subsys_mask && !ctx->none) { - ret = cg_invalf(fc, "cgroup1: No subsys list or none specified"); - goto out_unlock; - } + if (!ctx->subsys_mask && !ctx->none) + return cg_invalf(fc, "cgroup1: No subsys list or none specified"); /* Hierarchies may only be created in the initial cgroup namespace. */ - if (ns != &init_cgroup_ns) { - ret = -EPERM; - goto out_unlock; - } + if (ns != &init_cgroup_ns) + return -EPERM; root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) { - ret = -ENOMEM; - goto out_unlock; - } + if (!root) + return -ENOMEM; ctx->root = root; init_cgroup_root(ctx); @@ -1240,23 +1230,38 @@ int cgroup1_get_tree(struct fs_context *fc) ret = cgroup_setup_root(root, ctx->subsys_mask); if (ret) cgroup_free_root(root); + return ret; +} + +int cgroup1_get_tree(struct fs_context *fc) +{ + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_fs_context *ctx = cgroup_fc2context(fc); + int ret; + + /* Check if the caller has permission to mount. */ + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); + + ret = cgroup1_root_to_use(fc); + if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt)) + ret = 1; /* restart */ -out_unlock: - if (!ret && !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { - mutex_unlock(&cgroup_mutex); - msleep(10); - return restart_syscall(); - } mutex_unlock(&cgroup_mutex); -out_free: - if (ret) - return ret; - ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); - if (!ret && percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + if (!ret) + ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); + + if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { struct super_block *sb = fc->root->d_sb; dput(fc->root); deactivate_locked_super(sb); + ret = 1; + } + + if (unlikely(ret > 0)) { msleep(10); return restart_syscall(); } -- cgit v1.2.3 From cca8f32714d3a8bb4d109c9d7d790fd705b734e5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 17 Jan 2019 10:14:26 -0500 Subject: cgroup: store a reference to cgroup_ns into cgroup_fs_context ... and trim cgroup_do_mount() arguments (renaming it to cgroup_do_get_tree()) Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 4 ++-- kernel/cgroup/cgroup-v1.c | 8 +++----- kernel/cgroup/cgroup.c | 17 ++++++++++++----- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index f7fd54f2973f..37cf709b7a0e 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -42,6 +42,7 @@ extern void __init enable_debug_cgroup(void); */ struct cgroup_fs_context { struct cgroup_root *root; + struct cgroup_namespace *ns; unsigned int flags; /* CGRP_ROOT_* flags */ /* cgroup1 bits */ @@ -212,8 +213,7 @@ void cgroup_free_root(struct cgroup_root *root); void init_cgroup_root(struct cgroup_fs_context *ctx); int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask); int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); -int cgroup_do_mount(struct fs_context *fc, unsigned long magic, - struct cgroup_namespace *ns); +int cgroup_do_get_tree(struct fs_context *fc); int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp); void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 0d71fc98e73d..571ef3447426 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1145,7 +1145,6 @@ struct kernfs_syscall_ops cgroup1_kf_syscall_ops = { */ static int cgroup1_root_to_use(struct fs_context *fc) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct cgroup_root *root; struct cgroup_subsys *ss; @@ -1217,7 +1216,7 @@ static int cgroup1_root_to_use(struct fs_context *fc) return cg_invalf(fc, "cgroup1: No subsys list or none specified"); /* Hierarchies may only be created in the initial cgroup namespace. */ - if (ns != &init_cgroup_ns) + if (ctx->ns != &init_cgroup_ns) return -EPERM; root = kzalloc(sizeof(*root), GFP_KERNEL); @@ -1235,12 +1234,11 @@ static int cgroup1_root_to_use(struct fs_context *fc) int cgroup1_get_tree(struct fs_context *fc) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); int ret; /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); @@ -1252,7 +1250,7 @@ int cgroup1_get_tree(struct fs_context *fc) mutex_unlock(&cgroup_mutex); if (!ret) - ret = cgroup_do_mount(fc, CGROUP_SUPER_MAGIC, ns); + ret = cgroup_do_get_tree(fc); if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { struct super_block *sb = fc->root->d_sb; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 64360a46d4df..0c6bef234a7c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2036,13 +2036,17 @@ out: return ret; } -int cgroup_do_mount(struct fs_context *fc, unsigned long magic, - struct cgroup_namespace *ns) +int cgroup_do_get_tree(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); bool new_sb = false; + unsigned long magic; int ret = 0; + if (fc->fs_type == &cgroup2_fs_type) + magic = CGROUP2_SUPER_MAGIC; + else + magic = CGROUP_SUPER_MAGIC; fc->root = kernfs_mount(fc->fs_type, fc->sb_flags, ctx->root->kf_root, magic, &new_sb); if (IS_ERR(fc->root)) @@ -2052,7 +2056,7 @@ int cgroup_do_mount(struct fs_context *fc, unsigned long magic, * In non-init cgroup namespace, instead of root cgroup's dentry, * we return the dentry corresponding to the cgroupns->root_cgrp. */ - if (!ret && ns != &init_cgroup_ns) { + if (!ret && ctx->ns != &init_cgroup_ns) { struct dentry *nsdentry; struct super_block *sb = fc->root->d_sb; struct cgroup *cgrp; @@ -2060,7 +2064,7 @@ int cgroup_do_mount(struct fs_context *fc, unsigned long magic, mutex_lock(&cgroup_mutex); spin_lock_irq(&css_set_lock); - cgrp = cset_cgroup_from_root(ns->root_cset, ctx->root); + cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root); spin_unlock_irq(&css_set_lock); mutex_unlock(&cgroup_mutex); @@ -2089,6 +2093,7 @@ static void cgroup_fs_context_free(struct fs_context *fc) kfree(ctx->name); kfree(ctx->release_agent); + put_cgroup_ns(ctx->ns); kfree(ctx); } @@ -2106,7 +2111,7 @@ static int cgroup_get_tree(struct fs_context *fc) cgroup_get_live(&cgrp_dfl_root.cgrp); ctx->root = &cgrp_dfl_root; - ret = cgroup_do_mount(fc, CGROUP2_SUPER_MAGIC, ns); + ret = cgroup_do_get_tree(fc); if (!ret) apply_cgroup_root_flags(ctx->flags); return ret; @@ -2144,6 +2149,8 @@ static int cgroup_init_fs_context(struct fs_context *fc) if (!use_task_css_set_links) cgroup_enable_task_cg_lists(); + ctx->ns = current->nsproxy->cgroup_ns; + get_cgroup_ns(ctx->ns); fc->fs_private = ctx; if (fc->fs_type == &cgroup2_fs_type) fc->ops = &cgroup_fs_context_ops; -- cgit v1.2.3 From 23bf1b6be9c291a7130118dcc7384f72ac04d813 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:07:26 +0000 Subject: kernfs, sysfs, cgroup, intel_rdt: Support fs_context Make kernfs support superblock creation/mount/remount with fs_context. This requires that sysfs, cgroup and intel_rdt, which are built on kernfs, be made to support fs_context also. Notes: (1) A kernfs_fs_context struct is created to wrap fs_context and the kernfs mount parameters are moved in here (or are in fs_context). (2) kernfs_mount{,_ns}() are made into kernfs_get_tree(). The extra namespace tag parameter is passed in the context if desired (3) kernfs_free_fs_context() is provided as a destructor for the kernfs_fs_context struct, but for the moment it does nothing except get called in the right places. (4) sysfs doesn't wrap kernfs_fs_context since it has no parameters to pass, but possibly this should be done anyway in case someone wants to add a parameter in future. (5) A cgroup_fs_context struct is created to wrap kernfs_fs_context and the cgroup v1 and v2 mount parameters are all moved there. (6) cgroup1 parameter parsing error messages are now handled by invalf(), which allows userspace to collect them directly. (7) cgroup1 parameter cleanup is now done in the context destructor rather than in the mount/get_tree and remount functions. Weirdies: (*) cgroup_do_get_tree() calls cset_cgroup_from_root() with locks held, but then uses the resulting pointer after dropping the locks. I'm told this is okay and needs commenting. (*) The cgroup refcount web. This really needs documenting. (*) cgroup2 only has one root? Add a suggestion from Thomas Gleixner in which the RDT enablement code is placed into its own function. [folded a leak fix from Andrey Vagin] Signed-off-by: David Howells cc: Greg Kroah-Hartman cc: Tejun Heo cc: Li Zefan cc: Johannes Weiner cc: cgroups@vger.kernel.org cc: fenghua.yu@intel.com Signed-off-by: Al Viro --- kernel/cgroup/cgroup-internal.h | 5 ++++- kernel/cgroup/cgroup.c | 31 ++++++++++++++----------------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 37cf709b7a0e..30e39f3932ad 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -41,6 +41,7 @@ extern void __init enable_debug_cgroup(void); * The cgroup filesystem superblock creation/mount context. */ struct cgroup_fs_context { + struct kernfs_fs_context kfc; struct cgroup_root *root; struct cgroup_namespace *ns; unsigned int flags; /* CGRP_ROOT_* flags */ @@ -56,7 +57,9 @@ struct cgroup_fs_context { static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) { - return fc->fs_private; + struct kernfs_fs_context *kfc = fc->fs_private; + + return container_of(kfc, struct cgroup_fs_context, kfc); } /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0c6bef234a7c..747e5b17f9da 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2039,18 +2039,14 @@ out: int cgroup_do_get_tree(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); - bool new_sb = false; - unsigned long magic; - int ret = 0; + int ret; + ctx->kfc.root = ctx->root->kf_root; if (fc->fs_type == &cgroup2_fs_type) - magic = CGROUP2_SUPER_MAGIC; + ctx->kfc.magic = CGROUP2_SUPER_MAGIC; else - magic = CGROUP_SUPER_MAGIC; - fc->root = kernfs_mount(fc->fs_type, fc->sb_flags, ctx->root->kf_root, - magic, &new_sb); - if (IS_ERR(fc->root)) - ret = PTR_ERR(fc->root); + ctx->kfc.magic = CGROUP_SUPER_MAGIC; + ret = kernfs_get_tree(fc); /* * In non-init cgroup namespace, instead of root cgroup's dentry, @@ -2078,7 +2074,7 @@ int cgroup_do_get_tree(struct fs_context *fc) } } - if (!new_sb) + if (!ctx->kfc.new_sb_created) cgroup_put(&ctx->root->cgrp); return ret; @@ -2094,19 +2090,15 @@ static void cgroup_fs_context_free(struct fs_context *fc) kfree(ctx->name); kfree(ctx->release_agent); put_cgroup_ns(ctx->ns); + kernfs_free_fs_context(fc); kfree(ctx); } static int cgroup_get_tree(struct fs_context *fc) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_fs_context *ctx = cgroup_fc2context(fc); int ret; - /* Check if the caller has permission to mount. */ - if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) - return -EPERM; - cgrp_dfl_visible = true; cgroup_get_live(&cgrp_dfl_root.cgrp); ctx->root = &cgrp_dfl_root; @@ -2132,7 +2124,8 @@ static const struct fs_context_operations cgroup1_fs_context_ops = { }; /* - * Initialise the cgroup filesystem creation/reconfiguration context. + * Initialise the cgroup filesystem creation/reconfiguration context. Notably, + * we select the namespace we're going to use. */ static int cgroup_init_fs_context(struct fs_context *fc) { @@ -2151,11 +2144,15 @@ static int cgroup_init_fs_context(struct fs_context *fc) ctx->ns = current->nsproxy->cgroup_ns; get_cgroup_ns(ctx->ns); - fc->fs_private = ctx; + fc->fs_private = &ctx->kfc; if (fc->fs_type == &cgroup2_fs_type) fc->ops = &cgroup_fs_context_ops; else fc->ops = &cgroup1_fs_context_ops; + if (fc->user_ns) + put_user_ns(fc->user_ns); + fc->user_ns = get_user_ns(ctx->ns->user_ns); + fc->global = true; return 0; } -- cgit v1.2.3 From a18753747385b8b98577a18adc8ec99fda679044 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:07:25 +0000 Subject: cpuset: Use fs_context Make the cpuset filesystem use the filesystem context. This is potentially tricky as the cpuset fs is almost an alias for the cgroup filesystem, but with some special parameters. This can, however, be handled by setting up an appropriate cgroup filesystem and returning the root directory of that as the root dir of this one. Signed-off-by: David Howells cc: Tejun Heo Signed-off-by: Al Viro --- kernel/cgroup/cpuset.c | 56 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 479743db6c37..9758b03834ac 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -372,25 +373,52 @@ static inline bool is_in_v2_mode(void) * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead */ -static struct dentry *cpuset_mount(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, void *data) -{ - struct file_system_type *cgroup_fs = get_fs_type("cgroup"); - struct dentry *ret = ERR_PTR(-ENODEV); - if (cgroup_fs) { - char mountopts[] = - "cpuset,noprefix," - "release_agent=/sbin/cpuset_release_agent"; - ret = cgroup_fs->mount(cgroup_fs, flags, - unused_dev_name, mountopts); - put_filesystem(cgroup_fs); +static int cpuset_get_tree(struct fs_context *fc) +{ + struct file_system_type *cgroup_fs; + struct fs_context *new_fc; + int ret; + + cgroup_fs = get_fs_type("cgroup"); + if (!cgroup_fs) + return -ENODEV; + + new_fc = fs_context_for_mount(cgroup_fs, fc->sb_flags); + if (IS_ERR(new_fc)) { + ret = PTR_ERR(new_fc); + } else { + static const char agent_path[] = "/sbin/cpuset_release_agent"; + ret = vfs_parse_fs_string(new_fc, "cpuset", NULL, 0); + if (!ret) + ret = vfs_parse_fs_string(new_fc, "noprefix", NULL, 0); + if (!ret) + ret = vfs_parse_fs_string(new_fc, "release_agent", + agent_path, sizeof(agent_path) - 1); + if (!ret) + ret = vfs_get_tree(new_fc); + if (!ret) { /* steal the result */ + fc->root = new_fc->root; + new_fc->root = NULL; + } + put_fs_context(new_fc); } + put_filesystem(cgroup_fs); return ret; } +static const struct fs_context_operations cpuset_fs_context_ops = { + .get_tree = cpuset_get_tree, +}; + +static int cpuset_init_fs_context(struct fs_context *fc) +{ + fc->ops = &cpuset_fs_context_ops; + return 0; +} + static struct file_system_type cpuset_fs_type = { - .name = "cpuset", - .mount = cpuset_mount, + .name = "cpuset", + .init_fs_context = cpuset_init_fs_context, }; /* -- cgit v1.2.3 From 06a2ae56b5b88fa57cd56e0b99bd874135efdf58 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 1 Nov 2018 23:07:26 +0000 Subject: vfs: Add some logging to the core users of the fs_context log Add some logging to the core users of the fs_context log so that information can be extracted from them as to the reason for failure. Signed-off-by: David Howells Signed-off-by: Al Viro --- kernel/cgroup/cgroup-v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 571ef3447426..c126b34fd4ff 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -17,7 +17,7 @@ #include -#define cg_invalf(fc, fmt, ...) ({ pr_err(fmt, ## __VA_ARGS__); -EINVAL; }) +#define cg_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) /* * pidlists linger the following amount before being destroyed. The goal -- cgit v1.2.3