diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 18:32:21 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-30 18:32:21 -0800 |
commit | 8b0fdf631cf6a31f60a9ed3e1c0f37a9715de807 (patch) | |
tree | 50bab0b8c054df37f397d581251ba7df1484e061 | |
parent | 168fe32a072a4b8dc81a3aebf0e5e588d38e2955 (diff) | |
parent | 36735a6a2b5e042db1af956ce4bcc13f3ff99e21 (diff) | |
download | linux-8b0fdf631cf6a31f60a9ed3e1c0f37a9715de807.tar.gz linux-8b0fdf631cf6a31f60a9ed3e1c0f37a9715de807.tar.bz2 linux-8b0fdf631cf6a31f60a9ed3e1c0f37a9715de807.zip |
Merge branch 'work.mqueue' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull mqueue/bpf vfs cleanups from Al Viro:
"mqueue and bpf go through rather painful and similar contortions to
create objects in their dentry trees. Provide a primitive for doing
that without abusing ->mknod(), switch bpf and mqueue to it.
Another mqueue-related thing that has ended up in that branch is
on-demand creation of internal mount (based upon the work of Giuseppe
Scrivano)"
* 'work.mqueue' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
mqueue: switch to on-demand creation of internal mount
tidy do_mq_open() up a bit
mqueue: clean prepare_open() up
do_mq_open(): move all work prior to dentry_open() into a helper
mqueue: fold mq_attr_ok() into mqueue_get_inode()
move dentry_open() calls up into do_mq_open()
mqueue: switch to vfs_mkobj(), quit abusing ->d_fsdata
bpf_obj_do_pin(): switch to vfs_mkobj(), quit abusing ->mknod()
new primitive: vfs_mkobj()
-rw-r--r-- | fs/namei.c | 21 | ||||
-rw-r--r-- | include/linux/fs.h | 4 | ||||
-rw-r--r-- | ipc/mqueue.c | 241 | ||||
-rw-r--r-- | kernel/bpf/inode.c | 50 |
4 files changed, 158 insertions, 158 deletions
diff --git a/fs/namei.c b/fs/namei.c index 4e3fc58dae72..7c221fb0836b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2895,6 +2895,27 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } EXPORT_SYMBOL(vfs_create); +int vfs_mkobj(struct dentry *dentry, umode_t mode, + int (*f)(struct dentry *, umode_t, void *), + void *arg) +{ + struct inode *dir = dentry->d_parent->d_inode; + int error = may_create(dir, dentry); + if (error) + return error; + + mode &= S_IALLUGO; + mode |= S_IFREG; + error = security_inode_create(dir, dentry, mode); + if (error) + return error; + error = f(dentry, mode, arg); + if (!error) + fsnotify_create(dir, dentry); + return error; +} +EXPORT_SYMBOL(vfs_mkobj); + bool may_open_dev(const struct path *path) { return !(path->mnt->mnt_flags & MNT_NODEV) && diff --git a/include/linux/fs.h b/include/linux/fs.h index 569c51d37312..9798a133e718 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1608,6 +1608,10 @@ extern int vfs_whiteout(struct inode *, struct dentry *); extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag); +int vfs_mkobj(struct dentry *, umode_t, + int (*f)(struct dentry *, umode_t, void *), + void *); + /* * VFS file helper functions. */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 3bc5bb7d6827..690ae6665500 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -270,13 +270,30 @@ static struct inode *mqueue_get_inode(struct super_block *sb, * that means the min(mq_maxmsg, max_priorities) * struct * posix_msg_tree_node. */ + + ret = -EINVAL; + if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0) + goto out_inode; + if (capable(CAP_SYS_RESOURCE)) { + if (info->attr.mq_maxmsg > HARD_MSGMAX || + info->attr.mq_msgsize > HARD_MSGSIZEMAX) + goto out_inode; + } else { + if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max || + info->attr.mq_msgsize > ipc_ns->mq_msgsize_max) + goto out_inode; + } + ret = -EOVERFLOW; + /* check for overflow */ + if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg) + goto out_inode; mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * sizeof(struct posix_msg_tree_node); - - mq_bytes = mq_treesize + (info->attr.mq_maxmsg * - info->attr.mq_msgsize); - + mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize; + if (mq_bytes + mq_treesize < mq_bytes) + goto out_inode; + mq_bytes += mq_treesize; spin_lock(&mq_lock); if (u->mq_bytes + mq_bytes < u->mq_bytes || u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) { @@ -308,8 +325,9 @@ err: static int mqueue_fill_super(struct super_block *sb, void *data, int silent) { struct inode *inode; - struct ipc_namespace *ns = sb->s_fs_info; + struct ipc_namespace *ns = data; + sb->s_fs_info = ns; sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; @@ -326,18 +344,44 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) return 0; } +static struct file_system_type mqueue_fs_type; +/* + * Return value is pinned only by reference in ->mq_mnt; it will + * live until ipcns dies. Caller does not need to drop it. + */ +static struct vfsmount *mq_internal_mount(void) +{ + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + struct vfsmount *m = ns->mq_mnt; + if (m) + return m; + m = kern_mount_data(&mqueue_fs_type, ns); + spin_lock(&mq_lock); + if (unlikely(ns->mq_mnt)) { + spin_unlock(&mq_lock); + if (!IS_ERR(m)) + kern_unmount(m); + return ns->mq_mnt; + } + if (!IS_ERR(m)) + ns->mq_mnt = m; + spin_unlock(&mq_lock); + return m; +} + static struct dentry *mqueue_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct ipc_namespace *ns; - if (flags & SB_KERNMOUNT) { - ns = data; - data = NULL; - } else { - ns = current->nsproxy->ipc_ns; - } - return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); + struct vfsmount *m; + if (flags & SB_KERNMOUNT) + return mount_nodev(fs_type, flags, data, mqueue_fill_super); + m = mq_internal_mount(); + if (IS_ERR(m)) + return ERR_CAST(m); + atomic_inc(&m->mnt_sb->s_active); + down_write(&m->mnt_sb->s_umount); + return dget(m->mnt_root); } static void init_once(void *foo) @@ -416,11 +460,11 @@ static void mqueue_evict_inode(struct inode *inode) put_ipc_ns(ipc_ns); } -static int mqueue_create(struct inode *dir, struct dentry *dentry, - umode_t mode, bool excl) +static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg) { + struct inode *dir = dentry->d_parent->d_inode; struct inode *inode; - struct mq_attr *attr = dentry->d_fsdata; + struct mq_attr *attr = arg; int error; struct ipc_namespace *ipc_ns; @@ -461,6 +505,12 @@ out_unlock: return error; } +static int mqueue_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool excl) +{ + return mqueue_create_attr(dentry, mode, NULL); +} + static int mqueue_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); @@ -691,96 +741,46 @@ static void remove_notification(struct mqueue_inode_info *info) info->notify_user_ns = NULL; } -static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) -{ - int mq_treesize; - unsigned long total_size; - - if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) - return -EINVAL; - if (capable(CAP_SYS_RESOURCE)) { - if (attr->mq_maxmsg > HARD_MSGMAX || - attr->mq_msgsize > HARD_MSGSIZEMAX) - return -EINVAL; - } else { - if (attr->mq_maxmsg > ipc_ns->mq_msg_max || - attr->mq_msgsize > ipc_ns->mq_msgsize_max) - return -EINVAL; - } - /* check for overflow */ - if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg) - return -EOVERFLOW; - mq_treesize = attr->mq_maxmsg * sizeof(struct msg_msg) + - min_t(unsigned int, attr->mq_maxmsg, MQ_PRIO_MAX) * - sizeof(struct posix_msg_tree_node); - total_size = attr->mq_maxmsg * attr->mq_msgsize; - if (total_size + mq_treesize < total_size) - return -EOVERFLOW; - return 0; -} - -/* - * Invoked when creating a new queue via sys_mq_open - */ -static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, - struct path *path, int oflag, umode_t mode, +static int prepare_open(struct dentry *dentry, int oflag, int ro, + umode_t mode, struct filename *name, struct mq_attr *attr) { - const struct cred *cred = current_cred(); - int ret; - - if (attr) { - ret = mq_attr_ok(ipc_ns, attr); - if (ret) - return ERR_PTR(ret); - /* store for use during create */ - path->dentry->d_fsdata = attr; - } else { - struct mq_attr def_attr; - - def_attr.mq_maxmsg = min(ipc_ns->mq_msg_max, - ipc_ns->mq_msg_default); - def_attr.mq_msgsize = min(ipc_ns->mq_msgsize_max, - ipc_ns->mq_msgsize_default); - ret = mq_attr_ok(ipc_ns, &def_attr); - if (ret) - return ERR_PTR(ret); - } - - mode &= ~current_umask(); - ret = vfs_create(dir, path->dentry, mode, true); - path->dentry->d_fsdata = NULL; - if (ret) - return ERR_PTR(ret); - return dentry_open(path, oflag, cred); -} - -/* Opens existing queue */ -static struct file *do_open(struct path *path, int oflag) -{ static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE }; int acc; + + if (d_really_is_negative(dentry)) { + if (!(oflag & O_CREAT)) + return -ENOENT; + if (ro) + return ro; + audit_inode_parent_hidden(name, dentry->d_parent); + return vfs_mkobj(dentry, mode & ~current_umask(), + mqueue_create_attr, attr); + } + /* it already existed */ + audit_inode(name, dentry, 0); + if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + return -EEXIST; if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) - return ERR_PTR(-EINVAL); + return -EINVAL; acc = oflag2acc[oflag & O_ACCMODE]; - if (inode_permission(d_inode(path->dentry), acc)) - return ERR_PTR(-EACCES); - return dentry_open(path, oflag, current_cred()); + return inode_permission(d_inode(dentry), acc); } static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, struct mq_attr *attr) { - struct path path; - struct file *filp; + struct vfsmount *mnt = mq_internal_mount(); + struct dentry *root; struct filename *name; + struct path path; int fd, error; - struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - struct vfsmount *mnt = ipc_ns->mq_mnt; - struct dentry *root = mnt->mnt_root; int ro; + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + audit_mq_open(oflag, mode, attr); if (IS_ERR(name = getname(u_name))) @@ -791,7 +791,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, goto out_putname; ro = mnt_want_write(mnt); /* we'll drop it in any case */ - error = 0; + root = mnt->mnt_root; inode_lock(d_inode(root)); path.dentry = lookup_one_len(name->name, root, strlen(name->name)); if (IS_ERR(path.dentry)) { @@ -799,38 +799,14 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, goto out_putfd; } path.mnt = mntget(mnt); - - if (oflag & O_CREAT) { - if (d_really_is_positive(path.dentry)) { /* entry already exists */ - audit_inode(name, path.dentry, 0); - if (oflag & O_EXCL) { - error = -EEXIST; - goto out; - } - filp = do_open(&path, oflag); - } else { - if (ro) { - error = ro; - goto out; - } - audit_inode_parent_hidden(name, root); - filp = do_create(ipc_ns, d_inode(root), &path, - oflag, mode, attr); - } - } else { - if (d_really_is_negative(path.dentry)) { - error = -ENOENT; - goto out; - } - audit_inode(name, path.dentry, 0); - filp = do_open(&path, oflag); + error = prepare_open(path.dentry, oflag, ro, mode, name, attr); + if (!error) { + struct file *file = dentry_open(&path, oflag, current_cred()); + if (!IS_ERR(file)) + fd_install(fd, file); + else + error = PTR_ERR(file); } - - if (!IS_ERR(filp)) - fd_install(fd, filp); - else - error = PTR_ERR(filp); -out: path_put(&path); out_putfd: if (error) { @@ -864,6 +840,9 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; struct vfsmount *mnt = ipc_ns->mq_mnt; + if (!mnt) + return -ENOENT; + name = getname(u_name); if (IS_ERR(name)) return PTR_ERR(name); @@ -1590,28 +1569,26 @@ int mq_init_ns(struct ipc_namespace *ns) ns->mq_msgsize_max = DFLT_MSGSIZEMAX; ns->mq_msg_default = DFLT_MSG; ns->mq_msgsize_default = DFLT_MSGSIZE; + ns->mq_mnt = NULL; - ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); - if (IS_ERR(ns->mq_mnt)) { - int err = PTR_ERR(ns->mq_mnt); - ns->mq_mnt = NULL; - return err; - } return 0; } void mq_clear_sbinfo(struct ipc_namespace *ns) { - ns->mq_mnt->mnt_sb->s_fs_info = NULL; + if (ns->mq_mnt) + ns->mq_mnt->mnt_sb->s_fs_info = NULL; } void mq_put_mnt(struct ipc_namespace *ns) { - kern_unmount(ns->mq_mnt); + if (ns->mq_mnt) + kern_unmount(ns->mq_mnt); } static int __init init_mqueue_fs(void) { + struct vfsmount *m; int error; mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", @@ -1633,6 +1610,10 @@ static int __init init_mqueue_fs(void) if (error) goto out_filesystem; + m = kern_mount_data(&mqueue_fs_type, &init_ipc_ns); + if (IS_ERR(m)) + goto out_filesystem; + init_ipc_ns.mq_mnt = m; return 0; out_filesystem: diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 5bb5e49ef4c3..81e2f6995adb 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -150,39 +150,29 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return 0; } -static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, - umode_t mode, const struct inode_operations *iops) +static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, + const struct inode_operations *iops) { - struct inode *inode; - - inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); + struct inode *dir = dentry->d_parent->d_inode; + struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = iops; - inode->i_private = dentry->d_fsdata; + inode->i_private = raw; bpf_dentry_finalize(dentry, inode, dir); return 0; } -static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t devt) +static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) { - enum bpf_type type = MINOR(devt); - - if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || - dentry->d_fsdata == NULL) - return -EPERM; + return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops); +} - switch (type) { - case BPF_TYPE_PROG: - return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); - case BPF_TYPE_MAP: - return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); - default: - return -EPERM; - } +static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) +{ + return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops); } static struct dentry * @@ -218,7 +208,6 @@ static int bpf_symlink(struct inode *dir, struct dentry *dentry, static const struct inode_operations bpf_dir_iops = { .lookup = bpf_lookup, - .mknod = bpf_mkobj, .mkdir = bpf_mkdir, .symlink = bpf_symlink, .rmdir = simple_rmdir, @@ -234,7 +223,6 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw, struct inode *dir; struct path path; umode_t mode; - dev_t devt; int ret; dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); @@ -242,9 +230,8 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw, return PTR_ERR(dentry); mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); - devt = MKDEV(UNNAMED_MAJOR, type); - ret = security_path_mknod(&path, dentry, mode, devt); + ret = security_path_mknod(&path, dentry, mode, 0); if (ret) goto out; @@ -254,9 +241,16 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw, goto out; } - dentry->d_fsdata = raw; - ret = vfs_mknod(dir, dentry, mode, devt); - dentry->d_fsdata = NULL; + switch (type) { + case BPF_TYPE_PROG: + ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw); + break; + case BPF_TYPE_MAP: + ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); + break; + default: + ret = -EPERM; + } out: done_path_create(&path, dentry); return ret; |