summaryrefslogtreecommitdiffstats
path: root/security/landlock
diff options
context:
space:
mode:
Diffstat (limited to 'security/landlock')
-rw-r--r--security/landlock/Kconfig21
-rw-r--r--security/landlock/Makefile4
-rw-r--r--security/landlock/common.h20
-rw-r--r--security/landlock/cred.c46
-rw-r--r--security/landlock/cred.h58
-rw-r--r--security/landlock/fs.c692
-rw-r--r--security/landlock/fs.h70
-rw-r--r--security/landlock/limits.h21
-rw-r--r--security/landlock/object.c67
-rw-r--r--security/landlock/object.h91
-rw-r--r--security/landlock/ptrace.c120
-rw-r--r--security/landlock/ptrace.h14
-rw-r--r--security/landlock/ruleset.c473
-rw-r--r--security/landlock/ruleset.h165
-rw-r--r--security/landlock/setup.c40
-rw-r--r--security/landlock/setup.h18
-rw-r--r--security/landlock/syscalls.c451
17 files changed, 2371 insertions, 0 deletions
diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig
new file mode 100644
index 000000000000..8e33c4e8ffb8
--- /dev/null
+++ b/security/landlock/Kconfig
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config SECURITY_LANDLOCK
+ bool "Landlock support"
+ depends on SECURITY && !ARCH_EPHEMERAL_INODES
+ select SECURITY_PATH
+ help
+ Landlock is a sandboxing mechanism that enables processes to restrict
+ themselves (and their future children) by gradually enforcing
+ tailored access control policies. A Landlock security policy is a
+ set of access rights (e.g. open a file in read-only, make a
+ directory, etc.) tied to a file hierarchy. Such policy can be
+ configured and enforced by any processes for themselves using the
+ dedicated system calls: landlock_create_ruleset(),
+ landlock_add_rule(), and landlock_restrict_self().
+
+ See Documentation/userspace-api/landlock.rst for further information.
+
+ If you are unsure how to answer this question, answer N. Otherwise,
+ you should also prepend "landlock," to the content of CONFIG_LSM to
+ enable Landlock at boot time.
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
new file mode 100644
index 000000000000..7bbd2f413b3e
--- /dev/null
+++ b/security/landlock/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
+
+landlock-y := setup.o syscalls.o object.o ruleset.o \
+ cred.o ptrace.o fs.o
diff --git a/security/landlock/common.h b/security/landlock/common.h
new file mode 100644
index 000000000000..5dc0fe15707d
--- /dev/null
+++ b/security/landlock/common.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Common constants and helpers
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_COMMON_H
+#define _SECURITY_LANDLOCK_COMMON_H
+
+#define LANDLOCK_NAME "landlock"
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) LANDLOCK_NAME ": " fmt
+
+#endif /* _SECURITY_LANDLOCK_COMMON_H */
diff --git a/security/landlock/cred.c b/security/landlock/cred.c
new file mode 100644
index 000000000000..6725af24c684
--- /dev/null
+++ b/security/landlock/cred.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Credential hooks
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/cred.h>
+#include <linux/lsm_hooks.h>
+
+#include "common.h"
+#include "cred.h"
+#include "ruleset.h"
+#include "setup.h"
+
+static int hook_cred_prepare(struct cred *const new,
+ const struct cred *const old, const gfp_t gfp)
+{
+ struct landlock_ruleset *const old_dom = landlock_cred(old)->domain;
+
+ if (old_dom) {
+ landlock_get_ruleset(old_dom);
+ landlock_cred(new)->domain = old_dom;
+ }
+ return 0;
+}
+
+static void hook_cred_free(struct cred *const cred)
+{
+ struct landlock_ruleset *const dom = landlock_cred(cred)->domain;
+
+ if (dom)
+ landlock_put_ruleset_deferred(dom);
+}
+
+static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
+ LSM_HOOK_INIT(cred_prepare, hook_cred_prepare),
+ LSM_HOOK_INIT(cred_free, hook_cred_free),
+};
+
+__init void landlock_add_cred_hooks(void)
+{
+ security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+ LANDLOCK_NAME);
+}
diff --git a/security/landlock/cred.h b/security/landlock/cred.h
new file mode 100644
index 000000000000..5f99d3decade
--- /dev/null
+++ b/security/landlock/cred.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Credential hooks
+ *
+ * Copyright © 2019-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_CRED_H
+#define _SECURITY_LANDLOCK_CRED_H
+
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+
+#include "ruleset.h"
+#include "setup.h"
+
+struct landlock_cred_security {
+ struct landlock_ruleset *domain;
+};
+
+static inline struct landlock_cred_security *landlock_cred(
+ const struct cred *cred)
+{
+ return cred->security + landlock_blob_sizes.lbs_cred;
+}
+
+static inline const struct landlock_ruleset *landlock_get_current_domain(void)
+{
+ return landlock_cred(current_cred())->domain;
+}
+
+/*
+ * The call needs to come from an RCU read-side critical section.
+ */
+static inline const struct landlock_ruleset *landlock_get_task_domain(
+ const struct task_struct *const task)
+{
+ return landlock_cred(__task_cred(task))->domain;
+}
+
+static inline bool landlocked(const struct task_struct *const task)
+{
+ bool has_dom;
+
+ if (task == current)
+ return !!landlock_get_current_domain();
+
+ rcu_read_lock();
+ has_dom = !!landlock_get_task_domain(task);
+ rcu_read_unlock();
+ return has_dom;
+}
+
+__init void landlock_add_cred_hooks(void);
+
+#endif /* _SECURITY_LANDLOCK_CRED_H */
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
new file mode 100644
index 000000000000..97b8e421f617
--- /dev/null
+++ b/security/landlock/fs.c
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Filesystem management and hooks
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/bits.h>
+#include <linux/compiler_types.h>
+#include <linux/dcache.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/limits.h>
+#include <linux/list.h>
+#include <linux/lsm_hooks.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/path.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/types.h>
+#include <linux/wait_bit.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/landlock.h>
+
+#include "common.h"
+#include "cred.h"
+#include "fs.h"
+#include "limits.h"
+#include "object.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/* Underlying object management */
+
+static void release_inode(struct landlock_object *const object)
+ __releases(object->lock)
+{
+ struct inode *const inode = object->underobj;
+ struct super_block *sb;
+
+ if (!inode) {
+ spin_unlock(&object->lock);
+ return;
+ }
+
+ /*
+ * Protects against concurrent use by hook_sb_delete() of the reference
+ * to the underlying inode.
+ */
+ object->underobj = NULL;
+ /*
+ * Makes sure that if the filesystem is concurrently unmounted,
+ * hook_sb_delete() will wait for us to finish iput().
+ */
+ sb = inode->i_sb;
+ atomic_long_inc(&landlock_superblock(sb)->inode_refs);
+ spin_unlock(&object->lock);
+ /*
+ * Because object->underobj was not NULL, hook_sb_delete() and
+ * get_inode_object() guarantee that it is safe to reset
+ * landlock_inode(inode)->object while it is not NULL. It is therefore
+ * not necessary to lock inode->i_lock.
+ */
+ rcu_assign_pointer(landlock_inode(inode)->object, NULL);
+ /*
+ * Now, new rules can safely be tied to @inode with get_inode_object().
+ */
+
+ iput(inode);
+ if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))
+ wake_up_var(&landlock_superblock(sb)->inode_refs);
+}
+
+static const struct landlock_object_underops landlock_fs_underops = {
+ .release = release_inode
+};
+
+/* Ruleset management */
+
+static struct landlock_object *get_inode_object(struct inode *const inode)
+{
+ struct landlock_object *object, *new_object;
+ struct landlock_inode_security *inode_sec = landlock_inode(inode);
+
+ rcu_read_lock();
+retry:
+ object = rcu_dereference(inode_sec->object);
+ if (object) {
+ if (likely(refcount_inc_not_zero(&object->usage))) {
+ rcu_read_unlock();
+ return object;
+ }
+ /*
+ * We are racing with release_inode(), the object is going
+ * away. Wait for release_inode(), then retry.
+ */
+ spin_lock(&object->lock);
+ spin_unlock(&object->lock);
+ goto retry;
+ }
+ rcu_read_unlock();
+
+ /*
+ * If there is no object tied to @inode, then create a new one (without
+ * holding any locks).
+ */
+ new_object = landlock_create_object(&landlock_fs_underops, inode);
+ if (IS_ERR(new_object))
+ return new_object;
+
+ /*
+ * Protects against concurrent calls to get_inode_object() or
+ * hook_sb_delete().
+ */
+ spin_lock(&inode->i_lock);
+ if (unlikely(rcu_access_pointer(inode_sec->object))) {
+ /* Someone else just created the object, bail out and retry. */
+ spin_unlock(&inode->i_lock);
+ kfree(new_object);
+
+ rcu_read_lock();
+ goto retry;
+ }
+
+ /*
+ * @inode will be released by hook_sb_delete() on its superblock
+ * shutdown, or by release_inode() when no more ruleset references the
+ * related object.
+ */
+ ihold(inode);
+ rcu_assign_pointer(inode_sec->object, new_object);
+ spin_unlock(&inode->i_lock);
+ return new_object;
+}
+
+/* All access rights that can be tied to files. */
+#define ACCESS_FILE ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+ LANDLOCK_ACCESS_FS_READ_FILE)
+
+/*
+ * @path: Should have been checked by get_path_from_fd().
+ */
+int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
+ const struct path *const path, u32 access_rights)
+{
+ int err;
+ struct landlock_object *object;
+
+ /* Files only get access rights that make sense. */
+ if (!d_is_dir(path->dentry) && (access_rights | ACCESS_FILE) !=
+ ACCESS_FILE)
+ return -EINVAL;
+ if (WARN_ON_ONCE(ruleset->num_layers != 1))
+ return -EINVAL;
+
+ /* Transforms relative access rights to absolute ones. */
+ access_rights |= LANDLOCK_MASK_ACCESS_FS & ~ruleset->fs_access_masks[0];
+ object = get_inode_object(d_backing_inode(path->dentry));
+ if (IS_ERR(object))
+ return PTR_ERR(object);
+ mutex_lock(&ruleset->lock);
+ err = landlock_insert_rule(ruleset, object, access_rights);
+ mutex_unlock(&ruleset->lock);
+ /*
+ * No need to check for an error because landlock_insert_rule()
+ * increments the refcount for the new object if needed.
+ */
+ landlock_put_object(object);
+ return err;
+}
+
+/* Access-control management */
+
+static inline u64 unmask_layers(
+ const struct landlock_ruleset *const domain,
+ const struct path *const path, const u32 access_request,
+ u64 layer_mask)
+{
+ const struct landlock_rule *rule;
+ const struct inode *inode;
+ size_t i;
+
+ if (d_is_negative(path->dentry))
+ /* Ignore nonexistent leafs. */
+ return layer_mask;
+ inode = d_backing_inode(path->dentry);
+ rcu_read_lock();
+ rule = landlock_find_rule(domain,
+ rcu_dereference(landlock_inode(inode)->object));
+ rcu_read_unlock();
+ if (!rule)
+ return layer_mask;
+
+ /*
+ * An access is granted if, for each policy layer, at least one rule
+ * encountered on the pathwalk grants the requested accesses,
+ * regardless of their position in the layer stack. We must then check
+ * the remaining layers for each inode, from the first added layer to
+ * the last one.
+ */
+ for (i = 0; i < rule->num_layers; i++) {
+ const struct landlock_layer *const layer = &rule->layers[i];
+ const u64 layer_level = BIT_ULL(layer->level - 1);
+
+ /* Checks that the layer grants access to the full request. */
+ if ((layer->access & access_request) == access_request) {
+ layer_mask &= ~layer_level;
+
+ if (layer_mask == 0)
+ return layer_mask;
+ }
+ }
+ return layer_mask;
+}
+
+static int check_access_path(const struct landlock_ruleset *const domain,
+ const struct path *const path, u32 access_request)
+{
+ bool allowed = false;
+ struct path walker_path;
+ u64 layer_mask;
+ size_t i;
+
+ /* Make sure all layers can be checked. */
+ BUILD_BUG_ON(BITS_PER_TYPE(layer_mask) < LANDLOCK_MAX_NUM_LAYERS);
+
+ if (!access_request)
+ return 0;
+ if (WARN_ON_ONCE(!domain || !path))
+ return 0;
+ /*
+ * Allows access to pseudo filesystems that will never be mountable
+ * (e.g. sockfs, pipefs), but can still be reachable through
+ * /proc/<pid>/fd/<file-descriptor> .
+ */
+ if ((path->dentry->d_sb->s_flags & SB_NOUSER) ||
+ (d_is_positive(path->dentry) &&
+ unlikely(IS_PRIVATE(d_backing_inode(path->dentry)))))
+ return 0;
+ if (WARN_ON_ONCE(domain->num_layers < 1))
+ return -EACCES;
+
+ /* Saves all layers handling a subset of requested accesses. */
+ layer_mask = 0;
+ for (i = 0; i < domain->num_layers; i++) {
+ if (domain->fs_access_masks[i] & access_request)
+ layer_mask |= BIT_ULL(i);
+ }
+ /* An access request not handled by the domain is allowed. */
+ if (layer_mask == 0)
+ return 0;
+
+ walker_path = *path;
+ path_get(&walker_path);
+ /*
+ * We need to walk through all the hierarchy to not miss any relevant
+ * restriction.
+ */
+ while (true) {
+ struct dentry *parent_dentry;
+
+ layer_mask = unmask_layers(domain, &walker_path,
+ access_request, layer_mask);
+ if (layer_mask == 0) {
+ /* Stops when a rule from each layer grants access. */
+ allowed = true;
+ break;
+ }
+
+jump_up:
+ if (walker_path.dentry == walker_path.mnt->mnt_root) {
+ if (follow_up(&walker_path)) {
+ /* Ignores hidden mount points. */
+ goto jump_up;
+ } else {
+ /*
+ * Stops at the real root. Denies access
+ * because not all layers have granted access.
+ */
+ allowed = false;
+ break;
+ }
+ }
+ if (unlikely(IS_ROOT(walker_path.dentry))) {
+ /*
+ * Stops at disconnected root directories. Only allows
+ * access to internal filesystems (e.g. nsfs, which is
+ * reachable through /proc/<pid>/ns/<namespace>).
+ */
+ allowed = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
+ break;
+ }
+ parent_dentry = dget_parent(walker_path.dentry);
+ dput(walker_path.dentry);
+ walker_path.dentry = parent_dentry;
+ }
+ path_put(&walker_path);
+ return allowed ? 0 : -EACCES;
+}
+
+static inline int current_check_access_path(const struct path *const path,
+ const u32 access_request)
+{
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+
+ if (!dom)
+ return 0;
+ return check_access_path(dom, path, access_request);
+}
+
+/* Inode hooks */
+
+static void hook_inode_free_security(struct inode *const inode)
+{
+ /*
+ * All inodes must already have been untied from their object by
+ * release_inode() or hook_sb_delete().
+ */
+ WARN_ON_ONCE(landlock_inode(inode)->object);
+}
+
+/* Super-block hooks */
+
+/*
+ * Release the inodes used in a security policy.
+ *
+ * Cf. fsnotify_unmount_inodes() and invalidate_inodes()
+ */
+static void hook_sb_delete(struct super_block *const sb)
+{
+ struct inode *inode, *prev_inode = NULL;
+
+ if (!landlock_initialized)
+ return;
+
+ spin_lock(&sb->s_inode_list_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ struct landlock_object *object;
+
+ /* Only handles referenced inodes. */
+ if (!atomic_read(&inode->i_count))
+ continue;
+
+ /*
+ * Protects against concurrent modification of inode (e.g.
+ * from get_inode_object()).
+ */
+ spin_lock(&inode->i_lock);
+ /*
+ * Checks I_FREEING and I_WILL_FREE to protect against a race
+ * condition when release_inode() just called iput(), which
+ * could lead to a NULL dereference of inode->security or a
+ * second call to iput() for the same Landlock object. Also
+ * checks I_NEW because such inode cannot be tied to an object.
+ */
+ if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+
+ rcu_read_lock();
+ object = rcu_dereference(landlock_inode(inode)->object);
+ if (!object) {
+ rcu_read_unlock();
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ /* Keeps a reference to this inode until the next loop walk. */
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+
+ /*
+ * If there is no concurrent release_inode() ongoing, then we
+ * are in charge of calling iput() on this inode, otherwise we
+ * will just wait for it to finish.
+ */
+ spin_lock(&object->lock);
+ if (object->underobj == inode) {
+ object->underobj = NULL;
+ spin_unlock(&object->lock);
+ rcu_read_unlock();
+
+ /*
+ * Because object->underobj was not NULL,
+ * release_inode() and get_inode_object() guarantee
+ * that it is safe to reset
+ * landlock_inode(inode)->object while it is not NULL.
+ * It is therefore not necessary to lock inode->i_lock.
+ */
+ rcu_assign_pointer(landlock_inode(inode)->object, NULL);
+ /*
+ * At this point, we own the ihold() reference that was
+ * originally set up by get_inode_object() and the
+ * __iget() reference that we just set in this loop
+ * walk. Therefore the following call to iput() will
+ * not sleep nor drop the inode because there is now at
+ * least two references to it.
+ */
+ iput(inode);
+ } else {
+ spin_unlock(&object->lock);
+ rcu_read_unlock();
+ }
+
+ if (prev_inode) {
+ /*
+ * At this point, we still own the __iget() reference
+ * that we just set in this loop walk. Therefore we
+ * can drop the list lock and know that the inode won't
+ * disappear from under us until the next loop walk.
+ */
+ spin_unlock(&sb->s_inode_list_lock);
+ /*
+ * We can now actually put the inode reference from the
+ * previous loop walk, which is not needed anymore.
+ */
+ iput(prev_inode);
+ cond_resched();
+ spin_lock(&sb->s_inode_list_lock);
+ }
+ prev_inode = inode;
+ }
+ spin_unlock(&sb->s_inode_list_lock);
+
+ /* Puts the inode reference from the last loop walk, if any. */
+ if (prev_inode)
+ iput(prev_inode);
+ /* Waits for pending iput() in release_inode(). */
+ wait_var_event(&landlock_superblock(sb)->inode_refs, !atomic_long_read(
+ &landlock_superblock(sb)->inode_refs));
+}
+
+/*
+ * Because a Landlock security policy is defined according to the filesystem
+ * topology (i.e. the mount namespace), changing it may grant access to files
+ * not previously allowed.
+ *
+ * To make it simple, deny any filesystem topology modification by landlocked
+ * processes. Non-landlocked processes may still change the namespace of a
+ * landlocked process, but this kind of threat must be handled by a system-wide
+ * access-control security policy.
+ *
+ * This could be lifted in the future if Landlock can safely handle mount
+ * namespace updates requested by a landlocked process. Indeed, we could
+ * update the current domain (which is currently read-only) by taking into
+ * account the accesses of the source and the destination of a new mount point.
+ * However, it would also require to make all the child domains dynamically
+ * inherit these new constraints. Anyway, for backward compatibility reasons,
+ * a dedicated user space option would be required (e.g. as a ruleset flag).
+ */
+static int hook_sb_mount(const char *const dev_name,
+ const struct path *const path, const char *const type,
+ const unsigned long flags, void *const data)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+static int hook_move_mount(const struct path *const from_path,
+ const struct path *const to_path)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+/*
+ * Removing a mount point may reveal a previously hidden file hierarchy, which
+ * may then grant access to files, which may have previously been forbidden.
+ */
+static int hook_sb_umount(struct vfsmount *const mnt, const int flags)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+/*
+ * pivot_root(2), like mount(2), changes the current mount namespace. It must
+ * then be forbidden for a landlocked process.
+ *
+ * However, chroot(2) may be allowed because it only changes the relative root
+ * directory of the current process. Moreover, it can be used to restrict the
+ * view of the filesystem.
+ */
+static int hook_sb_pivotroot(const struct path *const old_path,
+ const struct path *const new_path)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+/* Path hooks */
+
+static inline u32 get_mode_access(const umode_t mode)
+{
+ switch (mode & S_IFMT) {
+ case S_IFLNK:
+ return LANDLOCK_ACCESS_FS_MAKE_SYM;
+ case 0:
+ /* A zero mode translates to S_IFREG. */
+ case S_IFREG:
+ return LANDLOCK_ACCESS_FS_MAKE_REG;
+ case S_IFDIR:
+ return LANDLOCK_ACCESS_FS_MAKE_DIR;
+ case S_IFCHR:
+ return LANDLOCK_ACCESS_FS_MAKE_CHAR;
+ case S_IFBLK:
+ return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
+ case S_IFIFO:
+ return LANDLOCK_ACCESS_FS_MAKE_FIFO;
+ case S_IFSOCK:
+ return LANDLOCK_ACCESS_FS_MAKE_SOCK;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+
+/*
+ * Creating multiple links or renaming may lead to privilege escalations if not
+ * handled properly. Indeed, we must be sure that the source doesn't gain more
+ * privileges by being accessible from the destination. This is getting more
+ * complex when dealing with multiple layers. The whole picture can be seen as
+ * a multilayer partial ordering problem. A future version of Landlock will
+ * deal with that.
+ */
+static int hook_path_link(struct dentry *const old_dentry,
+ const struct path *const new_dir,
+ struct dentry *const new_dentry)
+{
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+
+ if (!dom)
+ return 0;
+ /* The mount points are the same for old and new paths, cf. EXDEV. */
+ if (old_dentry->d_parent != new_dir->dentry)
+ /* Gracefully forbids reparenting. */
+ return -EXDEV;
+ if (unlikely(d_is_negative(old_dentry)))
+ return -ENOENT;
+ return check_access_path(dom, new_dir,
+ get_mode_access(d_backing_inode(old_dentry)->i_mode));
+}
+
+static inline u32 maybe_remove(const struct dentry *const dentry)
+{
+ if (d_is_negative(dentry))
+ return 0;
+ return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
+ LANDLOCK_ACCESS_FS_REMOVE_FILE;
+}
+
+static int hook_path_rename(const struct path *const old_dir,
+ struct dentry *const old_dentry,
+ const struct path *const new_dir,
+ struct dentry *const new_dentry)
+{
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+
+ if (!dom)
+ return 0;
+ /* The mount points are the same for old and new paths, cf. EXDEV. */
+ if (old_dir->dentry != new_dir->dentry)
+ /* Gracefully forbids reparenting. */
+ return -EXDEV;
+ if (unlikely(d_is_negative(old_dentry)))
+ return -ENOENT;
+ /* RENAME_EXCHANGE is handled because directories are the same. */
+ return check_access_path(dom, old_dir, maybe_remove(old_dentry) |
+ maybe_remove(new_dentry) |
+ get_mode_access(d_backing_inode(old_dentry)->i_mode));
+}
+
+static int hook_path_mkdir(const struct path *const dir,
+ struct dentry *const dentry, const umode_t mode)
+{
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
+}
+
+static int hook_path_mknod(const struct path *const dir,
+ struct dentry *const dentry, const umode_t mode,
+ const unsigned int dev)
+{
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+
+ if (!dom)
+ return 0;
+ return check_access_path(dom, dir, get_mode_access(mode));
+}
+
+static int hook_path_symlink(const struct path *const dir,
+ struct dentry *const dentry, const char *const old_name)
+{
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
+}
+
+static int hook_path_unlink(const struct path *const dir,
+ struct dentry *const dentry)
+{
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
+}
+
+static int hook_path_rmdir(const struct path *const dir,
+ struct dentry *const dentry)
+{
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
+}
+
+/* File hooks */
+
+static inline u32 get_file_access(const struct file *const file)
+{
+ u32 access = 0;
+
+ if (file->f_mode & FMODE_READ) {
+ /* A directory can only be opened in read mode. */
+ if (S_ISDIR(file_inode(file)->i_mode))
+ return LANDLOCK_ACCESS_FS_READ_DIR;
+ access = LANDLOCK_ACCESS_FS_READ_FILE;
+ }
+ if (file->f_mode & FMODE_WRITE)
+ access |= LANDLOCK_ACCESS_FS_WRITE_FILE;
+ /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */
+ if (file->f_flags & __FMODE_EXEC)
+ access |= LANDLOCK_ACCESS_FS_EXECUTE;
+ return access;
+}
+
+static int hook_file_open(struct file *const file)
+{
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+
+ if (!dom)
+ return 0;
+ /*
+ * Because a file may be opened with O_PATH, get_file_access() may
+ * return 0. This case will be handled with a future Landlock
+ * evolution.
+ */
+ return check_access_path(dom, &file->f_path, get_file_access(file));
+}
+
+static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
+ LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
+
+ LSM_HOOK_INIT(sb_delete, hook_sb_delete),
+ LSM_HOOK_INIT(sb_mount, hook_sb_mount),
+ LSM_HOOK_INIT(move_mount, hook_move_mount),
+ LSM_HOOK_INIT(sb_umount, hook_sb_umount),
+ LSM_HOOK_INIT(sb_remount, hook_sb_remount),
+ LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),
+
+ LSM_HOOK_INIT(path_link, hook_path_link),
+ LSM_HOOK_INIT(path_rename, hook_path_rename),
+ LSM_HOOK_INIT(path_mkdir, hook_path_mkdir),
+ LSM_HOOK_INIT(path_mknod, hook_path_mknod),
+ LSM_HOOK_INIT(path_symlink, hook_path_symlink),
+ LSM_HOOK_INIT(path_unlink, hook_path_unlink),
+ LSM_HOOK_INIT(path_rmdir, hook_path_rmdir),
+
+ LSM_HOOK_INIT(file_open, hook_file_open),
+};
+
+__init void landlock_add_fs_hooks(void)
+{
+ security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+ LANDLOCK_NAME);
+}
diff --git a/security/landlock/fs.h b/security/landlock/fs.h
new file mode 100644
index 000000000000..187284b421c9
--- /dev/null
+++ b/security/landlock/fs.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Filesystem management and hooks
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_FS_H
+#define _SECURITY_LANDLOCK_FS_H
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+
+#include "ruleset.h"
+#include "setup.h"
+
+/**
+ * struct landlock_inode_security - Inode security blob
+ *
+ * Enable to reference a &struct landlock_object tied to an inode (i.e.
+ * underlying object).
+ */
+struct landlock_inode_security {
+ /**
+ * @object: Weak pointer to an allocated object. All assignments of a
+ * new object are protected by the underlying inode->i_lock. However,
+ * atomically disassociating @object from the inode is only protected
+ * by @object->lock, from the time @object's usage refcount drops to
+ * zero to the time this pointer is nulled out (cf. release_inode() and
+ * hook_sb_delete()). Indeed, such disassociation doesn't require
+ * inode->i_lock thanks to the careful rcu_access_pointer() check
+ * performed by get_inode_object().
+ */
+ struct landlock_object __rcu *object;
+};
+
+/**
+ * struct landlock_superblock_security - Superblock security blob
+ *
+ * Enable hook_sb_delete() to wait for concurrent calls to release_inode().
+ */
+struct landlock_superblock_security {
+ /**
+ * @inode_refs: Number of pending inodes (from this superblock) that
+ * are being released by release_inode().
+ * Cf. struct super_block->s_fsnotify_inode_refs .
+ */
+ atomic_long_t inode_refs;
+};
+
+static inline struct landlock_inode_security *landlock_inode(
+ const struct inode *const inode)
+{
+ return inode->i_security + landlock_blob_sizes.lbs_inode;
+}
+
+static inline struct landlock_superblock_security *landlock_superblock(
+ const struct super_block *const superblock)
+{
+ return superblock->s_security + landlock_blob_sizes.lbs_superblock;
+}
+
+__init void landlock_add_fs_hooks(void);
+
+int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
+ const struct path *const path, u32 access_hierarchy);
+
+#endif /* _SECURITY_LANDLOCK_FS_H */
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
new file mode 100644
index 000000000000..2a0a1095ee27
--- /dev/null
+++ b/security/landlock/limits.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Limits for different components
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_LIMITS_H
+#define _SECURITY_LANDLOCK_LIMITS_H
+
+#include <linux/limits.h>
+#include <uapi/linux/landlock.h>
+
+#define LANDLOCK_MAX_NUM_LAYERS 64
+#define LANDLOCK_MAX_NUM_RULES U32_MAX
+
+#define LANDLOCK_LAST_ACCESS_FS LANDLOCK_ACCESS_FS_MAKE_SYM
+#define LANDLOCK_MASK_ACCESS_FS ((LANDLOCK_LAST_ACCESS_FS << 1) - 1)
+
+#endif /* _SECURITY_LANDLOCK_LIMITS_H */
diff --git a/security/landlock/object.c b/security/landlock/object.c
new file mode 100644
index 000000000000..d674fdf9ff04
--- /dev/null
+++ b/security/landlock/object.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Object management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/bug.h>
+#include <linux/compiler_types.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "object.h"
+
+struct landlock_object *landlock_create_object(
+ const struct landlock_object_underops *const underops,
+ void *const underobj)
+{
+ struct landlock_object *new_object;
+
+ if (WARN_ON_ONCE(!underops || !underobj))
+ return ERR_PTR(-ENOENT);
+ new_object = kzalloc(sizeof(*new_object), GFP_KERNEL_ACCOUNT);
+ if (!new_object)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&new_object->usage, 1);
+ spin_lock_init(&new_object->lock);
+ new_object->underops = underops;
+ new_object->underobj = underobj;
+ return new_object;
+}
+
+/*
+ * The caller must own the object (i.e. thanks to object->usage) to safely put
+ * it.
+ */
+void landlock_put_object(struct landlock_object *const object)
+{
+ /*
+ * The call to @object->underops->release(object) might sleep, e.g.
+ * because of iput().
+ */
+ might_sleep();
+ if (!object)
+ return;
+
+ /*
+ * If the @object's refcount cannot drop to zero, we can just decrement
+ * the refcount without holding a lock. Otherwise, the decrement must
+ * happen under @object->lock for synchronization with things like
+ * get_inode_object().
+ */
+ if (refcount_dec_and_lock(&object->usage, &object->lock)) {
+ __acquire(&object->lock);
+ /*
+ * With @object->lock initially held, remove the reference from
+ * @object->underobj to @object (if it still exists).
+ */
+ object->underops->release(object);
+ kfree_rcu(object, rcu_free);
+ }
+}
diff --git a/security/landlock/object.h b/security/landlock/object.h
new file mode 100644
index 000000000000..3f80674c6c8d
--- /dev/null
+++ b/security/landlock/object.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Object management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_OBJECT_H
+#define _SECURITY_LANDLOCK_OBJECT_H
+
+#include <linux/compiler_types.h>
+#include <linux/refcount.h>
+#include <linux/spinlock.h>
+
+struct landlock_object;
+
+/**
+ * struct landlock_object_underops - Operations on an underlying object
+ */
+struct landlock_object_underops {
+ /**
+ * @release: Releases the underlying object (e.g. iput() for an inode).
+ */
+ void (*release)(struct landlock_object *const object)
+ __releases(object->lock);
+};
+
+/**
+ * struct landlock_object - Security blob tied to a kernel object
+ *
+ * The goal of this structure is to enable to tie a set of ephemeral access
+ * rights (pertaining to different domains) to a kernel object (e.g an inode)
+ * in a safe way. This implies to handle concurrent use and modification.
+ *
+ * The lifetime of a &struct landlock_object depends on the rules referring to
+ * it.
+ */
+struct landlock_object {
+ /**
+ * @usage: This counter is used to tie an object to the rules matching
+ * it or to keep it alive while adding a new rule. If this counter
+ * reaches zero, this struct must not be modified, but this counter can
+ * still be read from within an RCU read-side critical section. When
+ * adding a new rule to an object with a usage counter of zero, we must
+ * wait until the pointer to this object is set to NULL (or recycled).
+ */
+ refcount_t usage;
+ /**
+ * @lock: Protects against concurrent modifications. This lock must be
+ * held from the time @usage drops to zero until any weak references
+ * from @underobj to this object have been cleaned up.
+ *
+ * Lock ordering: inode->i_lock nests inside this.
+ */
+ spinlock_t lock;
+ /**
+ * @underobj: Used when cleaning up an object and to mark an object as
+ * tied to its underlying kernel structure. This pointer is protected
+ * by @lock. Cf. landlock_release_inodes() and release_inode().
+ */
+ void *underobj;
+ union {
+ /**
+ * @rcu_free: Enables lockless use of @usage, @lock and
+ * @underobj from within an RCU read-side critical section.
+ * @rcu_free and @underops are only used by
+ * landlock_put_object().
+ */
+ struct rcu_head rcu_free;
+ /**
+ * @underops: Enables landlock_put_object() to release the
+ * underlying object (e.g. inode).
+ */
+ const struct landlock_object_underops *underops;
+ };
+};
+
+struct landlock_object *landlock_create_object(
+ const struct landlock_object_underops *const underops,
+ void *const underobj);
+
+void landlock_put_object(struct landlock_object *const object);
+
+static inline void landlock_get_object(struct landlock_object *const object)
+{
+ if (object)
+ refcount_inc(&object->usage);
+}
+
+#endif /* _SECURITY_LANDLOCK_OBJECT_H */
diff --git a/security/landlock/ptrace.c b/security/landlock/ptrace.c
new file mode 100644
index 000000000000..f55b82446de2
--- /dev/null
+++ b/security/landlock/ptrace.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Ptrace hooks
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/cred.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/lsm_hooks.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "common.h"
+#include "cred.h"
+#include "ptrace.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/**
+ * domain_scope_le - Checks domain ordering for scoped ptrace
+ *
+ * @parent: Parent domain.
+ * @child: Potential child of @parent.
+ *
+ * Checks if the @parent domain is less or equal to (i.e. an ancestor, which
+ * means a subset of) the @child domain.
+ */
+static bool domain_scope_le(const struct landlock_ruleset *const parent,
+ const struct landlock_ruleset *const child)
+{
+ const struct landlock_hierarchy *walker;
+
+ if (!parent)
+ return true;
+ if (!child)
+ return false;
+ for (walker = child->hierarchy; walker; walker = walker->parent) {
+ if (walker == parent->hierarchy)
+ /* @parent is in the scoped hierarchy of @child. */
+ return true;
+ }
+ /* There is no relationship between @parent and @child. */
+ return false;
+}
+
+static bool task_is_scoped(const struct task_struct *const parent,
+ const struct task_struct *const child)
+{
+ bool is_scoped;
+ const struct landlock_ruleset *dom_parent, *dom_child;
+
+ rcu_read_lock();
+ dom_parent = landlock_get_task_domain(parent);
+ dom_child = landlock_get_task_domain(child);
+ is_scoped = domain_scope_le(dom_parent, dom_child);
+ rcu_read_unlock();
+ return is_scoped;
+}
+
+static int task_ptrace(const struct task_struct *const parent,
+ const struct task_struct *const child)
+{
+ /* Quick return for non-landlocked tasks. */
+ if (!landlocked(parent))
+ return 0;
+ if (task_is_scoped(parent, child))
+ return 0;
+ return -EPERM;
+}
+
+/**
+ * hook_ptrace_access_check - Determines whether the current process may access
+ * another
+ *
+ * @child: Process to be accessed.
+ * @mode: Mode of attachment.
+ *
+ * If the current task has Landlock rules, then the child must have at least
+ * the same rules. Else denied.
+ *
+ * Determines whether a process may access another, returning 0 if permission
+ * granted, -errno if denied.
+ */
+static int hook_ptrace_access_check(struct task_struct *const child,
+ const unsigned int mode)
+{
+ return task_ptrace(current, child);
+}
+
+/**
+ * hook_ptrace_traceme - Determines whether another process may trace the
+ * current one
+ *
+ * @parent: Task proposed to be the tracer.
+ *
+ * If the parent has Landlock rules, then the current task must have the same
+ * or more rules. Else denied.
+ *
+ * Determines whether the nominated task is permitted to trace the current
+ * process, returning 0 if permission is granted, -errno if denied.
+ */
+static int hook_ptrace_traceme(struct task_struct *const parent)
+{
+ return task_ptrace(parent, current);
+}
+
+static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = {
+ LSM_HOOK_INIT(ptrace_access_check, hook_ptrace_access_check),
+ LSM_HOOK_INIT(ptrace_traceme, hook_ptrace_traceme),
+};
+
+__init void landlock_add_ptrace_hooks(void)
+{
+ security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+ LANDLOCK_NAME);
+}
diff --git a/security/landlock/ptrace.h b/security/landlock/ptrace.h
new file mode 100644
index 000000000000..265b220ae3bf
--- /dev/null
+++ b/security/landlock/ptrace.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Ptrace hooks
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_PTRACE_H
+#define _SECURITY_LANDLOCK_PTRACE_H
+
+__init void landlock_add_ptrace_hooks(void);
+
+#endif /* _SECURITY_LANDLOCK_PTRACE_H */
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
new file mode 100644
index 000000000000..ec72b9262bf3
--- /dev/null
+++ b/security/landlock/ruleset.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Ruleset management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/bits.h>
+#include <linux/bug.h>
+#include <linux/compiler_types.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
+#include <linux/overflow.h>
+#include <linux/rbtree.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "limits.h"
+#include "object.h"
+#include "ruleset.h"
+
+static struct landlock_ruleset *create_ruleset(const u32 num_layers)
+{
+ struct landlock_ruleset *new_ruleset;
+
+ new_ruleset = kzalloc(struct_size(new_ruleset, fs_access_masks,
+ num_layers), GFP_KERNEL_ACCOUNT);
+ if (!new_ruleset)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&new_ruleset->usage, 1);
+ mutex_init(&new_ruleset->lock);
+ new_ruleset->root = RB_ROOT;
+ new_ruleset->num_layers = num_layers;
+ /*
+ * hierarchy = NULL
+ * num_rules = 0
+ * fs_access_masks[] = 0
+ */
+ return new_ruleset;
+}
+
+struct landlock_ruleset *landlock_create_ruleset(const u32 fs_access_mask)
+{
+ struct landlock_ruleset *new_ruleset;
+
+ /* Informs about useless ruleset. */
+ if (!fs_access_mask)
+ return ERR_PTR(-ENOMSG);
+ new_ruleset = create_ruleset(1);
+ if (!IS_ERR(new_ruleset))
+ new_ruleset->fs_access_masks[0] = fs_access_mask;
+ return new_ruleset;
+}
+
+static void build_check_rule(void)
+{
+ const struct landlock_rule rule = {
+ .num_layers = ~0,
+ };
+
+ BUILD_BUG_ON(rule.num_layers < LANDLOCK_MAX_NUM_LAYERS);
+}
+
+static struct landlock_rule *create_rule(
+ struct landlock_object *const object,
+ const struct landlock_layer (*const layers)[],
+ const u32 num_layers,
+ const struct landlock_layer *const new_layer)
+{
+ struct landlock_rule *new_rule;
+ u32 new_num_layers;
+
+ build_check_rule();
+ if (new_layer) {
+ /* Should already be checked by landlock_merge_ruleset(). */
+ if (WARN_ON_ONCE(num_layers >= LANDLOCK_MAX_NUM_LAYERS))
+ return ERR_PTR(-E2BIG);
+ new_num_layers = num_layers + 1;
+ } else {
+ new_num_layers = num_layers;
+ }
+ new_rule = kzalloc(struct_size(new_rule, layers, new_num_layers),
+ GFP_KERNEL_ACCOUNT);
+ if (!new_rule)
+ return ERR_PTR(-ENOMEM);
+ RB_CLEAR_NODE(&new_rule->node);
+ landlock_get_object(object);
+ new_rule->object = object;
+ new_rule->num_layers = new_num_layers;
+ /* Copies the original layer stack. */
+ memcpy(new_rule->layers, layers,
+ flex_array_size(new_rule, layers, num_layers));
+ if (new_layer)
+ /* Adds a copy of @new_layer on the layer stack. */
+ new_rule->layers[new_rule->num_layers - 1] = *new_layer;
+ return new_rule;
+}
+
+static void free_rule(struct landlock_rule *const rule)
+{
+ might_sleep();
+ if (!rule)
+ return;
+ landlock_put_object(rule->object);
+ kfree(rule);
+}
+
+static void build_check_ruleset(void)
+{
+ const struct landlock_ruleset ruleset = {
+ .num_rules = ~0,
+ .num_layers = ~0,
+ };
+ typeof(ruleset.fs_access_masks[0]) fs_access_mask = ~0;
+
+ BUILD_BUG_ON(ruleset.num_rules < LANDLOCK_MAX_NUM_RULES);
+ BUILD_BUG_ON(ruleset.num_layers < LANDLOCK_MAX_NUM_LAYERS);
+ BUILD_BUG_ON(fs_access_mask < LANDLOCK_MASK_ACCESS_FS);
+}
+
+/**
+ * insert_rule - Create and insert a rule in a ruleset
+ *
+ * @ruleset: The ruleset to be updated.
+ * @object: The object to build the new rule with. The underlying kernel
+ * object must be held by the caller.
+ * @layers: One or multiple layers to be copied into the new rule.
+ * @num_layers: The number of @layers entries.
+ *
+ * When user space requests to add a new rule to a ruleset, @layers only
+ * contains one entry and this entry is not assigned to any level. In this
+ * case, the new rule will extend @ruleset, similarly to a boolean OR between
+ * access rights.
+ *
+ * When merging a ruleset in a domain, or copying a domain, @layers will be
+ * added to @ruleset as new constraints, similarly to a boolean AND between
+ * access rights.
+ */
+static int insert_rule(struct landlock_ruleset *const ruleset,
+ struct landlock_object *const object,
+ const struct landlock_layer (*const layers)[],
+ size_t num_layers)
+{
+ struct rb_node **walker_node;
+ struct rb_node *parent_node = NULL;
+ struct landlock_rule *new_rule;
+
+ might_sleep();
+ lockdep_assert_held(&ruleset->lock);
+ if (WARN_ON_ONCE(!object || !layers))
+ return -ENOENT;
+ walker_node = &(ruleset->root.rb_node);
+ while (*walker_node) {
+ struct landlock_rule *const this = rb_entry(*walker_node,
+ struct landlock_rule, node);
+
+ if (this->object != object) {
+ parent_node = *walker_node;
+ if (this->object < object)
+ walker_node = &((*walker_node)->rb_right);
+ else
+ walker_node = &((*walker_node)->rb_left);
+ continue;
+ }
+
+ /* Only a single-level layer should match an existing rule. */
+ if (WARN_ON_ONCE(num_layers != 1))
+ return -EINVAL;
+
+ /* If there is a matching rule, updates it. */
+ if ((*layers)[0].level == 0) {
+ /*
+ * Extends access rights when the request comes from
+ * landlock_add_rule(2), i.e. @ruleset is not a domain.
+ */
+ if (WARN_ON_ONCE(this->num_layers != 1))
+ return -EINVAL;
+ if (WARN_ON_ONCE(this->layers[0].level != 0))
+ return -EINVAL;
+ this->layers[0].access |= (*layers)[0].access;
+ return 0;
+ }
+
+ if (WARN_ON_ONCE(this->layers[0].level == 0))
+ return -EINVAL;
+
+ /*
+ * Intersects access rights when it is a merge between a
+ * ruleset and a domain.
+ */
+ new_rule = create_rule(object, &this->layers, this->num_layers,
+ &(*layers)[0]);
+ if (IS_ERR(new_rule))
+ return PTR_ERR(new_rule);
+ rb_replace_node(&this->node, &new_rule->node, &ruleset->root);
+ free_rule(this);
+ return 0;
+ }
+
+ /* There is no match for @object. */
+ build_check_ruleset();
+ if (ruleset->num_rules >= LANDLOCK_MAX_NUM_RULES)
+ return -E2BIG;
+ new_rule = create_rule(object, layers, num_layers, NULL);
+ if (IS_ERR(new_rule))
+ return PTR_ERR(new_rule);
+ rb_link_node(&new_rule->node, parent_node, walker_node);
+ rb_insert_color(&new_rule->node, &ruleset->root);
+ ruleset->num_rules++;
+ return 0;
+}
+
+static void build_check_layer(void)
+{
+ const struct landlock_layer layer = {
+ .level = ~0,
+ .access = ~0,
+ };
+
+ BUILD_BUG_ON(layer.level < LANDLOCK_MAX_NUM_LAYERS);
+ BUILD_BUG_ON(layer.access < LANDLOCK_MASK_ACCESS_FS);
+}
+
+/* @ruleset must be locked by the caller. */
+int landlock_insert_rule(struct landlock_ruleset *const ruleset,
+ struct landlock_object *const object, const u32 access)
+{
+ struct landlock_layer layers[] = {{
+ .access = access,
+ /* When @level is zero, insert_rule() extends @ruleset. */
+ .level = 0,
+ }};
+
+ build_check_layer();
+ return insert_rule(ruleset, object, &layers, ARRAY_SIZE(layers));
+}
+
+static inline void get_hierarchy(struct landlock_hierarchy *const hierarchy)
+{
+ if (hierarchy)
+ refcount_inc(&hierarchy->usage);
+}
+
+static void put_hierarchy(struct landlock_hierarchy *hierarchy)
+{
+ while (hierarchy && refcount_dec_and_test(&hierarchy->usage)) {
+ const struct landlock_hierarchy *const freeme = hierarchy;
+
+ hierarchy = hierarchy->parent;
+ kfree(freeme);
+ }
+}
+
+static int merge_ruleset(struct landlock_ruleset *const dst,
+ struct landlock_ruleset *const src)
+{
+ struct landlock_rule *walker_rule, *next_rule;
+ int err = 0;
+
+ might_sleep();
+ /* Should already be checked by landlock_merge_ruleset() */
+ if (WARN_ON_ONCE(!src))
+ return 0;
+ /* Only merge into a domain. */
+ if (WARN_ON_ONCE(!dst || !dst->hierarchy))
+ return -EINVAL;
+
+ /* Locks @dst first because we are its only owner. */
+ mutex_lock(&dst->lock);
+ mutex_lock_nested(&src->lock, SINGLE_DEPTH_NESTING);
+
+ /* Stacks the new layer. */
+ if (WARN_ON_ONCE(src->num_layers != 1 || dst->num_layers < 1)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ dst->fs_access_masks[dst->num_layers - 1] = src->fs_access_masks[0];
+
+ /* Merges the @src tree. */
+ rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
+ &src->root, node) {
+ struct landlock_layer layers[] = {{
+ .level = dst->num_layers,
+ }};
+
+ if (WARN_ON_ONCE(walker_rule->num_layers != 1)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ if (WARN_ON_ONCE(walker_rule->layers[0].level != 0)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ layers[0].access = walker_rule->layers[0].access;
+ err = insert_rule(dst, walker_rule->object, &layers,
+ ARRAY_SIZE(layers));
+ if (err)
+ goto out_unlock;
+ }
+
+out_unlock:
+ mutex_unlock(&src->lock);
+ mutex_unlock(&dst->lock);
+ return err;
+}
+
+static int inherit_ruleset(struct landlock_ruleset *const parent,
+ struct landlock_ruleset *const child)
+{
+ struct landlock_rule *walker_rule, *next_rule;
+ int err = 0;
+
+ might_sleep();
+ if (!parent)
+ return 0;
+
+ /* Locks @child first because we are its only owner. */
+ mutex_lock(&child->lock);
+ mutex_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
+
+ /* Copies the @parent tree. */
+ rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
+ &parent->root, node) {
+ err = insert_rule(child, walker_rule->object,
+ &walker_rule->layers, walker_rule->num_layers);
+ if (err)
+ goto out_unlock;
+ }
+
+ if (WARN_ON_ONCE(child->num_layers <= parent->num_layers)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ /* Copies the parent layer stack and leaves a space for the new layer. */
+ memcpy(child->fs_access_masks, parent->fs_access_masks,
+ flex_array_size(parent, fs_access_masks, parent->num_layers));
+
+ if (WARN_ON_ONCE(!parent->hierarchy)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ get_hierarchy(parent->hierarchy);
+ child->hierarchy->parent = parent->hierarchy;
+
+out_unlock:
+ mutex_unlock(&parent->lock);
+ mutex_unlock(&child->lock);
+ return err;
+}
+
+static void free_ruleset(struct landlock_ruleset *const ruleset)
+{
+ struct landlock_rule *freeme, *next;
+
+ might_sleep();
+ rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root,
+ node)
+ free_rule(freeme);
+ put_hierarchy(ruleset->hierarchy);
+ kfree(ruleset);
+}
+
+void landlock_put_ruleset(struct landlock_ruleset *const ruleset)
+{
+ might_sleep();
+ if (ruleset && refcount_dec_and_test(&ruleset->usage))
+ free_ruleset(ruleset);
+}
+
+static void free_ruleset_work(struct work_struct *const work)
+{
+ struct landlock_ruleset *ruleset;
+
+ ruleset = container_of(work, struct landlock_ruleset, work_free);
+ free_ruleset(ruleset);
+}
+
+void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
+{
+ if (ruleset && refcount_dec_and_test(&ruleset->usage)) {
+ INIT_WORK(&ruleset->work_free, free_ruleset_work);
+ schedule_work(&ruleset->work_free);
+ }
+}
+
+/**
+ * landlock_merge_ruleset - Merge a ruleset with a domain
+ *
+ * @parent: Parent domain.
+ * @ruleset: New ruleset to be merged.
+ *
+ * Returns the intersection of @parent and @ruleset, or returns @parent if
+ * @ruleset is empty, or returns a duplicate of @ruleset if @parent is empty.
+ */
+struct landlock_ruleset *landlock_merge_ruleset(
+ struct landlock_ruleset *const parent,
+ struct landlock_ruleset *const ruleset)
+{
+ struct landlock_ruleset *new_dom;
+ u32 num_layers;
+ int err;
+
+ might_sleep();
+ if (WARN_ON_ONCE(!ruleset || parent == ruleset))
+ return ERR_PTR(-EINVAL);
+
+ if (parent) {
+ if (parent->num_layers >= LANDLOCK_MAX_NUM_LAYERS)
+ return ERR_PTR(-E2BIG);
+ num_layers = parent->num_layers + 1;
+ } else {
+ num_layers = 1;
+ }
+
+ /* Creates a new domain... */
+ new_dom = create_ruleset(num_layers);
+ if (IS_ERR(new_dom))
+ return new_dom;
+ new_dom->hierarchy = kzalloc(sizeof(*new_dom->hierarchy),
+ GFP_KERNEL_ACCOUNT);
+ if (!new_dom->hierarchy) {
+ err = -ENOMEM;
+ goto out_put_dom;
+ }
+ refcount_set(&new_dom->hierarchy->usage, 1);
+
+ /* ...as a child of @parent... */
+ err = inherit_ruleset(parent, new_dom);
+ if (err)
+ goto out_put_dom;
+
+ /* ...and including @ruleset. */
+ err = merge_ruleset(new_dom, ruleset);
+ if (err)
+ goto out_put_dom;
+
+ return new_dom;
+
+out_put_dom:
+ landlock_put_ruleset(new_dom);
+ return ERR_PTR(err);
+}
+
+/*
+ * The returned access has the same lifetime as @ruleset.
+ */
+const struct landlock_rule *landlock_find_rule(
+ const struct landlock_ruleset *const ruleset,
+ const struct landlock_object *const object)
+{
+ const struct rb_node *node;
+
+ if (!object)
+ return NULL;
+ node = ruleset->root.rb_node;
+ while (node) {
+ struct landlock_rule *this = rb_entry(node,
+ struct landlock_rule, node);
+
+ if (this->object == object)
+ return this;
+ if (this->object < object)
+ node = node->rb_right;
+ else
+ node = node->rb_left;
+ }
+ return NULL;
+}
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
new file mode 100644
index 000000000000..2d3ed7ec5a0a
--- /dev/null
+++ b/security/landlock/ruleset.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Ruleset management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_RULESET_H
+#define _SECURITY_LANDLOCK_RULESET_H
+
+#include <linux/mutex.h>
+#include <linux/rbtree.h>
+#include <linux/refcount.h>
+#include <linux/workqueue.h>
+
+#include "object.h"
+
+/**
+ * struct landlock_layer - Access rights for a given layer
+ */
+struct landlock_layer {
+ /**
+ * @level: Position of this layer in the layer stack.
+ */
+ u16 level;
+ /**
+ * @access: Bitfield of allowed actions on the kernel object. They are
+ * relative to the object type (e.g. %LANDLOCK_ACTION_FS_READ).
+ */
+ u16 access;
+};
+
+/**
+ * struct landlock_rule - Access rights tied to an object
+ */
+struct landlock_rule {
+ /**
+ * @node: Node in the ruleset's red-black tree.
+ */
+ struct rb_node node;
+ /**
+ * @object: Pointer to identify a kernel object (e.g. an inode). This
+ * is used as a key for this ruleset element. This pointer is set once
+ * and never modified. It always points to an allocated object because
+ * each rule increments the refcount of its object.
+ */
+ struct landlock_object *object;
+ /**
+ * @num_layers: Number of entries in @layers.
+ */
+ u32 num_layers;
+ /**
+ * @layers: Stack of layers, from the latest to the newest, implemented
+ * as a flexible array member (FAM).
+ */
+ struct landlock_layer layers[];
+};
+
+/**
+ * struct landlock_hierarchy - Node in a ruleset hierarchy
+ */
+struct landlock_hierarchy {
+ /**
+ * @parent: Pointer to the parent node, or NULL if it is a root
+ * Landlock domain.
+ */
+ struct landlock_hierarchy *parent;
+ /**
+ * @usage: Number of potential children domains plus their parent
+ * domain.
+ */
+ refcount_t usage;
+};
+
+/**
+ * struct landlock_ruleset - Landlock ruleset
+ *
+ * This data structure must contain unique entries, be updatable, and quick to
+ * match an object.
+ */
+struct landlock_ruleset {
+ /**
+ * @root: Root of a red-black tree containing &struct landlock_rule
+ * nodes. Once a ruleset is tied to a process (i.e. as a domain), this
+ * tree is immutable until @usage reaches zero.
+ */
+ struct rb_root root;
+ /**
+ * @hierarchy: Enables hierarchy identification even when a parent
+ * domain vanishes. This is needed for the ptrace protection.
+ */
+ struct landlock_hierarchy *hierarchy;
+ union {
+ /**
+ * @work_free: Enables to free a ruleset within a lockless
+ * section. This is only used by
+ * landlock_put_ruleset_deferred() when @usage reaches zero.
+ * The fields @lock, @usage, @num_rules, @num_layers and
+ * @fs_access_masks are then unused.
+ */
+ struct work_struct work_free;
+ struct {
+ /**
+ * @lock: Protects against concurrent modifications of
+ * @root, if @usage is greater than zero.
+ */
+ struct mutex lock;
+ /**
+ * @usage: Number of processes (i.e. domains) or file
+ * descriptors referencing this ruleset.
+ */
+ refcount_t usage;
+ /**
+ * @num_rules: Number of non-overlapping (i.e. not for
+ * the same object) rules in this ruleset.
+ */
+ u32 num_rules;
+ /**
+ * @num_layers: Number of layers that are used in this
+ * ruleset. This enables to check that all the layers
+ * allow an access request. A value of 0 identifies a
+ * non-merged ruleset (i.e. not a domain).
+ */
+ u32 num_layers;
+ /**
+ * @fs_access_masks: Contains the subset of filesystem
+ * actions that are restricted by a ruleset. A domain
+ * saves all layers of merged rulesets in a stack
+ * (FAM), starting from the first layer to the last
+ * one. These layers are used when merging rulesets,
+ * for user space backward compatibility (i.e.
+ * future-proof), and to properly handle merged
+ * rulesets without overlapping access rights. These
+ * layers are set once and never changed for the
+ * lifetime of the ruleset.
+ */
+ u16 fs_access_masks[];
+ };
+ };
+};
+
+struct landlock_ruleset *landlock_create_ruleset(const u32 fs_access_mask);
+
+void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
+void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
+
+int landlock_insert_rule(struct landlock_ruleset *const ruleset,
+ struct landlock_object *const object, const u32 access);
+
+struct landlock_ruleset *landlock_merge_ruleset(
+ struct landlock_ruleset *const parent,
+ struct landlock_ruleset *const ruleset);
+
+const struct landlock_rule *landlock_find_rule(
+ const struct landlock_ruleset *const ruleset,
+ const struct landlock_object *const object);
+
+static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
+{
+ if (ruleset)
+ refcount_inc(&ruleset->usage);
+}
+
+#endif /* _SECURITY_LANDLOCK_RULESET_H */
diff --git a/security/landlock/setup.c b/security/landlock/setup.c
new file mode 100644
index 000000000000..f8e8e980454c
--- /dev/null
+++ b/security/landlock/setup.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Security framework setup
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/init.h>
+#include <linux/lsm_hooks.h>
+
+#include "common.h"
+#include "cred.h"
+#include "fs.h"
+#include "ptrace.h"
+#include "setup.h"
+
+bool landlock_initialized __lsm_ro_after_init = false;
+
+struct lsm_blob_sizes landlock_blob_sizes __lsm_ro_after_init = {
+ .lbs_cred = sizeof(struct landlock_cred_security),
+ .lbs_inode = sizeof(struct landlock_inode_security),
+ .lbs_superblock = sizeof(struct landlock_superblock_security),
+};
+
+static int __init landlock_init(void)
+{
+ landlock_add_cred_hooks();
+ landlock_add_ptrace_hooks();
+ landlock_add_fs_hooks();
+ landlock_initialized = true;
+ pr_info("Up and running.\n");
+ return 0;
+}
+
+DEFINE_LSM(LANDLOCK_NAME) = {
+ .name = LANDLOCK_NAME,
+ .init = landlock_init,
+ .blobs = &landlock_blob_sizes,
+};
diff --git a/security/landlock/setup.h b/security/landlock/setup.h
new file mode 100644
index 000000000000..1daffab1ab4b
--- /dev/null
+++ b/security/landlock/setup.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Security framework setup
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_SETUP_H
+#define _SECURITY_LANDLOCK_SETUP_H
+
+#include <linux/lsm_hooks.h>
+
+extern bool landlock_initialized;
+
+extern struct lsm_blob_sizes landlock_blob_sizes;
+
+#endif /* _SECURITY_LANDLOCK_SETUP_H */
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
new file mode 100644
index 000000000000..32396962f04d
--- /dev/null
+++ b/security/landlock/syscalls.c
@@ -0,0 +1,451 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - System call implementations and user space interfaces
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/anon_inodes.h>
+#include <linux/build_bug.h>
+#include <linux/capability.h>
+#include <linux/compiler_types.h>
+#include <linux/dcache.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/stddef.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <uapi/linux/landlock.h>
+
+#include "cred.h"
+#include "fs.h"
+#include "limits.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/**
+ * copy_min_struct_from_user - Safe future-proof argument copying
+ *
+ * Extend copy_struct_from_user() to check for consistent user buffer.
+ *
+ * @dst: Kernel space pointer or NULL.
+ * @ksize: Actual size of the data pointed to by @dst.
+ * @ksize_min: Minimal required size to be copied.
+ * @src: User space pointer or NULL.
+ * @usize: (Alleged) size of the data pointed to by @src.
+ */
+static __always_inline int copy_min_struct_from_user(void *const dst,
+ const size_t ksize, const size_t ksize_min,
+ const void __user *const src, const size_t usize)
+{
+ /* Checks buffer inconsistencies. */
+ BUILD_BUG_ON(!dst);
+ if (!src)
+ return -EFAULT;
+
+ /* Checks size ranges. */
+ BUILD_BUG_ON(ksize <= 0);
+ BUILD_BUG_ON(ksize < ksize_min);
+ if (usize < ksize_min)
+ return -EINVAL;
+ if (usize > PAGE_SIZE)
+ return -E2BIG;
+
+ /* Copies user buffer and fills with zeros. */
+ return copy_struct_from_user(dst, ksize, src, usize);
+}
+
+/*
+ * This function only contains arithmetic operations with constants, leading to
+ * BUILD_BUG_ON(). The related code is evaluated and checked at build time,
+ * but it is then ignored thanks to compiler optimizations.
+ */
+static void build_check_abi(void)
+{
+ struct landlock_ruleset_attr ruleset_attr;
+ struct landlock_path_beneath_attr path_beneath_attr;
+ size_t ruleset_size, path_beneath_size;
+
+ /*
+ * For each user space ABI structures, first checks that there is no
+ * hole in them, then checks that all architectures have the same
+ * struct size.
+ */
+ ruleset_size = sizeof(ruleset_attr.handled_access_fs);
+ BUILD_BUG_ON(sizeof(ruleset_attr) != ruleset_size);
+ BUILD_BUG_ON(sizeof(ruleset_attr) != 8);
+
+ path_beneath_size = sizeof(path_beneath_attr.allowed_access);
+ path_beneath_size += sizeof(path_beneath_attr.parent_fd);
+ BUILD_BUG_ON(sizeof(path_beneath_attr) != path_beneath_size);
+ BUILD_BUG_ON(sizeof(path_beneath_attr) != 12);
+}
+
+/* Ruleset handling */
+
+static int fop_ruleset_release(struct inode *const inode,
+ struct file *const filp)
+{
+ struct landlock_ruleset *ruleset = filp->private_data;
+
+ landlock_put_ruleset(ruleset);
+ return 0;
+}
+
+static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf,
+ const size_t size, loff_t *const ppos)
+{
+ /* Dummy handler to enable FMODE_CAN_READ. */
+ return -EINVAL;
+}
+
+static ssize_t fop_dummy_write(struct file *const filp,
+ const char __user *const buf, const size_t size,
+ loff_t *const ppos)
+{
+ /* Dummy handler to enable FMODE_CAN_WRITE. */
+ return -EINVAL;
+}
+
+/*
+ * A ruleset file descriptor enables to build a ruleset by adding (i.e.
+ * writing) rule after rule, without relying on the task's context. This
+ * reentrant design is also used in a read way to enforce the ruleset on the
+ * current task.
+ */
+static const struct file_operations ruleset_fops = {
+ .release = fop_ruleset_release,
+ .read = fop_dummy_read,
+ .write = fop_dummy_write,
+};
+
+#define LANDLOCK_ABI_VERSION 1
+
+/**
+ * sys_landlock_create_ruleset - Create a new ruleset
+ *
+ * @attr: Pointer to a &struct landlock_ruleset_attr identifying the scope of
+ * the new ruleset.
+ * @size: Size of the pointed &struct landlock_ruleset_attr (needed for
+ * backward and forward compatibility).
+ * @flags: Supported value: %LANDLOCK_CREATE_RULESET_VERSION.
+ *
+ * This system call enables to create a new Landlock ruleset, and returns the
+ * related file descriptor on success.
+ *
+ * If @flags is %LANDLOCK_CREATE_RULESET_VERSION and @attr is NULL and @size is
+ * 0, then the returned value is the highest supported Landlock ABI version
+ * (starting at 1).
+ *
+ * Possible returned errors are:
+ *
+ * - EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
+ * - EINVAL: unknown @flags, or unknown access, or too small @size;
+ * - E2BIG or EFAULT: @attr or @size inconsistencies;
+ * - ENOMSG: empty &landlock_ruleset_attr.handled_access_fs.
+ */
+SYSCALL_DEFINE3(landlock_create_ruleset,
+ const struct landlock_ruleset_attr __user *const, attr,
+ const size_t, size, const __u32, flags)
+{
+ struct landlock_ruleset_attr ruleset_attr;
+ struct landlock_ruleset *ruleset;
+ int err, ruleset_fd;
+
+ /* Build-time checks. */
+ build_check_abi();
+
+ if (!landlock_initialized)
+ return -EOPNOTSUPP;
+
+ if (flags) {
+ if ((flags == LANDLOCK_CREATE_RULESET_VERSION)
+ && !attr && !size)
+ return LANDLOCK_ABI_VERSION;
+ return -EINVAL;
+ }
+
+ /* Copies raw user space buffer. */
+ err = copy_min_struct_from_user(&ruleset_attr, sizeof(ruleset_attr),
+ offsetofend(typeof(ruleset_attr), handled_access_fs),
+ attr, size);
+ if (err)
+ return err;
+
+ /* Checks content (and 32-bits cast). */
+ if ((ruleset_attr.handled_access_fs | LANDLOCK_MASK_ACCESS_FS) !=
+ LANDLOCK_MASK_ACCESS_FS)
+ return -EINVAL;
+
+ /* Checks arguments and transforms to kernel struct. */
+ ruleset = landlock_create_ruleset(ruleset_attr.handled_access_fs);
+ if (IS_ERR(ruleset))
+ return PTR_ERR(ruleset);
+
+ /* Creates anonymous FD referring to the ruleset. */
+ ruleset_fd = anon_inode_getfd("landlock-ruleset", &ruleset_fops,
+ ruleset, O_RDWR | O_CLOEXEC);
+ if (ruleset_fd < 0)
+ landlock_put_ruleset(ruleset);
+ return ruleset_fd;
+}
+
+/*
+ * Returns an owned ruleset from a FD. It is thus needed to call
+ * landlock_put_ruleset() on the return value.
+ */
+static struct landlock_ruleset *get_ruleset_from_fd(const int fd,
+ const fmode_t mode)
+{
+ struct fd ruleset_f;
+ struct landlock_ruleset *ruleset;
+
+ ruleset_f = fdget(fd);
+ if (!ruleset_f.file)
+ return ERR_PTR(-EBADF);
+
+ /* Checks FD type and access right. */
+ if (ruleset_f.file->f_op != &ruleset_fops) {
+ ruleset = ERR_PTR(-EBADFD);
+ goto out_fdput;
+ }
+ if (!(ruleset_f.file->f_mode & mode)) {
+ ruleset = ERR_PTR(-EPERM);
+ goto out_fdput;
+ }
+ ruleset = ruleset_f.file->private_data;
+ if (WARN_ON_ONCE(ruleset->num_layers != 1)) {
+ ruleset = ERR_PTR(-EINVAL);
+ goto out_fdput;
+ }
+ landlock_get_ruleset(ruleset);
+
+out_fdput:
+ fdput(ruleset_f);
+ return ruleset;
+}
+
+/* Path handling */
+
+/*
+ * @path: Must call put_path(@path) after the call if it succeeded.
+ */
+static int get_path_from_fd(const s32 fd, struct path *const path)
+{
+ struct fd f;
+ int err = 0;
+
+ BUILD_BUG_ON(!__same_type(fd,
+ ((struct landlock_path_beneath_attr *)NULL)->parent_fd));
+
+ /* Handles O_PATH. */
+ f = fdget_raw(fd);
+ if (!f.file)
+ return -EBADF;
+ /*
+ * Forbids ruleset FDs, internal filesystems (e.g. nsfs), including
+ * pseudo filesystems that will never be mountable (e.g. sockfs,
+ * pipefs).
+ */
+ if ((f.file->f_op == &ruleset_fops) ||
+ (f.file->f_path.mnt->mnt_flags & MNT_INTERNAL) ||
+ (f.file->f_path.dentry->d_sb->s_flags & SB_NOUSER) ||
+ d_is_negative(f.file->f_path.dentry) ||
+ IS_PRIVATE(d_backing_inode(f.file->f_path.dentry))) {
+ err = -EBADFD;
+ goto out_fdput;
+ }
+ *path = f.file->f_path;
+ path_get(path);
+
+out_fdput:
+ fdput(f);
+ return err;
+}
+
+/**
+ * sys_landlock_add_rule - Add a new rule to a ruleset
+ *
+ * @ruleset_fd: File descriptor tied to the ruleset that should be extended
+ * with the new rule.
+ * @rule_type: Identify the structure type pointed to by @rule_attr (only
+ * LANDLOCK_RULE_PATH_BENEATH for now).
+ * @rule_attr: Pointer to a rule (only of type &struct
+ * landlock_path_beneath_attr for now).
+ * @flags: Must be 0.
+ *
+ * This system call enables to define a new rule and add it to an existing
+ * ruleset.
+ *
+ * Possible returned errors are:
+ *
+ * - EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
+ * - EINVAL: @flags is not 0, or inconsistent access in the rule (i.e.
+ * &landlock_path_beneath_attr.allowed_access is not a subset of the rule's
+ * accesses);
+ * - ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access);
+ * - EBADF: @ruleset_fd is not a file descriptor for the current thread, or a
+ * member of @rule_attr is not a file descriptor as expected;
+ * - EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of
+ * @rule_attr is not the expected file descriptor type (e.g. file open
+ * without O_PATH);
+ * - EPERM: @ruleset_fd has no write access to the underlying ruleset;
+ * - EFAULT: @rule_attr inconsistency.
+ */
+SYSCALL_DEFINE4(landlock_add_rule,
+ const int, ruleset_fd, const enum landlock_rule_type, rule_type,
+ const void __user *const, rule_attr, const __u32, flags)
+{
+ struct landlock_path_beneath_attr path_beneath_attr;
+ struct path path;
+ struct landlock_ruleset *ruleset;
+ int res, err;
+
+ if (!landlock_initialized)
+ return -EOPNOTSUPP;
+
+ /* No flag for now. */
+ if (flags)
+ return -EINVAL;
+
+ if (rule_type != LANDLOCK_RULE_PATH_BENEATH)
+ return -EINVAL;
+
+ /* Copies raw user space buffer, only one type for now. */
+ res = copy_from_user(&path_beneath_attr, rule_attr,
+ sizeof(path_beneath_attr));
+ if (res)
+ return -EFAULT;
+
+ /* Gets and checks the ruleset. */
+ ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE);
+ if (IS_ERR(ruleset))
+ return PTR_ERR(ruleset);
+
+ /*
+ * Informs about useless rule: empty allowed_access (i.e. deny rules)
+ * are ignored in path walks.
+ */
+ if (!path_beneath_attr.allowed_access) {
+ err = -ENOMSG;
+ goto out_put_ruleset;
+ }
+ /*
+ * Checks that allowed_access matches the @ruleset constraints
+ * (ruleset->fs_access_masks[0] is automatically upgraded to 64-bits).
+ */
+ if ((path_beneath_attr.allowed_access | ruleset->fs_access_masks[0]) !=
+ ruleset->fs_access_masks[0]) {
+ err = -EINVAL;
+ goto out_put_ruleset;
+ }
+
+ /* Gets and checks the new rule. */
+ err = get_path_from_fd(path_beneath_attr.parent_fd, &path);
+ if (err)
+ goto out_put_ruleset;
+
+ /* Imports the new rule. */
+ err = landlock_append_fs_rule(ruleset, &path,
+ path_beneath_attr.allowed_access);
+ path_put(&path);
+
+out_put_ruleset:
+ landlock_put_ruleset(ruleset);
+ return err;
+}
+
+/* Enforcement */
+
+/**
+ * sys_landlock_restrict_self - Enforce a ruleset on the calling thread
+ *
+ * @ruleset_fd: File descriptor tied to the ruleset to merge with the target.
+ * @flags: Must be 0.
+ *
+ * This system call enables to enforce a Landlock ruleset on the current
+ * thread. Enforcing a ruleset requires that the task has CAP_SYS_ADMIN in its
+ * namespace or is running with no_new_privs. This avoids scenarios where
+ * unprivileged tasks can affect the behavior of privileged children.
+ *
+ * Possible returned errors are:
+ *
+ * - EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
+ * - EINVAL: @flags is not 0.
+ * - EBADF: @ruleset_fd is not a file descriptor for the current thread;
+ * - EBADFD: @ruleset_fd is not a ruleset file descriptor;
+ * - EPERM: @ruleset_fd has no read access to the underlying ruleset, or the
+ * current thread is not running with no_new_privs, or it doesn't have
+ * CAP_SYS_ADMIN in its namespace.
+ * - E2BIG: The maximum number of stacked rulesets is reached for the current
+ * thread.
+ */
+SYSCALL_DEFINE2(landlock_restrict_self,
+ const int, ruleset_fd, const __u32, flags)
+{
+ struct landlock_ruleset *new_dom, *ruleset;
+ struct cred *new_cred;
+ struct landlock_cred_security *new_llcred;
+ int err;
+
+ if (!landlock_initialized)
+ return -EOPNOTSUPP;
+
+ /* No flag for now. */
+ if (flags)
+ return -EINVAL;
+
+ /*
+ * Similar checks as for seccomp(2), except that an -EPERM may be
+ * returned.
+ */
+ if (!task_no_new_privs(current) &&
+ !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* Gets and checks the ruleset. */
+ ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
+ if (IS_ERR(ruleset))
+ return PTR_ERR(ruleset);
+
+ /* Prepares new credentials. */
+ new_cred = prepare_creds();
+ if (!new_cred) {
+ err = -ENOMEM;
+ goto out_put_ruleset;
+ }
+ new_llcred = landlock_cred(new_cred);
+
+ /*
+ * There is no possible race condition while copying and manipulating
+ * the current credentials because they are dedicated per thread.
+ */
+ new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset);
+ if (IS_ERR(new_dom)) {
+ err = PTR_ERR(new_dom);
+ goto out_put_creds;
+ }
+
+ /* Replaces the old (prepared) domain. */
+ landlock_put_ruleset(new_llcred->domain);
+ new_llcred->domain = new_dom;
+
+ landlock_put_ruleset(ruleset);
+ return commit_creds(new_cred);
+
+out_put_creds:
+ abort_creds(new_cred);
+
+out_put_ruleset:
+ landlock_put_ruleset(ruleset);
+ return err;
+}