diff options
author | Alexei Starovoitov <ast@kernel.org> | 2023-03-02 20:14:46 -0800 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2023-03-03 17:42:20 +0100 |
commit | 6fcd486b3a0a628c41f12b3a7329a18a2c74b351 (patch) | |
tree | ac60957b666a5eac1e50dd7bb125bb7c106acb2f /kernel/bpf | |
parent | 0047d8343f6042c4feea24072ef254d47b8a33b3 (diff) | |
download | linux-stable-6fcd486b3a0a628c41f12b3a7329a18a2c74b351.tar.gz linux-stable-6fcd486b3a0a628c41f12b3a7329a18a2c74b351.tar.bz2 linux-stable-6fcd486b3a0a628c41f12b3a7329a18a2c74b351.zip |
bpf: Refactor RCU enforcement in the verifier.
bpf_rcu_read_lock/unlock() are only available in clang compiled kernels. Lack
of such key mechanism makes it impossible for sleepable bpf programs to use RCU
pointers.
Allow bpf_rcu_read_lock/unlock() in GCC compiled kernels (though GCC doesn't
support btf_type_tag yet) and allowlist certain field dereferences in important
data structures like tast_struct, cgroup, socket that are used by sleepable
programs either as RCU pointer or full trusted pointer (which is valid outside
of RCU CS). Use BTF_TYPE_SAFE_RCU and BTF_TYPE_SAFE_TRUSTED macros for such
tagging. They will be removed once GCC supports btf_type_tag.
With that refactor check_ptr_to_btf_access(). Make it strict in enforcing
PTR_TRUSTED and PTR_UNTRUSTED while deprecating old PTR_TO_BTF_ID without
modifier flags. There is a chance that this strict enforcement might break
existing programs (especially on GCC compiled kernels), but this cleanup has to
start sooner than later. Note PTR_TO_CTX access still yields old deprecated
PTR_TO_BTF_ID. Once it's converted to strict PTR_TRUSTED or PTR_UNTRUSTED the
kfuncs and helpers will be able to default to KF_TRUSTED_ARGS. KF_RCU will
remain as a weaker version of KF_TRUSTED_ARGS where obj refcnt could be 0.
Adjust rcu_read_lock selftest to run on gcc and clang compiled kernels.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20230303041446.3630-7-alexei.starovoitov@gmail.com
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/btf.c | 16 | ||||
-rw-r--r-- | kernel/bpf/cpumask.c | 40 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 178 |
3 files changed, 154 insertions, 80 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c5e1d6955491..a8cb09e5973b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6163,6 +6163,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, const char *tname, *mname, *tag_value; u32 vlen, elem_id, mid; + *flag = 0; again: tname = __btf_name_by_offset(btf, t->name_off); if (!btf_type_is_struct(t)) { @@ -6329,6 +6330,15 @@ error: * of this field or inside of this struct */ if (btf_type_is_struct(mtype)) { + if (BTF_INFO_KIND(mtype->info) == BTF_KIND_UNION && + btf_type_vlen(mtype) != 1) + /* + * walking unions yields untrusted pointers + * with exception of __bpf_md_ptr and other + * unions with a single member + */ + *flag |= PTR_UNTRUSTED; + /* our field must be inside that union or struct */ t = mtype; @@ -6373,7 +6383,7 @@ error: stype = btf_type_skip_modifiers(btf, mtype->type, &id); if (btf_type_is_struct(stype)) { *next_btf_id = id; - *flag = tmp_flag; + *flag |= tmp_flag; return WALK_PTR; } } @@ -8357,7 +8367,7 @@ out: bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off) + int off, const char *suffix) { struct btf *btf = reg->btf; const struct btf_type *walk_type, *safe_type; @@ -8374,7 +8384,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, tname = btf_name_by_offset(btf, walk_type->name_off); - ret = snprintf(safe_tname, sizeof(safe_tname), "%s__safe_fields", tname); + ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix); if (ret < 0) return false; diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 2b3fbbfebdc5..b6587ec40f1b 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -427,26 +427,26 @@ BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) -BTF_ID_FLAGS(func, bpf_cpumask_first, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_and, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_or, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_full, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_any, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_and, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_or, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU) BTF_SET8_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a095055d7ef4..c2adf3c24c64 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5073,29 +5073,76 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) return 0; } -#define BTF_TYPE_SAFE_NESTED(__type) __PASTE(__type, __safe_fields) +#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu) +#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted) -BTF_TYPE_SAFE_NESTED(struct task_struct) { +/* + * Allow list few fields as RCU trusted or full trusted. + * This logic doesn't allow mix tagging and will be removed once GCC supports + * btf_type_tag. + */ + +/* RCU trusted: these fields are trusted in RCU CS and never NULL */ +BTF_TYPE_SAFE_RCU(struct task_struct) { const cpumask_t *cpus_ptr; struct css_set __rcu *cgroups; + struct task_struct __rcu *real_parent; + struct task_struct *group_leader; }; -BTF_TYPE_SAFE_NESTED(struct css_set) { +BTF_TYPE_SAFE_RCU(struct css_set) { struct cgroup *dfl_cgrp; }; -static bool nested_ptr_is_trusted(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, - int off) +/* full trusted: these fields are trusted even outside of RCU CS and never NULL */ +BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) { + __bpf_md_ptr(struct seq_file *, seq); +}; + +BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct task_struct *, task); +}; + +BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) { + struct file *file; +}; + +BTF_TYPE_SAFE_TRUSTED(struct file) { + struct inode *f_inode; +}; + +BTF_TYPE_SAFE_TRUSTED(struct dentry) { + /* no negative dentry-s in places where bpf can see it */ + struct inode *d_inode; +}; + +BTF_TYPE_SAFE_TRUSTED(struct socket) { + struct sock *sk; +}; + +static bool type_is_rcu(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + int off) { - /* If its parent is not trusted, it can't regain its trusted status. */ - if (!is_trusted_reg(reg)) - return false; + BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set)); - BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct)); - BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct css_set)); + return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu"); +} - return btf_nested_type_is_trusted(&env->log, reg, off); +static bool type_is_trusted(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + int off) +{ + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry)); + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket)); + + return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted"); } static int check_ptr_to_btf_access(struct bpf_verifier_env *env, @@ -5181,49 +5228,58 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (ret < 0) return ret; - /* If this is an untrusted pointer, all pointers formed by walking it - * also inherit the untrusted flag. - */ - if (type_flag(reg->type) & PTR_UNTRUSTED) - flag |= PTR_UNTRUSTED; + if (ret != PTR_TO_BTF_ID) { + /* just mark; */ - /* By default any pointer obtained from walking a trusted pointer is no - * longer trusted, unless the field being accessed has explicitly been - * marked as inheriting its parent's state of trust. - * - * An RCU-protected pointer can also be deemed trusted if we are in an - * RCU read region. This case is handled below. - */ - if (nested_ptr_is_trusted(env, reg, off)) { - flag |= PTR_TRUSTED; - /* - * task->cgroups is trusted. It provides a stronger guarantee - * than __rcu tag on 'cgroups' field in 'struct task_struct'. - * Clear MEM_RCU in such case. + } else if (type_flag(reg->type) & PTR_UNTRUSTED) { + /* If this is an untrusted pointer, all pointers formed by walking it + * also inherit the untrusted flag. + */ + flag = PTR_UNTRUSTED; + + } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) { + /* By default any pointer obtained from walking a trusted pointer is no + * longer trusted, unless the field being accessed has explicitly been + * marked as inheriting its parent's state of trust (either full or RCU). + * For example: + * 'cgroups' pointer is untrusted if task->cgroups dereference + * happened in a sleepable program outside of bpf_rcu_read_lock() + * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU). + * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED. + * + * A regular RCU-protected pointer with __rcu tag can also be deemed + * trusted if we are in an RCU CS. Such pointer can be NULL. */ - flag &= ~MEM_RCU; + if (type_is_trusted(env, reg, off)) { + flag |= PTR_TRUSTED; + } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) { + if (type_is_rcu(env, reg, off)) { + /* ignore __rcu tag and mark it MEM_RCU */ + flag |= MEM_RCU; + } else if (flag & MEM_RCU) { + /* __rcu tagged pointers can be NULL */ + flag |= PTR_MAYBE_NULL; + } else if (flag & (MEM_PERCPU | MEM_USER)) { + /* keep as-is */ + } else { + /* walking unknown pointers yields untrusted pointer */ + flag = PTR_UNTRUSTED; + } + } else { + /* + * If not in RCU CS or MEM_RCU pointer can be NULL then + * aggressively mark as untrusted otherwise such + * pointers will be plain PTR_TO_BTF_ID without flags + * and will be allowed to be passed into helpers for + * compat reasons. + */ + flag = PTR_UNTRUSTED; + } } else { + /* Old compat. Deprecated */ flag &= ~PTR_TRUSTED; } - if (flag & MEM_RCU) { - /* Mark value register as MEM_RCU only if it is protected by - * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU - * itself can already indicate trustedness inside the rcu - * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since - * it could be null in some cases. - */ - if (in_rcu_cs(env) && (is_trusted_reg(reg) || is_rcu_reg(reg))) - flag |= PTR_MAYBE_NULL; - else - flag &= ~MEM_RCU; - } else if (reg->type & MEM_RCU) { - /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged - * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively. - */ - flag |= PTR_UNTRUSTED; - } - if (atype == BPF_READ && value_regno >= 0) mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag); @@ -10049,10 +10105,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); - if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) { - verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name); - return -EACCES; - } if (env->cur_state->active_rcu_lock) { struct bpf_func_state *state; @@ -14911,8 +14963,22 @@ static int do_check(struct bpf_verifier_env *env) * src_reg == stack|map in some other branch. * Reject it. */ - verbose(env, "same insn cannot be used with different pointers\n"); - return -EINVAL; + if (base_type(src_reg_type) == PTR_TO_BTF_ID && + base_type(*prev_src_type) == PTR_TO_BTF_ID) { + /* + * Have to support a use case when one path through + * the program yields TRUSTED pointer while another + * is UNTRUSTED. Fallback to UNTRUSTED to generate + * BPF_PROBE_MEM. + */ + *prev_src_type = PTR_TO_BTF_ID | PTR_UNTRUSTED; + } else { + verbose(env, + "The same insn cannot be used with different pointers: %s", + reg_type_str(env, src_reg_type)); + verbose(env, " != %s\n", reg_type_str(env, *prev_src_type)); + return -EINVAL; + } } } else if (class == BPF_STX) { @@ -17984,8 +18050,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); env->bpf_capable = bpf_capable(); - env->rcu_tag_supported = btf_vmlinux && - btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0; if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; |