summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2023-11-09 10:45:45 -0800
committerAlexei Starovoitov <ast@kernel.org>2023-11-09 19:07:52 -0800
commit3f6d04d742d9fbd492a79e28e7cfe4e2a97c66e5 (patch)
treec234716003d73512a51a9e4e43b0a3cecf0c38ef
parent82ce364c6087e31ff9837380a4641a856284064c (diff)
parente9ed8df7187cfdce1075d0ee591544ac15d072f1 (diff)
downloadlinux-3f6d04d742d9fbd492a79e28e7cfe4e2a97c66e5.tar.gz
linux-3f6d04d742d9fbd492a79e28e7cfe4e2a97c66e5.tar.bz2
linux-3f6d04d742d9fbd492a79e28e7cfe4e2a97c66e5.zip
Merge branch 'allow-bpf_refcount_acquire-of-mapval-obtained-via-direct-ld'
Dave Marchevsky says: ==================== Allow bpf_refcount_acquire of mapval obtained via direct LD Consider this BPF program: struct cgv_node { int d; struct bpf_refcount r; struct bpf_rb_node rb; }; struct val_stash { struct cgv_node __kptr *v; }; struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, int); __type(value, struct val_stash); __uint(max_entries, 10); } array_map SEC(".maps"); long bpf_program(void *ctx) { struct val_stash *mapval; struct cgv_node *p; int idx = 0; mapval = bpf_map_lookup_elem(&array_map, &idx); if (!mapval || !mapval->v) { /* omitted */ } p = bpf_refcount_acquire(mapval->v); /* Verification FAILs here */ /* Add p to some tree */ return 0; } Verification fails on the refcount_acquire: 160: (79) r1 = *(u64 *)(r8 +8) ; R1_w=untrusted_ptr_or_null_cgv_node(id=11,off=0,imm=0) R8_w=map_value(id=10,off=0,ks=8,vs=16,imm=0) refs=6 161: (b7) r2 = 0 ; R2_w=0 refs=6 162: (85) call bpf_refcount_acquire_impl#117824 arg#0 is neither owning or non-owning ref The above verifier dump is actually from sched_ext's scx_flatcg [0], which is the motivating usecase for this series' changes. Specifically, scx_flatcg stashes a rb_node type w/ cgroup-specific info (struct cgv_node) in a map when the cgroup is created, then later puts that cgroup's node in a rbtree in .enqueue . Making struct cgv_node refcounted would simplify the code a bit by virtue of allowing us to remove the kptr_xchg's, but "later puts that cgroups node in a rbtree" is not possible without a refcount_acquire, which suffers from the above verification failure. If we get rid of PTR_UNTRUSTED flag, and add MEM_ALLOC | NON_OWN_REF, mapval->v would be a non-owning ref and verification would succeed. Due to the most recent set of refcount changes [1], which modified bpf_obj_drop behavior to not reuse refcounted graph node's underlying memory until after RCU grace period, this is safe to do. Once mapval->v has the correct flags it _is_ a non-owning reference and verification of the motivating example will succeed. [0]: https://github.com/sched-ext/sched_ext/blob/52911e1040a0f94b9c426dddcc00be5364a7ae9f/tools/sched_ext/scx_flatcg.bpf.c#L275 [1]: https://lore.kernel.org/bpf/20230821193311.3290257-1-davemarchevsky@fb.com/ Summary of patches: * Patch 1 fixes an issue with bpf_refcount_acquire verification letting MAYBE_NULL ptrs through * Patch 2 tests Patch 1's fix * Patch 3 broadens the use of "free only after RCU GP" to all user-allocated types * Patch 4 is a small nonfunctional refactoring * Patch 5 changes verifier to mark direct LD of stashed graph node kptr as non-owning ref * Patch 6 tests Patch 5's verifier changes Changelog: v1 -> v2: https://lore.kernel.org/bpf/20231025214007.2920506-1-davemarchevsky@fb.com/ Series title changed to "Allow bpf_refcount_acquire of mapval obtained via direct LD". V1's title was mistakenly truncated. * Patch 5 ("bpf: Mark direct ld of stashed bpf_{rb,list}_node as non-owning ref") * Direct LD of percpu kptr should not have MEM_ALLOC flag (Yonghong) * Patch 6 ("selftests/bpf: Test bpf_refcount_acquire of node obtained via direct ld") * Test read from stashed value as well ==================== Link: https://lore.kernel.org/r/20231107085639.3016113-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/bpf.h4
-rw-r--r--kernel/bpf/btf.c11
-rw-r--r--kernel/bpf/helpers.c7
-rw-r--r--kernel/bpf/verifier.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c33
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash.c71
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c19
7 files changed, 162 insertions, 19 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index eb84caf133df..4001d11be151 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -186,8 +186,8 @@ enum btf_field_type {
BPF_LIST_NODE = (1 << 6),
BPF_RB_ROOT = (1 << 7),
BPF_RB_NODE = (1 << 8),
- BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
- BPF_RB_NODE | BPF_RB_ROOT,
+ BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE,
+ BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
BPF_REFCOUNT = (1 << 9),
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 15d71d2986d3..63cf4128fc05 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3840,9 +3840,6 @@ end:
return ERR_PTR(ret);
}
-#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
-#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
-
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
{
int i;
@@ -3855,13 +3852,13 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* Hence we only need to ensure that bpf_{list_head,rb_root} ownership
* does not form cycles.
*/
- if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
+ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT))
return 0;
for (i = 0; i < rec->cnt; i++) {
struct btf_struct_meta *meta;
u32 btf_id;
- if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
+ if (!(rec->fields[i].type & BPF_GRAPH_ROOT))
continue;
btf_id = rec->fields[i].graph_root.value_btf_id;
meta = btf_find_struct_meta(btf, btf_id);
@@ -3873,7 +3870,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* to check ownership cycle for a type unless it's also a
* node type.
*/
- if (!(rec->field_mask & GRAPH_NODE_MASK))
+ if (!(rec->field_mask & BPF_GRAPH_NODE))
continue;
/* We need to ensure ownership acyclicity among all types. The
@@ -3909,7 +3906,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
* - A is both an root and node.
* - B is only an node.
*/
- if (meta->record->field_mask & GRAPH_ROOT_MASK)
+ if (meta->record->field_mask & BPF_GRAPH_ROOT)
return -ELOOP;
}
return 0;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 174f02a9e703..03517db5cfb3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1937,10 +1937,7 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
ma = &bpf_global_percpu_ma;
else
ma = &bpf_global_ma;
- if (rec && rec->refcount_off >= 0)
- bpf_mem_free_rcu(ma, p);
- else
- bpf_mem_free(ma, p);
+ bpf_mem_free_rcu(ma, p);
}
__bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
@@ -2520,7 +2517,7 @@ BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 993e4677bbe9..9ae6eae13471 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5557,10 +5557,23 @@ BTF_SET_END(rcu_protected_types)
static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
{
if (!btf_is_kernel(btf))
- return false;
+ return true;
return btf_id_set_contains(&rcu_protected_types, btf_id);
}
+static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
+{
+ struct btf_struct_meta *meta;
+
+ if (btf_is_kernel(kptr_field->kptr.btf))
+ return NULL;
+
+ meta = btf_find_struct_meta(kptr_field->kptr.btf,
+ kptr_field->kptr.btf_id);
+
+ return meta ? meta->record : NULL;
+}
+
static bool rcu_safe_kptr(const struct btf_field *field)
{
const struct btf_field_kptr *kptr = &field->kptr;
@@ -5571,12 +5584,25 @@ static bool rcu_safe_kptr(const struct btf_field *field)
static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
{
+ struct btf_record *rec;
+ u32 ret;
+
+ ret = PTR_MAYBE_NULL;
if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
- if (kptr_field->type != BPF_KPTR_PERCPU)
- return PTR_MAYBE_NULL | MEM_RCU;
- return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
+ ret |= MEM_RCU;
+ if (kptr_field->type == BPF_KPTR_PERCPU)
+ ret |= MEM_PERCPU;
+ else if (!btf_is_kernel(kptr_field->kptr.btf))
+ ret |= MEM_ALLOC;
+
+ rec = kptr_pointee_btf_record(kptr_field);
+ if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
+ ret |= NON_OWN_REF;
+ } else {
+ ret |= PTR_UNTRUSTED;
}
- return PTR_MAYBE_NULL | PTR_UNTRUSTED;
+
+ return ret;
}
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
index b25b870f87ba..e6e50a394472 100644
--- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void)
local_kptr_stash__destroy(skel);
}
+static void test_refcount_acquire_without_unstash(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash run");
+ ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts);
+ ASSERT_OK(ret, "stash_refcounted_node run");
+ ASSERT_OK(opts.retval, "stash_refcounted_node retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run");
+ ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
static void test_local_kptr_stash_fail(void)
{
RUN_TESTS(local_kptr_stash_fail);
@@ -86,6 +117,8 @@ void test_local_kptr_stash(void)
test_local_kptr_stash_plain();
if (test__start_subtest("local_kptr_stash_unstash"))
test_local_kptr_stash_unstash();
+ if (test__start_subtest("refcount_acquire_without_unstash"))
+ test_refcount_acquire_without_unstash();
if (test__start_subtest("local_kptr_stash_fail"))
test_local_kptr_stash_fail();
}
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index b567a666d2b8..1769fdff6aea 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -14,6 +14,24 @@ struct node_data {
struct bpf_rb_node node;
};
+struct refcounted_node {
+ long data;
+ struct bpf_rb_node rb_node;
+ struct bpf_refcount refcount;
+};
+
+struct stash {
+ struct bpf_spin_lock l;
+ struct refcounted_node __kptr *stashed;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct stash);
+ __uint(max_entries, 10);
+} refcounted_node_stash SEC(".maps");
+
struct plain_local {
long key;
long data;
@@ -38,6 +56,7 @@ struct map_value {
* Had to do the same w/ bpf_kfunc_call_test_release below
*/
struct node_data *just_here_because_btf_bug;
+struct refcounted_node *just_here_because_btf_bug2;
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx)
return 0;
}
+SEC("tc")
+long refcount_acquire_without_unstash(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int ret = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &ret);
+ if (!s)
+ return 1;
+
+ if (!s->stashed)
+ /* refcount_acquire failure is expected when no refcounted_node
+ * has been stashed before this program executes
+ */
+ return 2;
+
+ p = bpf_refcount_acquire(s->stashed);
+ if (!p)
+ return 3;
+
+ ret = s->stashed ? s->stashed->data : -1;
+ bpf_obj_drop(p);
+ return ret;
+}
+
+/* Helper for refcount_acquire_without_unstash test */
+SEC("tc")
+long stash_refcounted_node(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int key = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &key);
+ if (!s)
+ return 1;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 2;
+ p->data = 42;
+
+ p = bpf_kptr_xchg(&s->stashed, p);
+ if (p) {
+ bpf_obj_drop(p);
+ return 3;
+ }
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 1ef07f6ee580..1553b9c16aa7 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -54,6 +54,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx)
}
SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+long refcount_acquire_maybe_null(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ /* Intentionally not testing !n
+ * it's MAYBE_NULL for refcount_acquire
+ */
+ m = bpf_refcount_acquire(n);
+ if (m)
+ bpf_obj_drop(m);
+ if (n)
+ bpf_obj_drop(n);
+
+ return 0;
+}
+
+SEC("?tc")
__failure __msg("Unreleased reference id=3 alloc_insn=9")
long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
{