diff options
author | Jakub Kicinski <kuba@kernel.org> | 2024-01-18 09:54:24 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-01-18 09:54:24 -0800 |
commit | 4349efc52b83af3851df7a4199567ad50b3d1f03 (patch) | |
tree | 2f8475b57657b0f5666266df5005622c9aa05200 /tools/testing | |
parent | 9cfd3b502153810b66ac0ce47f1fba682228f2d2 (diff) | |
parent | 35ac085a94efe82d906d3a812612d432aa267cbe (diff) | |
download | linux-stable-4349efc52b83af3851df7a4199567ad50b3d1f03.tar.gz linux-stable-4349efc52b83af3851df7a4199567ad50b3d1f03.tar.bz2 linux-stable-4349efc52b83af3851df7a4199567ad50b3d1f03.zip |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says:
====================
pull-request: bpf 2024-01-18
We've added 10 non-merge commits during the last 5 day(s) which contain
a total of 12 files changed, 806 insertions(+), 51 deletions(-).
The main changes are:
1) Fix an issue in bpf_iter_udp under backward progress which prevents
user space process from finishing iteration, from Martin KaFai Lau.
2) Fix BPF verifier to reject variable offset alu on registers with a type
of PTR_TO_FLOW_KEYS to prevent oob access, from Hao Sun.
3) Follow up fixes for kernel- and libbpf-side logic around handling
arg:ctx tagged arguments of BPF global subprogs, from Andrii Nakryiko.
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
libbpf: warn on unexpected __arg_ctx type when rewriting BTF
selftests/bpf: add tests confirming type logic in kernel for __arg_ctx
bpf: enforce types for __arg_ctx-tagged arguments in global subprogs
bpf: extract bpf_ctx_convert_map logic and make it more reusable
libbpf: feature-detect arg:ctx tag support in kernel
selftests/bpf: Add test for alu on PTR_TO_FLOW_KEYS
bpf: Reject variable offset alu on PTR_TO_FLOW_KEYS
selftests/bpf: Test udp and tcp iter batching
bpf: Avoid iter->offset making backward progress in bpf_iter_udp
bpf: iter_udp: Retry with a larger batch size without going back to the previous bucket
====================
Link: https://lore.kernel.org/r/20240118153936.11769-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools/testing')
7 files changed, 453 insertions, 3 deletions
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c new file mode 100644 index 000000000000..0c365f36c73b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024 Meta + +#include <test_progs.h> +#include "network_helpers.h" +#include "sock_iter_batch.skel.h" + +#define TEST_NS "sock_iter_batch_netns" + +static const int nr_soreuse = 4; + +static void do_test(int sock_type, bool onebyone) +{ + int err, i, nread, to_read, total_read, iter_fd = -1; + int first_idx, second_idx, indices[nr_soreuse]; + struct bpf_link *link = NULL; + struct sock_iter_batch *skel; + int *fds[2] = {}; + + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + return; + + /* Prepare 2 buckets of sockets in the kernel hashtable */ + for (i = 0; i < ARRAY_SIZE(fds); i++) { + int local_port; + + fds[i] = start_reuseport_server(AF_INET6, sock_type, "::1", 0, 0, + nr_soreuse); + if (!ASSERT_OK_PTR(fds[i], "start_reuseport_server")) + goto done; + local_port = get_socket_local_port(*fds[i]); + if (!ASSERT_GE(local_port, 0, "get_socket_local_port")) + goto done; + skel->rodata->ports[i] = ntohs(local_port); + } + + err = sock_iter_batch__load(skel); + if (!ASSERT_OK(err, "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(sock_type == SOCK_STREAM ? + skel->progs.iter_tcp_soreuse : + skel->progs.iter_udp_soreuse, + NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create")) + goto done; + + /* Test reading a bucket (either from fds[0] or fds[1]). + * Only read "nr_soreuse - 1" number of sockets + * from a bucket and leave one socket out from + * that bucket on purpose. + */ + to_read = (nr_soreuse - 1) * sizeof(*indices); + total_read = 0; + first_idx = -1; + do { + nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); + if (nread <= 0 || nread % sizeof(*indices)) + break; + total_read += nread; + + if (first_idx == -1) + first_idx = indices[0]; + for (i = 0; i < nread / sizeof(*indices); i++) + ASSERT_EQ(indices[i], first_idx, "first_idx"); + } while (total_read < to_read); + ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread"); + ASSERT_EQ(total_read, to_read, "total_read"); + + free_fds(fds[first_idx], nr_soreuse); + fds[first_idx] = NULL; + + /* Read the "whole" second bucket */ + to_read = nr_soreuse * sizeof(*indices); + total_read = 0; + second_idx = !first_idx; + do { + nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); + if (nread <= 0 || nread % sizeof(*indices)) + break; + total_read += nread; + + for (i = 0; i < nread / sizeof(*indices); i++) + ASSERT_EQ(indices[i], second_idx, "second_idx"); + } while (total_read <= to_read); + ASSERT_EQ(nread, 0, "nread"); + /* Both so_reuseport ports should be in different buckets, so + * total_read must equal to the expected to_read. + * + * For a very unlikely case, both ports collide at the same bucket, + * the bucket offset (i.e. 3) will be skipped and it cannot + * expect the to_read number of bytes. + */ + if (skel->bss->bucket[0] != skel->bss->bucket[1]) + ASSERT_EQ(total_read, to_read, "total_read"); + +done: + for (i = 0; i < ARRAY_SIZE(fds); i++) + free_fds(fds[i], nr_soreuse); + if (iter_fd < 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); +} + +void test_sock_iter_batch(void) +{ + struct nstoken *nstoken = NULL; + + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + SYS(done, "ip netns add %s", TEST_NS); + SYS(done, "ip -net %s link set dev lo up", TEST_NS); + + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto done; + + if (test__start_subtest("tcp")) { + do_test(SOCK_STREAM, true); + do_test(SOCK_STREAM, false); + } + if (test__start_subtest("udp")) { + do_test(SOCK_DGRAM, true); + do_test(SOCK_DGRAM, false); + } + close_netns(nstoken); + +done: + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 67d4ef9e62b3..e905cbaf6b3d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -47,6 +47,19 @@ static void subtest_ctx_arg_rewrite(void) struct btf *btf = NULL; __u32 info_len = sizeof(info); int err, fd, i; + struct btf *kern_btf = NULL; + + kern_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(kern_btf, "kern_btf_load")) + return; + + /* simple detection of kernel native arg:ctx tag support */ + if (btf__find_by_name_kind(kern_btf, "bpf_subprog_arg_info", BTF_KIND_STRUCT) > 0) { + test__skip(); + btf__free(kern_btf); + return; + } + btf__free(kern_btf); skel = test_global_func_ctx_args__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 1bdc680b0e0e..e8bd4b7b5ef7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -72,6 +72,8 @@ #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr #define inet_dport sk.__sk_common.skc_dport +#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] + #define ir_loc_addr req.__req_common.skc_rcv_saddr #define ir_num req.__req_common.skc_num #define ir_rmt_addr req.__req_common.skc_daddr @@ -85,6 +87,7 @@ #define sk_rmem_alloc sk_backlog.rmem_alloc #define sk_refcnt __sk_common.skc_refcnt #define sk_state __sk_common.skc_state +#define sk_net __sk_common.skc_net #define sk_v6_daddr __sk_common.skc_v6_daddr #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr #define sk_flags __sk_common.skc_flags diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c new file mode 100644 index 000000000000..ffbbfe1fa1c1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024 Meta + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_endian.h> +#include "bpf_tracing_net.h" +#include "bpf_kfuncs.h" + +#define ATTR __always_inline +#include "test_jhash.h" + +static bool ipv6_addr_loopback(const struct in6_addr *a) +{ + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0; +} + +volatile const __u16 ports[2]; +unsigned int bucket[2]; + +SEC("iter/tcp") +int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) +{ + struct sock *sk = (struct sock *)ctx->sk_common; + struct inet_hashinfo *hinfo; + unsigned int hash; + struct net *net; + int idx; + + if (!sk) + return 0; + + sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + if (sk->sk_family != AF_INET6 || + sk->sk_state != TCP_LISTEN || + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + return 0; + + if (sk->sk_num == ports[0]) + idx = 0; + else if (sk->sk_num == ports[1]) + idx = 1; + else + return 0; + + /* bucket selection as in inet_lhash2_bucket_sk() */ + net = sk->sk_net.net; + hash = jhash2(sk->sk_v6_rcv_saddr.s6_addr32, 4, net->hash_mix); + hash ^= sk->sk_num; + hinfo = net->ipv4.tcp_death_row.hashinfo; + bucket[idx] = hash & hinfo->lhash2_mask; + bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + + return 0; +} + +#define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk) + +SEC("iter/udp") +int iter_udp_soreuse(struct bpf_iter__udp *ctx) +{ + struct sock *sk = (struct sock *)ctx->udp_sk; + struct udp_table *udptable; + int idx; + + if (!sk) + return 0; + + sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + if (sk->sk_family != AF_INET6 || + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + return 0; + + if (sk->sk_num == ports[0]) + idx = 0; + else if (sk->sk_num == ports[1]) + idx = 1; + else + return 0; + + /* bucket selection as in udp_hashslot2() */ + udptable = sk->sk_net.net->ipv4.udp_table; + bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask; + bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h index c300734d26f6..ef53559bbbdf 100644 --- a/tools/testing/selftests/bpf/progs/test_jhash.h +++ b/tools/testing/selftests/bpf/progs/test_jhash.h @@ -69,3 +69,34 @@ u32 jhash(const void *key, u32 length, u32 initval) return c; } + +static __always_inline u32 jhash2(const u32 *k, u32 length, u32 initval) +{ + u32 a, b, c; + + /* Set up the internal state */ + a = b = c = JHASH_INITVAL + (length<<2) + initval; + + /* Handle most of the key */ + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + length -= 3; + k += 3; + } + + /* Handle the last 3 u32's */ + switch (length) { + case 3: c += k[2]; + case 2: b += k[1]; + case 1: a += k[0]; + __jhash_final(a, b, c); + break; + case 0: /* Nothing left to add */ + break; + } + + return c; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index 9eeb2d89cda8..67dddd941891 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> #include "bpf_misc.h" #include "xdp_metadata.h" #include "bpf_kfuncs.h" @@ -138,25 +139,182 @@ __weak int subprog_ctx_tag(void *ctx __arg_ctx) return bpf_get_stack(ctx, stack, sizeof(stack), 0); } +__weak int raw_tp_canonical(struct bpf_raw_tracepoint_args *ctx __arg_ctx) +{ + return 0; +} + +__weak int raw_tp_u64_array(u64 *ctx __arg_ctx) +{ + return 0; +} + SEC("?raw_tp") __success __log_level(2) int arg_tag_ctx_raw_tp(void *ctx) { - return subprog_ctx_tag(ctx); + return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx); +} + +SEC("?raw_tp.w") +__success __log_level(2) +int arg_tag_ctx_raw_tp_writable(void *ctx) +{ + return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx); +} + +SEC("?tp_btf/sys_enter") +__success __log_level(2) +int arg_tag_ctx_raw_tp_btf(void *ctx) +{ + return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx); +} + +struct whatever { }; + +__weak int tp_whatever(struct whatever *ctx __arg_ctx) +{ + return 0; } SEC("?tp") __success __log_level(2) int arg_tag_ctx_tp(void *ctx) { - return subprog_ctx_tag(ctx); + return subprog_ctx_tag(ctx) + tp_whatever(ctx); +} + +__weak int kprobe_subprog_pt_regs(struct pt_regs *ctx __arg_ctx) +{ + return 0; +} + +__weak int kprobe_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx) +{ + return 0; } SEC("?kprobe") __success __log_level(2) int arg_tag_ctx_kprobe(void *ctx) { - return subprog_ctx_tag(ctx); + return subprog_ctx_tag(ctx) + + kprobe_subprog_pt_regs(ctx) + + kprobe_subprog_typedef(ctx); +} + +__weak int perf_subprog_regs( +#if defined(bpf_target_riscv) + struct user_regs_struct *ctx __arg_ctx +#elif defined(bpf_target_s390) + /* user_pt_regs typedef is anonymous struct, so only `void *` works */ + void *ctx __arg_ctx +#elif defined(bpf_target_loongarch) || defined(bpf_target_arm64) || defined(bpf_target_powerpc) + struct user_pt_regs *ctx __arg_ctx +#else + struct pt_regs *ctx __arg_ctx +#endif +) +{ + return 0; +} + +__weak int perf_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx) +{ + return 0; +} + +__weak int perf_subprog_canonical(struct bpf_perf_event_data *ctx __arg_ctx) +{ + return 0; +} + +SEC("?perf_event") +__success __log_level(2) +int arg_tag_ctx_perf(void *ctx) +{ + return subprog_ctx_tag(ctx) + + perf_subprog_regs(ctx) + + perf_subprog_typedef(ctx) + + perf_subprog_canonical(ctx); +} + +__weak int iter_subprog_void(void *ctx __arg_ctx) +{ + return 0; +} + +__weak int iter_subprog_typed(struct bpf_iter__task *ctx __arg_ctx) +{ + return 0; +} + +SEC("?iter/task") +__success __log_level(2) +int arg_tag_ctx_iter_task(struct bpf_iter__task *ctx) +{ + return (iter_subprog_void(ctx) + iter_subprog_typed(ctx)) & 1; +} + +__weak int tracing_subprog_void(void *ctx __arg_ctx) +{ + return 0; +} + +__weak int tracing_subprog_u64(u64 *ctx __arg_ctx) +{ + return 0; +} + +int acc; + +SEC("?fentry/" SYS_PREFIX "sys_nanosleep") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_fentry) +{ + acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); + return 0; +} + +SEC("?fexit/" SYS_PREFIX "sys_nanosleep") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_fexit) +{ + acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); + return 0; +} + +SEC("?fmod_ret/" SYS_PREFIX "sys_nanosleep") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_fmod_ret) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); +} + +SEC("?lsm/bpf") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_lsm) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); +} + +SEC("?struct_ops/test_1") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_struct_ops) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); +} + +SEC(".struct_ops") +struct bpf_dummy_ops dummy_1 = { + .test_1 = (void *)arg_tag_ctx_struct_ops, +}; + +SEC("?syscall") +__success __log_level(2) +int arg_tag_ctx_syscall(void *ctx) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx) + tp_whatever(ctx); } __weak int subprog_dynptr(struct bpf_dynptr *dptr) diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c index 71814a753216..a9ab37d3b9e2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c +++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c @@ -146,4 +146,23 @@ l0_%=: exit; \ : __clobber_all); } +SEC("flow_dissector") +__description("flow_keys illegal alu op with variable offset") +__failure __msg("R7 pointer arithmetic on flow_keys prohibited") +__naked void flow_keys_illegal_variable_offset_alu(void) +{ + asm volatile(" \ + r6 = r1; \ + r7 = *(u64*)(r6 + %[flow_keys_off]); \ + r8 = 8; \ + r8 /= 1; \ + r8 &= 8; \ + r7 += r8; \ + r0 = *(u64*)(r7 + 0); \ + exit; \ +" : + : __imm_const(flow_keys_off, offsetof(struct __sk_buff, flow_keys)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; |