From 47ea7417b0744324424405fc1207e266053237a9 Mon Sep 17 00:00:00 2001 From: James Hilliard Date: Sun, 31 Jul 2022 17:26:49 -0600 Subject: libbpf: Skip empty sections in bpf_object__init_global_data_maps The GNU assembler generates an empty .bss section. This is a well established behavior in GAS that happens in all supported targets. The LLVM assembler doesn't generate an empty .bss section. bpftool chokes on the empty .bss section. Additionally in bpf_object__elf_collect the sec_desc->data is not initialized when a section is not recognized. In this case, this happens with .comment. So we must check that sec_desc->data is initialized before checking if the size is 0. Signed-off-by: James Hilliard Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220731232649.4668-1-james.hilliard1@gmail.com --- tools/lib/bpf/libbpf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 50d41815f431..77e3797cf75a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1642,6 +1642,10 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { sec_desc = &obj->efile.secs[sec_idx]; + /* Skip recognized sections with size 0. */ + if (sec_desc->data && sec_desc->data->d_size == 0) + continue; + switch (sec_desc->sec_type) { case SEC_DATA: sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); -- cgit v1.2.3 From 3045f42a64324d339125a8a1a1763bb9e1e08300 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 31 Jul 2022 19:51:09 -0700 Subject: libbpf: Initialize err in probe_map_create GCC-11 warns about the possibly unitialized err variable in probe_map_create: libbpf_probes.c: In function 'probe_map_create': libbpf_probes.c:361:38: error: 'err' may be used uninitialized in this function [-Werror=maybe-uninitialized] 361 | return fd < 0 && err == exp_err ? 1 : 0; | ~~~~^~~~~~~~~~ Fixes: 878d8def0603 ("libbpf: Rework feature-probing APIs") Signed-off-by: Florian Fainelli Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220801025109.1206633-1-f.fainelli@gmail.com --- tools/lib/bpf/libbpf_probes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 0b5398786bf3..6d495656f554 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -193,7 +193,7 @@ static int probe_map_create(enum bpf_map_type map_type) LIBBPF_OPTS(bpf_map_create_opts, opts); int key_size, value_size, max_entries; __u32 btf_key_type_id = 0, btf_value_type_id = 0; - int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; + int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err = 0; key_size = sizeof(__u32); value_size = sizeof(__u32); -- cgit v1.2.3 From d55dfe587bc0670f90564a962615723fe7749ab1 Mon Sep 17 00:00:00 2001 From: Manu Bretelle Date: Mon, 1 Aug 2022 06:24:09 -0700 Subject: bpftool: Remove BPF_OBJ_NAME_LEN restriction when looking up bpf program by name bpftool was limiting the length of names to BPF_OBJ_NAME_LEN in prog_parse fds. Since commit b662000aff84 ("bpftool: Adding support for BTF program names") we can get the full program name from BTF. This patch removes the restriction of name length when running `bpftool prog show name ${name}`. Test: Tested against some internal program names that were longer than `BPF_OBJ_NAME_LEN`, here a redacted example of what was ran to test. # previous behaviour $ sudo bpftool prog show name some_long_program_name Error: can't parse name # with the patch $ sudo ./bpftool prog show name some_long_program_name 123456789: tracing name some_long_program_name tag taghexa gpl .... ... ... ... # too long sudo ./bpftool prog show name $(python3 -c 'print("A"*128)') Error: can't parse name # not too long but no match $ sudo ./bpftool prog show name $(python3 -c 'print("A"*127)') Signed-off-by: Manu Bretelle Signed-off-by: Andrii Nakryiko Tested-by: Jiri Olsa Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20220801132409.4147849-1-chantr4@gmail.com --- tools/bpf/bpftool/common.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 067e9ea59e3b..8727765add88 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -722,6 +722,7 @@ print_all_levels(__maybe_unused enum libbpf_print_level level, static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) { + char prog_name[MAX_PROG_FULL_NAME]; unsigned int id = 0; int fd, nb_fds = 0; void *tmp; @@ -754,12 +755,20 @@ static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) goto err_close_fd; } - if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || - (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { + if (tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) { close(fd); continue; } + if (!tag) { + get_prog_full_name(&info, fd, prog_name, + sizeof(prog_name)); + if (strncmp(nametag, prog_name, sizeof(prog_name))) { + close(fd); + continue; + } + } + if (nb_fds > 0) { tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); if (!tmp) { @@ -820,7 +829,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds) NEXT_ARGP(); name = **argv; - if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + if (strlen(name) > MAX_PROG_FULL_NAME - 1) { p_err("can't parse name"); return -1; } -- cgit v1.2.3 From d25f40ff68aa61c838947bb9adee6c6b36e77453 Mon Sep 17 00:00:00 2001 From: James Hilliard Date: Wed, 3 Aug 2022 09:14:03 -0600 Subject: libbpf: Ensure functions with always_inline attribute are inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC expects the always_inline attribute to only be set on inline functions, as such we should make all functions with this attribute use the __always_inline macro which makes the function inline and sets the attribute. Fixes errors like: /home/buildroot/bpf-next/tools/testing/selftests/bpf/tools/include/bpf/bpf_tracing.h:439:1: error: ‘always_inline’ function might not be inlinable [-Werror=attributes] 439 | ____##name(unsigned long long *ctx, ##args) | ^~~~ Signed-off-by: James Hilliard Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220803151403.793024-1-james.hilliard1@gmail.com --- tools/lib/bpf/bpf_tracing.h | 14 +++++++------- tools/lib/bpf/usdt.bpf.h | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 43ca3aff2292..5fdb93da423b 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -426,7 +426,7 @@ struct pt_regs; */ #define BPF_PROG(name, args...) \ name(unsigned long long *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(unsigned long long *ctx, ##args); \ typeof(name(0)) name(unsigned long long *ctx) \ { \ @@ -435,7 +435,7 @@ typeof(name(0)) name(unsigned long long *ctx) \ return ____##name(___bpf_ctx_cast(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(unsigned long long *ctx, ##args) struct pt_regs; @@ -460,7 +460,7 @@ struct pt_regs; */ #define BPF_KPROBE(name, args...) \ name(struct pt_regs *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -469,7 +469,7 @@ typeof(name(0)) name(struct pt_regs *ctx) \ return ____##name(___bpf_kprobe_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) #define ___bpf_kretprobe_args0() ctx @@ -484,7 +484,7 @@ ____##name(struct pt_regs *ctx, ##args) */ #define BPF_KRETPROBE(name, args...) \ name(struct pt_regs *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -540,7 +540,7 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) #define BPF_KSYSCALL(name, args...) \ name(struct pt_regs *ctx); \ extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -555,7 +555,7 @@ typeof(name(0)) name(struct pt_regs *ctx) \ return ____##name(___bpf_syscall_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) #define BPF_KPROBE_SYSCALL BPF_KSYSCALL diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 4f2adc0bd6ca..fdfd235e52c4 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -232,7 +232,7 @@ long bpf_usdt_cookie(struct pt_regs *ctx) */ #define BPF_USDT(name, args...) \ name(struct pt_regs *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -241,7 +241,7 @@ typeof(name(0)) name(struct pt_regs *ctx) \ return ____##name(___bpf_usdt_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) #endif /* __USDT_BPF_H__ */ -- cgit v1.2.3 From 5653f55ebd767b4ef47414ee7f852517993eda6f Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Fri, 5 Aug 2022 10:14:05 -0700 Subject: selftests/bpf: Clean up sys_nanosleep uses This patch cleans up a few things: * dynptr_fail.c: There is no sys_nanosleep tracepoint. dynptr_fail only tests that the prog load fails, so just SEC("?raw_tp") suffices here. * test_bpf_cookie: There is no sys_nanosleep kprobe. The prog is loaded in userspace through bpf_program__attach_kprobe_opts passing in SYS_NANOSLEEP_KPROBE_NAME, so just SEC("k{ret}probe") suffices here. * test_helper_restricted: There is no sys_nanosleep kprobe. test_helper_restricted only tests that the prog load fails, so just SEC("?kprobe")( suffices here. There are no functional changes. Suggested-by: Andrii Nakryiko Signed-off-by: Joanne Koong Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220805171405.2272103-1-joannelkoong@gmail.com --- tools/testing/selftests/bpf/progs/dynptr_fail.c | 56 +++++++++++----------- .../testing/selftests/bpf/progs/test_bpf_cookie.c | 4 +- .../selftests/bpf/progs/test_helper_restricted.c | 4 +- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 0a26c243e6e9..b5e0a87f0a36 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -65,7 +65,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr) /* Every bpf_ringbuf_reserve_dynptr call must have a corresponding * bpf_ringbuf_submit/discard_dynptr call */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_missing_release1(void *ctx) { struct bpf_dynptr ptr; @@ -77,7 +77,7 @@ int ringbuf_missing_release1(void *ctx) return 0; } -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_missing_release2(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -112,7 +112,7 @@ static int missing_release_callback_fn(__u32 index, void *data) } /* Any dynptr initialized within a callback must have bpf_dynptr_put called */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_missing_release_callback(void *ctx) { bpf_loop(10, missing_release_callback_fn, NULL, 0); @@ -120,7 +120,7 @@ int ringbuf_missing_release_callback(void *ctx) } /* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_release_uninit_dynptr(void *ctx) { struct bpf_dynptr ptr; @@ -132,7 +132,7 @@ int ringbuf_release_uninit_dynptr(void *ctx) } /* A dynptr can't be used after it has been invalidated */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int use_after_invalid(void *ctx) { struct bpf_dynptr ptr; @@ -151,7 +151,7 @@ int use_after_invalid(void *ctx) } /* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_invalid_api(void *ctx) { struct bpf_dynptr ptr; @@ -173,7 +173,7 @@ done: } /* Can't add a dynptr to a map */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int add_dynptr_to_map1(void *ctx) { struct bpf_dynptr ptr; @@ -190,7 +190,7 @@ int add_dynptr_to_map1(void *ctx) } /* Can't add a struct with an embedded dynptr to a map */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int add_dynptr_to_map2(void *ctx) { struct test_info x; @@ -207,7 +207,7 @@ int add_dynptr_to_map2(void *ctx) } /* A data slice can't be accessed out of bounds */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int data_slice_out_of_bounds_ringbuf(void *ctx) { struct bpf_dynptr ptr; @@ -227,7 +227,7 @@ done: return 0; } -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int data_slice_out_of_bounds_map_value(void *ctx) { __u32 key = 0, map_val; @@ -247,7 +247,7 @@ int data_slice_out_of_bounds_map_value(void *ctx) } /* A data slice can't be used after it has been released */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int data_slice_use_after_release(void *ctx) { struct bpf_dynptr ptr; @@ -273,7 +273,7 @@ done: } /* A data slice must be first checked for NULL */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int data_slice_missing_null_check1(void *ctx) { struct bpf_dynptr ptr; @@ -293,7 +293,7 @@ int data_slice_missing_null_check1(void *ctx) } /* A data slice can't be dereferenced if it wasn't checked for null */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int data_slice_missing_null_check2(void *ctx) { struct bpf_dynptr ptr; @@ -315,7 +315,7 @@ done: /* Can't pass in a dynptr as an arg to a helper function that doesn't take in a * dynptr argument */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_helper1(void *ctx) { struct bpf_dynptr ptr; @@ -329,7 +329,7 @@ int invalid_helper1(void *ctx) } /* A dynptr can't be passed into a helper function at a non-zero offset */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_helper2(void *ctx) { struct bpf_dynptr ptr; @@ -344,7 +344,7 @@ int invalid_helper2(void *ctx) } /* A bpf_dynptr is invalidated if it's been written into */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_write1(void *ctx) { struct bpf_dynptr ptr; @@ -365,7 +365,7 @@ int invalid_write1(void *ctx) * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed * offset */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_write2(void *ctx) { struct bpf_dynptr ptr; @@ -388,7 +388,7 @@ int invalid_write2(void *ctx) * A bpf_dynptr can't be used as a dynptr if it has been written into at a * non-const offset */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_write3(void *ctx) { struct bpf_dynptr ptr; @@ -419,7 +419,7 @@ static int invalid_write4_callback(__u32 index, void *data) /* If the dynptr is written into in a callback function, it should * be invalidated as a dynptr */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_write4(void *ctx) { struct bpf_dynptr ptr; @@ -436,7 +436,7 @@ int invalid_write4(void *ctx) /* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */ struct bpf_dynptr global_dynptr; -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int global(void *ctx) { /* this should fail */ @@ -448,7 +448,7 @@ int global(void *ctx) } /* A direct read should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_read1(void *ctx) { struct bpf_dynptr ptr; @@ -464,7 +464,7 @@ int invalid_read1(void *ctx) } /* A direct read at an offset should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_read2(void *ctx) { struct bpf_dynptr ptr; @@ -479,7 +479,7 @@ int invalid_read2(void *ctx) } /* A direct read at an offset into the lower stack slot should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_read3(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -505,7 +505,7 @@ static int invalid_read4_callback(__u32 index, void *data) } /* A direct read within a callback function should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_read4(void *ctx) { struct bpf_dynptr ptr; @@ -520,7 +520,7 @@ int invalid_read4(void *ctx) } /* Initializing a dynptr on an offset should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_offset(void *ctx) { struct bpf_dynptr ptr; @@ -534,7 +534,7 @@ int invalid_offset(void *ctx) } /* Can't release a dynptr twice */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int release_twice(void *ctx) { struct bpf_dynptr ptr; @@ -560,7 +560,7 @@ static int release_twice_callback_fn(__u32 index, void *data) /* Test that releasing a dynptr twice, where one of the releases happens * within a calback function, fails */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int release_twice_callback(void *ctx) { struct bpf_dynptr ptr; @@ -575,7 +575,7 @@ int release_twice_callback(void *ctx) } /* Reject unsupported local mem types for dynptr_from_mem API */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int dynptr_from_mem_invalid_api(void *ctx) { struct bpf_dynptr ptr; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c index 22d0ac8709b4..5a3a80f751c4 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c @@ -28,14 +28,14 @@ static void update(void *ctx, __u64 *res) *res |= bpf_get_attach_cookie(ctx); } -SEC("kprobe/sys_nanosleep") +SEC("kprobe") int handle_kprobe(struct pt_regs *ctx) { update(ctx, &kprobe_res); return 0; } -SEC("kretprobe/sys_nanosleep") +SEC("kretprobe") int handle_kretprobe(struct pt_regs *ctx) { update(ctx, &kretprobe_res); diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c index 20ef9d433b97..5715c569ec03 100644 --- a/tools/testing/selftests/bpf/progs/test_helper_restricted.c +++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c @@ -72,7 +72,7 @@ int tp_timer(void *ctx) return 0; } -SEC("?kprobe/sys_nanosleep") +SEC("?kprobe") int kprobe_timer(void *ctx) { timer_work(); @@ -104,7 +104,7 @@ int tp_spin_lock(void *ctx) return 0; } -SEC("?kprobe/sys_nanosleep") +SEC("?kprobe") int kprobe_spin_lock(void *ctx) { spin_lock_work(); -- cgit v1.2.3 From e19db6762c18ab1ddf7a3ef4d0023780c24dc1e8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 3 Aug 2022 14:42:02 -0700 Subject: libbpf: Reject legacy 'maps' ELF section Add explicit error message if BPF object file is still using legacy BPF map definitions in SEC("maps"). Before this change, if BPF object file is still using legacy map definition user will see a bit confusing: libbpf: elf: skipping unrecognized data section(4) maps libbpf: prog 'handler': bad map relo against 'server_map' in section 'maps' Now libbpf will be explicit about rejecting "maps" ELF section: libbpf: elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+ Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220803214202.23750-1-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 77e3797cf75a..d3d94704583f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -591,7 +591,6 @@ struct elf_state { size_t strtabidx; struct elf_sec_desc *secs; int sec_cnt; - int maps_shndx; int btf_maps_shndx; __u32 btf_maps_sec_btf_id; int text_shndx; @@ -1272,7 +1271,6 @@ static struct bpf_object *bpf_object__new(const char *path, */ obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; - obj->efile.maps_shndx = -1; obj->efile.btf_maps_shndx = -1; obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; @@ -3363,7 +3361,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (err) return err; } else if (strcmp(name, "maps") == 0) { - obj->efile.maps_shndx = idx; + pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); + return -ENOTSUP; } else if (strcmp(name, MAPS_ELF_SEC) == 0) { obj->efile.btf_maps_shndx = idx; } else if (strcmp(name, BTF_ELF_SEC) == 0) { @@ -3895,8 +3894,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj, static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, int shndx) { - return shndx == obj->efile.maps_shndx || - shndx == obj->efile.btf_maps_shndx; + return shndx == obj->efile.btf_maps_shndx; } static enum libbpf_map_type -- cgit v1.2.3 From 0c9a7a7e2049859d7869e15dd8f70ca5aeae460e Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 2 Aug 2022 14:46:38 -0700 Subject: bpf: Verifier cleanups This patch cleans up a few things in the verifier: * type_is_pkt_pointer(): Future work (skb + xdp dynptrs [0]) will be using the reg type PTR_TO_PACKET | PTR_MAYBE_NULL. type_is_pkt_pointer() should return true for any type whose base type is PTR_TO_PACKET, regardless of flags attached to it. * reg_type_may_be_refcounted_or_null(): Get the base type at the start of the function to avoid having to recompute it / improve readability * check_func_proto(): remove unnecessary 'meta' arg * check_helper_call(): Use switch casing on the base type of return value instead of nested ifs on the full type There are no functional behavior changes. [0] https://lore.kernel.org/bpf/20220726184706.954822-1-joannelkoong@gmail.com/ Signed-off-by: Joanne Koong Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220802214638.3643235-1-joannelkoong@gmail.com --- kernel/bpf/verifier.c | 50 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 096fdac70165..843a966cd02b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -427,6 +427,7 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env, static bool type_is_pkt_pointer(enum bpf_reg_type type) { + type = base_type(type); return type == PTR_TO_PACKET || type == PTR_TO_PACKET_META; } @@ -456,10 +457,9 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) { - return base_type(type) == PTR_TO_SOCKET || - base_type(type) == PTR_TO_TCP_SOCK || - base_type(type) == PTR_TO_MEM || - base_type(type) == PTR_TO_BTF_ID; + type = base_type(type); + return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || + type == PTR_TO_MEM || type == PTR_TO_BTF_ID; } static bool type_is_rdonly_mem(u32 type) @@ -6498,8 +6498,7 @@ static bool check_btf_id_ok(const struct bpf_func_proto *fn) return true; } -static int check_func_proto(const struct bpf_func_proto *fn, int func_id, - struct bpf_call_arg_meta *meta) +static int check_func_proto(const struct bpf_func_proto *fn, int func_id) { return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && @@ -7218,7 +7217,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; - err = check_func_proto(fn, func_id, &meta); + err = check_func_proto(fn, func_id); if (err) { verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id); @@ -7359,13 +7358,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* update return register (already marked as written above) */ ret_type = fn->ret_type; - ret_flag = type_flag(fn->ret_type); - if (ret_type == RET_INTEGER) { + ret_flag = type_flag(ret_type); + + switch (base_type(ret_type)) { + case RET_INTEGER: /* sets type to SCALAR_VALUE */ mark_reg_unknown(env, regs, BPF_REG_0); - } else if (ret_type == RET_VOID) { + break; + case RET_VOID: regs[BPF_REG_0].type = NOT_INIT; - } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) { + break; + case RET_PTR_TO_MAP_VALUE: /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); /* remember map_ptr, so that check_map_access() @@ -7384,20 +7387,26 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn map_value_has_spin_lock(meta.map_ptr)) { regs[BPF_REG_0].id = ++env->id_gen; } - } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) { + break; + case RET_PTR_TO_SOCKET: mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag; - } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) { + break; + case RET_PTR_TO_SOCK_COMMON: mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag; - } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) { + break; + case RET_PTR_TO_TCP_SOCK: mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; - } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) { + break; + case RET_PTR_TO_ALLOC_MEM: mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; regs[BPF_REG_0].mem_size = meta.mem_size; - } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) { + break; + case RET_PTR_TO_MEM_OR_BTF_ID: + { const struct btf_type *t; mark_reg_known_zero(env, regs, BPF_REG_0); @@ -7429,7 +7438,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].btf = meta.ret_btf; regs[BPF_REG_0].btf_id = meta.ret_btf_id; } - } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { + break; + } + case RET_PTR_TO_BTF_ID: + { struct btf *ret_btf; int ret_btf_id; @@ -7450,7 +7462,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } regs[BPF_REG_0].btf = ret_btf; regs[BPF_REG_0].btf_id = ret_btf_id; - } else { + break; + } + default: verbose(env, "unknown return type %u of func %s#%d\n", base_type(ret_type), func_id_name(func_id), func_id); return -EINVAL; -- cgit v1.2.3 From 9e32084ef1c33a87a736d6ce3fcb95b60dac9aa1 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 6 Aug 2022 18:20:21 +0800 Subject: libbpf: Do not require executable permission for shared libraries Currently, resolve_full_path() requires executable permission for both programs and shared libraries. This causes failures on distos like Debian since the shared libraries are not installed executable and Linux is not requiring shared libraries to have executable permissions. Let's remove executable permission check for shared libraries. Reported-by: Goro Fuji Signed-off-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220806102021.3867130-1-hengqi.chen@gmail.com --- tools/lib/bpf/libbpf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d3d94704583f..f7364ea82ac1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10664,15 +10664,17 @@ static const char *arch_specific_lib_paths(void) static int resolve_full_path(const char *file, char *result, size_t result_sz) { const char *search_paths[3] = {}; - int i; + int i, perm; if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { search_paths[0] = getenv("LD_LIBRARY_PATH"); search_paths[1] = "/usr/lib64:/usr/lib"; search_paths[2] = arch_specific_lib_paths(); + perm = R_OK; } else { search_paths[0] = getenv("PATH"); search_paths[1] = "/usr/bin:/usr/sbin"; + perm = R_OK | X_OK; } for (i = 0; i < ARRAY_SIZE(search_paths); i++) { @@ -10691,8 +10693,8 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz) if (!seg_len) continue; snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); - /* ensure it is an executable file/link */ - if (access(result, R_OK | X_OK) < 0) + /* ensure it has required permissions */ + if (access(result, perm) < 0) continue; pr_debug("resolved '%s' to '%s'\n", file, result); return 0; -- cgit v1.2.3 From ca34ce29fc4b0e929cc6aada40829d17ab50fee4 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 8 Aug 2022 09:47:23 -0700 Subject: bpf: Improve docstring for BPF_F_USER_BUILD_ID flag Most tools which use bpf_get_stack or bpf_get_stackid symbolicate the stack - meaning the stack of addresses in the target process' address space is transformed into meaningful symbol names. The BPF_F_USER_BUILD_ID flag eases this process by finding the build_id of the file-backed vma which the address falls in and translating the address to an offset within the backing file. To be more specific, the offset is a "file offset" from the beginning of the backing file. The symbols in ET_DYN ELF objects have a st_value which is also described as an "offset" - but an offset in the process address space, relative to the base address of the object. It's necessary to translate between the "file offset" and "virtual address offset" during symbolication before they can be directly compared. Failure to do so can lead to confusing bugs, so this patch clarifies language in the documentation in an attempt to keep this from happening. Signed-off-by: Dave Marchevsky Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220808164723.3107500-1-davemarchevsky@fb.com --- include/uapi/linux/bpf.h | 14 ++++++++++++-- tools/include/uapi/linux/bpf.h | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bf9ba1329be..534e33fb1029 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3008,8 +3008,18 @@ union bpf_attr { * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. + * Collect (build_id, file_offset) instead of ips for user + * stack, only valid if **BPF_F_USER_STACK** is also + * specified. + * + * *file_offset* is an offset relative to the beginning + * of the executable or shared object file backing the vma + * which the *ip* falls in. It is *not* an offset relative + * to that object's base address. Accordingly, it must be + * adjusted by adding (sh_addr - sh_offset), where + * sh_{addr,offset} correspond to the executable section + * containing *file_offset* in the object, for comparisons + * to symbols' st_value to be valid. * * **bpf_get_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 59a217ca2dfd..f58d58e1d547 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3008,8 +3008,18 @@ union bpf_attr { * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. + * Collect (build_id, file_offset) instead of ips for user + * stack, only valid if **BPF_F_USER_STACK** is also + * specified. + * + * *file_offset* is an offset relative to the beginning + * of the executable or shared object file backing the vma + * which the *ip* falls in. It is *not* an offset relative + * to that object's base address. Accordingly, it must be + * adjusted by adding (sh_addr - sh_offset), where + * sh_{addr,offset} correspond to the executable section + * containing *file_offset* in the object, for comparisons + * to symbols' st_value to be valid. * * **bpf_get_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject -- cgit v1.2.3 From fa96b24204af42274ec13dfb2f2e6990d7510e55 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 5 Aug 2022 14:48:14 -0700 Subject: btf: Add a new kfunc flag which allows to mark a function to be sleepable This allows to declare a kfunc as sleepable and prevents its use in a non sleepable program. Signed-off-by: Benjamin Tissoires Co-developed-by: Yosry Ahmed Signed-off-by: Yosry Ahmed Signed-off-by: Hao Luo Link: https://lore.kernel.org/r/20220805214821.1058337-2-haoluo@google.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 6 ++++++ include/linux/btf.h | 1 + kernel/bpf/btf.c | 9 +++++++++ 3 files changed, 16 insertions(+) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index c0b7dae6dbf5..c8b21de1c772 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -146,6 +146,12 @@ that operate (change some property, perform some operation) on an object that was obtained using an acquire kfunc. Such kfuncs need an unchanged pointer to ensure the integrity of the operation being performed on the expected object. +2.4.6 KF_SLEEPABLE flag +----------------------- + +The KF_SLEEPABLE flag is used for kfuncs that may sleep. Such kfuncs can only +be called by sleepable BPF programs (BPF_F_SLEEPABLE). + 2.5 Registering the kfuncs -------------------------- diff --git a/include/linux/btf.h b/include/linux/btf.h index cdb376d53238..976cbdd2981f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -49,6 +49,7 @@ * for this case. */ #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ +#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ struct btf; struct btf_member; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7e64447659f3..d3e4c86b8fcd 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6175,6 +6175,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); bool rel = false, kptr_get = false, trusted_arg = false; + bool sleepable = false; struct bpf_verifier_log *log = &env->log; u32 i, nargs, ref_id, ref_obj_id = 0; bool is_kfunc = btf_is_kernel(btf); @@ -6212,6 +6213,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, rel = kfunc_flags & KF_RELEASE; kptr_get = kfunc_flags & KF_KPTR_GET; trusted_arg = kfunc_flags & KF_TRUSTED_ARGS; + sleepable = kfunc_flags & KF_SLEEPABLE; } /* check that BTF function arguments match actual types that the @@ -6419,6 +6421,13 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, func_name); return -EINVAL; } + + if (sleepable && !env->prog->aux->sleepable) { + bpf_log(log, "kernel function %s is sleepable but the program is not\n", + func_name); + return -EINVAL; + } + /* returns argument register number > 0 in case of reference release kfunc */ return rel ? ref_regno : 0; } -- cgit v1.2.3 From f3a2aebdd6fb90e444d595e46de64e822af419da Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Fri, 5 Aug 2022 14:48:15 -0700 Subject: cgroup: enable cgroup_get_from_file() on cgroup1 cgroup_get_from_file() currently fails with -EBADF if called on cgroup v1. However, the current implementation works on cgroup v1 as well, so the restriction is unnecessary. This enabled cgroup_get_from_fd() to work on cgroup v1, which would be the only thing stopping bpf cgroup_iter from supporting cgroup v1. Signed-off-by: Yosry Ahmed Acked-by: Tejun Heo Signed-off-by: Hao Luo Link: https://lore.kernel.org/r/20220805214821.1058337-3-haoluo@google.com Signed-off-by: Alexei Starovoitov --- kernel/cgroup/cgroup.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index ffaccd6373f1..5f4502aa2b3b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6132,11 +6132,6 @@ static struct cgroup *cgroup_get_from_file(struct file *f) return ERR_CAST(css); cgrp = css->cgroup; - if (!cgroup_on_dfl(cgrp)) { - cgroup_put(cgrp); - return ERR_PTR(-EBADF); - } - return cgrp; } -- cgit v1.2.3 From be3bb83dab2df838cd9e681e3e9dcde87bfe4f95 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Fri, 5 Aug 2022 14:48:16 -0700 Subject: bpf, iter: Fix the condition on p when calling stop. In bpf_seq_read, seq->op->next() could return an ERR and jump to the label stop. However, the existing code in stop does not handle the case when p (returned from next()) is an ERR. Adds the handling of ERR of p by converting p into an error and jumping to done. Because all the current implementations do not have a case that returns ERR from next(), so this patch doesn't have behavior changes right now. Acked-by: Yonghong Song Signed-off-by: Hao Luo Link: https://lore.kernel.org/r/20220805214821.1058337-4-haoluo@google.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_iter.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 2726a5950cfa..4b112aa8bba3 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -197,6 +197,11 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, } stop: offs = seq->count; + if (IS_ERR(p)) { + seq->op->stop(seq, NULL); + err = PTR_ERR(p); + goto done; + } /* bpf program called if !p */ seq->op->stop(seq, p); if (!p) { -- cgit v1.2.3 From 6e116280b41b0cbfd90dfe9fa66e07ff348d50d5 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 25 Jul 2022 10:51:30 +0200 Subject: net: netfilter: Remove ifdefs for code shared by BPF and ctnetlink The current ifdefry for code shared by the BPF and ctnetlink side looks ugly. As per Pablo's request, simplify this by unconditionally compiling in the code. This can be revisited when the shared code between the two grows further. Suggested-by: Pablo Neira Ayuso Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220725085130.11553-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/net/netfilter/nf_conntrack_core.h | 6 ------ net/netfilter/nf_conntrack_core.c | 6 ------ 2 files changed, 12 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3cd3a6e631aa..b2b9de70d9f4 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -86,10 +86,6 @@ extern spinlock_t nf_conntrack_expect_lock; /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ -#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ - (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \ - IS_ENABLED(CONFIG_NF_CT_NETLINK)) - static inline void __nf_ct_set_timeout(struct nf_conn *ct, u64 timeout) { if (timeout > INT_MAX) @@ -101,6 +97,4 @@ int __nf_ct_change_timeout(struct nf_conn *ct, u64 cta_timeout); void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off); int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status); -#endif - #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 71c2f4f95d36..da65c6e8eeeb 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2807,10 +2807,6 @@ err_expect: return ret; } -#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ - (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES) || \ - IS_ENABLED(CONFIG_NF_CT_NETLINK)) - /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout) @@ -2866,5 +2862,3 @@ int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status) return 0; } EXPORT_SYMBOL_GPL(nf_ct_change_status_common); - -#endif -- cgit v1.2.3 From b2d8ef19c6e7ed71ba5092feb0710063a751834f Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 8 Aug 2022 10:15:59 -0700 Subject: bpf: Cleanup check_refcount_ok Discussion around a recently-submitted patch provided historical context for check_refcount_ok [0]. Specifically, the function and its helpers - may_be_acquire_function and arg_type_may_be_refcounted - predate the OBJ_RELEASE type flag and the addition of many more helpers with acquire/release semantics. The purpose of check_refcount_ok is to ensure: 1) Helper doesn't have multiple uses of return reg's ref_obj_id 2) Helper with release semantics only has one arg needing to be released, since that's tracked using meta->ref_obj_id With current verifier, it's safe to remove check_refcount_ok and its helpers. Since addition of OBJ_RELEASE type flag, case 2) has been handled by the arg_type_is_release check in check_func_arg. To ensure case 1) won't result in verifier silently prioritizing one use of ref_obj_id, this patch adds a helper_multiple_ref_obj_use check which fails loudly if a helper passes > 1 test for use of ref_obj_id. [0]: lore.kernel.org/bpf/20220713234529.4154673-1-davemarchevsky@fb.com Signed-off-by: Dave Marchevsky Acked-by: Martin KaFai Lau Acked-by: Joanne Koong Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220808171559.3251090-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 74 ++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 45 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 843a966cd02b..01e7f48b4d8c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -467,25 +467,11 @@ static bool type_is_rdonly_mem(u32 type) return type & MEM_RDONLY; } -static bool arg_type_may_be_refcounted(enum bpf_arg_type type) -{ - return type == ARG_PTR_TO_SOCK_COMMON; -} - static bool type_may_be_null(u32 type) { return type & PTR_MAYBE_NULL; } -static bool may_be_acquire_function(enum bpf_func_id func_id) -{ - return func_id == BPF_FUNC_sk_lookup_tcp || - func_id == BPF_FUNC_sk_lookup_udp || - func_id == BPF_FUNC_skc_lookup_tcp || - func_id == BPF_FUNC_map_lookup_elem || - func_id == BPF_FUNC_ringbuf_reserve; -} - static bool is_acquire_function(enum bpf_func_id func_id, const struct bpf_map *map) { @@ -518,6 +504,26 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id) func_id == BPF_FUNC_skc_to_tcp_request_sock; } +static bool is_dynptr_acquire_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_dynptr_data; +} + +static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id, + const struct bpf_map *map) +{ + int ref_obj_uses = 0; + + if (is_ptr_cast_function(func_id)) + ref_obj_uses++; + if (is_acquire_function(func_id, map)) + ref_obj_uses++; + if (is_dynptr_acquire_function(func_id)) + ref_obj_uses++; + + return ref_obj_uses > 1; +} + static bool is_cmpxchg_insn(const struct bpf_insn *insn) { return BPF_CLASS(insn->code) == BPF_STX && @@ -6453,33 +6459,6 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) return true; } -static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) -{ - int count = 0; - - if (arg_type_may_be_refcounted(fn->arg1_type)) - count++; - if (arg_type_may_be_refcounted(fn->arg2_type)) - count++; - if (arg_type_may_be_refcounted(fn->arg3_type)) - count++; - if (arg_type_may_be_refcounted(fn->arg4_type)) - count++; - if (arg_type_may_be_refcounted(fn->arg5_type)) - count++; - - /* A reference acquiring function cannot acquire - * another refcounted ptr. - */ - if (may_be_acquire_function(func_id) && count) - return false; - - /* We only support one arg being unreferenced at the moment, - * which is sufficient for the helper functions we have right now. - */ - return count <= 1; -} - static bool check_btf_id_ok(const struct bpf_func_proto *fn) { int i; @@ -6502,8 +6481,7 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id) { return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && - check_btf_id_ok(fn) && - check_refcount_ok(fn, func_id) ? 0 : -EINVAL; + check_btf_id_ok(fn) ? 0 : -EINVAL; } /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] @@ -7473,6 +7451,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn if (type_may_be_null(regs[BPF_REG_0].type)) regs[BPF_REG_0].id = ++env->id_gen; + if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) { + verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n", + func_id_name(func_id), func_id); + return -EFAULT; + } + if (is_ptr_cast_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; @@ -7485,10 +7469,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].id = id; /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = id; - } else if (func_id == BPF_FUNC_dynptr_data) { + } else if (is_dynptr_acquire_function(func_id)) { int dynptr_id = 0, i; - /* Find the id of the dynptr we're acquiring a reference to */ + /* Find the id of the dynptr we're tracking the reference of */ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { if (arg_type_is_dynptr(fn->arg_type[i])) { if (dynptr_id) { -- cgit v1.2.3 From c8996c98f703b09afe77a1d247dae691c9849dc1 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 9 Aug 2022 08:08:02 +0200 Subject: bpf: Add BPF-helper for accessing CLOCK_TAI Commit 3dc6ffae2da2 ("timekeeping: Introduce fast accessor to clock tai") introduced a fast and NMI-safe accessor for CLOCK_TAI. Especially in time sensitive networks (TSN), where all nodes are synchronized by Precision Time Protocol (PTP), it's helpful to have the possibility to generate timestamps based on CLOCK_TAI instead of CLOCK_MONOTONIC. With a BPF helper for TAI in place, it becomes very convenient to correlate activity across different machines in the network. Use cases for such a BPF helper include functionalities such as Tx launch time (e.g. ETF and TAPRIO Qdiscs) and timestamping. Note: CLOCK_TAI is nothing new per se, only the NMI-safe variant of it is. Signed-off-by: Jesper Dangaard Brouer [Kurt: Wrote changelog and renamed helper] Signed-off-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20220809060803.5773-2-kurt@linutronix.de Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 13 +++++++++++++ kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 14 ++++++++++++++ tools/include/uapi/linux/bpf.h | 13 +++++++++++++ 5 files changed, 42 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 20c26aed7896..a627a02cf8ab 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2349,6 +2349,7 @@ extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto; +extern const struct bpf_func_proto bpf_ktime_get_tai_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 534e33fb1029..7d1e2794d83e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5341,6 +5341,18 @@ union bpf_attr { * **-EACCES** if the SYN cookie is not valid. * * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * u64 bpf_ktime_get_tai_ns(void) + * Description + * A nonsettable system-wide clock derived from wall-clock time but + * ignoring leap seconds. This clock does not experience + * discontinuities and backwards jumps caused by NTP inserting leap + * seconds as CLOCK_REALTIME does. + * + * See: **clock_gettime**\ (**CLOCK_TAI**) + * Return + * Current *ktime*. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5551,6 +5563,7 @@ union bpf_attr { FN(tcp_raw_gen_syncookie_ipv6), \ FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ + FN(ktime_get_tai_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c1e10d088dbb..639437f36928 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2623,6 +2623,7 @@ const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak; const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak; +const struct bpf_func_proto bpf_ktime_get_tai_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1f961f9982d2..a95eb9fb01ff 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -198,6 +198,18 @@ const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_ktime_get_tai_ns) +{ + /* NMI safe access to clock tai */ + return ktime_get_tai_fast_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = { + .func = bpf_ktime_get_tai_ns, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; @@ -1617,6 +1629,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; + case BPF_FUNC_ktime_get_tai_ns: + return &bpf_ktime_get_tai_ns_proto; case BPF_FUNC_ringbuf_output: return &bpf_ringbuf_output_proto; case BPF_FUNC_ringbuf_reserve: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f58d58e1d547..e174ad28aeb7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5341,6 +5341,18 @@ union bpf_attr { * **-EACCES** if the SYN cookie is not valid. * * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * u64 bpf_ktime_get_tai_ns(void) + * Description + * A nonsettable system-wide clock derived from wall-clock time but + * ignoring leap seconds. This clock does not experience + * discontinuities and backwards jumps caused by NTP inserting leap + * seconds as CLOCK_REALTIME does. + * + * See: **clock_gettime**\ (**CLOCK_TAI**) + * Return + * Current *ktime*. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5551,6 +5563,7 @@ union bpf_attr { FN(tcp_raw_gen_syncookie_ipv6), \ FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ + FN(ktime_get_tai_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3 From 64e15820b987cc8e5864a8b907dfc17861e6ab5a Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 9 Aug 2022 08:08:03 +0200 Subject: selftests/bpf: Add BPF-helper test for CLOCK_TAI access Add BPF-helper test case for CLOCK_TAI access. The added test verifies that: * Timestamps are generated * Timestamps are moving forward * Timestamps are reasonable Signed-off-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20220809060803.5773-3-kurt@linutronix.de Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/time_tai.c | 74 +++++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_time_tai.c | 24 ++++++++ 2 files changed, 98 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/time_tai.c create mode 100644 tools/testing/selftests/bpf/progs/test_time_tai.c diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c new file mode 100644 index 000000000000..a31119823666 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022 Linutronix GmbH */ + +#include +#include + +#include "test_time_tai.skel.h" + +#include +#include + +#define TAI_THRESHOLD 1000000000ULL /* 1s */ +#define NSEC_PER_SEC 1000000000ULL + +static __u64 ts_to_ns(const struct timespec *ts) +{ + return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; +} + +void test_time_tai(void) +{ + struct __sk_buff skb = { + .cb[0] = 0, + .cb[1] = 0, + .tstamp = 0, + }; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + .ctx_out = &skb, + .ctx_size_out = sizeof(skb), + ); + struct test_time_tai *skel; + struct timespec now_tai; + __u64 ts1, ts2, now; + int ret, prog_fd; + + /* Open and load */ + skel = test_time_tai__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tai_open")) + return; + + /* Run test program */ + prog_fd = bpf_program__fd(skel->progs.time_tai); + ret = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(ret, "test_run"); + + /* Retrieve generated TAI timestamps */ + ts1 = skb.tstamp; + ts2 = skb.cb[0] | ((__u64)skb.cb[1] << 32); + + /* TAI != 0 */ + ASSERT_NEQ(ts1, 0, "tai_ts1"); + ASSERT_NEQ(ts2, 0, "tai_ts2"); + + /* TAI is moving forward only */ + ASSERT_GT(ts2, ts1, "tai_forward"); + + /* Check for future */ + ret = clock_gettime(CLOCK_TAI, &now_tai); + ASSERT_EQ(ret, 0, "tai_gettime"); + now = ts_to_ns(&now_tai); + + ASSERT_TRUE(now > ts1, "tai_future_ts1"); + ASSERT_TRUE(now > ts2, "tai_future_ts2"); + + /* Check for reasonable range */ + ASSERT_TRUE(now - ts1 < TAI_THRESHOLD, "tai_range_ts1"); + ASSERT_TRUE(now - ts2 < TAI_THRESHOLD, "tai_range_ts2"); + + test_time_tai__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_time_tai.c b/tools/testing/selftests/bpf/progs/test_time_tai.c new file mode 100644 index 000000000000..7ea0863f3ddb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_time_tai.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022 Linutronix GmbH */ + +#include +#include + +char _license[] SEC("license") = "GPL"; + +SEC("tc") +int time_tai(struct __sk_buff *skb) +{ + __u64 ts1, ts2; + + /* Get TAI timestamps */ + ts1 = bpf_ktime_get_tai_ns(); + ts2 = bpf_ktime_get_tai_ns(); + + /* Save TAI timestamps (Note: skb->hwtstamp is read-only) */ + skb->tstamp = ts1; + skb->cb[0] = ts2 & 0xffffffff; + skb->cb[1] = ts2 >> 32; + + return 0; +} -- cgit v1.2.3 From a00ed8430199abbc9d9bf43ea31795bfe98998ca Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 7 Aug 2022 10:51:16 -0700 Subject: bpf: Always return corresponding btf_type in __get_type_size() Currently in funciton __get_type_size(), the corresponding btf_type is returned only in invalid cases. Let us always return btf_type regardless of valid or invalid cases. Such a new functionality will be used in subsequent patches. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220807175116.4179242-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/btf.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index d3e4c86b8fcd..903719b89238 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5864,26 +5864,25 @@ again: } static int __get_type_size(struct btf *btf, u32 btf_id, - const struct btf_type **bad_type) + const struct btf_type **ret_type) { const struct btf_type *t; + *ret_type = btf_type_by_id(btf, 0); if (!btf_id) /* void */ return 0; t = btf_type_by_id(btf, btf_id); while (t && btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (!t) { - *bad_type = btf_type_by_id(btf, 0); + if (!t) return -EINVAL; - } + *ret_type = t; if (btf_type_is_ptr(t)) /* kernel size of pointer. Not BPF's size of pointer*/ return sizeof(void *); if (btf_type_is_int(t) || btf_is_any_enum(t)) return t->size; - *bad_type = t; return -EINVAL; } -- cgit v1.2.3 From d020b2360b350b9f91b1769f9c84fe2d22f643db Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 9 Aug 2022 11:11:09 -0600 Subject: selftests/bpf: Fix vmtest.sh -h to not require root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the exit trap only after argument parsing is done. This way argument parse failure or `-h` will not require sudo. Reasoning is that it's confusing that a help message would require root access. Signed-off-by: Daniel Xu Signed-off-by: Daniel Borkmann Acked-by: Daniel Müller Link: https://lore.kernel.org/bpf/6a802aa37758e5a7e6aa5de294634f5518005e2b.1660064925.git.dxu@dxuuu.xyz --- tools/testing/selftests/bpf/vmtest.sh | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index b86ae4a2e5c5..976ef7585b33 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -307,6 +307,20 @@ update_kconfig() fi } +catch() +{ + local exit_code=$1 + local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}" + # This is just a cleanup and the directory may + # have already been unmounted. So, don't let this + # clobber the error code we intend to return. + unmount_image || true + if [[ -f "${exit_status_file}" ]]; then + exit_code="$(cat ${exit_status_file})" + fi + exit ${exit_code} +} + main() { local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" @@ -353,6 +367,8 @@ main() done shift $((OPTIND -1)) + trap 'catch "$?"' EXIT + if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then echo "No command specified, will run ${DEFAULT_COMMAND} in the vm" else @@ -409,20 +425,4 @@ main() fi } -catch() -{ - local exit_code=$1 - local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}" - # This is just a cleanup and the directory may - # have already been unmounted. So, don't let this - # clobber the error code we intend to return. - unmount_image || true - if [[ -f "${exit_status_file}" ]]; then - exit_code="$(cat ${exit_status_file})" - fi - exit ${exit_code} -} - -trap 'catch "$?"' EXIT - main "$@" -- cgit v1.2.3 From a7be0ab1eb1949f3564739784b4360e1233305f6 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 9 Aug 2022 11:11:10 -0600 Subject: selftests/bpf: Fix vmtest.sh getopts optstring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before, you could see the following errors: $ ./vmtest.sh -j ./vmtest.sh: option requires an argument -- j ./vmtest.sh: line 357: OPTARG: unbound variable $ ./vmtest.sh -z ./vmtest.sh: illegal option -- z ./vmtest.sh: line 357: OPTARG: unbound variable Fix by adding ':' as first character of optstring. Reason is that getopts requires ':' as the first character for OPTARG to be set in the `?` and `:` error cases. Note that the ':' as the first character of the optstring switches getopts to silent mode. The desire to run in this mode seems to have been there all along, as the script takes care of reporting errors. Signed-off-by: Daniel Xu Signed-off-by: Daniel Borkmann Acked-by: Daniel Müller Link: https://lore.kernel.org/bpf/0f93b56198328b6b4da7b4cf4662d05c3edb5fd2.1660064925.git.dxu@dxuuu.xyz --- tools/testing/selftests/bpf/vmtest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 976ef7585b33..a29aa05ebb3e 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -333,7 +333,7 @@ main() local exit_command="poweroff -f" local debug_shell="no" - while getopts 'hskid:j:' opt; do + while getopts ':hskid:j:' opt; do case ${opt} in i) update_image="yes" -- cgit v1.2.3 From 883743422ced8c961ab05dc63ec81b75a4e56052 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 9 Aug 2022 14:40:54 -0700 Subject: bpf: Fix ref_obj_id for dynptr data slices in verifier When a data slice is obtained from a dynptr (through the bpf_dynptr_data API), the ref obj id of the dynptr must be found and then associated with the data slice. The ref obj id of the dynptr must be found *before* the caller saved regs are reset. Without this fix, the ref obj id tracking is not correct for dynptrs that are at an offset from the frame pointer. Please also note that the data slice's ref obj id must be assigned after the ret types are parsed, since RET_PTR_TO_ALLOC_MEM-type return regs get zero-marked. Fixes: 34d4ef5775f7 ("bpf: Add dynptr data slices") Signed-off-by: Joanne Koong Acked-by: David Vernet Link: https://lore.kernel.org/r/20220809214055.4050604-1-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 01e7f48b4d8c..28b02dc67a2a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -504,7 +504,7 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id) func_id == BPF_FUNC_skc_to_tcp_request_sock; } -static bool is_dynptr_acquire_function(enum bpf_func_id func_id) +static bool is_dynptr_ref_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_dynptr_data; } @@ -518,7 +518,7 @@ static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id, ref_obj_uses++; if (is_acquire_function(func_id, map)) ref_obj_uses++; - if (is_dynptr_acquire_function(func_id)) + if (is_dynptr_ref_function(func_id)) ref_obj_uses++; return ref_obj_uses > 1; @@ -7320,6 +7320,23 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn } } break; + case BPF_FUNC_dynptr_data: + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { + if (arg_type_is_dynptr(fn->arg_type[i])) { + if (meta.ref_obj_id) { + verbose(env, "verifier internal error: meta.ref_obj_id already set\n"); + return -EFAULT; + } + /* Find the id of the dynptr we're tracking the reference of */ + meta.ref_obj_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]); + break; + } + } + if (i == MAX_BPF_FUNC_REG_ARGS) { + verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n"); + return -EFAULT; + } + break; } if (err) @@ -7457,7 +7474,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EFAULT; } - if (is_ptr_cast_function(func_id)) { + if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; } else if (is_acquire_function(func_id, meta.map_ptr)) { @@ -7469,21 +7486,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].id = id; /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = id; - } else if (is_dynptr_acquire_function(func_id)) { - int dynptr_id = 0, i; - - /* Find the id of the dynptr we're tracking the reference of */ - for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { - if (arg_type_is_dynptr(fn->arg_type[i])) { - if (dynptr_id) { - verbose(env, "verifier internal error: multiple dynptr args in func\n"); - return -EFAULT; - } - dynptr_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]); - } - } - /* For release_reference() */ - regs[BPF_REG_0].ref_obj_id = dynptr_id; } do_refine_retval_range(regs, fn->ret_type, func_id, &meta); -- cgit v1.2.3 From dc444be8bae45019396aedd53c745e685a4eb235 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Tue, 9 Aug 2022 14:40:55 -0700 Subject: selftests/bpf: add extra test for using dynptr data slice after release Add an additional test, "data_slice_use_after_release2", for ensuring that data slices are correctly invalidated by the verifier after the dynptr whose ref obj id they track is released. In particular, this tests data slice invalidation for dynptrs located at a non-zero offset from the frame pointer. Signed-off-by: Joanne Koong Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220809214055.4050604-2-joannelkoong@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/dynptr.c | 3 +- tools/testing/selftests/bpf/progs/dynptr_fail.c | 38 ++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 3c7aa82b98e2..bcf80b9f7c27 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -22,7 +22,8 @@ static struct { {"add_dynptr_to_map2", "invalid indirect read from stack"}, {"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"}, {"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"}, - {"data_slice_use_after_release", "invalid mem access 'scalar'"}, + {"data_slice_use_after_release1", "invalid mem access 'scalar'"}, + {"data_slice_use_after_release2", "invalid mem access 'scalar'"}, {"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"}, {"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"}, {"invalid_helper1", "invalid indirect read from stack"}, diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index b5e0a87f0a36..b0f08ff024fb 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -248,7 +248,7 @@ int data_slice_out_of_bounds_map_value(void *ctx) /* A data slice can't be used after it has been released */ SEC("?raw_tp") -int data_slice_use_after_release(void *ctx) +int data_slice_use_after_release1(void *ctx) { struct bpf_dynptr ptr; struct sample *sample; @@ -272,6 +272,42 @@ done: return 0; } +/* A data slice can't be used after it has been released. + * + * This tests the case where the data slice tracks a dynptr (ptr2) + * that is at a non-zero offset from the frame pointer (ptr1 is at fp, + * ptr2 is at fp - 16). + */ +SEC("?raw_tp") +int data_slice_use_after_release2(void *ctx) +{ + struct bpf_dynptr ptr1, ptr2; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr1); + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2); + + sample = bpf_dynptr_data(&ptr2, 0, sizeof(*sample)); + if (!sample) + goto done; + + sample->pid = 23; + + bpf_ringbuf_submit_dynptr(&ptr2, 0); + + /* this should fail */ + sample->pid = 23; + + bpf_ringbuf_submit_dynptr(&ptr1, 0); + + return 0; + +done: + bpf_ringbuf_discard_dynptr(&ptr2, 0); + bpf_ringbuf_discard_dynptr(&ptr1, 0); + return 0; +} + /* A data slice must be first checked for NULL */ SEC("?raw_tp") int data_slice_missing_null_check1(void *ctx) -- cgit v1.2.3 From 3143d10b094596f3e5d5964b2660375e586652a3 Mon Sep 17 00:00:00 2001 From: Shibin Koikkara Reeny Date: Wed, 3 Aug 2022 14:43:54 +0000 Subject: selftests/xsk: Update poll test cases Poll test case was not testing all the functionality of the poll feature in the test suite. This patch updates the poll test case which contains 2 test cases to test the RX and the TX poll functionality and additional 2 more test cases to check the timeout feature of the poll event. Poll test suite has 4 test cases: 1. TEST_TYPE_RX_POLL: Check if RX path POLLIN function works as expect. TX path can use any method to send the traffic. 2. TEST_TYPE_TX_POLL: Check if TX path POLLOUT function works as expect. RX path can use any method to receive the traffic. 3. TEST_TYPE_POLL_RXQ_EMPTY: Call poll function with parameter POLLIN on empty RX queue will cause timeout. If timeout then test case passes. 4. TEST_TYPE_POLL_TXQ_FULL: When TX queue is filled and packets are not cleaned by the kernel then if we invoke the poll function with POLLOUT it should trigger timeout. Signed-off-by: Shibin Koikkara Reeny Signed-off-by: Daniel Borkmann Reviewed-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20220803144354.98122-1-shibin.koikkara.reeny@intel.com --- tools/testing/selftests/bpf/xskxceiver.c | 166 ++++++++++++++++++++++++------- tools/testing/selftests/bpf/xskxceiver.h | 8 +- 2 files changed, 134 insertions(+), 40 deletions(-) diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 74d56d971baf..20b44ab32a06 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -244,6 +244,11 @@ static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject, memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE); } +static bool is_umem_valid(struct ifobject *ifobj) +{ + return !!ifobj->umem->umem; +} + static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) { udp_hdr->check = 0; @@ -817,12 +822,13 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) return TEST_PASS; } -static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) +static int receive_pkts(struct test_spec *test, struct pollfd *fds) { - struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0}; + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream; u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0; - struct pkt_stream *pkt_stream = ifobj->pkt_stream; - struct xsk_socket_info *xsk = ifobj->xsk; + struct xsk_socket_info *xsk = test->ifobj_rx->xsk; + struct ifobject *ifobj = test->ifobj_rx; struct xsk_umem_info *umem = xsk->umem; struct pkt *pkt; int ret; @@ -843,17 +849,28 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) } kick_rx(xsk); + if (ifobj->use_poll) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret < 0) + exit_with_error(-ret); + + if (!ret) { + if (!is_umem_valid(test->ifobj_tx)) + return TEST_PASS; + + ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); + return TEST_FAILURE; - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); - if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - exit_with_error(-ret); } - continue; + + if (!(fds->revents & POLLIN)) + continue; } + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + if (!rcvd) + continue; + if (ifobj->use_fill_ring) { ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); while (ret != rcvd) { @@ -900,13 +917,35 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) return TEST_PASS; } -static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb) +static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds, + bool timeout) { struct xsk_socket_info *xsk = ifobject->xsk; - u32 i, idx, valid_pkts = 0; + bool use_poll = ifobject->use_poll; + u32 i, idx, ret, valid_pkts = 0; + + while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { + if (use_poll) { + ret = poll(fds, 1, POLL_TMOUT); + if (timeout) { + if (ret < 0) { + ksft_print_msg("ERROR: [%s] Poll error %d\n", + __func__, ret); + return TEST_FAILURE; + } + if (ret == 0) + return TEST_PASS; + break; + } + if (ret <= 0) { + ksft_print_msg("ERROR: [%s] Poll error %d\n", + __func__, ret); + return TEST_FAILURE; + } + } - while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) complete_pkts(xsk, BATCH_SIZE); + } for (i = 0; i < BATCH_SIZE; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); @@ -933,11 +972,27 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb) xsk_ring_prod__submit(&xsk->tx, i); xsk->outstanding_tx += valid_pkts; - if (complete_pkts(xsk, i)) - return TEST_FAILURE; - usleep(10); - return TEST_PASS; + if (use_poll) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret <= 0) { + if (ret == 0 && timeout) + return TEST_PASS; + + ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret); + return TEST_FAILURE; + } + } + + if (!timeout) { + if (complete_pkts(xsk, i)) + return TEST_FAILURE; + + usleep(10); + return TEST_PASS; + } + + return TEST_CONTINUE; } static void wait_for_tx_completion(struct xsk_socket_info *xsk) @@ -948,29 +1003,19 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk) static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { + bool timeout = !is_umem_valid(test->ifobj_rx); struct pollfd fds = { }; - u32 pkt_cnt = 0; + u32 pkt_cnt = 0, ret; fds.fd = xsk_socket__fd(ifobject->xsk->xsk); fds.events = POLLOUT; while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { - int err; - - if (ifobject->use_poll) { - int ret; - - ret = poll(&fds, 1, POLL_TMOUT); - if (ret <= 0) - continue; - - if (!(fds.revents & POLLOUT)) - continue; - } - - err = __send_pkts(ifobject, &pkt_cnt); - if (err || test->fail) + ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout); + if ((ret || test->fail) && !timeout) return TEST_FAILURE; + else if (ret == TEST_PASS && timeout) + return ret; } wait_for_tx_completion(ifobject->xsk); @@ -1235,7 +1280,7 @@ static void *worker_testapp_validate_rx(void *arg) pthread_barrier_wait(&barr); - err = receive_pkts(ifobject, &fds); + err = receive_pkts(test, &fds); if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); @@ -1251,6 +1296,33 @@ static void *worker_testapp_validate_rx(void *arg) pthread_exit(NULL); } +static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj, + enum test_type type) +{ + pthread_t t0; + + if (pthread_barrier_init(&barr, NULL, 2)) + exit_with_error(errno); + + test->current_step++; + if (type == TEST_TYPE_POLL_RXQ_TMOUT) + pkt_stream_reset(ifobj->pkt_stream); + pkts_in_flight = 0; + + /*Spawn thread */ + pthread_create(&t0, NULL, ifobj->func_ptr, test); + + if (type != TEST_TYPE_POLL_TXQ_TMOUT) + pthread_barrier_wait(&barr); + + if (pthread_barrier_destroy(&barr)) + exit_with_error(errno); + + pthread_join(t0, NULL); + + return !!test->fail; +} + static int testapp_validate_traffic(struct test_spec *test) { struct ifobject *ifobj_tx = test->ifobj_tx; @@ -1548,12 +1620,30 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ pkt_stream_restore_default(test); break; - case TEST_TYPE_POLL: - test->ifobj_tx->use_poll = true; + case TEST_TYPE_RX_POLL: test->ifobj_rx->use_poll = true; - test_spec_set_name(test, "POLL"); + test_spec_set_name(test, "POLL_RX"); testapp_validate_traffic(test); break; + case TEST_TYPE_TX_POLL: + test->ifobj_tx->use_poll = true; + test_spec_set_name(test, "POLL_TX"); + testapp_validate_traffic(test); + break; + case TEST_TYPE_POLL_TXQ_TMOUT: + test_spec_set_name(test, "POLL_TXQ_FULL"); + test->ifobj_tx->use_poll = true; + /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ + test->ifobj_tx->umem->frame_size = 2048; + pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); + testapp_validate_traffic_single_thread(test, test->ifobj_tx, type); + pkt_stream_restore_default(test); + break; + case TEST_TYPE_POLL_RXQ_TMOUT: + test_spec_set_name(test, "POLL_RXQ_EMPTY"); + test->ifobj_rx->use_poll = true; + testapp_validate_traffic_single_thread(test, test->ifobj_rx, type); + break; case TEST_TYPE_ALIGNED_INV_DESC: test_spec_set_name(test, "ALIGNED_INV_DESC"); testapp_invalid_desc(test); diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 3d17053f98e5..ee97576757a9 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -27,6 +27,7 @@ #define TEST_PASS 0 #define TEST_FAILURE -1 +#define TEST_CONTINUE 1 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 7 #define MAX_INTERFACES_NAMESPACE_CHARS 10 @@ -48,7 +49,7 @@ #define SOCK_RECONF_CTR 10 #define BATCH_SIZE 64 #define POLL_TMOUT 1000 -#define RECV_TMOUT 3 +#define THREAD_TMOUT 3 #define DEFAULT_PKT_CNT (4 * 1024) #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) #define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) @@ -68,7 +69,10 @@ enum test_type { TEST_TYPE_RUN_TO_COMPLETION, TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME, TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT, - TEST_TYPE_POLL, + TEST_TYPE_RX_POLL, + TEST_TYPE_TX_POLL, + TEST_TYPE_POLL_RXQ_TMOUT, + TEST_TYPE_POLL_TXQ_TMOUT, TEST_TYPE_UNALIGNED, TEST_TYPE_ALIGNED_INV_DESC, TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, -- cgit v1.2.3 From 4dd48c6f1f83290d4bc61b43e61d86f8bc6c310e Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 10 Aug 2022 08:59:03 +0200 Subject: bpf: add destructive kfunc flag Add KF_DESTRUCTIVE flag for destructive functions. Functions with this flag set will require CAP_SYS_BOOT capabilities. Signed-off-by: Artem Savkov Link: https://lore.kernel.org/r/20220810065905.475418-2-asavkov@redhat.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 9 +++++++++ include/linux/btf.h | 3 ++- kernel/bpf/verifier.c | 5 +++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index c8b21de1c772..781731749e55 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -152,6 +152,15 @@ ensure the integrity of the operation being performed on the expected object. The KF_SLEEPABLE flag is used for kfuncs that may sleep. Such kfuncs can only be called by sleepable BPF programs (BPF_F_SLEEPABLE). +2.4.7 KF_DESTRUCTIVE flag +-------------------------- + +The KF_DESTRUCTIVE flag is used to indicate functions calling which is +destructive to the system. For example such a call can result in system +rebooting or panicking. Due to this additional restrictions apply to these +calls. At the moment they only require CAP_SYS_BOOT capability, but more can be +added later. + 2.5 Registering the kfuncs -------------------------- diff --git a/include/linux/btf.h b/include/linux/btf.h index 976cbdd2981f..ad93c2d9cc1c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -49,7 +49,8 @@ * for this case. */ #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ -#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ +#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ +#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ struct btf; struct btf_member; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 28b02dc67a2a..2c1f8069f7b7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7584,6 +7584,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, func_name); return -EACCES; } + if (*kfunc_flags & KF_DESTRUCTIVE && !capable(CAP_SYS_BOOT)) { + verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capabilities\n"); + return -EACCES; + } + acq = *kfunc_flags & KF_ACQUIRE; /* Check the arguments */ -- cgit v1.2.3 From 133790596406ce2658f0864eb7eac64987c2b12f Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 10 Aug 2022 08:59:04 +0200 Subject: bpf: export crash_kexec() as destructive kfunc Allow properly marked bpf programs to call crash_kexec(). Signed-off-by: Artem Savkov Link: https://lore.kernel.org/r/20220810065905.475418-3-asavkov@redhat.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a95eb9fb01ff..3c1b9bbcf971 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1725,3 +1725,21 @@ bpf_base_func_proto(enum bpf_func_id func_id) return NULL; } } + +BTF_SET8_START(tracing_btf_ids) +#ifdef CONFIG_KEXEC_CORE +BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) +#endif +BTF_SET8_END(tracing_btf_ids) + +static const struct btf_kfunc_id_set tracing_kfunc_set = { + .owner = THIS_MODULE, + .set = &tracing_btf_ids, +}; + +static int __init kfunc_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &tracing_kfunc_set); +} + +late_initcall(kfunc_init); -- cgit v1.2.3 From e338945816754a1c362f606b8e2029f2c023e51c Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 10 Aug 2022 08:59:05 +0200 Subject: selftests/bpf: add destructive kfunc test Add a test checking that programs calling destructive kfuncs can only do so if they have CAP_SYS_BOOT capabilities. Signed-off-by: Artem Savkov Link: https://lore.kernel.org/r/20220810065905.475418-4-asavkov@redhat.com Signed-off-by: Alexei Starovoitov --- net/bpf/test_run.c | 5 +++ .../testing/selftests/bpf/prog_tests/kfunc_call.c | 36 ++++++++++++++++++++++ .../selftests/bpf/progs/kfunc_call_destructive.c | 14 +++++++++ 3 files changed, 55 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/kfunc_call_destructive.c diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index cbc9cd5058cb..afa7125252f6 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -695,6 +695,10 @@ noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) { } +noinline void bpf_kfunc_call_test_destructive(void) +{ +} + __diag_pop(); ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); @@ -719,6 +723,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index c00eb974eb85..351fafa006fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -5,6 +5,9 @@ #include "kfunc_call_test.lskel.h" #include "kfunc_call_test_subprog.skel.h" #include "kfunc_call_test_subprog.lskel.h" +#include "kfunc_call_destructive.skel.h" + +#include "cap_helpers.h" static void test_main(void) { @@ -86,6 +89,36 @@ static void test_subprog_lskel(void) kfunc_call_test_subprog_lskel__destroy(skel); } +static int test_destructive_open_and_load(void) +{ + struct kfunc_call_destructive *skel; + int err; + + skel = kfunc_call_destructive__open(); + if (!ASSERT_OK_PTR(skel, "prog_open")) + return -1; + + err = kfunc_call_destructive__load(skel); + + kfunc_call_destructive__destroy(skel); + + return err; +} + +static void test_destructive(void) +{ + __u64 save_caps = 0; + + ASSERT_OK(test_destructive_open_and_load(), "succesful_load"); + + if (!ASSERT_OK(cap_disable_effective(1ULL << CAP_SYS_BOOT, &save_caps), "drop_caps")) + return; + + ASSERT_EQ(test_destructive_open_and_load(), -13, "no_caps_failure"); + + cap_enable_effective(save_caps, NULL); +} + void test_kfunc_call(void) { if (test__start_subtest("main")) @@ -96,4 +129,7 @@ void test_kfunc_call(void) if (test__start_subtest("subprog_lskel")) test_subprog_lskel(); + + if (test__start_subtest("destructive")) + test_destructive(); } diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c new file mode 100644 index 000000000000..767472bc5a97 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +extern void bpf_kfunc_call_test_destructive(void) __ksym; + +SEC("tc") +int kfunc_destructive_test(void) +{ + bpf_kfunc_call_test_destructive(); + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From d7c5802faff6e7f50d18db40fdcb7e50590177f5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 10 Aug 2022 11:34:25 -0700 Subject: libbpf: preserve errno across pr_warn/pr_info/pr_debug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As suggested in [0], make sure that libbpf_print saves and restored errno and as such guaranteed that no matter what actual print callback user installs, macros like pr_warn/pr_info/pr_debug are completely transparent as far as errno goes. While libbpf code is pretty careful about not clobbering important errno values accidentally with pr_warn(), it's a trivial change to make sure that pr_warn can be used anywhere without a risk of clobbering errno. No functional changes, just future proofing. [0] https://github.com/libbpf/libbpf/pull/536 Signed-off-by: Andrii Nakryiko Acked-by: Daniel Müller Link: https://lore.kernel.org/r/20220810183425.1998735-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f7364ea82ac1..917d975bd4c6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -223,13 +223,18 @@ __printf(2, 3) void libbpf_print(enum libbpf_print_level level, const char *format, ...) { va_list args; + int old_errno; if (!__libbpf_pr) return; + old_errno = errno; + va_start(args, format); __libbpf_pr(level, format, args); va_end(args); + + errno = old_errno; } static void pr_perm_msg(int err) -- cgit v1.2.3 From 083818156d1e98f22b1ac612a3957bc553e7ba57 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 10 Aug 2022 15:18:26 +0000 Subject: bpf: Remove unneeded memset in queue_stack_map creation __GFP_ZERO will clear the memory, so we don't need to memset it. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20220810151840.16394-2-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/queue_stack_maps.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index a1c0794ae49d..8a5e060de63b 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -78,8 +78,6 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) if (!qs) return ERR_PTR(-ENOMEM); - memset(qs, 0, sizeof(*qs)); - bpf_map_init_from_attr(&qs->map, attr); qs->size = size; -- cgit v1.2.3 From 8f58ee54c2eae790f50c51dfa64a153601451f08 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 10 Aug 2022 15:18:27 +0000 Subject: bpf: Use bpf_map_area_free instread of kvfree bpf_map_area_alloc() should be paired with bpf_map_area_free(). Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20220810151840.16394-3-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/ringbuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index ded4faeca192..3fb54feb39d4 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -116,7 +116,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) err_free_pages: for (i = 0; i < nr_pages; i++) __free_page(pages[i]); - kvfree(pages); + bpf_map_area_free(pages); return NULL; } @@ -190,7 +190,7 @@ static void bpf_ringbuf_free(struct bpf_ringbuf *rb) vunmap(rb); for (i = 0; i < nr_pages; i++) __free_page(pages[i]); - kvfree(pages); + bpf_map_area_free(pages); } static void ringbuf_map_free(struct bpf_map *map) -- cgit v1.2.3 From 992c9e13f5939437037627c67bcb51e674b64265 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 10 Aug 2022 15:18:28 +0000 Subject: bpf: Make __GFP_NOWARN consistent in bpf map creation Some of the bpf maps are created with __GFP_NOWARN, i.e. arraymap, bloom_filter, bpf_local_storage, bpf_struct_ops, lpm_trie, queue_stack_maps, reuseport_array, stackmap and xskmap, while others are created without __GFP_NOWARN, i.e. cpumap, devmap, hashtab, local_storage, offload, ringbuf and sock_map. But there are not key differences between the creation of these maps. So let make this allocation flag consistent in all bpf maps creation. Then we can use a generic helper to alloc all bpf maps. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20220810151840.16394-4-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/cpumap.c | 2 +- kernel/bpf/devmap.c | 2 +- kernel/bpf/hashtab.c | 2 +- kernel/bpf/local_storage.c | 4 ++-- kernel/bpf/offload.c | 2 +- kernel/bpf/ringbuf.c | 2 +- net/core/sock_map.c | 4 ++-- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index f4860ac756cd..b25ca9d603a6 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -97,7 +97,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) attr->map_flags & ~BPF_F_NUMA_NODE) return ERR_PTR(-EINVAL); - cmap = kzalloc(sizeof(*cmap), GFP_USER | __GFP_ACCOUNT); + cmap = kzalloc(sizeof(*cmap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!cmap) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index a0e02b009487..88feaa094de8 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -163,7 +163,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); - dtab = kzalloc(sizeof(*dtab), GFP_USER | __GFP_ACCOUNT); + dtab = kzalloc(sizeof(*dtab), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!dtab) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index da7578426a46..f1e5303fe26e 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -495,7 +495,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) struct bpf_htab *htab; int err, i; - htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT); + htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!htab) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 49ef0ce040c7..a64255e20f87 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -313,8 +313,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) /* max_entries is not used and enforced to be 0 */ return ERR_PTR(-EINVAL); - map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), - __GFP_ZERO | GFP_USER | __GFP_ACCOUNT, numa_node); + map = kzalloc_node(sizeof(struct bpf_cgroup_storage_map), + GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT, numa_node); if (!map) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index bd09290e3648..5a629a1b971c 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -372,7 +372,7 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) attr->map_type != BPF_MAP_TYPE_HASH) return ERR_PTR(-EINVAL); - offmap = kzalloc(sizeof(*offmap), GFP_USER); + offmap = kzalloc(sizeof(*offmap), GFP_USER | __GFP_NOWARN); if (!offmap) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 3fb54feb39d4..df8062cb258c 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -164,7 +164,7 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) return ERR_PTR(-E2BIG); #endif - rb_map = kzalloc(sizeof(*rb_map), GFP_USER | __GFP_ACCOUNT); + rb_map = kzalloc(sizeof(*rb_map), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!rb_map) return ERR_PTR(-ENOMEM); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 028813dfecb0..763d77162d0c 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -41,7 +41,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); - stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_ACCOUNT); + stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!stab) return ERR_PTR(-ENOMEM); @@ -1076,7 +1076,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) if (attr->key_size > MAX_BPF_STACK) return ERR_PTR(-E2BIG); - htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT); + htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!htab) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From 73cf09a36bf7bfb3e5a3ff23755c36d49137c44d Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Wed, 10 Aug 2022 15:18:29 +0000 Subject: bpf: Use bpf_map_area_alloc consistently on bpf map creation Let's use the generic helper bpf_map_area_alloc() instead of the open-coded kzalloc helpers in bpf maps creation path. Signed-off-by: Yafang Shao Link: https://lore.kernel.org/r/20220810151840.16394-5-laoar.shao@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 6 +++--- kernel/bpf/cpumap.c | 6 +++--- kernel/bpf/devmap.c | 6 +++--- kernel/bpf/hashtab.c | 6 +++--- kernel/bpf/local_storage.c | 5 ++--- kernel/bpf/lpm_trie.c | 4 ++-- kernel/bpf/offload.c | 6 +++--- kernel/bpf/ringbuf.c | 6 +++--- net/core/sock_map.c | 12 ++++++------ 9 files changed, 28 insertions(+), 29 deletions(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 8ce40fd869f6..4ee2e7286c23 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -582,7 +582,7 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, synchronize_rcu(); kvfree(smap->buckets); - kfree(smap); + bpf_map_area_free(smap); } int bpf_local_storage_map_alloc_check(union bpf_attr *attr) @@ -610,7 +610,7 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) unsigned int i; u32 nbuckets; - smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); + smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); if (!smap) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&smap->map, attr); @@ -623,7 +623,7 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!smap->buckets) { - kfree(smap); + bpf_map_area_free(smap); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index b25ca9d603a6..b5ba34ddd4b6 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -97,7 +97,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) attr->map_flags & ~BPF_F_NUMA_NODE) return ERR_PTR(-EINVAL); - cmap = kzalloc(sizeof(*cmap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); + cmap = bpf_map_area_alloc(sizeof(*cmap), NUMA_NO_NODE); if (!cmap) return ERR_PTR(-ENOMEM); @@ -118,7 +118,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) return &cmap->map; free_cmap: - kfree(cmap); + bpf_map_area_free(cmap); return ERR_PTR(err); } @@ -623,7 +623,7 @@ static void cpu_map_free(struct bpf_map *map) __cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */ } bpf_map_area_free(cmap->cpu_map); - kfree(cmap); + bpf_map_area_free(cmap); } /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 88feaa094de8..f9a87dcc5535 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -163,13 +163,13 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); - dtab = kzalloc(sizeof(*dtab), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); + dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE); if (!dtab) return ERR_PTR(-ENOMEM); err = dev_map_init_map(dtab, attr); if (err) { - kfree(dtab); + bpf_map_area_free(dtab); return ERR_PTR(err); } @@ -240,7 +240,7 @@ static void dev_map_free(struct bpf_map *map) bpf_map_area_free(dtab->netdev_map); } - kfree(dtab); + bpf_map_area_free(dtab); } static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index f1e5303fe26e..8392f7f8a8ac 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -495,7 +495,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) struct bpf_htab *htab; int err, i; - htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); + htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); if (!htab) return ERR_PTR(-ENOMEM); @@ -579,7 +579,7 @@ free_map_locked: bpf_map_area_free(htab->buckets); free_htab: lockdep_unregister_key(&htab->lockdep_key); - kfree(htab); + bpf_map_area_free(htab); return ERR_PTR(err); } @@ -1496,7 +1496,7 @@ static void htab_map_free(struct bpf_map *map) for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) free_percpu(htab->map_locked[i]); lockdep_unregister_key(&htab->lockdep_key); - kfree(htab); + bpf_map_area_free(htab); } static void htab_map_seq_show_elem(struct bpf_map *map, void *key, diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index a64255e20f87..098cf336fae6 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -313,8 +313,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) /* max_entries is not used and enforced to be 0 */ return ERR_PTR(-EINVAL); - map = kzalloc_node(sizeof(struct bpf_cgroup_storage_map), - GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT, numa_node); + map = bpf_map_area_alloc(sizeof(struct bpf_cgroup_storage_map), numa_node); if (!map) return ERR_PTR(-ENOMEM); @@ -346,7 +345,7 @@ static void cgroup_storage_map_free(struct bpf_map *_map) WARN_ON(!RB_EMPTY_ROOT(&map->root)); WARN_ON(!list_empty(&map->list)); - kfree(map); + bpf_map_area_free(map); } static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index d789e3b831ad..d833496e9e42 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -558,7 +558,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) attr->value_size > LPM_VAL_SIZE_MAX) return ERR_PTR(-EINVAL); - trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); + trie = bpf_map_area_alloc(sizeof(*trie), NUMA_NO_NODE); if (!trie) return ERR_PTR(-ENOMEM); @@ -609,7 +609,7 @@ static void trie_free(struct bpf_map *map) } out: - kfree(trie); + bpf_map_area_free(trie); } static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 5a629a1b971c..13e4efc971e6 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -372,7 +372,7 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) attr->map_type != BPF_MAP_TYPE_HASH) return ERR_PTR(-EINVAL); - offmap = kzalloc(sizeof(*offmap), GFP_USER | __GFP_NOWARN); + offmap = bpf_map_area_alloc(sizeof(*offmap), NUMA_NO_NODE); if (!offmap) return ERR_PTR(-ENOMEM); @@ -404,7 +404,7 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) err_unlock: up_write(&bpf_devs_lock); rtnl_unlock(); - kfree(offmap); + bpf_map_area_free(offmap); return ERR_PTR(err); } @@ -428,7 +428,7 @@ void bpf_map_offload_map_free(struct bpf_map *map) up_write(&bpf_devs_lock); rtnl_unlock(); - kfree(offmap); + bpf_map_area_free(offmap); } int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index df8062cb258c..b483aea35f41 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -164,7 +164,7 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) return ERR_PTR(-E2BIG); #endif - rb_map = kzalloc(sizeof(*rb_map), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); + rb_map = bpf_map_area_alloc(sizeof(*rb_map), NUMA_NO_NODE); if (!rb_map) return ERR_PTR(-ENOMEM); @@ -172,7 +172,7 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node); if (!rb_map->rb) { - kfree(rb_map); + bpf_map_area_free(rb_map); return ERR_PTR(-ENOMEM); } @@ -199,7 +199,7 @@ static void ringbuf_map_free(struct bpf_map *map) rb_map = container_of(map, struct bpf_ringbuf_map, map); bpf_ringbuf_free(rb_map->rb); - kfree(rb_map); + bpf_map_area_free(rb_map); } static void *ringbuf_map_lookup_elem(struct bpf_map *map, void *key) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 763d77162d0c..d0c43384d8bf 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -41,7 +41,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); - stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); + stab = bpf_map_area_alloc(sizeof(*stab), NUMA_NO_NODE); if (!stab) return ERR_PTR(-ENOMEM); @@ -52,7 +52,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) sizeof(struct sock *), stab->map.numa_node); if (!stab->sks) { - kfree(stab); + bpf_map_area_free(stab); return ERR_PTR(-ENOMEM); } @@ -361,7 +361,7 @@ static void sock_map_free(struct bpf_map *map) synchronize_rcu(); bpf_map_area_free(stab->sks); - kfree(stab); + bpf_map_area_free(stab); } static void sock_map_release_progs(struct bpf_map *map) @@ -1076,7 +1076,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) if (attr->key_size > MAX_BPF_STACK) return ERR_PTR(-E2BIG); - htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); + htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); if (!htab) return ERR_PTR(-ENOMEM); @@ -1106,7 +1106,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) return &htab->map; free_htab: - kfree(htab); + bpf_map_area_free(htab); return ERR_PTR(err); } @@ -1159,7 +1159,7 @@ static void sock_hash_free(struct bpf_map *map) synchronize_rcu(); bpf_map_area_free(htab->buckets); - kfree(htab); + bpf_map_area_free(htab); } static void *sock_hash_lookup_sys(struct bpf_map *map, void *key) -- cgit v1.2.3 From 10b62d6a38f7c92e9f41983bb7d7669c9fa6e287 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 11 Aug 2022 11:40:20 +0800 Subject: libbpf: Add names for auxiliary maps The bpftool self-created maps can appear in final map show output due to deferred removal in kernel. These maps don't have a name, which would make users confused about where it comes from. With a libbpf_ prefix name, users could know who created these maps. It also could make some tests (like test_offload.py, which skip base maps without names as a workaround) filter them out. Kernel adds bpf prog/map name support in the same merge commit fadad670a8ab ("Merge branch 'bpf-extend-info'"). So we can also use kernel_supports(NULL, FEAT_PROG_NAME) to check if kernel supports map name. As discussed [1], Let's make bpf_map_create accept non-null name string, and silently ignore the name if kernel doesn't support. [1] https://lore.kernel.org/bpf/CAEf4BzYL1TQwo1231s83pjTdFPk9XWWhfZC5=KzkU-VO0k=0Ug@mail.gmail.com/ Signed-off-by: Hangbin Liu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220811034020.529685-1-liuhangbin@gmail.com --- tools/lib/bpf/bpf.c | 2 +- tools/lib/bpf/libbpf.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index efcc06dafbd9..6a96e665dc5d 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -183,7 +183,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name) + if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 917d975bd4c6..3f01f5cd8a4c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4437,7 +4437,7 @@ static int probe_kern_global_data(void) }; int ret, map, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4570,7 +4570,7 @@ static int probe_kern_array_mmap(void) LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); int fd; - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); return probe_fd(fd); } @@ -4617,7 +4617,7 @@ static int probe_prog_bind_map(void) }; int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); -- cgit v1.2.3 From 54c939773b2d2c2e6676743c180cb2049bb3a40a Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 12 Aug 2022 16:37:25 +0100 Subject: bpftool: Fix a typo in a comment This is the wrong library name: libcap, not libpcap. Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220812153727.224500-1-quentin@isovalent.com --- tools/bpf/bpftool/feature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 7ecabf7947fb..36cf0f1517c9 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -1147,7 +1147,7 @@ exit_free: return res; #else /* Detection assumes user has specific privileges. - * We do not use libpcap so let's approximate, and restrict usage to + * We do not use libcap so let's approximate, and restrict usage to * root user only. */ if (geteuid()) { -- cgit v1.2.3 From 4961d0772578e8737afe61370743f3bc22867111 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Fri, 12 Aug 2022 16:37:27 +0100 Subject: bpf: Clear up confusion in bpf_skb_adjust_room()'s documentation Adding or removing room space _below_ layers 2 or 3, as the description mentions, is ambiguous. This was written with a mental image of the packet with layer 2 at the top, layer 3 under it, and so on. But it has led users to believe that it was on lower layers (before the beginning of the L2 and L3 headers respectively). Let's make it more explicit, and specify between which layers the room space is adjusted. Reported-by: Rumen Telbizov Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220812153727.224500-3-quentin@isovalent.com --- include/uapi/linux/bpf.h | 6 ++++-- tools/include/uapi/linux/bpf.h | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7d1e2794d83e..934a2a8beb87 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2573,10 +2573,12 @@ union bpf_attr { * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). + * (room space is added or removed between the layer 2 and + * layer 3 headers). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). + * (room space is added or removed between the layer 3 and + * layer 4 headers). * * The following flags are supported at this time: * diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e174ad28aeb7..1d6085e15fc8 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2573,10 +2573,12 @@ union bpf_attr { * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). + * (room space is added or removed between the layer 2 and + * layer 3 headers). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). + * (room space is added or removed between the layer 3 and + * layer 4 headers). * * The following flags are supported at this time: * -- cgit v1.2.3 From cea558855c39b7f1f02ff50dcf701ca6596bc964 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Mon, 15 Aug 2022 17:22:05 +0100 Subject: bpftool: Clear errno after libcap's checks When bpftool is linked against libcap, the library runs a "constructor" function to compute the number of capabilities of the running kernel [0], at the beginning of the execution of the program. As part of this, it performs multiple calls to prctl(). Some of these may fail, and set errno to a non-zero value: # strace -e prctl ./bpftool version prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1 prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_CAPBSET_READ, CAP_CHECKPOINT_RESTORE) = 1 prctl(PR_CAPBSET_READ, 0x2c /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_CAPBSET_READ, 0x2a /* CAP_??? */) = -1 EINVAL (Invalid argument) prctl(PR_CAPBSET_READ, 0x29 /* CAP_??? */) = -1 EINVAL (Invalid argument) ** fprintf added at the top of main(): we have errno == 1 ./bpftool v7.0.0 using libbpf v1.0 features: libbfd, libbpf_strict, skeletons +++ exited with 0 +++ This has been addressed in libcap 2.63 [1], but until this version is available everywhere, we can fix it on bpftool side. Let's clean errno at the beginning of the main() function, to make sure that these checks do not interfere with the batch mode, where we error out if errno is set after a bpftool command. [0] https://git.kernel.org/pub/scm/libs/libcap/libcap.git/tree/libcap/cap_alloc.c?h=libcap-2.65#n20 [1] https://git.kernel.org/pub/scm/libs/libcap/libcap.git/commit/?id=f25a1b7e69f7b33e6afb58b3e38f3450b7d2d9a0 Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220815162205.45043-1-quentin@isovalent.com --- tools/bpf/bpftool/main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 451cefc2d0da..ccd7457f92bf 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -435,6 +435,16 @@ int main(int argc, char **argv) setlinebuf(stdout); +#ifdef USE_LIBCAP + /* Libcap < 2.63 hooks before main() to compute the number of + * capabilities of the running kernel, and doing so it calls prctl() + * which may fail and set errno to non-zero. + * Let's reset errno to make sure this does not interfere with the + * batch mode. + */ + errno = 0; +#endif + last_do_help = do_help; pretty_output = false; json_output = false; -- cgit v1.2.3 From e81fbd4c1ba7b128a198c2843665e1186db449b6 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 11 Aug 2022 15:55:25 -0600 Subject: selftests/bpf: Add existing connection bpf_*_ct_lookup() test Add a test where we do a conntrack lookup on an existing connection. This is nice because it's a more realistic test than artifically creating a ct entry and looking it up afterwards. Signed-off-by: Daniel Xu Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/de5a617832f38f8b5631cc87e2a836da7c94d497.1660254747.git.dxu@dxuuu.xyz --- tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 59 +++++++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_bpf_nf.c | 18 ++++++++ 2 files changed, 77 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index 7a74a1579076..88a2c0bdefec 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -24,10 +24,34 @@ enum { TEST_TC_BPF, }; +#define TIMEOUT_MS 3000 + +static int connect_to_server(int srv_fd) +{ + int fd = -1; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket")) + goto out; + + if (!ASSERT_EQ(connect_fd_to_fd(fd, srv_fd, TIMEOUT_MS), 0, "connect_fd_to_fd")) { + close(fd); + fd = -1; + } +out: + return fd; +} + static void test_bpf_nf_ct(int mode) { + const char *iptables = "iptables -t raw %s PREROUTING -j CT"; + int srv_fd = -1, client_fd = -1, srv_client_fd = -1; + struct sockaddr_in peer_addr = {}; struct test_bpf_nf *skel; int prog_fd, err; + socklen_t len; + u16 srv_port; + char cmd[64]; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), @@ -38,6 +62,32 @@ static void test_bpf_nf_ct(int mode) if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) return; + /* Enable connection tracking */ + snprintf(cmd, sizeof(cmd), iptables, "-A"); + if (!ASSERT_OK(system(cmd), "iptables")) + goto end; + + srv_port = (mode == TEST_XDP) ? 5005 : 5006; + srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS); + if (!ASSERT_GE(srv_fd, 0, "start_server")) + goto end; + + client_fd = connect_to_server(srv_fd); + if (!ASSERT_GE(client_fd, 0, "connect_to_server")) + goto end; + + len = sizeof(peer_addr); + srv_client_fd = accept(srv_fd, (struct sockaddr *)&peer_addr, &len); + if (!ASSERT_GE(srv_client_fd, 0, "accept")) + goto end; + if (!ASSERT_EQ(len, sizeof(struct sockaddr_in), "sockaddr len")) + goto end; + + skel->bss->saddr = peer_addr.sin_addr.s_addr; + skel->bss->sport = peer_addr.sin_port; + skel->bss->daddr = peer_addr.sin_addr.s_addr; + skel->bss->dport = htons(srv_port); + if (mode == TEST_XDP) prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test); else @@ -63,7 +113,16 @@ static void test_bpf_nf_ct(int mode) ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); /* expected status is IPS_SEEN_REPLY */ ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update "); + ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup"); end: + if (srv_client_fd != -1) + close(srv_client_fd); + if (client_fd != -1) + close(client_fd); + if (srv_fd != -1) + close(srv_fd); + snprintf(cmd, sizeof(cmd), iptables, "-D"); + system(cmd); test_bpf_nf__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 196cd8dfe42a..84e0fd479794 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -23,6 +23,11 @@ int test_insert_entry = -EAFNOSUPPORT; int test_succ_lookup = -ENOENT; u32 test_delta_timeout = 0; u32 test_status = 0; +__be32 saddr = 0; +__be16 sport = 0; +__be32 daddr = 0; +__be16 dport = 0; +int test_exist_lookup = -ENOENT; struct nf_conn; @@ -160,6 +165,19 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, } test_alloc_entry = 0; } + + bpf_tuple.ipv4.saddr = saddr; + bpf_tuple.ipv4.daddr = daddr; + bpf_tuple.ipv4.sport = sport; + bpf_tuple.ipv4.dport = dport; + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + if (ct) { + test_exist_lookup = 0; + bpf_ct_release(ct); + } else { + test_exist_lookup = opts_def.error; + } } SEC("xdp") -- cgit v1.2.3 From 99799de2cba2d399acf65f49a986b3d5cf0732ab Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 11 Aug 2022 15:55:26 -0600 Subject: selftests/bpf: Add connmark read test Test that the prog can read from the connection mark. This test is nice because it ensures progs can interact with netfilter subsystem correctly. Signed-off-by: Daniel Xu Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/d3bc620a491e4c626c20d80631063922cbe13e2b.1660254747.git.dxu@dxuuu.xyz --- tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 3 ++- tools/testing/selftests/bpf/progs/test_bpf_nf.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index 88a2c0bdefec..544bf90ac2a7 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -44,7 +44,7 @@ out: static void test_bpf_nf_ct(int mode) { - const char *iptables = "iptables -t raw %s PREROUTING -j CT"; + const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; int srv_fd = -1, client_fd = -1, srv_client_fd = -1; struct sockaddr_in peer_addr = {}; struct test_bpf_nf *skel; @@ -114,6 +114,7 @@ static void test_bpf_nf_ct(int mode) /* expected status is IPS_SEEN_REPLY */ ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update "); ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup"); + ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark"); end: if (srv_client_fd != -1) close(srv_client_fd); diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 84e0fd479794..2722441850cc 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -28,6 +28,7 @@ __be16 sport = 0; __be32 daddr = 0; __be16 dport = 0; int test_exist_lookup = -ENOENT; +u32 test_exist_lookup_mark = 0; struct nf_conn; @@ -174,6 +175,8 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, sizeof(opts_def)); if (ct) { test_exist_lookup = 0; + if (ct->mark == 42) + test_exist_lookup_mark = 43; bpf_ct_release(ct); } else { test_exist_lookup = opts_def.error; -- cgit v1.2.3 From 8308bf207ce6963adb42791cfb260dc6552b6665 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 11 Aug 2022 15:55:27 -0600 Subject: selftests/bpf: Update CI kconfig The previous selftest changes require two kconfig changes in bpf-ci. Signed-off-by: Daniel Xu Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/2c27c6ebf7a03954915f83560653752450389564.1660254747.git.dxu@dxuuu.xyz --- tools/testing/selftests/bpf/config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index fabf0c014349..3fc46f9cfb22 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -50,9 +50,11 @@ CONFIG_NET_SCHED=y CONFIG_NETDEVSIM=m CONFIG_NETFILTER=y CONFIG_NETFILTER_SYNPROXY=y +CONFIG_NETFILTER_XT_CONNMARK=y CONFIG_NETFILTER_XT_MATCH_STATE=y CONFIG_NETFILTER_XT_TARGET_CT=y CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_RC_CORE=y -- cgit v1.2.3 From 1f235777c3a4ab115162fe7d45b82be534b9ae2e Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 13 Aug 2022 08:09:36 +0800 Subject: libbpf: Making bpf_prog_load() ignore name if kernel doesn't support Similar with commit 10b62d6a38f7 ("libbpf: Add names for auxiliary maps"), let's make bpf_prog_load() also ignore name if kernel doesn't support program name. To achieve this, we need to call sys_bpf_prog_load() directly in probe_kern_prog_name() to avoid circular dependency. sys_bpf_prog_load() also need to be exported in the libbpf_internal.h file. Signed-off-by: Hangbin Liu Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20220813000936.6464-1-liuhangbin@gmail.com --- tools/lib/bpf/bpf.c | 6 ++---- tools/lib/bpf/libbpf.c | 13 +++++++++++-- tools/lib/bpf/libbpf_internal.h | 3 +++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 6a96e665dc5d..575867d69496 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -84,9 +84,7 @@ static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, return ensure_good_fd(fd); } -#define PROG_LOAD_ATTEMPTS 5 - -static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) +int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) { int fd; @@ -263,7 +261,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0); - if (prog_name) + if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3f01f5cd8a4c..aa05a99b913d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4415,14 +4415,23 @@ static int probe_fd(int fd) static int probe_kern_prog_name(void) { + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, insn_cnt = ARRAY_SIZE(insns); + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); /* make sure loading with name works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL); + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); return probe_fd(ret); } diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 4135ae0a2bc3..377642ff51fc 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -573,4 +573,7 @@ static inline bool is_pow_of_2(size_t x) return x && (x & (x - 1)) == 0; } +#define PROG_LOAD_ATTEMPTS 5 +int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ -- cgit v1.2.3 From 807662cac66af0dfca60ce1cf784063da6ec2f65 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 16 Aug 2022 07:52:31 +0200 Subject: selftests/bpf: Fix attach point for non-x86 arches in test_progs/lsm Use SYS_PREFIX macro from bpf_misc.h instead of hard-coded '__x64_' prefix for sys_setdomainname attach point in lsm test. Signed-off-by: Artem Savkov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220816055231.717006-1-asavkov@redhat.com --- tools/testing/selftests/bpf/DENYLIST.s390x | 2 +- tools/testing/selftests/bpf/progs/lsm.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index e33cab34d22f..9d8de15e725e 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -43,7 +43,7 @@ test_bpffs # bpffs test failed 255 test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) test_ima # failed to auto-attach program 'ima': -524 (trampoline) test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline) -test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?) +test_lsm # attach unexpected error: -524 (trampoline) test_overhead # attach_fentry unexpected error: -524 (trampoline) test_profiler # unknown func bpf_probe_read_str#45 (overlapping) timer # failed to auto-attach program 'test1': -524 (trampoline) diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index 33694ef8acfa..d8d8af623bc2 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -4,6 +4,7 @@ * Copyright 2020 Google LLC. */ +#include "bpf_misc.h" #include "vmlinux.h" #include #include @@ -160,7 +161,7 @@ int BPF_PROG(test_task_free, struct task_struct *task) int copy_test = 0; -SEC("fentry.s/__x64_sys_setdomainname") +SEC("fentry.s/" SYS_PREFIX "sys_setdomainname") int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs) { void *ptr = (void *)PT_REGS_PARM1(regs); -- cgit v1.2.3 From 43cb8cbadffa21e88a65dd1129c86f5552d6c42e Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 16 Aug 2022 16:40:11 -0700 Subject: libbpf: Allows disabling auto attach Adds libbpf APIs for disabling auto-attach for individual functions. This is motivated by the use case of cgroup iter [1]. Some iter types require their parameters to be non-zero, therefore applying auto-attach on them will fail. With these two new APIs, users who want to use auto-attach and these types of iters can disable auto-attach on the program and perform manual attach. [1] https://lore.kernel.org/bpf/CAEf4BzZ+a2uDo_t6kGBziqdz--m2gh2_EUwkGLDtMd65uwxUjA@mail.gmail.com/ Signed-off-by: Hao Luo Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220816234012.910255-1-haoluo@google.com --- tools/lib/bpf/libbpf.c | 15 ++++++++++++++- tools/lib/bpf/libbpf.h | 2 ++ tools/lib/bpf/libbpf.map | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index aa05a99b913d..0159a43c7efd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -417,6 +417,7 @@ struct bpf_program { int fd; bool autoload; + bool autoattach; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; @@ -755,6 +756,8 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->autoload = true; } + prog->autoattach = true; + /* inherit object's log_level */ prog->log_level = obj->log_level; @@ -8314,6 +8317,16 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) return 0; } +bool bpf_program__autoattach(const struct bpf_program *prog) +{ + return prog->autoattach; +} + +void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) +{ + prog->autoattach = autoattach; +} + const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) { return prog->insns; @@ -12346,7 +12359,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; - if (!prog->autoload) + if (!prog->autoload || !prog->autoattach) continue; /* auto-attaching not supported for this program */ diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 61493c4cddac..88a1ac34b12a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -260,6 +260,8 @@ LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog); LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); +LIBBPF_API bool bpf_program__autoattach(const struct bpf_program *prog); +LIBBPF_API void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach); struct bpf_insn; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 119e6e1ea7f1..2b928dc21af0 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -358,6 +358,8 @@ LIBBPF_1.0.0 { bpf_obj_get_opts; bpf_prog_query_opts; bpf_program__attach_ksyscall; + bpf_program__autoattach; + bpf_program__set_autoattach; btf__add_enum64; btf__add_enum64_value; libbpf_bpf_attach_type_str; -- cgit v1.2.3 From 738a2f2f9130f98f92ccb3efd94d4879c0a0990c Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 16 Aug 2022 16:40:12 -0700 Subject: selftests/bpf: Tests libbpf autoattach APIs Adds test for libbpf APIs that toggle bpf program auto-attaching. Signed-off-by: Hao Luo Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220816234012.910255-2-haoluo@google.com --- .../testing/selftests/bpf/prog_tests/autoattach.c | 30 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/test_autoattach.c | 23 +++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/autoattach.c create mode 100644 tools/testing/selftests/bpf/progs/test_autoattach.c diff --git a/tools/testing/selftests/bpf/prog_tests/autoattach.c b/tools/testing/selftests/bpf/prog_tests/autoattach.c new file mode 100644 index 000000000000..dc5e01d279bd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/autoattach.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Google */ + +#include +#include "test_autoattach.skel.h" + +void test_autoattach(void) +{ + struct test_autoattach *skel; + + skel = test_autoattach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto cleanup; + + /* disable auto-attach for prog2 */ + bpf_program__set_autoattach(skel->progs.prog2, false); + ASSERT_TRUE(bpf_program__autoattach(skel->progs.prog1), "autoattach_prog1"); + ASSERT_FALSE(bpf_program__autoattach(skel->progs.prog2), "autoattach_prog2"); + if (!ASSERT_OK(test_autoattach__attach(skel), "skel_attach")) + goto cleanup; + + usleep(1); + + ASSERT_TRUE(skel->bss->prog1_called, "attached_prog1"); + ASSERT_FALSE(skel->bss->prog2_called, "attached_prog2"); + +cleanup: + test_autoattach__destroy(skel); +} + diff --git a/tools/testing/selftests/bpf/progs/test_autoattach.c b/tools/testing/selftests/bpf/progs/test_autoattach.c new file mode 100644 index 000000000000..11a44493ebce --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_autoattach.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Google */ + +#include "vmlinux.h" +#include + +bool prog1_called = false; +bool prog2_called = false; + +SEC("raw_tp/sys_enter") +int prog1(const void *ctx) +{ + prog1_called = true; + return 0; +} + +SEC("raw_tp/sys_exit") +int prog2(const void *ctx) +{ + prog2_called = true; + return 0; +} + -- cgit v1.2.3 From d4e6d684f3bea46a2fc195765c77a3b26bcb080e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Aug 2022 17:19:26 -0700 Subject: libbpf: Fix potential NULL dereference when parsing ELF Fix if condition filtering empty ELF sections to prevent NULL dereference. Fixes: 47ea7417b074 ("libbpf: Skip empty sections in bpf_object__init_global_data_maps") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20220816001929.369487-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0159a43c7efd..146d35526b87 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1649,7 +1649,7 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) sec_desc = &obj->efile.secs[sec_idx]; /* Skip recognized sections with size 0. */ - if (sec_desc->data && sec_desc->data->d_size == 0) + if (!sec_desc->data || sec_desc->data->d_size == 0) continue; switch (sec_desc->sec_type) { -- cgit v1.2.3 From 813847a31447feba6119df4ee77a7c0c7a77fc72 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Aug 2022 17:19:27 -0700 Subject: libbpf: Streamline bpf_attr and perf_event_attr initialization Make sure that entire libbpf code base is initializing bpf_attr and perf_event_attr with memset(0). Also for bpf_attr make sure we clear and pass to kernel only relevant parts of bpf_attr. bpf_attr is a huge union of independent sub-command attributes, so there is no need to clear and pass entire union bpf_attr, which over time grows quite a lot and for most commands this growth is completely irrelevant. Few cases where we were relying on compiler initialization of BPF UAPI structs (like bpf_prog_info, bpf_map_info, etc) with `= {};` were switched to memset(0) pattern for future-proofing. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20220816001929.369487-3-andrii@kernel.org --- tools/lib/bpf/bpf.c | 173 +++++++++++++++++++++++++----------------- tools/lib/bpf/libbpf.c | 43 +++++++---- tools/lib/bpf/netlink.c | 3 +- tools/lib/bpf/skel_internal.h | 10 ++- 4 files changed, 138 insertions(+), 91 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 575867d69496..e3a0bd7efa2f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -105,7 +105,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) */ int probe_memcg_account(void) { - const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); + const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), BPF_EXIT_INSN(), @@ -115,13 +115,13 @@ int probe_memcg_account(void) int prog_fd; /* attempt loading freplace trying to use custom BTF */ - memset(&attr, 0, prog_load_attr_sz); + memset(&attr, 0, attr_sz); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); - prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); + prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { close(prog_fd); return 1; @@ -232,6 +232,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, const struct bpf_prog_load_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, fd_array); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -251,7 +252,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, if (attempts == 0) attempts = PROG_LOAD_ATTEMPTS; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.prog_type = prog_type; attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); @@ -314,7 +315,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.log_level = log_level; } - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, attr_sz, attempts); if (fd >= 0) return fd; @@ -354,7 +355,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, break; } - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, attr_sz, attempts); if (fd >= 0) goto done; } @@ -368,7 +369,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.log_size = log_size; attr.log_level = 1; - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, attr_sz, attempts); } done: /* free() doesn't affect errno, so we don't need to restore it */ @@ -380,127 +381,136 @@ done: int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); attr.flags = flags; - ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_elem(int fd, const void *key, void *value) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); - ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); attr.flags = flags; - ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); - ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); attr.flags = flags; - ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_delete_elem(int fd, const void *key) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); - ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.flags = flags; - ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_get_next_key(int fd, const void *key, void *next_key) { + const size_t attr_sz = offsetofend(union bpf_attr, next_key); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.next_key = ptr_to_u64(next_key); - ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_freeze(int fd) { + const size_t attr_sz = offsetofend(union bpf_attr, map_fd); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; - ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz); return libbpf_err_errno(ret); } @@ -509,13 +519,14 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, __u32 *count, const struct bpf_map_batch_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, batch); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_map_batch_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.batch.map_fd = fd; attr.batch.in_batch = ptr_to_u64(in_batch); attr.batch.out_batch = ptr_to_u64(out_batch); @@ -525,7 +536,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); attr.batch.flags = OPTS_GET(opts, flags, 0); - ret = sys_bpf(cmd, &attr, sizeof(attr)); + ret = sys_bpf(cmd, &attr, attr_sz); *count = attr.batch.count; return libbpf_err_errno(ret); @@ -564,14 +575,15 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co int bpf_obj_pin(int fd, const char *pathname) { + const size_t attr_sz = offsetofend(union bpf_attr, file_flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.pathname = ptr_to_u64((void *)pathname); attr.bpf_fd = fd; - ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); + ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz); return libbpf_err_errno(ret); } @@ -582,17 +594,18 @@ int bpf_obj_get(const char *pathname) int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, file_flags); union bpf_attr attr; int fd; if (!OPTS_VALID(opts, bpf_obj_get_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.pathname = ptr_to_u64((void *)pathname); attr.file_flags = OPTS_GET(opts, file_flags, 0); - fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_OBJ_GET, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -610,20 +623,21 @@ int bpf_prog_attach_opts(int prog_fd, int target_fd, enum bpf_attach_type type, const struct bpf_prog_attach_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_prog_attach_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; attr.attach_flags = OPTS_GET(opts, flags, 0); attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); - ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_ATTACH, &attr, attr_sz); return libbpf_err_errno(ret); } @@ -634,28 +648,30 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd, int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { + const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.target_fd = target_fd; attr.attach_type = type; - ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) { + const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; - ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz); return libbpf_err_errno(ret); } @@ -663,6 +679,7 @@ int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, const struct bpf_link_create_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, link_create); __u32 target_btf_id, iter_info_len; union bpf_attr attr; int fd, err; @@ -681,7 +698,7 @@ int bpf_link_create(int prog_fd, int target_fd, return libbpf_err(-EINVAL); } - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_create.prog_fd = prog_fd; attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; @@ -725,7 +742,7 @@ int bpf_link_create(int prog_fd, int target_fd, break; } proceed: - fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, attr_sz); if (fd >= 0) return fd; /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry @@ -761,44 +778,47 @@ proceed: int bpf_link_detach(int link_fd) { + const size_t attr_sz = offsetofend(union bpf_attr, link_detach); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_detach.link_fd = link_fd; - ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_LINK_DETACH, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_link_update(int link_fd, int new_prog_fd, const struct bpf_link_update_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, link_update); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_link_update_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_update.link_fd = link_fd; attr.link_update.new_prog_fd = new_prog_fd; attr.link_update.flags = OPTS_GET(opts, flags, 0); attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); - ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr)); + ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_iter_create(int link_fd) { + const size_t attr_sz = offsetofend(union bpf_attr, iter_create); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.iter_create.link_fd = link_fd; - fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -806,13 +826,14 @@ int bpf_prog_query_opts(int target_fd, enum bpf_attach_type type, struct bpf_prog_query_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, query); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_prog_query_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.query.target_fd = target_fd; attr.query.attach_type = type; @@ -821,7 +842,7 @@ int bpf_prog_query_opts(int target_fd, attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL)); attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL)); - ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_QUERY, &attr, attr_sz); OPTS_SET(opts, attach_flags, attr.query.attach_flags); OPTS_SET(opts, prog_cnt, attr.query.prog_cnt); @@ -850,13 +871,14 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, test); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_test_run_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.test.prog_fd = prog_fd; attr.test.batch_size = OPTS_GET(opts, batch_size, 0); attr.test.cpu = OPTS_GET(opts, cpu, 0); @@ -872,7 +894,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL)); attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL)); - ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, attr_sz); OPTS_SET(opts, data_size_out, attr.test.data_size_out); OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out); @@ -884,13 +906,14 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int err; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.start_id = start_id; - err = sys_bpf(cmd, &attr, sizeof(attr)); + err = sys_bpf(cmd, &attr, attr_sz); if (!err) *next_id = attr.next_id; @@ -919,80 +942,84 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id) int bpf_prog_get_fd_by_id(__u32 id) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.prog_id = id; - fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_map_get_fd_by_id(__u32 id) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_id = id; - fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_btf_get_fd_by_id(__u32 id) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.btf_id = id; - fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_link_get_fd_by_id(__u32 id) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_id = id; - fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) { + const size_t attr_sz = offsetofend(union bpf_attr, info); union bpf_attr attr; int err; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.info.bpf_fd = bpf_fd; attr.info.info_len = *info_len; attr.info.info = ptr_to_u64(info); - err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); - + err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz); if (!err) *info_len = attr.info.info_len; - return libbpf_err_errno(err); } int bpf_raw_tracepoint_open(const char *name, int prog_fd) { + const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; - fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -1048,16 +1075,18 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr) { - union bpf_attr attr = {}; + const size_t attr_sz = offsetofend(union bpf_attr, task_fd_query); + union bpf_attr attr; int err; + memset(&attr, 0, attr_sz); attr.task_fd_query.pid = pid; attr.task_fd_query.fd = fd; attr.task_fd_query.flags = flags; attr.task_fd_query.buf = ptr_to_u64(buf); attr.task_fd_query.buf_len = *buf_len; - err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); + err = sys_bpf(BPF_TASK_FD_QUERY, &attr, attr_sz); *buf_len = attr.task_fd_query.buf_len; *prog_id = attr.task_fd_query.prog_id; @@ -1070,30 +1099,32 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, int bpf_enable_stats(enum bpf_stats_type type) { + const size_t attr_sz = offsetofend(union bpf_attr, enable_stats); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.enable_stats.type = type; - fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_prog_bind_map(int prog_fd, int map_fd, const struct bpf_prog_bind_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, prog_bind_map); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_prog_bind_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.prog_bind_map.prog_fd = prog_fd; attr.prog_bind_map.map_fd = map_fd; attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0); - ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz); return libbpf_err_errno(ret); } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 146d35526b87..21fc3fc7f44c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4287,11 +4287,12 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) int bpf_map__reuse_fd(struct bpf_map *map, int fd) { - struct bpf_map_info info = {}; + struct bpf_map_info info; __u32 len = sizeof(info), name_len; int new_fd, err; char *new_name; + memset(&info, 0, len); err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err && errno == EINVAL) err = bpf_get_map_info_from_fdinfo(fd, &info); @@ -4833,13 +4834,12 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { - struct bpf_map_info map_info = {}; + struct bpf_map_info map_info; char msg[STRERR_BUFSIZE]; - __u32 map_info_len; + __u32 map_info_len = sizeof(map_info); int err; - map_info_len = sizeof(map_info); - + memset(&map_info, 0, map_info_len); err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); if (err && errno == EINVAL) err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); @@ -9007,11 +9007,12 @@ int libbpf_find_vmlinux_btf_id(const char *name, static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) { - struct bpf_prog_info info = {}; + struct bpf_prog_info info; __u32 info_len = sizeof(info); struct btf *btf; int err; + memset(&info, 0, info_len); err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n", @@ -9839,13 +9840,16 @@ static int determine_uprobe_retprobe_bit(void) static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, uint64_t offset, int pid, size_t ref_ctr_off) { - struct perf_event_attr attr = {}; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; char errmsg[STRERR_BUFSIZE]; int type, pfd; if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) return -EINVAL; + memset(&attr, 0, attr_sz); + type = uprobe ? determine_uprobe_perf_type() : determine_kprobe_perf_type(); if (type < 0) { @@ -9866,7 +9870,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, } attr.config |= 1 << bit; } - attr.size = sizeof(attr); + attr.size = attr_sz; attr.type = type; attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ @@ -9965,7 +9969,8 @@ static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retpro static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, const char *kfunc_name, size_t offset, int pid) { - struct perf_event_attr attr = {}; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; char errmsg[STRERR_BUFSIZE]; int type, pfd, err; @@ -9984,7 +9989,9 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); goto err_clean_legacy; } - attr.size = sizeof(attr); + + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = type; attr.type = PERF_TYPE_TRACEPOINT; @@ -10441,6 +10448,7 @@ static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retpro static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset, int pid) { + const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; int type, pfd, err; @@ -10458,8 +10466,8 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, goto err_clean_legacy; } - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(attr); + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = type; attr.type = PERF_TYPE_TRACEPOINT; @@ -10998,7 +11006,8 @@ static int determine_tracepoint_id(const char *tp_category, static int perf_event_open_tracepoint(const char *tp_category, const char *tp_name) { - struct perf_event_attr attr = {}; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; char errmsg[STRERR_BUFSIZE]; int tp_id, pfd, err; @@ -11010,8 +11019,9 @@ static int perf_event_open_tracepoint(const char *tp_category, return tp_id; } + memset(&attr, 0, attr_sz); attr.type = PERF_TYPE_TRACEPOINT; - attr.size = sizeof(attr); + attr.size = attr_sz; attr.config = tp_id; pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, @@ -11631,12 +11641,15 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, void *ctx, const struct perf_buffer_opts *opts) { + const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_buffer_params p = {}; - struct perf_event_attr attr = {}; + struct perf_event_attr attr; if (!OPTS_VALID(opts, perf_buffer_opts)) return libbpf_err_ptr(-EINVAL); + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 6c013168032d..35104580870c 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -587,11 +587,12 @@ static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd) { - struct bpf_prog_info info = {}; + struct bpf_prog_info info; __u32 info_len = sizeof(info); char name[256]; int len, ret; + memset(&info, 0, info_len); ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); if (ret < 0) return ret; diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index bd6f4505e7b1..365d769e0357 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -285,6 +285,8 @@ static inline int skel_link_create(int prog_fd, int target_fd, static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) { + const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array); + const size_t test_run_attr_sz = offsetofend(union bpf_attr, test); int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; @@ -302,7 +304,7 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) goto out; } - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, prog_load_attr_sz); attr.prog_type = BPF_PROG_TYPE_SYSCALL; attr.insns = (long) opts->insns; attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); @@ -313,18 +315,18 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) attr.log_size = opts->ctx->log_size; attr.log_buf = opts->ctx->log_buf; attr.prog_flags = BPF_F_SLEEPABLE; - err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, prog_load_attr_sz); if (prog_fd < 0) { opts->errstr = "failed to load loader prog"; set_err; goto out; } - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, test_run_attr_sz); attr.test.prog_fd = prog_fd; attr.test.ctx_in = (long) opts->ctx; attr.test.ctx_size_in = opts->ctx->sz; - err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr)); + err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz); if (err < 0 || (int)attr.test.retval < 0) { opts->errstr = "failed to execute loader prog"; if (err < 0) { -- cgit v1.2.3 From abf84b64e36b175c9c4dd4ecbad2af4329c00041 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Aug 2022 17:19:28 -0700 Subject: libbpf: Clean up deprecated and legacy aliases Remove three missed deprecated APIs that were aliased to new APIs: bpf_object__unload, bpf_prog_attach_xattr and btf__load. Also move legacy API libbpf_find_kernel_btf (aliased to btf__load_vmlinux_btf) into libbpf_legacy.h. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20220816001929.369487-4-andrii@kernel.org --- tools/lib/bpf/bpf.c | 5 ----- tools/lib/bpf/btf.c | 2 -- tools/lib/bpf/btf.h | 1 - tools/lib/bpf/libbpf.c | 2 -- tools/lib/bpf/libbpf_legacy.h | 2 ++ 5 files changed, 2 insertions(+), 10 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index e3a0bd7efa2f..1d49a0352836 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -641,11 +641,6 @@ int bpf_prog_attach_opts(int prog_fd, int target_fd, return libbpf_err_errno(ret); } -__attribute__((alias("bpf_prog_attach_opts"))) -int bpf_prog_attach_xattr(int prog_fd, int target_fd, - enum bpf_attach_type type, - const struct bpf_prog_attach_opts *opts); - int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd); diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2d14f1a52d7a..361131518d63 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1225,8 +1225,6 @@ int btf__load_into_kernel(struct btf *btf) return btf_load_into_kernel(btf, NULL, 0, 0); } -int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); - int btf__fd(const struct btf *btf) { return btf->fd; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 583760df83b4..ae543144ee30 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -116,7 +116,6 @@ LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_b LIBBPF_API struct btf *btf__load_vmlinux_btf(void); LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf); -LIBBPF_API struct btf *libbpf_find_kernel_btf(void); LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 21fc3fc7f44c..3ad139285fad 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7263,8 +7263,6 @@ static int bpf_object_unload(struct bpf_object *obj) return 0; } -int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); - static int bpf_object__sanitize_maps(struct bpf_object *obj) { struct bpf_map *m; diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 5b7e0155db6a..1e1be467bede 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -125,6 +125,8 @@ struct bpf_map; struct btf; struct btf_ext; +LIBBPF_API struct btf *libbpf_find_kernel_btf(void); + LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); -- cgit v1.2.3 From df78da27260c915039b348b164bbc53fa372ba70 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Aug 2022 17:19:29 -0700 Subject: selftests/bpf: Few fixes for selftests/bpf built in release mode Fix few issues found when building and running test_progs in release mode. First, potentially uninitialized idx variable in xskxceiver, force-initialize to zero to satisfy compiler. Few instances of defining uprobe trigger functions break in release mode unless marked as noinline, due to being static. Add noinline to make sure everything works. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20220816001929.369487-5-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/attach_probe.c | 6 +++--- tools/testing/selftests/bpf/prog_tests/bpf_cookie.c | 2 +- tools/testing/selftests/bpf/prog_tests/task_pt_regs.c | 2 +- tools/testing/selftests/bpf/xskxceiver.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 0b899d2d8ea7..9566d9d2f6ee 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -6,19 +6,19 @@ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); /* uprobe attach point */ -static void trigger_func(void) +static noinline void trigger_func(void) { asm volatile (""); } /* attach point for byname uprobe */ -static void trigger_func2(void) +static noinline void trigger_func2(void) { asm volatile (""); } /* attach point for byname sleepable uprobe */ -static void trigger_func3(void) +static noinline void trigger_func3(void) { asm volatile (""); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 2974b44f80fa..2be2d61954bc 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -13,7 +13,7 @@ #include "kprobe_multi.skel.h" /* uprobe attach point */ -static void trigger_func(void) +static noinline void trigger_func(void) { asm volatile (""); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c index 61935e7e056a..f000734a3d1f 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c +++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c @@ -4,7 +4,7 @@ #include "test_task_pt_regs.skel.h" /* uprobe attach point */ -static void trigger_func(void) +static noinline void trigger_func(void) { asm volatile (""); } diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 20b44ab32a06..14b4737b223c 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -922,7 +922,7 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd { struct xsk_socket_info *xsk = ifobject->xsk; bool use_poll = ifobject->use_poll; - u32 i, idx, ret, valid_pkts = 0; + u32 i, idx = 0, ret, valid_pkts = 0; while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { if (use_poll) { -- cgit v1.2.3