From 7bd3a33ae6d2b820bc44a206f9b81b96840219fd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 17 Jun 2020 11:31:32 -0700 Subject: libbpf: Bump version to 0.1.0 Bump libbpf version to 0.1.0, as new development cycle starts. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200617183132.1970836-1-andriin@fb.com --- tools/lib/bpf/libbpf.map | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f732c77b7ed0..c914347f5065 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -270,3 +270,6 @@ LIBBPF_0.0.9 { ring_buffer__new; ring_buffer__poll; } LIBBPF_0.0.8; + +LIBBPF_0.1.0 { +} LIBBPF_0.0.9; -- cgit v1.2.3 From d56b74b9e1b8d747171dc6ff60315c00c41562ce Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 18 Jun 2020 16:46:32 -0700 Subject: tools/bpf: Add verifier tests for 32bit pointer/scalar arithmetic Added two test_verifier subtests for 32bit pointer/scalar arithmetic with BPF_SUB operator. They are passing verifier now. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200618234632.3321367-1-yhs@fb.com --- .../selftests/bpf/verifier/value_ptr_arith.c | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 97ee658e1242..ed4e76b24649 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -836,3 +836,41 @@ .errstr = "R0 invalid mem access 'inv'", .errstr_unpriv = "R0 pointer -= pointer prohibited", }, +{ + "32bit pkt_ptr -= scalar", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2), + BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7), + BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "32bit scalar -= pkt_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2), + BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6), + BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, -- cgit v1.2.3 From bb8dc2695a7db4f35c1de94d212f86229bb4a5d2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 15:20:24 -0700 Subject: tools/bpftool: Relicense bpftool's BPF profiler prog as dual-license GPL/BSD Relicense it to be compatible with the rest of bpftool files. Suggested-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200619222024.519774-1-andriin@fb.com --- tools/bpf/bpftool/skeleton/profiler.bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 20034c12f7c5..c9d196ddb670 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2020 Facebook #include "profiler.h" #include @@ -116,4 +116,4 @@ int BPF_PROG(fexit_XXX) return 0; } -char LICENSE[] SEC("license") = "GPL"; +char LICENSE[] SEC("license") = "Dual BSD/GPL"; -- cgit v1.2.3 From 41c48f3a98231738c5ce79f6f2aa6e40ba924d18 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 19 Jun 2020 14:11:43 -0700 Subject: bpf: Support access to bpf map fields There are multiple use-cases when it's convenient to have access to bpf map fields, both `struct bpf_map` and map type specific struct-s such as `struct bpf_array`, `struct bpf_htab`, etc. For example while working with sock arrays it can be necessary to calculate the key based on map->max_entries (some_hash % max_entries). Currently this is solved by communicating max_entries via "out-of-band" channel, e.g. via additional map with known key to get info about target map. That works, but is not very convenient and error-prone while working with many maps. In other cases necessary data is dynamic (i.e. unknown at loading time) and it's impossible to get it at all. For example while working with a hash table it can be convenient to know how much capacity is already used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case). At the same time kernel knows this info and can provide it to bpf program. Fill this gap by adding support to access bpf map fields from bpf program for both `struct bpf_map` and map type specific fields. Support is implemented via btf_struct_access() so that a user can define their own `struct bpf_map` or map type specific struct in their program with only necessary fields and preserve_access_index attribute, cast a map to this struct and use a field. For example: struct bpf_map { __u32 max_entries; } __attribute__((preserve_access_index)); struct bpf_array { struct bpf_map map; __u32 elem_size; } __attribute__((preserve_access_index)); struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); __type(key, __u32); __type(value, __u32); } m_array SEC(".maps"); SEC("cgroup_skb/egress") int cg_skb(void *ctx) { struct bpf_array *array = (struct bpf_array *)&m_array; struct bpf_map *map = (struct bpf_map *)&m_array; /* .. use map->max_entries or array->map.max_entries .. */ } Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of corresponding struct. Only reading from map fields is supported. For btf_struct_access() to work there should be a way to know btf id of a struct that corresponds to a map type. To get btf id there should be a way to get a stringified name of map-specific struct, such as "bpf_array", "bpf_htab", etc for a map type. Two new fields are added to `struct bpf_map_ops` to handle it: * .map_btf_name keeps a btf name of a struct returned by map_alloc(); * .map_btf_id is used to cache btf id of that struct. To make btf ids calculation cheaper they're calculated once while preparing btf_vmlinux and cached same way as it's done for btf_id field of `struct bpf_func_proto` While calculating btf ids, struct names are NOT checked for collision. Collisions will be checked as a part of the work to prepare btf ids used in verifier in compile time that should land soon. The only known collision for `struct bpf_htab` (kernel/bpf/hashtab.c vs net/core/sock_map.c) was fixed earlier. Both new fields .map_btf_name and .map_btf_id must be set for a map type for the feature to work. If neither is set for a map type, verifier will return ENOTSUPP on a try to access map_ptr of corresponding type. If just one of them set, it's verifier misconfiguration. Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be supported separately. The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by perfmon_capable() so that unpriv programs won't have access to bpf map fields. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/6479686a0cd1e9067993df57b4c3eef0e276fec9.1592600985.git.rdna@fb.com --- tools/testing/selftests/bpf/verifier/map_ptr_mixing.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c index cd26ee6b7b1d..1f2b8c4cb26d 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c @@ -56,7 +56,7 @@ .fixup_map_in_map = { 16 }, .fixup_map_array_48b = { 13 }, .result = REJECT, - .errstr = "R0 invalid mem access 'map_ptr'", + .errstr = "only read from bpf_array is supported", }, { "cond: two branches returning different map pointers for lookup (tail, tail)", -- cgit v1.2.3 From b1b53d413f16c6b5078edb127e660e67332e4d2f Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 19 Jun 2020 14:11:45 -0700 Subject: selftests/bpf: Test access to bpf map pointer Add selftests to test access to map pointers from bpf program for all map types except struct_ops (that one would need additional work). verifier test focuses mostly on scenarios that must be rejected. prog_tests test focuses on accessing multiple fields both scalar and a nested struct from bpf program and verifies that those fields have expected values. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/139a6a17f8016491e39347849b951525335c6eb4.1592600985.git.rdna@fb.com --- tools/testing/selftests/bpf/prog_tests/map_ptr.c | 32 ++ tools/testing/selftests/bpf/progs/map_ptr_kern.c | 686 +++++++++++++++++++++++ tools/testing/selftests/bpf/verifier/map_ptr.c | 62 ++ 3 files changed, 780 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/map_ptr.c create mode 100644 tools/testing/selftests/bpf/progs/map_ptr_kern.c create mode 100644 tools/testing/selftests/bpf/verifier/map_ptr.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c new file mode 100644 index 000000000000..c230a573c373 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include +#include + +#include "map_ptr_kern.skel.h" + +void test_map_ptr(void) +{ + struct map_ptr_kern *skel; + __u32 duration = 0, retval; + char buf[128]; + int err; + + skel = map_ptr_kern__open_and_load(); + if (CHECK(!skel, "skel_open_load", "open_load failed\n")) + return; + + err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4, + sizeof(pkt_v4), buf, NULL, &retval, NULL); + + if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno)) + goto cleanup; + + if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval, + skel->bss->g_map_type, skel->bss->g_line)) + goto cleanup; + +cleanup: + map_ptr_kern__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c new file mode 100644 index 000000000000..473665cac67e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -0,0 +1,686 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include +#include + +#define LOOP_BOUND 0xf +#define MAX_ENTRIES 8 +#define HALF_ENTRIES (MAX_ENTRIES >> 1) + +_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND"); + +enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC; +__u32 g_line = 0; + +#define VERIFY_TYPE(type, func) ({ \ + g_map_type = type; \ + if (!func()) \ + return 0; \ +}) + + +#define VERIFY(expr) ({ \ + g_line = __LINE__; \ + if (!(expr)) \ + return 0; \ +}) + +struct bpf_map_memory { + __u32 pages; +} __attribute__((preserve_access_index)); + +struct bpf_map { + enum bpf_map_type map_type; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 id; + struct bpf_map_memory memory; +} __attribute__((preserve_access_index)); + +static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size, + __u32 value_size, __u32 max_entries) +{ + VERIFY(map->map_type == g_map_type); + VERIFY(map->key_size == key_size); + VERIFY(map->value_size == value_size); + VERIFY(map->max_entries == max_entries); + VERIFY(map->id > 0); + VERIFY(map->memory.pages > 0); + + return 1; +} + +static inline int check_bpf_map_ptr(struct bpf_map *indirect, + struct bpf_map *direct) +{ + VERIFY(indirect->map_type == direct->map_type); + VERIFY(indirect->key_size == direct->key_size); + VERIFY(indirect->value_size == direct->value_size); + VERIFY(indirect->max_entries == direct->max_entries); + VERIFY(indirect->id == direct->id); + VERIFY(indirect->memory.pages == direct->memory.pages); + + return 1; +} + +static inline int check(struct bpf_map *indirect, struct bpf_map *direct, + __u32 key_size, __u32 value_size, __u32 max_entries) +{ + VERIFY(check_bpf_map_ptr(indirect, direct)); + VERIFY(check_bpf_map_fields(indirect, key_size, value_size, + max_entries)); + return 1; +} + +static inline int check_default(struct bpf_map *indirect, + struct bpf_map *direct) +{ + VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32), + MAX_ENTRIES)); + return 1; +} + +typedef struct { + int counter; +} atomic_t; + +struct bpf_htab { + struct bpf_map map; + atomic_t count; + __u32 n_buckets; + __u32 elem_size; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */ + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_hash SEC(".maps"); + +static inline int check_hash(void) +{ + struct bpf_htab *hash = (struct bpf_htab *)&m_hash; + struct bpf_map *map = (struct bpf_map *)&m_hash; + int i; + + VERIFY(check_default(&hash->map, map)); + + VERIFY(hash->n_buckets == MAX_ENTRIES); + VERIFY(hash->elem_size == 64); + + VERIFY(hash->count.counter == 0); + for (i = 0; i < HALF_ENTRIES; ++i) { + const __u32 key = i; + const __u32 val = 1; + + if (bpf_map_update_elem(hash, &key, &val, 0)) + return 0; + } + VERIFY(hash->count.counter == HALF_ENTRIES); + + return 1; +} + +struct bpf_array { + struct bpf_map map; + __u32 elem_size; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_array SEC(".maps"); + +static inline int check_array(void) +{ + struct bpf_array *array = (struct bpf_array *)&m_array; + struct bpf_map *map = (struct bpf_map *)&m_array; + int i, n_lookups = 0, n_keys = 0; + + VERIFY(check_default(&array->map, map)); + + VERIFY(array->elem_size == 8); + + for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) { + const __u32 key = i; + __u32 *val = bpf_map_lookup_elem(array, &key); + + ++n_lookups; + if (val) + ++n_keys; + } + + VERIFY(n_lookups == MAX_ENTRIES); + VERIFY(n_keys == MAX_ENTRIES); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_prog_array SEC(".maps"); + +static inline int check_prog_array(void) +{ + struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array; + struct bpf_map *map = (struct bpf_map *)&m_prog_array; + + VERIFY(check_default(&prog_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_perf_event_array SEC(".maps"); + +static inline int check_perf_event_array(void) +{ + struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array; + struct bpf_map *map = (struct bpf_map *)&m_perf_event_array; + + VERIFY(check_default(&perf_event_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_percpu_hash SEC(".maps"); + +static inline int check_percpu_hash(void) +{ + struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash; + struct bpf_map *map = (struct bpf_map *)&m_percpu_hash; + + VERIFY(check_default(&percpu_hash->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_percpu_array SEC(".maps"); + +static inline int check_percpu_array(void) +{ + struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array; + struct bpf_map *map = (struct bpf_map *)&m_percpu_array; + + VERIFY(check_default(&percpu_array->map, map)); + + return 1; +} + +struct bpf_stack_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u64); +} m_stack_trace SEC(".maps"); + +static inline int check_stack_trace(void) +{ + struct bpf_stack_map *stack_trace = + (struct bpf_stack_map *)&m_stack_trace; + struct bpf_map *map = (struct bpf_map *)&m_stack_trace; + + VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64), + MAX_ENTRIES)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_cgroup_array SEC(".maps"); + +static inline int check_cgroup_array(void) +{ + struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array; + struct bpf_map *map = (struct bpf_map *)&m_cgroup_array; + + VERIFY(check_default(&cgroup_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_lru_hash SEC(".maps"); + +static inline int check_lru_hash(void) +{ + struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash; + struct bpf_map *map = (struct bpf_map *)&m_lru_hash; + + VERIFY(check_default(&lru_hash->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_lru_percpu_hash SEC(".maps"); + +static inline int check_lru_percpu_hash(void) +{ + struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash; + struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash; + + VERIFY(check_default(&lru_percpu_hash->map, map)); + + return 1; +} + +struct lpm_trie { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct lpm_key { + struct bpf_lpm_trie_key trie_key; + __u32 data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, MAX_ENTRIES); + __type(key, struct lpm_key); + __type(value, __u32); +} m_lpm_trie SEC(".maps"); + +static inline int check_lpm_trie(void) +{ + struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie; + struct bpf_map *map = (struct bpf_map *)&m_lpm_trie; + + VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32), + MAX_ENTRIES)); + + return 1; +} + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); + }); +} m_array_of_maps SEC(".maps") = { + .values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 }, +}; + +static inline int check_array_of_maps(void) +{ + struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_array_of_maps; + + VERIFY(check_default(&array_of_maps->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + __array(values, struct inner_map); +} m_hash_of_maps SEC(".maps") = { + .values = { + [2] = &inner_map, + }, +}; + +static inline int check_hash_of_maps(void) +{ + struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps; + + VERIFY(check_default(&hash_of_maps->map, map)); + + return 1; +} + +struct bpf_dtab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_devmap SEC(".maps"); + +static inline int check_devmap(void) +{ + struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap; + struct bpf_map *map = (struct bpf_map *)&m_devmap; + + VERIFY(check_default(&devmap->map, map)); + + return 1; +} + +struct bpf_stab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_sockmap SEC(".maps"); + +static inline int check_sockmap(void) +{ + struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap; + struct bpf_map *map = (struct bpf_map *)&m_sockmap; + + VERIFY(check_default(&sockmap->map, map)); + + return 1; +} + +struct bpf_cpu_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_cpumap SEC(".maps"); + +static inline int check_cpumap(void) +{ + struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap; + struct bpf_map *map = (struct bpf_map *)&m_cpumap; + + VERIFY(check_default(&cpumap->map, map)); + + return 1; +} + +struct xsk_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_xskmap SEC(".maps"); + +static inline int check_xskmap(void) +{ + struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap; + struct bpf_map *map = (struct bpf_map *)&m_xskmap; + + VERIFY(check_default(&xskmap->map, map)); + + return 1; +} + +struct bpf_shtab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_sockhash SEC(".maps"); + +static inline int check_sockhash(void) +{ + struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash; + struct bpf_map *map = (struct bpf_map *)&m_sockhash; + + VERIFY(check_default(&sockhash->map, map)); + + return 1; +} + +struct bpf_cgroup_storage_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u32); +} m_cgroup_storage SEC(".maps"); + +static inline int check_cgroup_storage(void) +{ + struct bpf_cgroup_storage_map *cgroup_storage = + (struct bpf_cgroup_storage_map *)&m_cgroup_storage; + struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage; + + VERIFY(check(&cgroup_storage->map, map, + sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0)); + + return 1; +} + +struct reuseport_array { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_reuseport_sockarray SEC(".maps"); + +static inline int check_reuseport_sockarray(void) +{ + struct reuseport_array *reuseport_sockarray = + (struct reuseport_array *)&m_reuseport_sockarray; + struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray; + + VERIFY(check_default(&reuseport_sockarray->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u32); +} m_percpu_cgroup_storage SEC(".maps"); + +static inline int check_percpu_cgroup_storage(void) +{ + struct bpf_cgroup_storage_map *percpu_cgroup_storage = + (struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage; + struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage; + + VERIFY(check(&percpu_cgroup_storage->map, map, + sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0)); + + return 1; +} + +struct bpf_queue_stack { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_QUEUE); + __uint(max_entries, MAX_ENTRIES); + __type(value, __u32); +} m_queue SEC(".maps"); + +static inline int check_queue(void) +{ + struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue; + struct bpf_map *map = (struct bpf_map *)&m_queue; + + VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_STACK); + __uint(max_entries, MAX_ENTRIES); + __type(value, __u32); +} m_stack SEC(".maps"); + +static inline int check_stack(void) +{ + struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack; + struct bpf_map *map = (struct bpf_map *)&m_stack; + + VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES)); + + return 1; +} + +struct bpf_sk_storage_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, __u32); + __type(value, __u32); +} m_sk_storage SEC(".maps"); + +static inline int check_sk_storage(void) +{ + struct bpf_sk_storage_map *sk_storage = + (struct bpf_sk_storage_map *)&m_sk_storage; + struct bpf_map *map = (struct bpf_map *)&m_sk_storage; + + VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_devmap_hash SEC(".maps"); + +static inline int check_devmap_hash(void) +{ + struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash; + struct bpf_map *map = (struct bpf_map *)&m_devmap_hash; + + VERIFY(check_default(&devmap_hash->map, map)); + + return 1; +} + +struct bpf_ringbuf_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} m_ringbuf SEC(".maps"); + +static inline int check_ringbuf(void) +{ + struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf; + struct bpf_map *map = (struct bpf_map *)&m_ringbuf; + + VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12)); + + return 1; +} + +SEC("cgroup_skb/egress") +int cg_skb(void *ctx) +{ + VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash); + VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array); + VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array); + VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array); + VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace); + VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array); + VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash); + VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash); + VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie); + VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps); + VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps); + VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap); + VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap); + VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap); + VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap); + VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash); + VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage); + VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + check_reuseport_sockarray); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + check_percpu_cgroup_storage); + VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue); + VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack); + VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage); + VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash); + VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf); + + return 1; +} + +__u32 _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c new file mode 100644 index 000000000000..b52209db8250 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -0,0 +1,62 @@ +{ + "bpf_map_ptr: read with negative offset rejected", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "R1 is bpf_array invalid negative access: off=-8", +}, +{ + "bpf_map_ptr: write rejected", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "only read from bpf_array is supported", +}, +{ + "bpf_map_ptr: read non-existent field rejected", + .insns = { + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4", +}, +{ + "bpf_map_ptr: read ops field accepted", + .insns = { + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = ACCEPT, + .retval = 1, +}, -- cgit v1.2.3 From 1bdb6c9a1c43fdf9b83b2331dfc6229bd2e71d9b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 20 Jun 2020 23:21:12 -0700 Subject: libbpf: Add a bunch of attribute getters/setters for map definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a bunch of getter for various aspects of BPF map. Some of these attribute (e.g., key_size, value_size, type, etc) are available right now in struct bpf_map_def, but this patch adds getter allowing to fetch them individually. bpf_map_def approach isn't very scalable, when ABI stability requirements are taken into account. It's much easier to extend libbpf and add support for new features, when each aspect of BPF map has separate getter/setter. Getters follow the common naming convention of not explicitly having "get" in its name: bpf_map__type() returns map type, bpf_map__key_size() returns key_size. Setters, though, explicitly have set in their name: bpf_map__set_type(), bpf_map__set_key_size(). This patch ensures we now have a getter and a setter for the following map attributes: - type; - max_entries; - map_flags; - numa_node; - key_size; - value_size; - ifindex. bpf_map__resize() enforces unnecessary restriction of max_entries > 0. It is unnecessary, because libbpf actually supports zero max_entries for some cases (e.g., for PERF_EVENT_ARRAY map) and treats it specially during map creation time. To allow setting max_entries=0, new bpf_map__set_max_entries() setter is added. bpf_map__resize()'s behavior is preserved for backwards compatibility reasons. Map ifindex getter is added as well. There is a setter already, but no corresponding getter. Fix this assymetry as well. bpf_map__set_ifindex() itself is converted from void function into error-returning one, similar to other setters. The only error returned right now is -EBUSY, if BPF map is already loaded and has corresponding FD. One lacking attribute with no ability to get/set or even specify it declaratively is numa_node. This patch fixes this gap and both adds programmatic getter/setter, as well as adds support for numa_node field in BTF-defined map. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200621062112.3006313-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 100 +++++++++++++++++++++++++++++++++++++++++++---- tools/lib/bpf/libbpf.h | 30 ++++++++++++-- tools/lib/bpf/libbpf.map | 14 +++++++ 3 files changed, 134 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 477c679ed945..259a6360475f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -310,6 +310,7 @@ struct bpf_map { int map_ifindex; int inner_map_fd; struct bpf_map_def def; + __u32 numa_node; __u32 btf_var_idx; __u32 btf_key_type_id; __u32 btf_value_type_id; @@ -1957,6 +1958,10 @@ static int parse_btf_map_def(struct bpf_object *obj, return -EINVAL; pr_debug("map '%s': found map_flags = %u.\n", map->name, map->def.map_flags); + } else if (strcmp(name, "numa_node") == 0) { + if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node)) + return -EINVAL; + pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node); } else if (strcmp(name, "key_size") == 0) { __u32 sz; @@ -3222,20 +3227,27 @@ err_free_new_name: return err; } -int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +__u32 bpf_map__max_entries(const struct bpf_map *map) { - if (!map || !max_entries) - return -EINVAL; + return map->def.max_entries; +} - /* If map already created, its attributes can't be changed. */ +int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) +{ if (map->fd >= 0) return -EBUSY; - map->def.max_entries = max_entries; - return 0; } +int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +{ + if (!map || !max_entries) + return -EINVAL; + + return bpf_map__set_max_entries(map, max_entries); +} + static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -3603,6 +3615,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) create_attr.map_flags = def->map_flags; create_attr.key_size = def->key_size; create_attr.value_size = def->value_size; + create_attr.numa_node = map->numa_node; if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { int nr_cpus; @@ -7088,6 +7101,71 @@ const char *bpf_map__name(const struct bpf_map *map) return map ? map->name : NULL; } +enum bpf_map_type bpf_map__type(const struct bpf_map *map) +{ + return map->def.type; +} + +int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.type = type; + return 0; +} + +__u32 bpf_map__map_flags(const struct bpf_map *map) +{ + return map->def.map_flags; +} + +int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.map_flags = flags; + return 0; +} + +__u32 bpf_map__numa_node(const struct bpf_map *map) +{ + return map->numa_node; +} + +int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) +{ + if (map->fd >= 0) + return -EBUSY; + map->numa_node = numa_node; + return 0; +} + +__u32 bpf_map__key_size(const struct bpf_map *map) +{ + return map->def.key_size; +} + +int bpf_map__set_key_size(struct bpf_map *map, __u32 size) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.key_size = size; + return 0; +} + +__u32 bpf_map__value_size(const struct bpf_map *map) +{ + return map->def.value_size; +} + +int bpf_map__set_value_size(struct bpf_map *map, __u32 size) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.value_size = size; + return 0; +} + __u32 bpf_map__btf_key_type_id(const struct bpf_map *map) { return map ? map->btf_key_type_id : 0; @@ -7140,9 +7218,17 @@ bool bpf_map__is_internal(const struct bpf_map *map) return map->libbpf_type != LIBBPF_MAP_UNSPEC; } -void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) +__u32 bpf_map__ifindex(const struct bpf_map *map) +{ + return map->map_ifindex; +} + +int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) { + if (map->fd >= 0) + return -EBUSY; map->map_ifindex = ifindex; + return 0; } int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 334437af3014..fdd279fb1866 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -418,11 +418,38 @@ bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); +/* get/set map FD */ LIBBPF_API int bpf_map__fd(const struct bpf_map *map); +LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +/* get map definition */ LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); +/* get map name */ LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); +/* get/set map type */ +LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type); +/* get/set map size (max_entries) */ +LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries); +LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); +/* get/set map flags */ +LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags); +/* get/set map NUMA node */ +LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node); +/* get/set map key size */ +LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); +/* get/set map value size */ +LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size); +/* get map key/value BTF type IDs */ LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); +/* get/set map if_index */ +LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, @@ -430,11 +457,8 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); -LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); -LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); -LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index c914347f5065..9914e0db4859 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -272,4 +272,18 @@ LIBBPF_0.0.9 { } LIBBPF_0.0.8; LIBBPF_0.1.0 { + global: + bpf_map__ifindex; + bpf_map__key_size; + bpf_map__map_flags; + bpf_map__max_entries; + bpf_map__numa_node; + bpf_map__set_key_size; + bpf_map__set_map_flags; + bpf_map__set_max_entries; + bpf_map__set_numa_node; + bpf_map__set_type; + bpf_map__set_value_size; + bpf_map__type; + bpf_map__value_size; } LIBBPF_0.0.9; -- cgit v1.2.3 From 2e33efe32e019328916ce653dc1265d637261993 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:16:55 -0700 Subject: libbpf: Generalize libbpf externs support Switch existing Kconfig externs to be just one of few possible kinds of more generic externs. This refactoring is in preparation for ksymbol extern support, added in the follow up patch. There are no functional changes intended. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Hao Luo Link: https://lore.kernel.org/bpf/20200619231703.738941-2-andriin@fb.com --- tools/lib/bpf/libbpf.c | 346 +++++++++++++++++++++++++++++-------------------- 1 file changed, 206 insertions(+), 140 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 259a6360475f..ffccb5af32a5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -330,24 +330,35 @@ struct bpf_map { enum extern_type { EXT_UNKNOWN, - EXT_CHAR, - EXT_BOOL, - EXT_INT, - EXT_TRISTATE, - EXT_CHAR_ARR, + EXT_KCFG, +}; + +enum kcfg_type { + KCFG_UNKNOWN, + KCFG_CHAR, + KCFG_BOOL, + KCFG_INT, + KCFG_TRISTATE, + KCFG_CHAR_ARR, }; struct extern_desc { - const char *name; + enum extern_type type; int sym_idx; int btf_id; - enum extern_type type; - int sz; - int align; - int data_off; - bool is_signed; - bool is_weak; + int sec_btf_id; + const char *name; bool is_set; + bool is_weak; + union { + struct { + enum kcfg_type type; + int sz; + int align; + int data_off; + bool is_signed; + } kcfg; + }; }; static LIST_HEAD(bpf_objects_list); @@ -1424,19 +1435,19 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, return NULL; } -static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, - char value) +static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, + char value) { - switch (ext->type) { - case EXT_BOOL: + switch (ext->kcfg.type) { + case KCFG_BOOL: if (value == 'm') { - pr_warn("extern %s=%c should be tristate or char\n", + pr_warn("extern (kcfg) %s=%c should be tristate or char\n", ext->name, value); return -EINVAL; } *(bool *)ext_val = value == 'y' ? true : false; break; - case EXT_TRISTATE: + case KCFG_TRISTATE: if (value == 'y') *(enum libbpf_tristate *)ext_val = TRI_YES; else if (value == 'm') @@ -1444,14 +1455,14 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, else /* value == 'n' */ *(enum libbpf_tristate *)ext_val = TRI_NO; break; - case EXT_CHAR: + case KCFG_CHAR: *(char *)ext_val = value; break; - case EXT_UNKNOWN: - case EXT_INT: - case EXT_CHAR_ARR: + case KCFG_UNKNOWN: + case KCFG_INT: + case KCFG_CHAR_ARR: default: - pr_warn("extern %s=%c should be bool, tristate, or char\n", + pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n", ext->name, value); return -EINVAL; } @@ -1459,29 +1470,29 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, return 0; } -static int set_ext_value_str(struct extern_desc *ext, char *ext_val, - const char *value) +static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, + const char *value) { size_t len; - if (ext->type != EXT_CHAR_ARR) { - pr_warn("extern %s=%s should char array\n", ext->name, value); + if (ext->kcfg.type != KCFG_CHAR_ARR) { + pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value); return -EINVAL; } len = strlen(value); if (value[len - 1] != '"') { - pr_warn("extern '%s': invalid string config '%s'\n", + pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", ext->name, value); return -EINVAL; } /* strip quotes */ len -= 2; - if (len >= ext->sz) { - pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", - ext->name, value, len, ext->sz - 1); - len = ext->sz - 1; + if (len >= ext->kcfg.sz) { + pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + ext->name, value, len, ext->kcfg.sz - 1); + len = ext->kcfg.sz - 1; } memcpy(ext_val, value + 1, len); ext_val[len] = '\0'; @@ -1508,11 +1519,11 @@ static int parse_u64(const char *value, __u64 *res) return 0; } -static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) +static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) { - int bit_sz = ext->sz * 8; + int bit_sz = ext->kcfg.sz * 8; - if (ext->sz == 8) + if (ext->kcfg.sz == 8) return true; /* Validate that value stored in u64 fits in integer of `ext->sz` @@ -1527,26 +1538,26 @@ static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) * For unsigned target integer, check that all the (64 - Y) bits are * zero. */ - if (ext->is_signed) + if (ext->kcfg.is_signed) return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); else return (v >> bit_sz) == 0; } -static int set_ext_value_num(struct extern_desc *ext, void *ext_val, - __u64 value) +static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, + __u64 value) { - if (ext->type != EXT_INT && ext->type != EXT_CHAR) { - pr_warn("extern %s=%llu should be integer\n", + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { + pr_warn("extern (kcfg) %s=%llu should be integer\n", ext->name, (unsigned long long)value); return -EINVAL; } - if (!is_ext_value_in_range(ext, value)) { - pr_warn("extern %s=%llu value doesn't fit in %d bytes\n", - ext->name, (unsigned long long)value, ext->sz); + if (!is_kcfg_value_in_range(ext, value)) { + pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n", + ext->name, (unsigned long long)value, ext->kcfg.sz); return -ERANGE; } - switch (ext->sz) { + switch (ext->kcfg.sz) { case 1: *(__u8 *)ext_val = value; break; case 2: *(__u16 *)ext_val = value; break; case 4: *(__u32 *)ext_val = value; break; @@ -1592,30 +1603,30 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, if (!ext || ext->is_set) return 0; - ext_val = data + ext->data_off; + ext_val = data + ext->kcfg.data_off; value = sep + 1; switch (*value) { case 'y': case 'n': case 'm': - err = set_ext_value_tri(ext, ext_val, *value); + err = set_kcfg_value_tri(ext, ext_val, *value); break; case '"': - err = set_ext_value_str(ext, ext_val, value); + err = set_kcfg_value_str(ext, ext_val, value); break; default: /* assume integer */ err = parse_u64(value, &num); if (err) { - pr_warn("extern %s=%s should be integer\n", + pr_warn("extern (kcfg) %s=%s should be integer\n", ext->name, value); return err; } - err = set_ext_value_num(ext, ext_val, num); + err = set_kcfg_value_num(ext, ext_val, num); break; } if (err) return err; - pr_debug("extern %s=%s\n", ext->name, value); + pr_debug("extern (kcfg) %s=%s\n", ext->name, value); return 0; } @@ -1686,16 +1697,20 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj, static int bpf_object__init_kconfig_map(struct bpf_object *obj) { - struct extern_desc *last_ext; + struct extern_desc *last_ext = NULL, *ext; size_t map_sz; - int err; + int i, err; - if (obj->nr_extern == 0) - return 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (ext->type == EXT_KCFG) + last_ext = ext; + } - last_ext = &obj->externs[obj->nr_extern - 1]; - map_sz = last_ext->data_off + last_ext->sz; + if (!last_ext) + return 0; + map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, obj->efile.symbols_shndx, NULL, map_sz); @@ -2714,8 +2729,33 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name) return -ENOENT; } -static enum extern_type find_extern_type(const struct btf *btf, int id, - bool *is_signed) +static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { + const struct btf_var_secinfo *vs; + const struct btf_type *t; + int i, j, n; + + if (!btf) + return -ESRCH; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (!btf_is_datasec(t)) + continue; + + vs = btf_var_secinfos(t); + for (j = 0; j < btf_vlen(t); j++, vs++) { + if (vs->type == ext_btf_id) + return i; + } + } + + return -ENOENT; +} + +static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, + bool *is_signed) { const struct btf_type *t; const char *name; @@ -2730,29 +2770,29 @@ static enum extern_type find_extern_type(const struct btf *btf, int id, int enc = btf_int_encoding(t); if (enc & BTF_INT_BOOL) - return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN; + return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; if (is_signed) *is_signed = enc & BTF_INT_SIGNED; if (t->size == 1) - return EXT_CHAR; + return KCFG_CHAR; if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) - return EXT_UNKNOWN; - return EXT_INT; + return KCFG_UNKNOWN; + return KCFG_INT; } case BTF_KIND_ENUM: if (t->size != 4) - return EXT_UNKNOWN; + return KCFG_UNKNOWN; if (strcmp(name, "libbpf_tristate")) - return EXT_UNKNOWN; - return EXT_TRISTATE; + return KCFG_UNKNOWN; + return KCFG_TRISTATE; case BTF_KIND_ARRAY: if (btf_array(t)->nelems == 0) - return EXT_UNKNOWN; - if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR) - return EXT_UNKNOWN; - return EXT_CHAR_ARR; + return KCFG_UNKNOWN; + if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) + return KCFG_UNKNOWN; + return KCFG_CHAR_ARR; default: - return EXT_UNKNOWN; + return KCFG_UNKNOWN; } } @@ -2761,23 +2801,29 @@ static int cmp_externs(const void *_a, const void *_b) const struct extern_desc *a = _a; const struct extern_desc *b = _b; - /* descending order by alignment requirements */ - if (a->align != b->align) - return a->align > b->align ? -1 : 1; - /* ascending order by size, within same alignment class */ - if (a->sz != b->sz) - return a->sz < b->sz ? -1 : 1; + if (a->type != b->type) + return a->type < b->type ? -1 : 1; + + if (a->type == EXT_KCFG) { + /* descending order by alignment requirements */ + if (a->kcfg.align != b->kcfg.align) + return a->kcfg.align > b->kcfg.align ? -1 : 1; + /* ascending order by size, within same alignment class */ + if (a->kcfg.sz != b->kcfg.sz) + return a->kcfg.sz < b->kcfg.sz ? -1 : 1; + } + /* resolve ties by name */ return strcmp(a->name, b->name); } static int bpf_object__collect_externs(struct bpf_object *obj) { + struct btf_type *sec, *kcfg_sec = NULL; const struct btf_type *t; struct extern_desc *ext; - int i, n, off, btf_id; - struct btf_type *sec; - const char *ext_name; + int i, n, off; + const char *ext_name, *sec_name; Elf_Scn *scn; GElf_Shdr sh; @@ -2823,22 +2869,39 @@ static int bpf_object__collect_externs(struct bpf_object *obj) ext->name = btf__name_by_offset(obj->btf, t->name_off); ext->sym_idx = i; ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; - ext->sz = btf__resolve_size(obj->btf, t->type); - if (ext->sz <= 0) { - pr_warn("failed to resolve size of extern '%s': %d\n", - ext_name, ext->sz); - return ext->sz; - } - ext->align = btf__align_of(obj->btf, t->type); - if (ext->align <= 0) { - pr_warn("failed to determine alignment of extern '%s': %d\n", - ext_name, ext->align); - return -EINVAL; - } - ext->type = find_extern_type(obj->btf, t->type, - &ext->is_signed); - if (ext->type == EXT_UNKNOWN) { - pr_warn("extern '%s' type is unsupported\n", ext_name); + + ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); + if (ext->sec_btf_id <= 0) { + pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", + ext_name, ext->btf_id, ext->sec_btf_id); + return ext->sec_btf_id; + } + sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); + sec_name = btf__name_by_offset(obj->btf, sec->name_off); + + if (strcmp(sec_name, KCONFIG_SEC) == 0) { + kcfg_sec = sec; + ext->type = EXT_KCFG; + ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); + if (ext->kcfg.sz <= 0) { + pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", + ext_name, ext->kcfg.sz); + return ext->kcfg.sz; + } + ext->kcfg.align = btf__align_of(obj->btf, t->type); + if (ext->kcfg.align <= 0) { + pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", + ext_name, ext->kcfg.align); + return -EINVAL; + } + ext->kcfg.type = find_kcfg_type(obj->btf, t->type, + &ext->kcfg.is_signed); + if (ext->kcfg.type == KCFG_UNKNOWN) { + pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); + return -ENOTSUP; + } + } else { + pr_warn("unrecognized extern section '%s'\n", sec_name); return -ENOTSUP; } } @@ -2847,42 +2910,40 @@ static int bpf_object__collect_externs(struct bpf_object *obj) if (!obj->nr_extern) return 0; - /* sort externs by (alignment, size, name) and calculate their offsets - * within a map */ + /* sort externs by type, for kcfg ones also by (align, size, name) */ qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); - off = 0; - for (i = 0; i < obj->nr_extern; i++) { - ext = &obj->externs[i]; - ext->data_off = roundup(off, ext->align); - off = ext->data_off + ext->sz; - pr_debug("extern #%d: symbol %d, off %u, name %s\n", - i, ext->sym_idx, ext->data_off, ext->name); - } - btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC); - if (btf_id <= 0) { - pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC); - return -ESRCH; - } + if (kcfg_sec) { + sec = kcfg_sec; + /* for kcfg externs calculate their offsets within a .kconfig map */ + off = 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (ext->type != EXT_KCFG) + continue; - sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id); - sec->size = off; - n = btf_vlen(sec); - for (i = 0; i < n; i++) { - struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; - - t = btf__type_by_id(obj->btf, vs->type); - ext_name = btf__name_by_offset(obj->btf, t->name_off); - ext = find_extern_by_name(obj, ext_name); - if (!ext) { - pr_warn("failed to find extern definition for BTF var '%s'\n", - ext_name); - return -ESRCH; + ext->kcfg.data_off = roundup(off, ext->kcfg.align); + off = ext->kcfg.data_off + ext->kcfg.sz; + pr_debug("extern #%d (kcfg): symbol %d, off %u, name %s\n", + i, ext->sym_idx, ext->kcfg.data_off, ext->name); + } + sec->size = off; + n = btf_vlen(sec); + for (i = 0; i < n; i++) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + + t = btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, t->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + vs->offset = ext->kcfg.data_off; } - vs->offset = ext->data_off; - btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; } - return 0; } @@ -3012,11 +3073,11 @@ static int bpf_program__record_reloc(struct bpf_program *prog, sym_idx); return -LIBBPF_ERRNO__RELOC; } - pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n", - i, ext->name, ext->sym_idx, ext->data_off, insn_idx); + pr_debug("found extern #%d '%s' (sym %d) for insn %u\n", + i, ext->name, ext->sym_idx, insn_idx); reloc_desc->type = RELO_EXTERN; reloc_desc->insn_idx = insn_idx; - reloc_desc->sym_off = ext->data_off; + reloc_desc->sym_off = i; /* sym_off stores extern index */ return 0; } @@ -4941,6 +5002,7 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + struct extern_desc *ext; if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { pr_warn("relocation out of range: '%s'\n", @@ -4959,9 +5021,10 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) insn[0].imm = obj->maps[relo->map_idx].fd; break; case RELO_EXTERN: + ext = &obj->externs[relo->sym_off]; insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; - insn[1].imm = relo->sym_off; + insn[1].imm = ext->kcfg.data_off; break; case RELO_CALL: err = bpf_program__reloc_text(prog, obj, relo); @@ -5585,30 +5648,33 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, { bool need_config = false; struct extern_desc *ext; + void *kcfg_data = NULL; int err, i; - void *data; if (obj->nr_extern == 0) return 0; - data = obj->maps[obj->kconfig_map_idx].mmaped; + if (obj->kconfig_map_idx >= 0) + kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { - void *ext_val = data + ext->data_off; + if (ext->type == EXT_KCFG && + strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + void *ext_val = kcfg_data + ext->kcfg.data_off; __u32 kver = get_kernel_version(); if (!kver) { pr_warn("failed to get kernel version\n"); return -EINVAL; } - err = set_ext_value_num(ext, ext_val, kver); + err = set_kcfg_value_num(ext, ext_val, kver); if (err) return err; - pr_debug("extern %s=0x%x\n", ext->name, kver); - } else if (strncmp(ext->name, "CONFIG_", 7) == 0) { + pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); + } else if (ext->type == EXT_KCFG && + strncmp(ext->name, "CONFIG_", 7) == 0) { need_config = true; } else { pr_warn("unrecognized extern '%s'\n", ext->name); @@ -5616,20 +5682,20 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, } } if (need_config && extra_kconfig) { - err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data); + err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); if (err) return -EINVAL; need_config = false; for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (!ext->is_set) { + if (ext->type == EXT_KCFG && !ext->is_set) { need_config = true; break; } } } if (need_config) { - err = bpf_object__read_kconfig_file(obj, data); + err = bpf_object__read_kconfig_file(obj, kcfg_data); if (err) return -EINVAL; } -- cgit v1.2.3 From 1c0c7074fefd769f62dda155e881ca90c9e3e75a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:16:56 -0700 Subject: libbpf: Add support for extracting kernel symbol addresses Add support for another (in addition to existing Kconfig) special kind of externs in BPF code, kernel symbol externs. Such externs allow BPF code to "know" kernel symbol address and either use it for comparisons with kernel data structures (e.g., struct file's f_op pointer, to distinguish different kinds of file), or, with the help of bpf_probe_user_kernel(), to follow pointers and read data from global variables. Kernel symbol addresses are found through /proc/kallsyms, which should be present in the system. Currently, such kernel symbol variables are typeless: they have to be defined as `extern const void ` and the only operation you can do (in C code) with them is to take its address. Such extern should reside in a special section '.ksyms'. bpf_helpers.h header provides __ksym macro for this. Strong vs weak semantics stays the same as with Kconfig externs. If symbol is not found in /proc/kallsyms, this will be a failure for strong (non-weak) extern, but will be defaulted to 0 for weak externs. If the same symbol is defined multiple times in /proc/kallsyms, then it will be error if any of the associated addresses differs. In that case, address is ambiguous, so libbpf falls on the side of caution, rather than confusing user with randomly chosen address. In the future, once kernel is extended with variables BTF information, such ksym externs will be supported in a typed version, which will allow BPF program to read variable's contents directly, similarly to how it's done for fentry/fexit input arguments. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Hao Luo Link: https://lore.kernel.org/bpf/20200619231703.738941-3-andriin@fb.com --- tools/lib/bpf/bpf_helpers.h | 1 + tools/lib/bpf/btf.h | 5 ++ tools/lib/bpf/libbpf.c | 144 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 144 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index f67dce2af802..a510d8ed716f 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -75,5 +75,6 @@ enum libbpf_tristate { }; #define __kconfig __attribute__((section(".kconfig"))) +#define __ksym __attribute__((section(".ksyms"))) #endif diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 70c1b7ec2bd0..06cd1731c154 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -168,6 +168,11 @@ static inline bool btf_kflag(const struct btf_type *t) return BTF_INFO_KFLAG(t->info); } +static inline bool btf_is_void(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_UNKN; +} + static inline bool btf_is_int(const struct btf_type *t) { return btf_kind(t) == BTF_KIND_INT; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ffccb5af32a5..18461deb1b19 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -285,6 +285,7 @@ struct bpf_struct_ops { #define BSS_SEC ".bss" #define RODATA_SEC ".rodata" #define KCONFIG_SEC ".kconfig" +#define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" enum libbpf_map_type { @@ -331,6 +332,7 @@ struct bpf_map { enum extern_type { EXT_UNKNOWN, EXT_KCFG, + EXT_KSYM, }; enum kcfg_type { @@ -358,6 +360,9 @@ struct extern_desc { int data_off; bool is_signed; } kcfg; + struct { + unsigned long long addr; + } ksym; }; }; @@ -2817,9 +2822,25 @@ static int cmp_externs(const void *_a, const void *_b) return strcmp(a->name, b->name); } +static int find_int_btf_id(const struct btf *btf) +{ + const struct btf_type *t; + int i, n; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (btf_is_int(t) && btf_int_bits(t) == 32) + return i; + } + + return 0; +} + static int bpf_object__collect_externs(struct bpf_object *obj) { - struct btf_type *sec, *kcfg_sec = NULL; + struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; const struct btf_type *t; struct extern_desc *ext; int i, n, off; @@ -2900,6 +2921,17 @@ static int bpf_object__collect_externs(struct bpf_object *obj) pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); return -ENOTSUP; } + } else if (strcmp(sec_name, KSYMS_SEC) == 0) { + const struct btf_type *vt; + + ksym_sec = sec; + ext->type = EXT_KSYM; + + vt = skip_mods_and_typedefs(obj->btf, t->type, NULL); + if (!btf_is_void(vt)) { + pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name); + return -ENOTSUP; + } } else { pr_warn("unrecognized extern section '%s'\n", sec_name); return -ENOTSUP; @@ -2913,6 +2945,46 @@ static int bpf_object__collect_externs(struct bpf_object *obj) /* sort externs by type, for kcfg ones also by (align, size, name) */ qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); + /* for .ksyms section, we need to turn all externs into allocated + * variables in BTF to pass kernel verification; we do this by + * pretending that each extern is a 8-byte variable + */ + if (ksym_sec) { + /* find existing 4-byte integer type in BTF to use for fake + * extern variables in DATASEC + */ + int int_btf_id = find_int_btf_id(obj->btf); + + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (ext->type != EXT_KSYM) + continue; + pr_debug("extern (ksym) #%d: symbol %d, name %s\n", + i, ext->sym_idx, ext->name); + } + + sec = ksym_sec; + n = btf_vlen(sec); + for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + struct btf_type *vt; + + vt = (void *)btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, vt->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + vt->type = int_btf_id; + vs->offset = off; + vs->size = sizeof(int); + } + sec->size = off; + } + if (kcfg_sec) { sec = kcfg_sec; /* for kcfg externs calculate their offsets within a .kconfig map */ @@ -2924,7 +2996,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) ext->kcfg.data_off = roundup(off, ext->kcfg.align); off = ext->kcfg.data_off + ext->kcfg.sz; - pr_debug("extern #%d (kcfg): symbol %d, off %u, name %s\n", + pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", i, ext->sym_idx, ext->kcfg.data_off, ext->name); } sec->size = off; @@ -5022,9 +5094,14 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) break; case RELO_EXTERN: ext = &obj->externs[relo->sym_off]; - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; - insn[1].imm = ext->kcfg.data_off; + if (ext->type == EXT_KCFG) { + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + insn[1].imm = ext->kcfg.data_off; + } else /* EXT_KSYM */ { + insn[0].imm = (__u32)ext->ksym.addr; + insn[1].imm = ext->ksym.addr >> 32; + } break; case RELO_CALL: err = bpf_program__reloc_text(prog, obj, relo); @@ -5643,10 +5720,58 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) return 0; } +static int bpf_object__read_kallsyms_file(struct bpf_object *obj) +{ + char sym_type, sym_name[500]; + unsigned long long sym_addr; + struct extern_desc *ext; + int ret, err = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (!f) { + err = -errno; + pr_warn("failed to open /proc/kallsyms: %d\n", err); + return err; + } + + while (true) { + ret = fscanf(f, "%llx %c %499s%*[^\n]\n", + &sym_addr, &sym_type, sym_name); + if (ret == EOF && feof(f)) + break; + if (ret != 3) { + pr_warn("failed to read kallasyms entry: %d\n", ret); + err = -EINVAL; + goto out; + } + + ext = find_extern_by_name(obj, sym_name); + if (!ext || ext->type != EXT_KSYM) + continue; + + if (ext->is_set && ext->ksym.addr != sym_addr) { + pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", + sym_name, ext->ksym.addr, sym_addr); + err = -EINVAL; + goto out; + } + if (!ext->is_set) { + ext->is_set = true; + ext->ksym.addr = sym_addr; + pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); + } + } + +out: + fclose(f); + return err; +} + static int bpf_object__resolve_externs(struct bpf_object *obj, const char *extra_kconfig) { - bool need_config = false; + bool need_config = false, need_kallsyms = false; struct extern_desc *ext; void *kcfg_data = NULL; int err, i; @@ -5676,6 +5801,8 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, } else if (ext->type == EXT_KCFG && strncmp(ext->name, "CONFIG_", 7) == 0) { need_config = true; + } else if (ext->type == EXT_KSYM) { + need_kallsyms = true; } else { pr_warn("unrecognized extern '%s'\n", ext->name); return -EINVAL; @@ -5699,6 +5826,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, if (err) return -EINVAL; } + if (need_kallsyms) { + err = bpf_object__read_kallsyms_file(obj); + if (err) + return -EINVAL; + } for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; -- cgit v1.2.3 From b7ddfab20a6af3a0e366000eada63adf6a7683e7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:16:57 -0700 Subject: selftests/bpf: Add __ksym extern selftest Validate libbpf is able to handle weak and strong kernel symbol externs in BPF code correctly. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Hao Luo Link: https://lore.kernel.org/bpf/20200619231703.738941-4-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/ksyms.c | 71 ++++++++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_ksyms.c | 32 ++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/ksyms.c create mode 100644 tools/testing/selftests/bpf/progs/test_ksyms.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c new file mode 100644 index 000000000000..e3d6777226a8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include +#include "test_ksyms.skel.h" +#include + +static int duration; + +static __u64 kallsyms_find(const char *sym) +{ + char type, name[500]; + __u64 addr, res = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno)) + return 0; + + while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) { + if (strcmp(name, sym) == 0) { + res = addr; + goto out; + } + } + + CHECK(false, "not_found", "symbol %s not found\n", sym); +out: + fclose(f); + return res; +} + +void test_ksyms(void) +{ + __u64 link_fops_addr = kallsyms_find("bpf_link_fops"); + const char *btf_path = "/sys/kernel/btf/vmlinux"; + struct test_ksyms *skel; + struct test_ksyms__data *data; + struct stat st; + __u64 btf_size; + int err; + + if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno)) + return; + btf_size = st.st_size; + + skel = test_ksyms__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) + return; + + err = test_ksyms__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops", + "got 0x%llx, exp 0x%llx\n", + data->out__bpf_link_fops, link_fops_addr); + CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1", + "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0); + CHECK(data->out__btf_size != btf_size, "btf_size", + "got %llu, exp %llu\n", data->out__btf_size, btf_size); + CHECK(data->out__per_cpu_start != 0, "__per_cpu_start", + "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0); + +cleanup: + test_ksyms__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c new file mode 100644 index 000000000000..6c9cbb5a3bdf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include +#include +#include + +__u64 out__bpf_link_fops = -1; +__u64 out__bpf_link_fops1 = -1; +__u64 out__btf_size = -1; +__u64 out__per_cpu_start = -1; + +extern const void bpf_link_fops __ksym; +extern const void __start_BTF __ksym; +extern const void __stop_BTF __ksym; +extern const void __per_cpu_start __ksym; +/* non-existing symbol, weak, default to zero */ +extern const void bpf_link_fops1 __ksym __weak; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + out__bpf_link_fops = (__u64)&bpf_link_fops; + out__btf_size = (__u64)(&__stop_BTF - &__start_BTF); + out__per_cpu_start = (__u64)&__per_cpu_start; + + out__bpf_link_fops1 = (__u64)&bpf_link_fops1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From a479b8ce4ed1457f814be6f67a8447a9af38f235 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:16:58 -0700 Subject: tools/bpftool: Move map/prog parsing logic into common Move functions that parse map and prog by id/tag/name/etc outside of map.c/prog.c, respectively. These functions are used outside of those files and are generic enough to be in common. This also makes heavy-weight map.c and prog.c more decoupled from the rest of bpftool files and facilitates more lightweight bootstrap bpftool variant. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200619231703.738941-5-andriin@fb.com --- tools/bpf/bpftool/common.c | 308 +++++++++++++++++++++++++++++++++++++++++++++ tools/bpf/bpftool/main.h | 2 + tools/bpf/bpftool/map.c | 156 ----------------------- tools/bpf/bpftool/prog.c | 152 ---------------------- 4 files changed, 310 insertions(+), 308 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index c47bdc65de8e..6c864c3683fc 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -581,3 +581,311 @@ print_all_levels(__maybe_unused enum libbpf_print_level level, { return vfprintf(stderr, format, args); } + +static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) +{ + unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; + int err; + + while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + + err = bpf_prog_get_next_id(id, &id); + if (err) { + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; + } + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get prog by id (%u): %s", + id, strerror(errno)); + goto err_close_fds; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get prog info (%u): %s", + id, strerror(errno)); + goto err_close_fd; + } + + if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || + (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { + close(fd); + continue; + } + + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; + } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; +} + +int prog_parse_fds(int *argc, char ***argv, int **fds) +{ + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as ID", **argv); + return -1; + } + NEXT_ARGP(); + + (*fds)[0] = bpf_prog_get_fd_by_id(id); + if ((*fds)[0] < 0) { + p_err("get by id (%u): %s", id, strerror(errno)); + return -1; + } + return 1; + } else if (is_prefix(**argv, "tag")) { + unsigned char tag[BPF_TAG_SIZE]; + + NEXT_ARGP(); + + if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2, + tag + 3, tag + 4, tag + 5, tag + 6, tag + 7) + != BPF_TAG_SIZE) { + p_err("can't parse tag"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_nametag(tag, fds, true); + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_nametag(name, fds, false); + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); + if ((*fds)[0] < 0) + return -1; + return 1; + } + + p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv); + return -1; +} + +int prog_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = prog_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several programs match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + +static int map_fd_by_name(char *name, int **fds) +{ + unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; + int err; + + while (true) { + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get map by id (%u): %s", + id, strerror(errno)); + goto err_close_fds; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get map info (%u): %s", + id, strerror(errno)); + goto err_close_fd; + } + + if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) { + close(fd); + continue; + } + + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; + } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; +} + +int map_parse_fds(int *argc, char ***argv, int **fds) +{ + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as ID", **argv); + return -1; + } + NEXT_ARGP(); + + (*fds)[0] = bpf_map_get_fd_by_id(id); + if ((*fds)[0] < 0) { + p_err("get map by id (%u): %s", id, strerror(errno)); + return -1; + } + return 1; + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return map_fd_by_name(name, fds); + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); + if ((*fds)[0] < 0) + return -1; + return 1; + } + + p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv); + return -1; +} + +int map_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several maps match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + +int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) +{ + int err; + int fd; + + fd = map_parse_fd(argc, argv); + if (fd < 0) + return -1; + + err = bpf_obj_get_info_by_fd(fd, info, info_len); + if (err) { + p_err("can't get map info: %s", strerror(errno)); + close(fd); + return err; + } + + return fd; +} diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 5cdf0bc049bd..4338ab9d86d4 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -210,7 +210,9 @@ int do_iter(int argc, char **argv); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); +int prog_parse_fds(int *argc, char ***argv, int **fds); int map_parse_fd(int *argc, char ***argv); +int map_parse_fds(int *argc, char ***argv, int **fds); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); struct bpf_prog_linfo; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index c5fac8068ba1..b9eee19b094c 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -92,162 +92,6 @@ static void *alloc_value(struct bpf_map_info *info) return malloc(info->value_size); } -static int map_fd_by_name(char *name, int **fds) -{ - unsigned int id = 0; - int fd, nb_fds = 0; - void *tmp; - int err; - - while (true) { - struct bpf_map_info info = {}; - __u32 len = sizeof(info); - - err = bpf_map_get_next_id(id, &id); - if (err) { - if (errno != ENOENT) { - p_err("%s", strerror(errno)); - goto err_close_fds; - } - return nb_fds; - } - - fd = bpf_map_get_fd_by_id(id); - if (fd < 0) { - p_err("can't get map by id (%u): %s", - id, strerror(errno)); - goto err_close_fds; - } - - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { - p_err("can't get map info (%u): %s", - id, strerror(errno)); - goto err_close_fd; - } - - if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) { - close(fd); - continue; - } - - if (nb_fds > 0) { - tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); - if (!tmp) { - p_err("failed to realloc"); - goto err_close_fd; - } - *fds = tmp; - } - (*fds)[nb_fds++] = fd; - } - -err_close_fd: - close(fd); -err_close_fds: - while (--nb_fds >= 0) - close((*fds)[nb_fds]); - return -1; -} - -static int map_parse_fds(int *argc, char ***argv, int **fds) -{ - if (is_prefix(**argv, "id")) { - unsigned int id; - char *endptr; - - NEXT_ARGP(); - - id = strtoul(**argv, &endptr, 0); - if (*endptr) { - p_err("can't parse %s as ID", **argv); - return -1; - } - NEXT_ARGP(); - - (*fds)[0] = bpf_map_get_fd_by_id(id); - if ((*fds)[0] < 0) { - p_err("get map by id (%u): %s", id, strerror(errno)); - return -1; - } - return 1; - } else if (is_prefix(**argv, "name")) { - char *name; - - NEXT_ARGP(); - - name = **argv; - if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { - p_err("can't parse name"); - return -1; - } - NEXT_ARGP(); - - return map_fd_by_name(name, fds); - } else if (is_prefix(**argv, "pinned")) { - char *path; - - NEXT_ARGP(); - - path = **argv; - NEXT_ARGP(); - - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); - if ((*fds)[0] < 0) - return -1; - return 1; - } - - p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv); - return -1; -} - -int map_parse_fd(int *argc, char ***argv) -{ - int *fds = NULL; - int nb_fds, fd; - - fds = malloc(sizeof(int)); - if (!fds) { - p_err("mem alloc failed"); - return -1; - } - nb_fds = map_parse_fds(argc, argv, &fds); - if (nb_fds != 1) { - if (nb_fds > 1) { - p_err("several maps match this handle"); - while (nb_fds--) - close(fds[nb_fds]); - } - fd = -1; - goto exit_free; - } - - fd = fds[0]; -exit_free: - free(fds); - return fd; -} - -int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) -{ - int err; - int fd; - - fd = map_parse_fd(argc, argv); - if (fd < 0) - return -1; - - err = bpf_obj_get_info_by_fd(fd, info, info_len); - if (err) { - p_err("can't get map info: %s", strerror(errno)); - close(fd); - return err; - } - - return fd; -} - static int do_dump_btf(const struct btf_dumper *d, struct bpf_map_info *map_info, void *key, void *value) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index a5eff83496f2..53d47610ff58 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -86,158 +86,6 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) strftime(buf, size, "%FT%T%z", &load_tm); } -static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) -{ - unsigned int id = 0; - int fd, nb_fds = 0; - void *tmp; - int err; - - while (true) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); - - err = bpf_prog_get_next_id(id, &id); - if (err) { - if (errno != ENOENT) { - p_err("%s", strerror(errno)); - goto err_close_fds; - } - return nb_fds; - } - - fd = bpf_prog_get_fd_by_id(id); - if (fd < 0) { - p_err("can't get prog by id (%u): %s", - id, strerror(errno)); - goto err_close_fds; - } - - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { - p_err("can't get prog info (%u): %s", - id, strerror(errno)); - goto err_close_fd; - } - - if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || - (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { - close(fd); - continue; - } - - if (nb_fds > 0) { - tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); - if (!tmp) { - p_err("failed to realloc"); - goto err_close_fd; - } - *fds = tmp; - } - (*fds)[nb_fds++] = fd; - } - -err_close_fd: - close(fd); -err_close_fds: - while (--nb_fds >= 0) - close((*fds)[nb_fds]); - return -1; -} - -static int prog_parse_fds(int *argc, char ***argv, int **fds) -{ - if (is_prefix(**argv, "id")) { - unsigned int id; - char *endptr; - - NEXT_ARGP(); - - id = strtoul(**argv, &endptr, 0); - if (*endptr) { - p_err("can't parse %s as ID", **argv); - return -1; - } - NEXT_ARGP(); - - (*fds)[0] = bpf_prog_get_fd_by_id(id); - if ((*fds)[0] < 0) { - p_err("get by id (%u): %s", id, strerror(errno)); - return -1; - } - return 1; - } else if (is_prefix(**argv, "tag")) { - unsigned char tag[BPF_TAG_SIZE]; - - NEXT_ARGP(); - - if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2, - tag + 3, tag + 4, tag + 5, tag + 6, tag + 7) - != BPF_TAG_SIZE) { - p_err("can't parse tag"); - return -1; - } - NEXT_ARGP(); - - return prog_fd_by_nametag(tag, fds, true); - } else if (is_prefix(**argv, "name")) { - char *name; - - NEXT_ARGP(); - - name = **argv; - if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { - p_err("can't parse name"); - return -1; - } - NEXT_ARGP(); - - return prog_fd_by_nametag(name, fds, false); - } else if (is_prefix(**argv, "pinned")) { - char *path; - - NEXT_ARGP(); - - path = **argv; - NEXT_ARGP(); - - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); - if ((*fds)[0] < 0) - return -1; - return 1; - } - - p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv); - return -1; -} - -int prog_parse_fd(int *argc, char ***argv) -{ - int *fds = NULL; - int nb_fds, fd; - - fds = malloc(sizeof(int)); - if (!fds) { - p_err("mem alloc failed"); - return -1; - } - nb_fds = prog_parse_fds(argc, argv, &fds); - if (nb_fds != 1) { - if (nb_fds > 1) { - p_err("several programs match this handle"); - while (nb_fds--) - close(fds[nb_fds]); - } - fd = -1; - goto exit_free; - } - - fd = fds[0]; -exit_free: - free(fds); - return fd; -} - static void show_prog_maps(int fd, __u32 num_maps) { struct bpf_prog_info info = {}; -- cgit v1.2.3 From 16e9b187aba60f9014f11a7e95b878850b6c95e5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:16:59 -0700 Subject: tools/bpftool: Minimize bootstrap bpftool Build minimal "bootstrap mode" bpftool to enable skeleton (and, later, vmlinux.h generation), instead of building almost complete, but slightly different (w/o skeletons, etc) bpftool to bootstrap complete bpftool build. Current approach doesn't scale well (engineering-wise) when adding more BPF programs to bpftool and other complicated functionality, as it requires constant adjusting of the code to work in both bootstrapped mode and normal mode. So it's better to build only minimal bpftool version that supports only BPF skeleton code generation and BTF-to-C conversion. Thankfully, this is quite easy to accomplish due to internal modularity of bpftool commands. This will also allow to keep adding new functionality to bpftool in general, without the need to care about bootstrap mode for those new parts of bpftool. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200619231703.738941-6-andriin@fb.com --- tools/bpf/bpftool/.gitignore | 2 +- tools/bpf/bpftool/Makefile | 30 +++++++++++++----------------- tools/bpf/bpftool/main.c | 11 +++++++++-- tools/bpf/bpftool/main.h | 27 +++++++++++++++------------ 4 files changed, 38 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index 26cde83e1ca3..ce721adf3161 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only *.d -/_bpftool +/bpftool-bootstrap /bpftool bpftool*.8 bpf-helpers.* diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 9e85f101be85..eec2da4d45d2 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -116,40 +116,36 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) endif +BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap) + +BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o -_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o -ifeq ($(feature-clang-bpf-global-var),1) - __OBJS = $(OBJS) -else - __OBJS = $(_OBJS) +ifneq ($(feature-clang-bpf-global-var),1) + CFLAGS += -DBPFTOOL_WITHOUT_SKELETONS endif -$(OUTPUT)_prog.o: prog.c - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $< - -$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS) - skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ -I$(srctree)/tools/include/uapi/ \ -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \ -g -O2 -target bpf -c $< -o $@ -profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o - $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@ +profiler.skel.h: $(BPFTOOL_BOOTSTRAP) skeleton/profiler.bpf.o + $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton skeleton/profiler.bpf.o > $@ $(OUTPUT)prog.o: prog.c profiler.skel.h - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< $(OUTPUT)feature.o: | zdep -$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS) +$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS) + +$(OUTPUT)bpftool: $(OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) $(OUTPUT)%.o: %.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< @@ -157,7 +153,7 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d - $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o + $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) profiler.skel.h skeleton/profiler.bpf.o $(Q)$(RM) -r -- $(OUTPUT)libbpf/ $(call QUIET_CLEAN, core-gen) $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 46bd716a9d86..bf4d7487552a 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -92,9 +92,16 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv, if (argc < 1 && cmds[0].func) return cmds[0].func(argc, argv); - for (i = 0; cmds[i].func; i++) - if (is_prefix(*argv, cmds[i].cmd)) + for (i = 0; cmds[i].cmd; i++) { + if (is_prefix(*argv, cmds[i].cmd)) { + if (!cmds[i].func) { + p_err("command '%s' is not supported in bootstrap mode", + cmds[i].cmd); + return -1; + } return cmds[i].func(argc - 1, argv + 1); + } + } help(argc - 1, argv + 1); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 4338ab9d86d4..aad7be74e8a7 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -194,19 +194,22 @@ int mount_bpffs_for_pin(const char *name); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); int do_pin_fd(int fd, const char *name); -int do_prog(int argc, char **arg); -int do_map(int argc, char **arg); -int do_link(int argc, char **arg); -int do_event_pipe(int argc, char **argv); -int do_cgroup(int argc, char **arg); -int do_perf(int argc, char **arg); -int do_net(int argc, char **arg); -int do_tracelog(int argc, char **arg); -int do_feature(int argc, char **argv); -int do_btf(int argc, char **argv); +/* commands available in bootstrap mode */ int do_gen(int argc, char **argv); -int do_struct_ops(int argc, char **argv); -int do_iter(int argc, char **argv); +int do_btf(int argc, char **argv); + +/* non-bootstrap only commands */ +int do_prog(int argc, char **arg) __weak; +int do_map(int argc, char **arg) __weak; +int do_link(int argc, char **arg) __weak; +int do_event_pipe(int argc, char **argv) __weak; +int do_cgroup(int argc, char **arg) __weak; +int do_perf(int argc, char **arg) __weak; +int do_net(int argc, char **arg) __weak; +int do_tracelog(int argc, char **arg) __weak; +int do_feature(int argc, char **argv) __weak; +int do_struct_ops(int argc, char **argv) __weak; +int do_iter(int argc, char **argv) __weak; int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); -- cgit v1.2.3 From 05aca6da3b5ab3c5c6003dbbefc9580d9a6a308b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:17:00 -0700 Subject: tools/bpftool: Generalize BPF skeleton support and generate vmlinux.h Adapt Makefile to support BPF skeleton generation beyond single profiler.bpf.c case. Also add vmlinux.h generation and switch profiler.bpf.c to use it. clang-bpf-global-var feature is extended and renamed to clang-bpf-co-re to check for support of preserve_access_index attribute, which, together with BTF for global variables, is the minimum requirement for modern BPF programs. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200619231703.738941-7-andriin@fb.com --- tools/bpf/bpftool/.gitignore | 3 +- tools/bpf/bpftool/Makefile | 42 ++++++++++++++++------ tools/bpf/bpftool/skeleton/profiler.bpf.c | 3 +- tools/bpf/bpftool/skeleton/profiler.h | 46 ------------------------- tools/build/feature/Makefile | 4 +-- tools/build/feature/test-clang-bpf-co-re.c | 9 +++++ tools/build/feature/test-clang-bpf-global-var.c | 4 --- 7 files changed, 45 insertions(+), 66 deletions(-) delete mode 100644 tools/bpf/bpftool/skeleton/profiler.h create mode 100644 tools/build/feature/test-clang-bpf-co-re.c delete mode 100644 tools/build/feature/test-clang-bpf-global-var.c (limited to 'tools') diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index ce721adf3161..3e601bcfd461 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -7,4 +7,5 @@ bpf-helpers.* FEATURE-DUMP.bpftool feature libbpf -profiler.skel.h +/*.skel.h +/vmlinux.h diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index eec2da4d45d2..bdb6e38c6c5c 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -42,6 +42,7 @@ CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ + -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ @@ -61,9 +62,9 @@ CLANG ?= clang FEATURE_USER = .bpftool FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \ - clang-bpf-global-var + clang-bpf-co-re FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \ - clang-bpf-global-var + clang-bpf-co-re check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -121,20 +122,38 @@ BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstr BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o -ifneq ($(feature-clang-bpf-global-var),1) - CFLAGS += -DBPFTOOL_WITHOUT_SKELETONS -endif +VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) + +ifneq ($(VMLINUX_BTF),) +ifeq ($(feature-clang-bpf-co-re),1) + +BUILD_BPF_SKELS := 1 + +$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) + $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@ -skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF) +$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ + -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ - -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \ + -I$(LIBBPF_PATH) \ + -I$(srctree)/tools/lib \ -g -O2 -target bpf -c $< -o $@ -profiler.skel.h: $(BPFTOOL_BOOTSTRAP) skeleton/profiler.bpf.o - $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton skeleton/profiler.bpf.o > $@ +$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) + $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ + +$(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h + +endif +endif -$(OUTPUT)prog.o: prog.c profiler.skel.h +CFLAGS += $(if BUILD_BPF_SKELS,,-DBPFTOOL_WITHOUT_SKELETONS) $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< @@ -153,7 +172,7 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d - $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) profiler.skel.h skeleton/profiler.bpf.o + $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h $(Q)$(RM) -r -- $(OUTPUT)libbpf/ $(call QUIET_CLEAN, core-gen) $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool @@ -188,6 +207,7 @@ FORCE: zdep: @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi +.SECONDARY: .PHONY: all FORCE clean install uninstall zdep .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index c9d196ddb670..4e3512f700c0 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2020 Facebook -#include "profiler.h" -#include +#include #include #include diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h deleted file mode 100644 index 1f767e9510f7..000000000000 --- a/tools/bpf/bpftool/skeleton/profiler.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __PROFILER_H -#define __PROFILER_H - -/* useful typedefs from vmlinux.h */ - -typedef signed char __s8; -typedef unsigned char __u8; -typedef short int __s16; -typedef short unsigned int __u16; -typedef int __s32; -typedef unsigned int __u32; -typedef long long int __s64; -typedef long long unsigned int __u64; - -typedef __s8 s8; -typedef __u8 u8; -typedef __s16 s16; -typedef __u16 u16; -typedef __s32 s32; -typedef __u32 u32; -typedef __s64 s64; -typedef __u64 u64; - -enum { - false = 0, - true = 1, -}; - -#ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) -#else -#define __bitwise__ -#endif - -typedef __u16 __bitwise__ __le16; -typedef __u16 __bitwise__ __be16; -typedef __u32 __bitwise__ __le32; -typedef __u32 __bitwise__ __be32; -typedef __u64 __bitwise__ __le64; -typedef __u64 __bitwise__ __be64; - -typedef __u16 __bitwise__ __sum16; -typedef __u32 __bitwise__ __wsum; - -#endif /* __PROFILER_H */ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index b1f0321180f5..88371f7f0369 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -68,7 +68,7 @@ FILES= \ test-llvm-version.bin \ test-libaio.bin \ test-libzstd.bin \ - test-clang-bpf-global-var.bin \ + test-clang-bpf-co-re.bin \ test-file-handle.bin \ test-libpfm4.bin @@ -325,7 +325,7 @@ $(OUTPUT)test-libaio.bin: $(OUTPUT)test-libzstd.bin: $(BUILD) -lzstd -$(OUTPUT)test-clang-bpf-global-var.bin: +$(OUTPUT)test-clang-bpf-co-re.bin: $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \ grep BTF_KIND_VAR diff --git a/tools/build/feature/test-clang-bpf-co-re.c b/tools/build/feature/test-clang-bpf-co-re.c new file mode 100644 index 000000000000..cb5265bfdd83 --- /dev/null +++ b/tools/build/feature/test-clang-bpf-co-re.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +struct test { + int a; + int b; +} __attribute__((preserve_access_index)); + +volatile struct test global_value_for_test = {}; diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c deleted file mode 100644 index 221f1481d52e..000000000000 --- a/tools/build/feature/test-clang-bpf-global-var.c +++ /dev/null @@ -1,4 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2020 Facebook - -volatile int global_value_for_test = 1; -- cgit v1.2.3 From bd9bedf84b87289b9a87eebfe7917e54373e99f9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:17:01 -0700 Subject: libbpf: Wrap source argument of BPF_CORE_READ macro in parentheses Wrap source argument of BPF_CORE_READ family of macros into parentheses to allow uses like this: BPF_CORE_READ((struct cast_struct *)src, a, b, c); Fixes: 7db3822ab991 ("libbpf: Add BPF_CORE_READ/BPF_CORE_READ_INTO helpers") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200619231703.738941-8-andriin@fb.com --- tools/lib/bpf/bpf_core_read.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 7009dc90e012..eae5cccff761 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -217,7 +217,7 @@ enum bpf_field_info_kind { */ #define BPF_CORE_READ_INTO(dst, src, a, ...) \ ({ \ - ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \ + ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \ }) /* @@ -227,7 +227,7 @@ enum bpf_field_info_kind { */ #define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ ({ \ - ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \ + ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\ }) /* @@ -254,8 +254,8 @@ enum bpf_field_info_kind { */ #define BPF_CORE_READ(src, a, ...) \ ({ \ - ___type(src, a, ##__VA_ARGS__) __r; \ - BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ __r; \ }) -- cgit v1.2.3 From d53dee3fe0138610fcce5721bae9414377c41ec3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:17:02 -0700 Subject: tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs Add bpf_iter-based way to find all the processes that hold open FDs against BPF object (map, prog, link, btf). bpftool always attempts to discover this, but will silently give up if kernel doesn't yet support bpf_iter BPF programs. Process name and PID are emitted for each process (task group). Sample output for each of 4 BPF objects: $ sudo ./bpftool prog show 2694: cgroup_device tag 8c42dee26e8cd4c2 gpl loaded_at 2020-06-16T15:34:32-0700 uid 0 xlated 648B jited 409B memlock 4096B pids systemd(1) 2907: cgroup_skb name egress tag 9ad187367cf2b9e8 gpl loaded_at 2020-06-16T18:06:54-0700 uid 0 xlated 48B jited 59B memlock 4096B map_ids 2436 btf_id 1202 pids test_progs(2238417), test_progs(2238445) $ sudo ./bpftool map show 2436: array name test_cgr.bss flags 0x400 key 4B value 8B max_entries 1 memlock 8192B btf_id 1202 pids test_progs(2238417), test_progs(2238445) 2445: array name pid_iter.rodata flags 0x480 key 4B value 4B max_entries 1 memlock 8192B btf_id 1214 frozen pids bpftool(2239612) $ sudo ./bpftool link show 61: cgroup prog 2908 cgroup_id 375301 attach_type egress pids test_progs(2238417), test_progs(2238445) 62: cgroup prog 2908 cgroup_id 375344 attach_type egress pids test_progs(2238417), test_progs(2238445) $ sudo ./bpftool btf show 1202: size 1527B prog_ids 2908,2907 map_ids 2436 pids test_progs(2238417), test_progs(2238445) 1242: size 34684B pids bpftool(2258892) Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200619231703.738941-9-andriin@fb.com --- tools/bpf/bpftool/Makefile | 2 + tools/bpf/bpftool/btf.c | 6 + tools/bpf/bpftool/link.c | 7 + tools/bpf/bpftool/main.c | 1 + tools/bpf/bpftool/main.h | 27 ++++ tools/bpf/bpftool/map.c | 7 + tools/bpf/bpftool/pids.c | 229 ++++++++++++++++++++++++++++++ tools/bpf/bpftool/prog.c | 7 + tools/bpf/bpftool/skeleton/pid_iter.bpf.c | 80 +++++++++++ tools/bpf/bpftool/skeleton/pid_iter.h | 12 ++ 10 files changed, 378 insertions(+) create mode 100644 tools/bpf/bpftool/pids.c create mode 100644 tools/bpf/bpftool/skeleton/pid_iter.bpf.c create mode 100644 tools/bpf/bpftool/skeleton/pid_iter.h (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index bdb6e38c6c5c..06f436e8191a 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -150,6 +150,8 @@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) $(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h +$(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h + endif endif diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index faac8189b285..fc9bc7a23db6 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -809,6 +809,7 @@ show_btf_plain(struct bpf_btf_info *info, int fd, printf("%s%u", n++ == 0 ? " map_ids " : ",", obj->obj_id); } + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); } @@ -841,6 +842,9 @@ show_btf_json(struct bpf_btf_info *info, int fd, jsonw_uint(json_wtr, obj->obj_id); } jsonw_end_array(json_wtr); /* map_ids */ + + emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ + jsonw_end_object(json_wtr); /* btf object */ } @@ -893,6 +897,7 @@ static int do_show(int argc, char **argv) close(fd); return err; } + build_obj_refs_table(&refs_table, BPF_OBJ_BTF); if (fd >= 0) { err = show_btf(fd, &btf_prog_table, &btf_map_table); @@ -939,6 +944,7 @@ static int do_show(int argc, char **argv) exit_free: delete_btf_table(&btf_prog_table); delete_btf_table(&btf_map_table); + delete_obj_refs_table(&refs_table); return err; } diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index fca57ee8fafe..7329f3134283 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -143,6 +143,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) } jsonw_end_array(json_wtr); } + + emit_obj_refs_json(&refs_table, info->id, json_wtr); + jsonw_end_object(json_wtr); return 0; @@ -212,6 +215,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\tpinned %s", obj->path); } } + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); @@ -257,6 +261,7 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&link_table, BPF_OBJ_LINK); + build_obj_refs_table(&refs_table, BPF_OBJ_LINK); if (argc == 2) { fd = link_parse_fd(&argc, &argv); @@ -296,6 +301,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); + delete_obj_refs_table(&refs_table); + return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index bf4d7487552a..4a191fcbeb82 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,6 +31,7 @@ bool relaxed_maps; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; struct pinned_obj_table link_table; +struct obj_refs_table refs_table; static void __noreturn clean_and_exit(int i) { diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index aad7be74e8a7..ce26271e5f0c 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -127,11 +127,13 @@ static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { extern const char * const map_type_name[]; extern const size_t map_type_name_size; +/* keep in sync with the definition in skeleton/pid_iter.bpf.c */ enum bpf_obj_type { BPF_OBJ_UNKNOWN, BPF_OBJ_PROG, BPF_OBJ_MAP, BPF_OBJ_LINK, + BPF_OBJ_BTF, }; extern const char *bin_name; @@ -139,12 +141,14 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; extern bool show_pinned; +extern bool show_pids; extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; extern struct pinned_obj_table link_table; +extern struct obj_refs_table refs_table; void __printf(1, 2) p_err(const char *fmt, ...); void __printf(1, 2) p_info(const char *fmt, ...); @@ -168,12 +172,35 @@ struct pinned_obj { struct hlist_node hash; }; +struct obj_refs_table { + DECLARE_HASHTABLE(table, 16); +}; + +struct obj_ref { + int pid; + char comm[16]; +}; + +struct obj_refs { + struct hlist_node node; + __u32 id; + int ref_cnt; + struct obj_ref *refs; +}; + struct btf; struct bpf_line_info; int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); +__weak int build_obj_refs_table(struct obj_refs_table *table, + enum bpf_obj_type type); +__weak void delete_obj_refs_table(struct obj_refs_table *table); +__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, + json_writer_t *json_wtr); +__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, + const char *prefix); void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index b9eee19b094c..0a6a5d82d380 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -509,6 +509,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_end_array(json_wtr); } + emit_obj_refs_json(&refs_table, info->id, json_wtr); + jsonw_end_object(json_wtr); return 0; @@ -596,6 +598,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (frozen) printf("%sfrozen", info->btf_id ? " " : ""); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + printf("\n"); return 0; } @@ -654,6 +658,7 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + build_obj_refs_table(&refs_table, BPF_OBJ_MAP); if (argc == 2) return do_show_subset(argc, argv); @@ -697,6 +702,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); + delete_obj_refs_table(&refs_table); + return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c new file mode 100644 index 000000000000..3474a91743ff --- /dev/null +++ b/tools/bpf/bpftool/pids.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2020 Facebook */ +#include +#include +#include +#include +#include +#include +#include + +#include "main.h" +#include "skeleton/pid_iter.h" + +#ifdef BPFTOOL_WITHOUT_SKELETONS + +int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +{ + p_err("bpftool built without PID iterator support"); + return -ENOTSUP; +} +void delete_obj_refs_table(struct obj_refs_table *table) {} + +#else /* BPFTOOL_WITHOUT_SKELETONS */ + +#include "pid_iter.skel.h" + +static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) +{ + struct obj_refs *refs; + struct obj_ref *ref; + void *tmp; + int i; + + hash_for_each_possible(table->table, refs, node, e->id) { + if (refs->id != e->id) + continue; + + for (i = 0; i < refs->ref_cnt; i++) { + if (refs->refs[i].pid == e->pid) + return; + } + + tmp = realloc(refs->refs, (refs->ref_cnt + 1) * sizeof(*ref)); + if (!tmp) { + p_err("failed to re-alloc memory for ID %u, PID %d, COMM %s...", + e->id, e->pid, e->comm); + return; + } + refs->refs = tmp; + ref = &refs->refs[refs->ref_cnt]; + ref->pid = e->pid; + memcpy(ref->comm, e->comm, sizeof(ref->comm)); + refs->ref_cnt++; + + return; + } + + /* new ref */ + refs = calloc(1, sizeof(*refs)); + if (!refs) { + p_err("failed to alloc memory for ID %u, PID %d, COMM %s...", + e->id, e->pid, e->comm); + return; + } + + refs->id = e->id; + refs->refs = malloc(sizeof(*refs->refs)); + if (!refs->refs) { + free(refs); + p_err("failed to alloc memory for ID %u, PID %d, COMM %s...", + e->id, e->pid, e->comm); + return; + } + ref = &refs->refs[0]; + ref->pid = e->pid; + memcpy(ref->comm, e->comm, sizeof(ref->comm)); + refs->ref_cnt = 1; + hash_add(table->table, &refs->node, e->id); +} + +static int __printf(2, 0) +libbpf_print_none(__maybe_unused enum libbpf_print_level level, + __maybe_unused const char *format, + __maybe_unused va_list args) +{ + return 0; +} + +int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +{ + char buf[4096]; + struct pid_iter_bpf *skel; + struct pid_iter_entry *e; + int err, ret, fd = -1, i; + libbpf_print_fn_t default_print; + + hash_init(table->table); + set_max_rlimit(); + + skel = pid_iter_bpf__open(); + if (!skel) { + p_err("failed to open PID iterator skeleton"); + return -1; + } + + skel->rodata->obj_type = type; + + /* we don't want output polluted with libbpf errors if bpf_iter is not + * supported + */ + default_print = libbpf_set_print(libbpf_print_none); + err = pid_iter_bpf__load(skel); + libbpf_set_print(default_print); + if (err) { + /* too bad, kernel doesn't support BPF iterators yet */ + err = 0; + goto out; + } + err = pid_iter_bpf__attach(skel); + if (err) { + /* if we loaded above successfully, attach has to succeed */ + p_err("failed to attach PID iterator: %d", err); + goto out; + } + + fd = bpf_iter_create(bpf_link__fd(skel->links.iter)); + if (fd < 0) { + err = -errno; + p_err("failed to create PID iterator session: %d", err); + goto out; + } + + while (true) { + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) { + err = -errno; + p_err("failed to read PID iterator output: %d", err); + goto out; + } + if (ret == 0) + break; + if (ret % sizeof(*e)) { + err = -EINVAL; + p_err("invalid PID iterator output format"); + goto out; + } + ret /= sizeof(*e); + + e = (void *)buf; + for (i = 0; i < ret; i++, e++) { + add_ref(table, e); + } + } + err = 0; +out: + if (fd >= 0) + close(fd); + pid_iter_bpf__destroy(skel); + return err; +} + +void delete_obj_refs_table(struct obj_refs_table *table) +{ + struct obj_refs *refs; + struct hlist_node *tmp; + unsigned int bkt; + + hash_for_each_safe(table->table, bkt, tmp, refs, node) { + hash_del(&refs->node); + free(refs->refs); + free(refs); + } +} + +void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr) +{ + struct obj_refs *refs; + struct obj_ref *ref; + int i; + + if (hash_empty(table->table)) + return; + + hash_for_each_possible(table->table, refs, node, id) { + if (refs->id != id) + continue; + if (refs->ref_cnt == 0) + break; + + jsonw_name(json_wtr, "pids"); + jsonw_start_array(json_wtr); + for (i = 0; i < refs->ref_cnt; i++) { + ref = &refs->refs[i]; + jsonw_start_object(json_wtr); + jsonw_int_field(json_wtr, "pid", ref->pid); + jsonw_string_field(json_wtr, "comm", ref->comm); + jsonw_end_object(json_wtr); + } + jsonw_end_array(json_wtr); + break; + } +} + +void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) +{ + struct obj_refs *refs; + struct obj_ref *ref; + int i; + + if (hash_empty(table->table)) + return; + + hash_for_each_possible(table->table, refs, node, id) { + if (refs->id != id) + continue; + if (refs->ref_cnt == 0) + break; + + printf("%s", prefix); + for (i = 0; i < refs->ref_cnt; i++) { + ref = &refs->refs[i]; + printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid); + } + break; + } +} + + +#endif diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 53d47610ff58..e21fa8ad2efa 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -190,6 +190,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_end_array(json_wtr); } + emit_obj_refs_json(&refs_table, info->id, json_wtr); + jsonw_end_object(json_wtr); } @@ -256,6 +258,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->btf_id) printf("\n\tbtf_id %d", info->btf_id); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + printf("\n"); } @@ -321,6 +325,7 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + build_obj_refs_table(&refs_table, BPF_OBJ_PROG); if (argc == 2) return do_show_subset(argc, argv); @@ -362,6 +367,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); + delete_obj_refs_table(&refs_table); + return err; } diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c new file mode 100644 index 000000000000..8468a608911e --- /dev/null +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (c) 2020 Facebook */ +#include +#include +#include +#include +#include "pid_iter.h" + +/* keep in sync with the definition in main.h */ +enum bpf_obj_type { + BPF_OBJ_UNKNOWN, + BPF_OBJ_PROG, + BPF_OBJ_MAP, + BPF_OBJ_LINK, + BPF_OBJ_BTF, +}; + +extern const void bpf_link_fops __ksym; +extern const void bpf_map_fops __ksym; +extern const void bpf_prog_fops __ksym; +extern const void btf_fops __ksym; + +const volatile enum bpf_obj_type obj_type = BPF_OBJ_UNKNOWN; + +static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type) +{ + switch (type) { + case BPF_OBJ_PROG: + return BPF_CORE_READ((struct bpf_prog *)ent, aux, id); + case BPF_OBJ_MAP: + return BPF_CORE_READ((struct bpf_map *)ent, id); + case BPF_OBJ_BTF: + return BPF_CORE_READ((struct btf *)ent, id); + case BPF_OBJ_LINK: + return BPF_CORE_READ((struct bpf_link *)ent, id); + default: + return 0; + } +} + +SEC("iter/task_file") +int iter(struct bpf_iter__task_file *ctx) +{ + struct file *file = ctx->file; + struct task_struct *task = ctx->task; + struct pid_iter_entry e; + const void *fops; + + if (!file || !task) + return 0; + + switch (obj_type) { + case BPF_OBJ_PROG: + fops = &bpf_prog_fops; + break; + case BPF_OBJ_MAP: + fops = &bpf_map_fops; + break; + case BPF_OBJ_BTF: + fops = &btf_fops; + break; + case BPF_OBJ_LINK: + fops = &bpf_link_fops; + break; + default: + return 0; + } + + if (file->f_op != fops) + return 0; + + e.pid = task->tgid; + e.id = get_obj_id(file->private_data, obj_type); + bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm); + bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/bpf/bpftool/skeleton/pid_iter.h b/tools/bpf/bpftool/skeleton/pid_iter.h new file mode 100644 index 000000000000..5692cf257adb --- /dev/null +++ b/tools/bpf/bpftool/skeleton/pid_iter.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2020 Facebook */ +#ifndef __PID_ITER_H +#define __PID_ITER_H + +struct pid_iter_entry { + __u32 id; + int pid; + char comm[16]; +}; + +#endif -- cgit v1.2.3 From 075c776658190681d2bf9997306f871d6c8a9b36 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 19 Jun 2020 16:17:03 -0700 Subject: tools/bpftool: Add documentation and sample output for process info Add statements about bpftool being able to discover process info, holding reference to BPF map, prog, link, or BTF. Show example output as well. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200619231703.738941-10-andriin@fb.com --- tools/bpf/bpftool/Documentation/bpftool-btf.rst | 5 +++++ tools/bpf/bpftool/Documentation/bpftool-link.rst | 13 ++++++++++++- tools/bpf/bpftool/Documentation/bpftool-map.rst | 8 +++++++- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 11 +++++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index ce3a724f50c1..896f4c6c2870 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -36,6 +36,11 @@ DESCRIPTION otherwise list all BTF objects currently loaded on the system. + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BTF + objects. On such kernels bpftool will automatically emit this + information as well. + **bpftool btf dump** *BTF_SRC* Dump BTF entries from a given *BTF_SRC*. diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 0e43d7b06c11..38b0949a185b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -37,6 +37,11 @@ DESCRIPTION zero or more named attributes, some of which depend on type of link. + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BPF + links. On such kernels bpftool will automatically emit this + information as well. + **bpftool link pin** *LINK* *FILE* Pin link *LINK* as *FILE*. @@ -82,6 +87,7 @@ EXAMPLES 10: cgroup prog 25 cgroup_id 614 attach_type egress + pids test_progs(223) **# bpftool --json --pretty link show** @@ -91,7 +97,12 @@ EXAMPLES "type": "cgroup", "prog_id": 25, "cgroup_id": 614, - "attach_type": "egress" + "attach_type": "egress", + "pids": [{ + "pid": 223, + "comm": "test_progs" + } + ] } ] diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 31101643e57c..5bc2123e9944 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -62,6 +62,11 @@ DESCRIPTION Output will start with map ID followed by map type and zero or more named attributes (depending on kernel version). + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BPF + maps. On such kernels bpftool will automatically emit this + information as well. + **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*] Create a new map with given parameters and pin it to *bpffs* as *FILE*. @@ -180,7 +185,8 @@ EXAMPLES :: 10: hash name some_map flags 0x0 - key 4B value 8B max_entries 2048 memlock 167936B + key 4B value 8B max_entries 2048 memlock 167936B + pids systemd(1) The following three commands are equivalent: diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 2b254959d488..412ea3d9bf7f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -75,6 +75,11 @@ DESCRIPTION program run. Activation or deactivation of the feature is performed via the **kernel.bpf_stats_enabled** sysctl knob. + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BPF + programs. On such kernels bpftool will automatically emit this + information as well. + **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] Dump eBPF instructions of the programs from the kernel. By default, eBPF will be disassembled and printed to standard @@ -243,6 +248,7 @@ EXAMPLES 10: xdp name some_prog tag 005a3d2123620c8b gpl run_time_ns 81632 run_cnt 10 loaded_at 2017-09-29T20:11:00+0000 uid 0 xlated 528B jited 370B memlock 4096B map_ids 10 + pids systemd(1) **# bpftool --json --pretty prog show** @@ -262,6 +268,11 @@ EXAMPLES "bytes_jited": 370, "bytes_memlock": 4096, "map_ids": [10 + ], + "pids": [{ + "pid": 1, + "comm": "systemd" + } ] } ] -- cgit v1.2.3 From bdb7b79b4ce864a724250e1d35948c46f135de36 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 22 Jun 2020 20:22:21 -0700 Subject: bpf: Switch most helper return values from 32-bit int to 64-bit long Switch most of BPF helper definitions from returning int to long. These definitions are coming from comments in BPF UAPI header and are used to generate bpf_helper_defs.h (under libbpf) to be later included and used from BPF programs. In actual in-kernel implementation, all the helpers are defined as returning u64, but due to some historical reasons, most of them are actually defined as returning int in UAPI (usually, to return 0 on success, and negative value on error). This actually causes Clang to quite often generate sub-optimal code, because compiler believes that return value is 32-bit, and in a lot of cases has to be up-converted (usually with a pair of 32-bit bit shifts) to 64-bit values, before they can be used further in BPF code. Besides just "polluting" the code, these 32-bit shifts quite often cause problems for cases in which return value matters. This is especially the case for the family of bpf_probe_read_str() functions. There are few other similar helpers (e.g., bpf_read_branch_records()), in which return value is used by BPF program logic to record variable-length data and process it. For such cases, BPF program logic carefully manages offsets within some array or map to read variable-length data. For such uses, it's crucial for BPF verifier to track possible range of register values to prove that all the accesses happen within given memory bounds. Those extraneous zero-extending bit shifts, inserted by Clang (and quite often interleaved with other code, which makes the issues even more challenging and sometimes requires employing extra per-variable compiler barriers), throws off verifier logic and makes it mark registers as having unknown variable offset. We'll study this pattern a bit later below. Another common pattern is to check return of BPF helper for non-zero state to detect error conditions and attempt alternative actions in such case. Even in this simple and straightforward case, this 32-bit vs BPF's native 64-bit mode quite often leads to sub-optimal and unnecessary extra code. We'll look at this pattern as well. Clang's BPF target supports two modes of code generation: ALU32, in which it is capable of using lower 32-bit parts of registers, and no-ALU32, in which only full 64-bit registers are being used. ALU32 mode somewhat mitigates the above described problems, but not in all cases. This patch switches all the cases in which BPF helpers return 0 or negative error from returning int to returning long. It is shown below that such change in definition leads to equivalent or better code. No-ALU32 mode benefits more, but ALU32 mode doesn't degrade or still gets improved code generation. Another class of cases switched from int to long are bpf_probe_read_str()-like helpers, which encode successful case as non-negative values, while still returning negative value for errors. In all of such cases, correctness is preserved due to two's complement encoding of negative values and the fact that all helpers return values with 32-bit absolute value. Two's complement ensures that for negative values higher 32 bits are all ones and when truncated, leave valid negative 32-bit value with the same value. Non-negative values have upper 32 bits set to zero and similarly preserve value when high 32 bits are truncated. This means that just casting to int/u32 is correct and efficient (and in ALU32 mode doesn't require any extra shifts). To minimize the chances of regressions, two code patterns were investigated, as mentioned above. For both patterns, BPF assembly was analyzed in ALU32/NO-ALU32 compiler modes, both with current 32-bit int return type and new 64-bit long return type. Case 1. Variable-length data reading and concatenation. This is quite ubiquitous pattern in tracing/monitoring applications, reading data like process's environment variables, file path, etc. In such case, many pieces of string-like variable-length data are read into a single big buffer, and at the end of the process, only a part of array containing actual data is sent to user-space for further processing. This case is tested in test_varlen.c selftest (in the next patch). Code flow is roughly as follows: void *payload = &sample->payload; u64 len; len = bpf_probe_read_kernel_str(payload, MAX_SZ1, &source_data1); if (len <= MAX_SZ1) { payload += len; sample->len1 = len; } len = bpf_probe_read_kernel_str(payload, MAX_SZ2, &source_data2); if (len <= MAX_SZ2) { payload += len; sample->len2 = len; } /* and so on */ sample->total_len = payload - &sample->payload; /* send over, e.g., perf buffer */ There could be two variations with slightly different code generated: when len is 64-bit integer and when it is 32-bit integer. Both variations were analysed. BPF assembly instructions between two successive invocations of bpf_probe_read_kernel_str() were used to check code regressions. Results are below, followed by short analysis. Left side is using helpers with int return type, the right one is after the switch to long. ALU32 + INT ALU32 + LONG =========== ============ 64-BIT (13 insns): 64-BIT (10 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: if w0 > 256 goto +9 18: if r0 > 256 goto +6 19: w1 = w0 19: r1 = 0 ll 20: r1 <<= 32 21: *(u64 *)(r1 + 0) = r0 21: r1 s>>= 32 22: r6 = 0 ll 22: r2 = 0 ll 24: r6 += r0 24: *(u64 *)(r2 + 0) = r1 00000000000000c8 : 25: r6 = 0 ll 25: r1 = r6 27: r6 += r1 26: w2 = 256 00000000000000e0 : 27: r3 = 0 ll 28: r1 = r6 29: call 115 29: w2 = 256 30: r3 = 0 ll 32: call 115 32-BIT (11 insns): 32-BIT (12 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: if w0 > 256 goto +7 18: if w0 > 256 goto +8 19: r1 = 0 ll 19: r1 = 0 ll 21: *(u32 *)(r1 + 0) = r0 21: *(u32 *)(r1 + 0) = r0 22: w1 = w0 22: r0 <<= 32 23: r6 = 0 ll 23: r0 >>= 32 25: r6 += r1 24: r6 = 0 ll 00000000000000d0 : 26: r6 += r0 26: r1 = r6 00000000000000d8 : 27: w2 = 256 27: r1 = r6 28: r3 = 0 ll 28: w2 = 256 30: call 115 29: r3 = 0 ll 31: call 115 In ALU32 mode, the variant using 64-bit length variable clearly wins and avoids unnecessary zero-extension bit shifts. In practice, this is even more important and good, because BPF code won't need to do extra checks to "prove" that payload/len are within good bounds. 32-bit len is one instruction longer. Clang decided to do 64-to-32 casting with two bit shifts, instead of equivalent `w1 = w0` assignment. The former uses extra register. The latter might potentially lose some range information, but not for 32-bit value. So in this case, verifier infers that r0 is [0, 256] after check at 18:, and shifting 32 bits left/right keeps that range intact. We should probably look into Clang's logic and see why it chooses bitshifts over sub-register assignments for this. NO-ALU32 + INT NO-ALU32 + LONG ============== =============== 64-BIT (14 insns): 64-BIT (10 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: r0 <<= 32 18: if r0 > 256 goto +6 19: r1 = r0 19: r1 = 0 ll 20: r1 >>= 32 21: *(u64 *)(r1 + 0) = r0 21: if r1 > 256 goto +7 22: r6 = 0 ll 22: r0 s>>= 32 24: r6 += r0 23: r1 = 0 ll 00000000000000c8 : 25: *(u64 *)(r1 + 0) = r0 25: r1 = r6 26: r6 = 0 ll 26: r2 = 256 28: r6 += r0 27: r3 = 0 ll 00000000000000e8 : 29: call 115 29: r1 = r6 30: r2 = 256 31: r3 = 0 ll 33: call 115 32-BIT (13 insns): 32-BIT (13 insns): ------------------------------------ ------------------------------------ 17: call 115 17: call 115 18: r1 = r0 18: r1 = r0 19: r1 <<= 32 19: r1 <<= 32 20: r1 >>= 32 20: r1 >>= 32 21: if r1 > 256 goto +6 21: if r1 > 256 goto +6 22: r2 = 0 ll 22: r2 = 0 ll 24: *(u32 *)(r2 + 0) = r0 24: *(u32 *)(r2 + 0) = r0 25: r6 = 0 ll 25: r6 = 0 ll 27: r6 += r1 27: r6 += r1 00000000000000e0 : 00000000000000e0 : 28: r1 = r6 28: r1 = r6 29: r2 = 256 29: r2 = 256 30: r3 = 0 ll 30: r3 = 0 ll 32: call 115 32: call 115 In NO-ALU32 mode, for the case of 64-bit len variable, Clang generates much superior code, as expected, eliminating unnecessary bit shifts. For 32-bit len, code is identical. So overall, only ALU-32 32-bit len case is more-or-less equivalent and the difference stems from internal Clang decision, rather than compiler lacking enough information about types. Case 2. Let's look at the simpler case of checking return result of BPF helper for errors. The code is very simple: long bla; if (bpf_probe_read_kenerl(&bla, sizeof(bla), 0)) return 1; else return 0; ALU32 + CHECK (9 insns) ALU32 + CHECK (9 insns) ==================================== ==================================== 0: r1 = r10 0: r1 = r10 1: r1 += -8 1: r1 += -8 2: w2 = 8 2: w2 = 8 3: r3 = 0 3: r3 = 0 4: call 113 4: call 113 5: w1 = w0 5: r1 = r0 6: w0 = 1 6: w0 = 1 7: if w1 != 0 goto +1 7: if r1 != 0 goto +1 8: w0 = 0 8: w0 = 0 0000000000000048 : 0000000000000048 : 9: exit 9: exit Almost identical code, the only difference is the use of full register assignment (r1 = r0) vs half-registers (w1 = w0) in instruction #5. On 32-bit architectures, new BPF assembly might be slightly less optimal, in theory. But one can argue that's not a big issue, given that use of full registers is still prevalent (e.g., for parameter passing). NO-ALU32 + CHECK (11 insns) NO-ALU32 + CHECK (9 insns) ==================================== ==================================== 0: r1 = r10 0: r1 = r10 1: r1 += -8 1: r1 += -8 2: r2 = 8 2: r2 = 8 3: r3 = 0 3: r3 = 0 4: call 113 4: call 113 5: r1 = r0 5: r1 = r0 6: r1 <<= 32 6: r0 = 1 7: r1 >>= 32 7: if r1 != 0 goto +1 8: r0 = 1 8: r0 = 0 9: if r1 != 0 goto +1 0000000000000048 : 10: r0 = 0 9: exit 0000000000000058 : 11: exit NO-ALU32 is a clear improvement, getting rid of unnecessary zero-extension bit shifts. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200623032224.4020118-1-andriin@fb.com --- tools/include/uapi/linux/bpf.h | 192 ++++++++++++++++++++--------------------- 1 file changed, 96 insertions(+), 96 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 19684813faae..be0efee49093 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -653,7 +653,7 @@ union bpf_attr { * Map value associated to *key*, or **NULL** if no entry was * found. * - * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) * Description * Add or update the value of the entry associated to *key* in * *map* with *value*. *flags* is one of: @@ -671,13 +671,13 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * long bpf_map_delete_elem(struct bpf_map *map, const void *key) * Description * Delete entry with *key* from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) * Description * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. @@ -695,7 +695,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...) * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) @@ -775,7 +775,7 @@ union bpf_attr { * Return * The SMP id of the processor running the program. * - * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. *flags* are a combination of @@ -792,7 +792,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) * Description * Recompute the layer 3 (e.g. IP) checksum for the packet * associated to *skb*. Computation is incremental, so the helper @@ -817,7 +817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) * Description * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the * packet associated to *skb*. Computation is incremental, so the @@ -849,7 +849,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) * Description * This special helper is used to trigger a "tail call", or in * other words, to jump into another eBPF program. The same stack @@ -880,7 +880,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) * Description * Clone and redirect the packet associated to *skb* to another * net device of index *ifindex*. Both ingress and egress @@ -916,7 +916,7 @@ union bpf_attr { * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(void *buf, u32 size_of_buf) + * long bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -953,7 +953,7 @@ union bpf_attr { * Return * The classid, or 0 for the default unconfigured classid. * - * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) * Description * Push a *vlan_tci* (VLAN tag control information) of protocol * *vlan_proto* to the packet associated to *skb*, then update @@ -969,7 +969,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_vlan_pop(struct sk_buff *skb) + * long bpf_skb_vlan_pop(struct sk_buff *skb) * Description * Pop a VLAN header from the packet associated to *skb*. * @@ -981,7 +981,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Get tunnel metadata. This helper takes a pointer *key* to an * empty **struct bpf_tunnel_key** of **size**, that will be @@ -1032,7 +1032,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Populate tunnel metadata for packet associated to *skb.* The * tunnel metadata is set to the contents of *key*, of *size*. The @@ -1098,7 +1098,7 @@ union bpf_attr { * The value of the perf event counter read from the map, or a * negative error code in case of failure. * - * int bpf_redirect(u32 ifindex, u64 flags) + * long bpf_redirect(u32 ifindex, u64 flags) * Description * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_clone_redirect**\ @@ -1145,7 +1145,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1190,7 +1190,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) + * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1207,7 +1207,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) + * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1276,7 +1276,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1294,7 +1294,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1304,7 +1304,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) * Description * Change the protocol of the *skb* to *proto*. Currently * supported are transition from IPv4 to IPv6, and from IPv6 to @@ -1331,7 +1331,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * long bpf_skb_change_type(struct sk_buff *skb, u32 type) * Description * Change the packet type for the packet associated to *skb*. This * comes down to setting *skb*\ **->pkt_type** to *type*, except @@ -1358,7 +1358,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) * Description * Check whether *skb* is a descendant of the cgroup2 held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. @@ -1389,7 +1389,7 @@ union bpf_attr { * Return * A pointer to the current task struct. * - * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * long bpf_probe_write_user(void *dst, const void *src, u32 len) * Description * Attempt in a safe way to write *len* bytes from the buffer * *src* to *dst* in memory. It only works for threads that are in @@ -1408,7 +1408,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) * Description * Check whether the probe is being run is the context of a given * subset of the cgroup2 hierarchy. The cgroup2 to test is held by @@ -1420,7 +1420,7 @@ union bpf_attr { * * 1, if the *skb* task does not belong to the cgroup2. * * A negative error code, if an error occurred. * - * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) * Description * Resize (trim or grow) the packet associated to *skb* to the * new *len*. The *flags* are reserved for future usage, and must @@ -1444,7 +1444,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * long bpf_skb_pull_data(struct sk_buff *skb, u32 len) * Description * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes @@ -1500,7 +1500,7 @@ union bpf_attr { * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. * - * int bpf_get_numa_node_id(void) + * long bpf_get_numa_node_id(void) * Description * Return the id of the current NUMA node. The primary use case * for this helper is the selection of sockets for the local NUMA @@ -1511,7 +1511,7 @@ union bpf_attr { * Return * The id of current NUMA node. * - * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) * Description * Grows headroom of packet associated to *skb* and adjusts the * offset of the MAC header accordingly, adding *len* bytes of @@ -1532,7 +1532,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that * it is possible to use a negative value for *delta*. This helper @@ -1547,7 +1547,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for @@ -1595,14 +1595,14 @@ union bpf_attr { * is returned (note that **overflowuid** might also be the actual * UID value for the socket). * - * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * long bpf_set_hash(struct sk_buff *skb, u32 hash) * Description * Set the full hash for *skb* (set the field *skb*\ **->hash**) * to value *hash*. * Return * 0 * - * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1630,7 +1630,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -1676,7 +1676,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -1697,7 +1697,7 @@ union bpf_attr { * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. * - * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) + * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1708,7 +1708,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a *map* referencing sockets. The * *skops* is used as a new value for the entry associated to @@ -1727,7 +1727,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) * Description * Adjust the address pointed by *xdp_md*\ **->data_meta** by * *delta* (which can be positive or negative). Note that this @@ -1756,7 +1756,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) * Description * Read the value of a perf event counter, and store it into *buf* * of size *buf_size*. This helper relies on a *map* of type @@ -1806,7 +1806,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description * For en eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in @@ -1817,7 +1817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1842,7 +1842,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_regs *regs, u64 rc) + * long bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. @@ -1867,7 +1867,7 @@ union bpf_attr { * Return * 0 * - * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) * Description * Attempt to set the value of the **bpf_sock_ops_cb_flags** field * for the full TCP socket associated to *bpf_sock_ops* to @@ -1911,7 +1911,7 @@ union bpf_attr { * be set is returned (which comes down to 0 if all bits were set * as required). * - * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -1925,7 +1925,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, apply the verdict of the eBPF program to * the next *bytes* (number of bytes) of message *msg*. @@ -1959,7 +1959,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, prevent the execution of the verdict eBPF * program for message *msg* until *bytes* (byte number) have been @@ -1977,7 +1977,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) * Description * For socket policies, pull in non-linear data from user space * for *msg* and set pointers *msg*\ **->data** and *msg*\ @@ -2008,7 +2008,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) * Description * Bind the socket associated to *ctx* to the address pointed by * *addr*, of length *addr_len*. This allows for making outgoing @@ -2026,7 +2026,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is * possible to both shrink and grow the packet tail. @@ -2040,7 +2040,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) * Description * Retrieve the XFRM state (IP transform framework, see also * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. @@ -2056,7 +2056,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) + * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -2089,7 +2089,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2111,7 +2111,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * Description * Do FIB lookup in kernel tables using parameters in *params*. * If lookup is successful and result shows packet is to be @@ -2142,7 +2142,7 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2161,7 +2161,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -2175,7 +2175,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. @@ -2189,7 +2189,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) * Description * Encapsulate the packet associated to *skb* within a Layer 3 * protocol header. This header is provided in the buffer at @@ -2226,7 +2226,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. Only the flags, tag and TLVs @@ -2241,7 +2241,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) * Description * Adjust the size allocated to TLVs in the outermost IPv6 * Segment Routing Header contained in the packet associated to @@ -2257,7 +2257,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) * Description * Apply an IPv6 Segment Routing action of type *action* to the * packet associated to *skb*. Each action takes a parameter @@ -2286,7 +2286,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_repeat(void *ctx) + * long bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded repeat key message. This delays @@ -2305,7 +2305,7 @@ union bpf_attr { * Return * 0 * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded key press with *scancode*, @@ -2370,7 +2370,7 @@ union bpf_attr { * Return * A pointer to the local storage area. * - * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. @@ -2471,7 +2471,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(struct bpf_sock *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -2479,7 +2479,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) * Description * Push an element *value* in *map*. *flags* is one of: * @@ -2489,19 +2489,19 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * long bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * long bpf_map_peek_elem(struct bpf_map *map, void *value) * Description * Get an element from *map* without removing it. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2517,7 +2517,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if @@ -2529,7 +2529,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded pointer movement. @@ -2543,7 +2543,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_lock(struct bpf_spin_lock *lock) + * long bpf_spin_lock(struct bpf_spin_lock *lock) * Description * Acquire a spinlock represented by the pointer *lock*, which is * stored as part of a value of a map. Taking the lock allows to @@ -2591,7 +2591,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * long bpf_spin_unlock(struct bpf_spin_lock *lock) * Description * Release the *lock* previously locked by a call to * **bpf_spin_lock**\ (\ *lock*\ ). @@ -2614,7 +2614,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buff *skb) + * long bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** @@ -2651,7 +2651,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2666,7 +2666,7 @@ union bpf_attr { * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. * - * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description * Get name of sysctl in /proc/sys/ and copy it into provided by * program buffer *buf* of size *buf_len*. @@ -2682,7 +2682,7 @@ union bpf_attr { * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * - * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get current value of sysctl as it is presented in /proc/sys * (incl. newline, etc), and copy it as a string into provided @@ -2701,7 +2701,7 @@ union bpf_attr { * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. * - * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get new value being written by user space to sysctl (before * the actual write happens) and copy it as a string into @@ -2718,7 +2718,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) * Description * Override new value being written by user space to sysctl with * value provided by program in buffer *buf* of size *buf_len*. @@ -2735,7 +2735,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to a long integer according to the given base @@ -2759,7 +2759,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to an unsigned long integer according to the @@ -2810,7 +2810,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return @@ -2818,7 +2818,7 @@ union bpf_attr { * * **-ENOENT** if the bpf-local-storage cannot be found. * - * int bpf_send_signal(u32 sig) + * long bpf_send_signal(u32 sig) * Description * Send signal *sig* to the process of the current task. * The signal may be delivered to any of this process's threads. @@ -2859,7 +2859,7 @@ union bpf_attr { * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 * - * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -2883,21 +2883,21 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from user space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from kernel space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe user address * *unsafe_ptr* to *dst*. The *size* should include the @@ -2941,7 +2941,7 @@ union bpf_attr { * including the trailing NUL character. On error, a negative * value. * - * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. @@ -2949,14 +2949,14 @@ union bpf_attr { * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * - * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_send_signal_thread(u32 sig) + * long bpf_send_signal_thread(u32 sig) * Description * Send signal *sig* to the thread corresponding to the current task. * Return @@ -2976,7 +2976,7 @@ union bpf_attr { * Return * The 64 bit jiffies * - * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the * branch records (**struct perf_branch_entry**) associated to *ctx* @@ -2995,7 +2995,7 @@ union bpf_attr { * * **-ENOENT** if architecture does not support branch records. * - * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. @@ -3007,7 +3007,7 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * - * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -3062,7 +3062,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) * Description * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, @@ -3097,7 +3097,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print * out the format string. @@ -3126,7 +3126,7 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * long bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the @@ -3221,7 +3221,7 @@ union bpf_attr { * Return * Requested value, or 0, if flags are not recognized. * - * int bpf_csum_level(struct sk_buff *skb, u64 level) + * long bpf_csum_level(struct sk_buff *skb, u64 level) * Description * Change the skbs checksum level by one layer up or down, or * reset it entirely to none in order to have the stack perform -- cgit v1.2.3 From 5e85c6bb8e74bd9daa4f5815da373d4ac2cb1a35 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 22 Jun 2020 20:22:22 -0700 Subject: selftests/bpf: Add variable-length data concatenation pattern test Add selftest that validates variable-length data reading and concatentation with one big shared data array. This is a common pattern in production use for monitoring and tracing applications, that potentially can read a lot of data, but overall read much less. Such pattern allows to determine precisely what amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200623032224.4020118-2-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/varlen.c | 56 +++++++++++++++ tools/testing/selftests/bpf/progs/test_varlen.c | 96 +++++++++++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/varlen.c create mode 100644 tools/testing/selftests/bpf/progs/test_varlen.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c new file mode 100644 index 000000000000..7533565e096d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/varlen.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include "test_varlen.skel.h" + +#define CHECK_VAL(got, exp) \ + CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \ + (long)(got), (long)(exp)) + +void test_varlen(void) +{ + int duration = 0, err; + struct test_varlen* skel; + struct test_varlen__bss *bss; + struct test_varlen__data *data; + const char str1[] = "Hello, "; + const char str2[] = "World!"; + const char exp_str[] = "Hello, \0World!\0"; + const int size1 = sizeof(str1); + const int size2 = sizeof(str2); + + skel = test_varlen__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + data = skel->data; + + err = test_varlen__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + bss->test_pid = getpid(); + + /* trigger everything */ + memcpy(bss->buf_in1, str1, size1); + memcpy(bss->buf_in2, str2, size2); + bss->capture = true; + usleep(1); + bss->capture = false; + + CHECK_VAL(bss->payload1_len1, size1); + CHECK_VAL(bss->payload1_len2, size2); + CHECK_VAL(bss->total1, size1 + size2); + CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload2_len1, size1); + CHECK_VAL(data->payload2_len2, size2); + CHECK_VAL(data->total2, size1 + size2); + CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check", + "doesn't match!"); +cleanup: + test_varlen__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c new file mode 100644 index 000000000000..09691852debf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include +#include +#include + +#define MAX_LEN 256 + +char buf_in1[MAX_LEN] = {}; +char buf_in2[MAX_LEN] = {}; + +int test_pid = 0; +bool capture = false; + +/* .bss */ +long payload1_len1 = 0; +long payload1_len2 = 0; +long total1 = 0; +char payload1[MAX_LEN + MAX_LEN] = {}; + +/* .data */ +int payload2_len1 = -1; +int payload2_len2 = -1; +int total2 = -1; +char payload2[MAX_LEN + MAX_LEN] = { 1 }; + +SEC("raw_tp/sys_enter") +int handler64(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload1; + u64 len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len <= MAX_LEN) { + payload += len; + payload1_len1 = len; + } + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len <= MAX_LEN) { + payload += len; + payload1_len2 = len; + } + + total1 = payload - (void *)payload1; + + return 0; +} + +SEC("tp_btf/sys_enter") +int handler32(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload2; + u32 len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len <= MAX_LEN) { + payload += len; + payload2_len1 = len; + } + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len <= MAX_LEN) { + payload += len; + payload2_len2 = len; + } + + total2 = payload - (void *)payload2; + + return 0; +} + +SEC("tp_btf/sys_exit") +int handler_exit(void *regs) +{ + long bla; + + if (bpf_probe_read_kernel(&bla, sizeof(bla), 0)) + return 1; + else + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; -- cgit v1.2.3 From 2fde1747c986cac28fa66d0cffd7577db042640b Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 22 Jun 2020 20:22:23 -0700 Subject: selftests/bpf: Add variable-length data concat pattern less than test Extend original variable-length tests with a case to catch a common existing pattern of testing for < 0 for errors. Note because verifier also tracks upper bounds and we know it can not be greater than MAX_LEN here we can skip upper bound check. In ALU64 enabled compilation converting from long->int return types in probe helpers results in extra instruction pattern, <<= 32, s >>= 32. The trade-off is the non-ALU64 case works. If you really care about every extra insn (XDP case?) then you probably should be using original int type. In addition adding a sext insn to bpf might help the verifier in the general case to avoid these types of tricks. Signed-off-by: John Fastabend Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200623032224.4020118-3-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/varlen.c | 12 +++++ tools/testing/selftests/bpf/progs/test_varlen.c | 70 +++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c index 7533565e096d..c75525eab02c 100644 --- a/tools/testing/selftests/bpf/prog_tests/varlen.c +++ b/tools/testing/selftests/bpf/prog_tests/varlen.c @@ -51,6 +51,18 @@ void test_varlen(void) CHECK_VAL(data->total2, size1 + size2); CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check", "doesn't match!"); + + CHECK_VAL(data->payload3_len1, size1); + CHECK_VAL(data->payload3_len2, size2); + CHECK_VAL(data->total3, size1 + size2); + CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload4_len1, size1); + CHECK_VAL(data->payload4_len2, size2); + CHECK_VAL(data->total4, size1 + size2); + CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check", + "doesn't match!"); cleanup: test_varlen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c index 09691852debf..cd4b72c55dfe 100644 --- a/tools/testing/selftests/bpf/progs/test_varlen.c +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -26,8 +26,18 @@ int payload2_len2 = -1; int total2 = -1; char payload2[MAX_LEN + MAX_LEN] = { 1 }; +int payload3_len1 = -1; +int payload3_len2 = -1; +int total3= -1; +char payload3[MAX_LEN + MAX_LEN] = { 1 }; + +int payload4_len1 = -1; +int payload4_len2 = -1; +int total4= -1; +char payload4[MAX_LEN + MAX_LEN] = { 1 }; + SEC("raw_tp/sys_enter") -int handler64(void *regs) +int handler64_unsigned(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload1; @@ -54,8 +64,34 @@ int handler64(void *regs) return 0; } -SEC("tp_btf/sys_enter") -int handler32(void *regs) +SEC("raw_tp/sys_exit") +int handler64_signed(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload3; + long len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len >= 0) { + payload += len; + payload3_len1 = len; + } + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len >= 0) { + payload += len; + payload3_len2 = len; + } + total3 = payload - (void *)payload3; + + return 0; +} + +SEC("tp/raw_syscalls/sys_enter") +int handler32_unsigned(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload2; @@ -82,7 +118,33 @@ int handler32(void *regs) return 0; } -SEC("tp_btf/sys_exit") +SEC("tp/raw_syscalls/sys_exit") +int handler32_signed(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload4; + int len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len >= 0) { + payload += len; + payload4_len1 = len; + } + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len >= 0) { + payload += len; + payload4_len2 = len; + } + total4 = payload - (void *)payload4; + + return 0; +} + +SEC("tp/syscalls/sys_exit_getpid") int handler_exit(void *regs) { long bla; -- cgit v1.2.3 From 9d9d8cc21e3827b89e414f990016836290de3038 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 23 Jun 2020 12:37:10 +0200 Subject: tools, bpftool: Correctly evaluate $(BUILD_BPF_SKELS) in Makefile Currently, if the clang-bpf-co-re feature is not available, the build fails with e.g. CC prog.o prog.c:1462:10: fatal error: profiler.skel.h: No such file or directory 1462 | #include "profiler.skel.h" | ^~~~~~~~~~~~~~~~~ This is due to the fact that the BPFTOOL_WITHOUT_SKELETONS macro is not defined, despite BUILD_BPF_SKELS not being set. Fix this by correctly evaluating $(BUILD_BPF_SKELS) when deciding on whether to add -DBPFTOOL_WITHOUT_SKELETONS to CFLAGS. Fixes: 05aca6da3b5a ("tools/bpftool: Generalize BPF skeleton support and generate vmlinux.h") Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200623103710.10370-1-tklauser@distanz.ch --- tools/bpf/bpftool/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 06f436e8191a..8c6563e56ffc 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -155,7 +155,7 @@ $(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h endif endif -CFLAGS += $(if BUILD_BPF_SKELS,,-DBPFTOOL_WITHOUT_SKELETONS) +CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< -- cgit v1.2.3 From 54b66c2255fadc8d78e88b5ffd99b19f7f754f5a Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 23 Jun 2020 22:36:00 +0100 Subject: tools, bpftool: Fix variable shadowing in emit_obj_refs_json() Building bpftool yields the following complaint: pids.c: In function 'emit_obj_refs_json': pids.c:175:80: warning: declaration of 'json_wtr' shadows a global declaration [-Wshadow] 175 | void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr) | ~~~~~~~~~~~~~~~^~~~~~~~ In file included from pids.c:11: main.h:141:23: note: shadowed declaration is here 141 | extern json_writer_t *json_wtr; | ^~~~~~~~ Let's rename the variable. v2: - Rename the variable instead of calling the global json_wtr directly. Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200623213600.16643-1-quentin@isovalent.com --- tools/bpf/bpftool/pids.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 3474a91743ff..2709be4de2b1 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -172,7 +172,8 @@ void delete_obj_refs_table(struct obj_refs_table *table) } } -void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr) +void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, + json_writer_t *json_writer) { struct obj_refs *refs; struct obj_ref *ref; @@ -187,16 +188,16 @@ void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *j if (refs->ref_cnt == 0) break; - jsonw_name(json_wtr, "pids"); - jsonw_start_array(json_wtr); + jsonw_name(json_writer, "pids"); + jsonw_start_array(json_writer); for (i = 0; i < refs->ref_cnt; i++) { ref = &refs->refs[i]; - jsonw_start_object(json_wtr); - jsonw_int_field(json_wtr, "pid", ref->pid); - jsonw_string_field(json_wtr, "comm", ref->comm); - jsonw_end_object(json_wtr); + jsonw_start_object(json_writer); + jsonw_int_field(json_writer, "pid", ref->pid); + jsonw_string_field(json_writer, "comm", ref->comm); + jsonw_end_object(json_writer); } - jsonw_end_array(json_wtr); + jsonw_end_array(json_writer); break; } } -- cgit v1.2.3 From 135c783f4794fbdeace4a969dea6eabd27f8a501 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 23 Jun 2020 09:42:07 +0100 Subject: libbpf: Fix spelling mistake "kallasyms" -> "kallsyms" There is a spelling mistake in a pr_warn message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200623084207.149253-1-colin.king@canonical.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 18461deb1b19..deea27aadcef 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5741,7 +5741,7 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj) if (ret == EOF && feof(f)) break; if (ret != 3) { - pr_warn("failed to read kallasyms entry: %d\n", ret); + pr_warn("failed to read kallsyms entry: %d\n", ret); err = -EINVAL; goto out; } -- cgit v1.2.3 From 192b6638eea5d40c99964291671fc0371b858f6e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jun 2020 21:38:05 -0700 Subject: libbpf: Prevent loading vmlinux BTF twice Prevent loading/parsing vmlinux BTF twice in some cases: for CO-RE relocations and for BTF-aware hooks (tp_btf, fentry/fexit, etc). Fixes: a6ed02cac690 ("libbpf: Load btf_vmlinux only once per object.") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200624043805.1794620-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index deea27aadcef..6b4955d170ff 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2504,22 +2504,31 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) { + bool need_vmlinux_btf = false; struct bpf_program *prog; int err; + /* CO-RE relocations need kernel BTF */ + if (obj->btf_ext && obj->btf_ext->field_reloc_info.len) + need_vmlinux_btf = true; + bpf_object__for_each_program(prog, obj) { if (libbpf_prog_needs_vmlinux_btf(prog)) { - obj->btf_vmlinux = libbpf_find_kernel_btf(); - if (IS_ERR(obj->btf_vmlinux)) { - err = PTR_ERR(obj->btf_vmlinux); - pr_warn("Error loading vmlinux BTF: %d\n", err); - obj->btf_vmlinux = NULL; - return err; - } - return 0; + need_vmlinux_btf = true; + break; } } + if (!need_vmlinux_btf) + return 0; + + obj->btf_vmlinux = libbpf_find_kernel_btf(); + if (IS_ERR(obj->btf_vmlinux)) { + err = PTR_ERR(obj->btf_vmlinux); + pr_warn("Error loading vmlinux BTF: %d\n", err); + obj->btf_vmlinux = NULL; + return err; + } return 0; } @@ -4945,8 +4954,8 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) if (targ_btf_path) targ_btf = btf__parse_elf(targ_btf_path, NULL); else - targ_btf = libbpf_find_kernel_btf(); - if (IS_ERR(targ_btf)) { + targ_btf = obj->btf_vmlinux; + if (IS_ERR_OR_NULL(targ_btf)) { pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); return PTR_ERR(targ_btf); } @@ -4987,7 +4996,9 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) } out: - btf__free(targ_btf); + /* obj->btf_vmlinux is freed at the end of object load phase */ + if (targ_btf != obj->btf_vmlinux) + btf__free(targ_btf); if (!IS_ERR_OR_NULL(cand_cache)) { hashmap__for_each_entry(cand_cache, entry, i) { bpf_core_free_cands(entry->value); -- cgit v1.2.3 From fea549b030152d5336dbd960b357a4d4b841a851 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 24 Jun 2020 11:10:59 -0700 Subject: selftests/bpf: Workaround for get_stack_rawtp test. ./test_progs-no_alu32 -t get_stack_raw_tp fails due to: 52: (85) call bpf_get_stack#67 53: (bf) r8 = r0 54: (bf) r1 = r8 55: (67) r1 <<= 32 56: (c7) r1 s>>= 32 ; if (usize < 0) 57: (c5) if r1 s< 0x0 goto pc+26 R0=inv(id=0,smax_value=800) R1_w=inv(id=0,umax_value=800,var_off=(0x0; 0x3ff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R8_w=inv(id=0,smax_value=800) R9=inv800 ; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); 58: (1f) r9 -= r8 ; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); 59: (bf) r2 = r7 60: (0f) r2 += r1 regs=1 stack=0 before 52: (85) call bpf_get_stack#67 ; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); 61: (bf) r1 = r6 62: (bf) r3 = r9 63: (b7) r4 = 0 64: (85) call bpf_get_stack#67 R0=inv(id=0,smax_value=800) R1_w=ctx(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=1600,umax_value=800,var_off=(0x0; 0x3ff),s32_max_value=1023,u32_max_value=1023) R3_w=inv(id=0,umax_value=9223372036854776608) R3 unbounded memory access, use 'var &= const' or 'if (var < const)' In the C code: usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); if (usize < 0) return 0; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); if (ksize < 0) return 0; We used to have problem with pointer arith in R2. Now it's a problem with two integers in R3. 'if (usize < 0)' is comparing R1 and makes it [0,800], but R8 stays [-inf,800]. Both registers represent the same 'usize' variable. Then R9 -= R8 is doing 800 - [-inf, 800] so the result of "max_len - usize" looks unbounded to the verifier while it's obvious in C code that "max_len - usize" should be [0, 800]. To workaround the problem convert ksize and usize variables from int to long. Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index 29817a703984..b6a6eb279e54 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -57,8 +57,9 @@ struct { SEC("raw_tracepoint/sys_enter") int bpf_prog1(void *ctx) { - int max_len, max_buildid_len, usize, ksize, total_size; + int max_len, max_buildid_len, total_size; struct stack_trace_t *data; + long usize, ksize; void *raw_data; __u32 key = 0; -- cgit v1.2.3 From f9bcf96837f158db6ea982d15cd2c8161ca6bc23 Mon Sep 17 00:00:00 2001 From: Dmitry Yakunin Date: Sat, 20 Jun 2020 18:30:52 +0300 Subject: bpf: Add SO_KEEPALIVE and related options to bpf_setsockopt This patch adds support of SO_KEEPALIVE flag and TCP related options to bpf_setsockopt() routine. This is helpful if we want to enable or tune TCP keepalive for applications which don't do it in the userspace code. v3: - update kernel-doc in uapi (Nikita Vetoshkin ) v4: - update kernel-doc in tools too (Alexei Starovoitov) - add test to selftests (Alexei Starovoitov) Signed-off-by: Dmitry Yakunin Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200620153052.9439-3-zeil@yandex-team.ru --- tools/include/uapi/linux/bpf.h | 7 ++++-- tools/testing/selftests/bpf/progs/connect4_prog.c | 27 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9d3923e6b860..d9737d51dd19 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1621,10 +1621,13 @@ union bpf_attr { * * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, - * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, - * **TCP_BPF_SNDCWND_CLAMP**. + * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, + * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 1ab2c5eba86c..b1b2773c0b9d 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) return 0; } +static __inline int set_keepalive(struct bpf_sock_addr *ctx) +{ + int zero = 0, one = 1; + + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one))) + return 1; + if (ctx->type == SOCK_STREAM) { + if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one))) + return 1; + } + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero))) + return 1; + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) if (bind_to_device(ctx)) return 0; + if (set_keepalive(ctx)) + return 0; + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) -- cgit v1.2.3 From af7ec13833619e17f03aa73a785a2f871da6d66b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:09 -0700 Subject: bpf: Add bpf_skc_to_tcp6_sock() helper The helper is used in tracing programs to cast a socket pointer to a tcp6_sock pointer. The return value could be NULL if the casting is illegal. A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added so the verifier is able to deduce proper return types for the helper. Different from the previous BTF_ID based helpers, the bpf_skc_to_tcp6_sock() argument can be several possible btf_ids. More specifically, all possible socket data structures with sock_common appearing in the first in the memory layout. This patch only added socket types related to tcp and udp. All possible argument btf_id and return value btf_id for helper bpf_skc_to_tcp6_sock() are pre-calculcated and cached. In the future, it is even possible to precompute these btf_id's at kernel build time. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230809.3988195-1-yhs@fb.com --- tools/include/uapi/linux/bpf.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d9737d51dd19..e90ad07b291a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3255,6 +3255,12 @@ union bpf_attr { * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * is returned or the error code -EACCES in case the skb is not * subject to CHECKSUM_UNNECESSARY. + * + * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3392,7 +3398,8 @@ union bpf_attr { FN(ringbuf_submit), \ FN(ringbuf_discard), \ FN(ringbuf_query), \ - FN(csum_level), + FN(csum_level), \ + FN(skc_to_tcp6_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 478cfbdf5f13dfe09cfd0b1cbac821f5e27f6108 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:11 -0700 Subject: bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers Three more helpers are added to cast a sock_common pointer to an tcp_sock, tcp_timewait_sock or a tcp_request_sock for tracing programs. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230811.3988277-1-yhs@fb.com --- tools/include/uapi/linux/bpf.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e90ad07b291a..b9412ab275f3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3261,6 +3261,24 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. * Return * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3399,7 +3417,10 @@ union bpf_attr { FN(ringbuf_discard), \ FN(ringbuf_query), \ FN(csum_level), \ - FN(skc_to_tcp6_sock), + FN(skc_to_tcp6_sock), \ + FN(skc_to_tcp_sock), \ + FN(skc_to_tcp_timewait_sock), \ + FN(skc_to_tcp_request_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 0d4fad3e57df2bf61e8ffc8d12a34b1caf9b8835 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:15 -0700 Subject: bpf: Add bpf_skc_to_udp6_sock() helper The helper is used in tracing programs to cast a socket pointer to a udp6_sock pointer. The return value could be NULL if the casting is illegal. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/20200623230815.3988481-1-yhs@fb.com --- tools/include/uapi/linux/bpf.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b9412ab275f3..0cb8ec948816 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3279,6 +3279,12 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. * Return * *sk* if casting is valid, or NULL otherwise. + * + * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3420,7 +3426,8 @@ union bpf_attr { FN(skc_to_tcp6_sock), \ FN(skc_to_tcp_sock), \ FN(skc_to_tcp_timewait_sock), \ - FN(skc_to_tcp_request_sock), + FN(skc_to_tcp_request_sock), \ + FN(skc_to_udp6_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 84544f5637ff3501876ba96bd48ca900317e08fb Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:16 -0700 Subject: selftests/bpf: Move newer bpf_iter_* type redefining to a new header file Commit b9f4c01f3e0b ("selftest/bpf: Make bpf_iter selftest compilable against old vmlinux.h") and Commit dda18a5c0b75 ("selftests/bpf: Convert bpf_iter_test_kern{3, 4}.c to define own bpf_iter_meta") redefined newly introduced types in bpf programs so the bpf program can still compile properly with old kernels although loading may fail. Since this patch set introduced new types and the same workaround is needed, so let us move the workaround to a separate header file so they do not clutter bpf programs. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230816.3988656-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/bpf_iter.h | 49 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/bpf_iter_bpf_map.c | 18 +------- .../selftests/bpf/progs/bpf_iter_ipv6_route.c | 18 +------- .../testing/selftests/bpf/progs/bpf_iter_netlink.c | 18 +------- tools/testing/selftests/bpf/progs/bpf_iter_task.c | 18 +------- .../selftests/bpf/progs/bpf_iter_task_file.c | 20 +-------- .../selftests/bpf/progs/bpf_iter_test_kern3.c | 17 +------- .../selftests/bpf/progs/bpf_iter_test_kern4.c | 17 +------- .../bpf/progs/bpf_iter_test_kern_common.h | 18 +------- 9 files changed, 57 insertions(+), 136 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h new file mode 100644 index 000000000000..3757e88c6406 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used +#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used +#define bpf_iter__netlink bpf_iter__netlink___not_used +#define bpf_iter__task bpf_iter__task___not_used +#define bpf_iter__task_file bpf_iter__task_file___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__bpf_map +#undef bpf_iter__ipv6_route +#undef bpf_iter__netlink +#undef bpf_iter__task +#undef bpf_iter__task_file + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__ipv6_route { + struct bpf_iter_meta *meta; + struct fib6_info *rt; +} __attribute__((preserve_access_index)); + +struct bpf_iter__netlink { + struct bpf_iter_meta *meta; + struct netlink_sock *sk; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task { + struct bpf_iter_meta *meta; + struct task_struct *task; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task_file { + struct bpf_iter_meta *meta; + struct task_struct *task; + __u32 fd; + struct file *file; +} __attribute__((preserve_access_index)); + +struct bpf_iter__bpf_map { + struct bpf_iter_meta *meta; + struct bpf_map *map; +} __attribute__((preserve_access_index)); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c index b57bd6fef208..08651b23edba 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -1,27 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__bpf_map +#include "bpf_iter.h" #include #include char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; -} __attribute__((preserve_access_index)); - SEC("iter/bpf_map") int dump_bpf_map(struct bpf_iter__bpf_map *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c index c8e9ca74c87b..93a452d1d136 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -1,25 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__ipv6_route +#include "bpf_iter.h" #include #include -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__ipv6_route { - struct bpf_iter_meta *meta; - struct fib6_info *rt; -} __attribute__((preserve_access_index)); - char _license[] SEC("license") = "GPL"; extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index e7b8753eac0b..fda5036fdf75 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -1,11 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__netlink bpf_iter__netlink___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__netlink +#include "bpf_iter.h" #include #include @@ -14,17 +9,6 @@ char _license[] SEC("license") = "GPL"; #define sk_rmem_alloc sk_backlog.rmem_alloc #define sk_refcnt __sk_common.skc_refcnt -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__netlink { - struct bpf_iter_meta *meta; - struct netlink_sock *sk; -} __attribute__((preserve_access_index)); - static inline struct inode *SOCK_INODE(struct socket *socket) { return &container_of(socket, struct socket_alloc, socket)->vfs_inode; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c index ee754021f98e..4983087852a0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -1,27 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task bpf_iter__task___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task +#include "bpf_iter.h" #include #include char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index 0f0ec3db20ba..8b787baa2654 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -1,29 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task_file bpf_iter__task_file___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task_file +#include "bpf_iter.h" #include #include char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task_file { - struct bpf_iter_meta *meta; - struct task_struct *task; - __u32 fd; - struct file *file; -} __attribute__((preserve_access_index)); - SEC("iter/task_file") int dump_task_file(struct bpf_iter__task_file *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c index 13c2c90c835f..2a4647f20c46 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c @@ -1,25 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task bpf_iter__task___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task +#include "bpf_iter.h" #include char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c index 0aa71b333cf3..ee49493dc125 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -1,25 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__bpf_map +#include "bpf_iter.h" #include char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; -} __attribute__((preserve_access_index)); - __u32 map1_id = 0, map2_id = 0; __u32 map1_accessed = 0, map2_accessed = 0; __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h index dee1339e6905..d5e3df66ad9a 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h @@ -1,27 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task bpf_iter__task___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task +#include "bpf_iter.h" #include char _license[] SEC("license") = "GPL"; int count = 0; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { -- cgit v1.2.3 From 647b502e3d5456f5c240b1587112b163c69732e9 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:17 -0700 Subject: selftests/bpf: Refactor some net macros to bpf_tracing_net.h Refactor bpf_iter_ipv6_route.c and bpf_iter_netlink.c so net macros, originally from various include/linux header files, are moved to a new header file bpf_tracing_net.h. The goal is to improve reuse so networking tracing programs do not need to copy these macros every time they use them. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230817.3988962-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c | 7 +------ tools/testing/selftests/bpf/progs/bpf_iter_netlink.c | 4 +--- tools/testing/selftests/bpf/progs/bpf_tracing_net.h | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 9 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/bpf_tracing_net.h (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c index 93a452d1d136..d58d9f1642b5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" +#include "bpf_tracing_net.h" #include #include @@ -8,12 +9,6 @@ char _license[] SEC("license") = "GPL"; extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; -#define RTF_GATEWAY 0x0002 -#define IFNAMSIZ 16 -#define fib_nh_gw_family nh_common.nhc_gw_family -#define fib_nh_gw6 nh_common.nhc_gw.ipv6 -#define fib_nh_dev nh_common.nhc_dev - SEC("iter/ipv6_route") int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index fda5036fdf75..cec82a419800 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -1,14 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" +#include "bpf_tracing_net.h" #include #include char _license[] SEC("license") = "GPL"; -#define sk_rmem_alloc sk_backlog.rmem_alloc -#define sk_refcnt __sk_common.skc_refcnt - static inline struct inode *SOCK_INODE(struct socket *socket) { return &container_of(socket, struct socket_alloc, socket)->vfs_inode; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h new file mode 100644 index 000000000000..1f38a1098727 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_TRACING_NET_H__ +#define __BPF_TRACING_NET_H__ + +#define IFNAMSIZ 16 + +#define RTF_GATEWAY 0x0002 + +#define fib_nh_dev nh_common.nhc_dev +#define fib_nh_gw_family nh_common.nhc_gw_family +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 + +#define sk_rmem_alloc sk_backlog.rmem_alloc +#define sk_refcnt __sk_common.skc_refcnt + +#endif -- cgit v1.2.3 From 3982bfaaef7c80ecf6a065cbf9422165a8e36f75 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:19 -0700 Subject: selftests/bpf: Add more common macros to bpf_tracing_net.h These newly added macros will be used in subsequent bpf iterator tcp{4,6} and udp{4,6} programs. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230819.3989050-1-yhs@fb.com --- .../testing/selftests/bpf/progs/bpf_tracing_net.h | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 1f38a1098727..01378911252b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -2,15 +2,50 @@ #ifndef __BPF_TRACING_NET_H__ #define __BPF_TRACING_NET_H__ +#define AF_INET 2 +#define AF_INET6 10 + +#define ICSK_TIME_RETRANS 1 +#define ICSK_TIME_PROBE0 3 +#define ICSK_TIME_LOSS_PROBE 5 +#define ICSK_TIME_REO_TIMEOUT 6 + #define IFNAMSIZ 16 #define RTF_GATEWAY 0x0002 +#define TCP_INFINITE_SSTHRESH 0x7fffffff +#define TCP_PINGPONG_THRESH 3 + #define fib_nh_dev nh_common.nhc_dev #define fib_nh_gw_family nh_common.nhc_gw_family #define fib_nh_gw6 nh_common.nhc_gw.ipv6 +#define inet_daddr sk.__sk_common.skc_daddr +#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr +#define inet_dport sk.__sk_common.skc_dport + +#define ir_loc_addr req.__req_common.skc_rcv_saddr +#define ir_num req.__req_common.skc_num +#define ir_rmt_addr req.__req_common.skc_daddr +#define ir_rmt_port req.__req_common.skc_dport +#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr +#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr + +#define sk_family __sk_common.skc_family #define sk_rmem_alloc sk_backlog.rmem_alloc #define sk_refcnt __sk_common.skc_refcnt +#define sk_state __sk_common.skc_state +#define sk_v6_daddr __sk_common.skc_v6_daddr +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr + +#define s6_addr32 in6_u.u6_addr32 + +#define tw_daddr __tw_common.skc_daddr +#define tw_rcv_saddr __tw_common.skc_rcv_saddr +#define tw_dport __tw_common.skc_dport +#define tw_refcnt __tw_common.skc_refcnt +#define tw_v6_daddr __tw_common.skc_v6_daddr +#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr #endif -- cgit v1.2.3 From 2767c97765cb3d9b54c8e62b468e55cc56854a66 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:20 -0700 Subject: selftests/bpf: Implement sample tcp/tcp6 bpf_iter programs In my VM, I got identical result compared to /proc/net/{tcp,tcp6}. For tcp6: $ cat /proc/net/tcp6 sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode 0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000001 00000000 0 0 17955 1 000000003eb3102e 100 0 0 10 0 $ cat /sys/fs/bpf/p1 sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode 0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 17955 1 000000003eb3102e 100 0 0 10 0 For tcp: $ cat /proc/net/tcp sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode 0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 2666 1 000000007152e43f 100 0 0 10 0 $ cat /sys/fs/bpf/p2 sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode 1: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 2666 1 000000007152e43f 100 0 0 10 0 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230820.3989165-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/bpf_iter.h | 15 ++ tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c | 234 ++++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c | 250 ++++++++++++++++++++++ 3 files changed, 499 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 3757e88c6406..bde23e16e777 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -7,6 +7,8 @@ #define bpf_iter__netlink bpf_iter__netlink___not_used #define bpf_iter__task bpf_iter__task___not_used #define bpf_iter__task_file bpf_iter__task_file___not_used +#define bpf_iter__tcp bpf_iter__tcp___not_used +#define tcp6_sock tcp6_sock___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -14,6 +16,8 @@ #undef bpf_iter__netlink #undef bpf_iter__task #undef bpf_iter__task_file +#undef bpf_iter__tcp +#undef tcp6_sock struct bpf_iter_meta { struct seq_file *seq; @@ -47,3 +51,14 @@ struct bpf_iter__bpf_map { struct bpf_iter_meta *meta; struct bpf_map *map; } __attribute__((preserve_access_index)); + +struct bpf_iter__tcp { + struct bpf_iter_meta *meta; + struct sock_common *sk_common; + uid_t uid; +} __attribute__((preserve_access_index)); + +struct tcp6_sock { + struct tcp_sock tcp; + struct ipv6_pinfo inet6; +} __attribute__((preserve_access_index)); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c new file mode 100644 index 000000000000..30fd587cb325 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +static int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !(h->pprev); +} + +static int timer_pending(const struct timer_list * timer) +{ + return !hlist_unhashed_lockless(&timer->entry); +} + +extern unsigned CONFIG_HZ __kconfig; + +#define USER_HZ 100 +#define NSEC_PER_SEC 1000000000ULL +static clock_t jiffies_to_clock_t(unsigned long x) +{ + /* The implementation here tailored to a particular + * setting of USER_HZ. + */ + u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ; + u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ; + + if ((tick_nsec % user_hz_nsec) == 0) { + if (CONFIG_HZ < USER_HZ) + return x * (USER_HZ / CONFIG_HZ); + else + return x / (CONFIG_HZ / USER_HZ); + } + return x * tick_nsec/user_hz_nsec; +} + +static clock_t jiffies_delta_to_clock_t(long delta) +{ + if (delta <= 0) + return 0; + + return jiffies_to_clock_t(delta); +} + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +static bool +inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk) +{ + return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} + +static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp) +{ + return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; +} + +static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, + uid_t uid, __u32 seq_num) +{ + const struct inet_connection_sock *icsk; + const struct fastopen_queue *fastopenq; + const struct inet_sock *inet; + unsigned long timer_expires; + const struct sock *sp; + __u16 destp, srcp; + __be32 dest, src; + int timer_active; + int rx_queue; + int state; + + icsk = &tp->inet_conn; + inet = &icsk->icsk_inet; + sp = &inet->sk; + fastopenq = &icsk->icsk_accept_queue.fastopenq; + + dest = inet->inet_daddr; + src = inet->inet_rcv_saddr; + destp = bpf_ntohs(inet->inet_dport); + srcp = bpf_ntohs(inet->inet_sport); + + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + timer_active = 1; + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + timer_active = 4; + timer_expires = icsk->icsk_timeout; + } else if (timer_pending(&sp->sk_timer)) { + timer_active = 2; + timer_expires = sp->sk_timer.expires; + } else { + timer_active = 0; + timer_expires = bpf_jiffies64(); + } + + state = sp->sk_state; + if (state == TCP_LISTEN) { + rx_queue = sp->sk_ack_backlog; + } else { + rx_queue = tp->rcv_nxt - tp->copied_seq; + if (rx_queue < 0) + rx_queue = 0; + } + + BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", + seq_num, src, srcp, destp, destp); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", + state, + tp->write_seq - tp->snd_una, rx_queue, + timer_active, + jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()), + icsk->icsk_retransmits, uid, + icsk->icsk_probes_out, + sock_i_ino(sp), + sp->sk_refcnt.refs.counter); + BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n", + tp, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk), + tp->snd_cwnd, + state == TCP_LISTEN ? fastopenq->max_qlen + : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) + ); + + return 0; +} + +static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw, + uid_t uid, __u32 seq_num) +{ + struct inet_timewait_sock *tw = &ttw->tw_sk; + __u16 destp, srcp; + __be32 dest, src; + long delta; + + delta = tw->tw_timer.expires - bpf_jiffies64(); + dest = tw->tw_daddr; + src = tw->tw_rcv_saddr; + destp = bpf_ntohs(tw->tw_dport); + srcp = bpf_ntohs(tw->tw_sport); + + BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", + seq_num, src, srcp, dest, destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + tw->tw_substate, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, + tw->tw_refcnt.refs.counter, tw); + + return 0; +} + +static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq, + uid_t uid, __u32 seq_num) +{ + struct inet_request_sock *irsk = &treq->req; + struct request_sock *req = &irsk->req; + long ttd; + + ttd = req->rsk_timer.expires - bpf_jiffies64(); + + if (ttd < 0) + ttd = 0; + + BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", + seq_num, irsk->ir_loc_addr, + irsk->ir_num, irsk->ir_rmt_addr, + bpf_ntohs(irsk->ir_rmt_port)); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd), + req->num_timeout, uid, 0, 0, 0, req); + + return 0; +} + +SEC("iter/tcp") +int dump_tcp4(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + struct seq_file *seq = ctx->meta->seq; + struct tcp_timewait_sock *tw; + struct tcp_request_sock *req; + struct tcp_sock *tp; + uid_t uid = ctx->uid; + __u32 seq_num; + + if (sk_common == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, " sl " + "local_address " + "rem_address " + "st tx_queue rx_queue tr tm->when retrnsmt" + " uid timeout inode\n"); + + if (sk_common->skc_family != AF_INET) + return 0; + + tp = bpf_skc_to_tcp_sock(sk_common); + if (tp) + return dump_tcp_sock(seq, tp, uid, seq_num); + + tw = bpf_skc_to_tcp_timewait_sock(sk_common); + if (tw) + return dump_tw_sock(seq, tw, uid, seq_num); + + req = bpf_skc_to_tcp_request_sock(sk_common); + if (req) + return dump_req_sock(seq, req, uid, seq_num); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c new file mode 100644 index 000000000000..10dec4392031 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +static int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !(h->pprev); +} + +static int timer_pending(const struct timer_list * timer) +{ + return !hlist_unhashed_lockless(&timer->entry); +} + +extern unsigned CONFIG_HZ __kconfig; + +#define USER_HZ 100 +#define NSEC_PER_SEC 1000000000ULL +static clock_t jiffies_to_clock_t(unsigned long x) +{ + /* The implementation here tailored to a particular + * setting of USER_HZ. + */ + u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ; + u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ; + + if ((tick_nsec % user_hz_nsec) == 0) { + if (CONFIG_HZ < USER_HZ) + return x * (USER_HZ / CONFIG_HZ); + else + return x / (CONFIG_HZ / USER_HZ); + } + return x * tick_nsec/user_hz_nsec; +} + +static clock_t jiffies_delta_to_clock_t(long delta) +{ + if (delta <= 0) + return 0; + + return jiffies_to_clock_t(delta); +} + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +static bool +inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk) +{ + return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} + +static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp) +{ + return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; +} + +static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp, + uid_t uid, __u32 seq_num) +{ + const struct inet_connection_sock *icsk; + const struct fastopen_queue *fastopenq; + const struct in6_addr *dest, *src; + const struct inet_sock *inet; + unsigned long timer_expires; + const struct sock *sp; + __u16 destp, srcp; + int timer_active; + int rx_queue; + int state; + + icsk = &tp->tcp.inet_conn; + inet = &icsk->icsk_inet; + sp = &inet->sk; + fastopenq = &icsk->icsk_accept_queue.fastopenq; + + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; + destp = bpf_ntohs(inet->inet_dport); + srcp = bpf_ntohs(inet->inet_sport); + + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + timer_active = 1; + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + timer_active = 4; + timer_expires = icsk->icsk_timeout; + } else if (timer_pending(&sp->sk_timer)) { + timer_active = 2; + timer_expires = sp->sk_timer.expires; + } else { + timer_active = 0; + timer_expires = bpf_jiffies64(); + } + + state = sp->sk_state; + if (state == TCP_LISTEN) { + rx_queue = sp->sk_ack_backlog; + } else { + rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq; + if (rx_queue < 0) + rx_queue = 0; + } + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", + state, + tp->tcp.write_seq - tp->tcp.snd_una, rx_queue, + timer_active, + jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()), + icsk->icsk_retransmits, uid, + icsk->icsk_probes_out, + sock_i_ino(sp), + sp->sk_refcnt.refs.counter); + BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n", + tp, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk), + tp->tcp.snd_cwnd, + state == TCP_LISTEN ? fastopenq->max_qlen + : (tcp_in_initial_slowstart(&tp->tcp) ? -1 + : tp->tcp.snd_ssthresh) + ); + + return 0; +} + +static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw, + uid_t uid, __u32 seq_num) +{ + struct inet_timewait_sock *tw = &ttw->tw_sk; + const struct in6_addr *dest, *src; + __u16 destp, srcp; + long delta; + + delta = tw->tw_timer.expires - bpf_jiffies64(); + dest = &tw->tw_v6_daddr; + src = &tw->tw_v6_rcv_saddr; + destp = bpf_ntohs(tw->tw_dport); + srcp = bpf_ntohs(tw->tw_sport); + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + tw->tw_substate, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, + tw->tw_refcnt.refs.counter, tw); + + return 0; +} + +static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq, + uid_t uid, __u32 seq_num) +{ + struct inet_request_sock *irsk = &treq->req; + struct request_sock *req = &irsk->req; + struct in6_addr *src, *dest; + long ttd; + + ttd = req->rsk_timer.expires - bpf_jiffies64(); + src = &irsk->ir_v6_loc_addr; + dest = &irsk->ir_v6_rmt_addr; + + if (ttd < 0) + ttd = 0; + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], + irsk->ir_num, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], + bpf_ntohs(irsk->ir_rmt_port)); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd), + req->num_timeout, uid, 0, 0, 0, req); + + return 0; +} + +SEC("iter/tcp") +int dump_tcp6(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + struct seq_file *seq = ctx->meta->seq; + struct tcp_timewait_sock *tw; + struct tcp_request_sock *req; + struct tcp6_sock *tp; + uid_t uid = ctx->uid; + __u32 seq_num; + + if (sk_common == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, " sl " + "local_address " + "remote_address " + "st tx_queue rx_queue tr tm->when retrnsmt" + " uid timeout inode\n"); + + if (sk_common->skc_family != AF_INET6) + return 0; + + tp = bpf_skc_to_tcp6_sock(sk_common); + if (tp) + return dump_tcp6_sock(seq, tp, uid, seq_num); + + tw = bpf_skc_to_tcp_timewait_sock(sk_common); + if (tw) + return dump_tw_sock(seq, tw, uid, seq_num); + + req = bpf_skc_to_tcp_request_sock(sk_common); + if (req) + return dump_req_sock(seq, req, uid, seq_num); + + return 0; +} -- cgit v1.2.3 From ace6d6ec9e9e167047b6c8ca462a0830220640c2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:22 -0700 Subject: selftests/bpf: Implement sample udp/udp6 bpf_iter programs On my VM, I got identical results between /proc/net/udp[6] and the udp{4,6} bpf iterator. For udp6: $ cat /sys/fs/bpf/p1 sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops 1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000 193 0 19183 2 0000000029eab111 0 $ cat /proc/net/udp6 sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops 1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000 193 0 19183 2 0000000029eab111 0 For udp4: $ cat /sys/fs/bpf/p4 sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops 2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000 0 0 72540 2 000000004ede477a 0 $ cat /proc/net/udp sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode ref pointer drops 2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000 0 0 72540 2 000000004ede477a 0 Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230822.3989299-1-yhs@fb.com --- tools/testing/selftests/bpf/progs/bpf_iter.h | 16 +++++ tools/testing/selftests/bpf/progs/bpf_iter_udp4.c | 71 ++++++++++++++++++++ tools/testing/selftests/bpf/progs/bpf_iter_udp6.c | 79 +++++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index bde23e16e777..17db3bac518b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -9,6 +9,8 @@ #define bpf_iter__task_file bpf_iter__task_file___not_used #define bpf_iter__tcp bpf_iter__tcp___not_used #define tcp6_sock tcp6_sock___not_used +#define bpf_iter__udp bpf_iter__udp___not_used +#define udp6_sock udp6_sock___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -18,6 +20,8 @@ #undef bpf_iter__task_file #undef bpf_iter__tcp #undef tcp6_sock +#undef bpf_iter__udp +#undef udp6_sock struct bpf_iter_meta { struct seq_file *seq; @@ -62,3 +66,15 @@ struct tcp6_sock { struct tcp_sock tcp; struct ipv6_pinfo inet6; } __attribute__((preserve_access_index)); + +struct bpf_iter__udp { + struct bpf_iter_meta *meta; + struct udp_sock *udp_sk; + uid_t uid __attribute__((aligned(8))); + int bucket __attribute__((aligned(8))); +} __attribute__((preserve_access_index)); + +struct udp6_sock { + struct udp_sock udp; + struct ipv6_pinfo inet6; +} __attribute__((preserve_access_index)); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c new file mode 100644 index 000000000000..7053784575e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +SEC("iter/udp") +int dump_udp4(struct bpf_iter__udp *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct udp_sock *udp_sk = ctx->udp_sk; + struct inet_sock *inet; + __u16 srcp, destp; + __be32 dest, src; + __u32 seq_num; + int rqueue; + + if (udp_sk == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, + " sl local_address rem_address st tx_queue " + "rx_queue tr tm->when retrnsmt uid timeout " + "inode ref pointer drops\n"); + + /* filter out udp6 sockets */ + inet = &udp_sk->inet; + if (inet->sk.sk_family == AF_INET6) + return 0; + + inet = &udp_sk->inet; + dest = inet->inet_daddr; + src = inet->inet_rcv_saddr; + srcp = bpf_ntohs(inet->inet_sport); + destp = bpf_ntohs(inet->inet_dport); + rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit; + + BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ", + ctx->bucket, src, srcp, dest, destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n", + inet->sk.sk_state, + inet->sk.sk_wmem_alloc.refs.counter - 1, + rqueue, + 0, 0L, 0, ctx->uid, 0, + sock_i_ino(&inet->sk), + inet->sk.sk_refcnt.refs.counter, udp_sk, + inet->sk.sk_drops.counter); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c new file mode 100644 index 000000000000..c1175a6ecf43 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +#define IPV6_SEQ_DGRAM_HEADER \ + " sl " \ + "local_address " \ + "remote_address " \ + "st tx_queue rx_queue tr tm->when retrnsmt" \ + " uid timeout inode ref pointer drops\n" + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +SEC("iter/udp") +int dump_udp6(struct bpf_iter__udp *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct udp_sock *udp_sk = ctx->udp_sk; + const struct in6_addr *dest, *src; + struct udp6_sock *udp6_sk; + struct inet_sock *inet; + __u16 srcp, destp; + __u32 seq_num; + int rqueue; + + if (udp_sk == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER); + + udp6_sk = bpf_skc_to_udp6_sock(udp_sk); + if (udp6_sk == (void *)0) + return 0; + + inet = &udp_sk->inet; + srcp = bpf_ntohs(inet->inet_sport); + destp = bpf_ntohs(inet->inet_dport); + rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit; + dest = &inet->sk.sk_v6_daddr; + src = &inet->sk.sk_v6_rcv_saddr; + + BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + ctx->bucket, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n", + inet->sk.sk_state, + inet->sk.sk_wmem_alloc.refs.counter - 1, + rqueue, + 0, 0L, 0, ctx->uid, 0, + sock_i_ino(&inet->sk), + inet->sk.sk_refcnt.refs.counter, udp_sk, + inet->sk.sk_drops.counter); + + return 0; +} -- cgit v1.2.3 From cfcd75f9bf12301dfdcfe9ff6dfb240997e7745f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 23 Jun 2020 16:08:23 -0700 Subject: selftests/bpf: Add tcp/udp iterator programs to selftests Added tcp{4,6} and udp{4,6} bpf programs into test_progs selftest so that they at least can load successfully. $ ./test_progs -n 3 ... #3/7 tcp4:OK #3/8 tcp6:OK #3/9 udp4:OK #3/10 udp6:OK ... #3 bpf_iter:OK Summary: 1/16 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200623230823.3989372-1-yhs@fb.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 68 +++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 87c29dde1cf9..1e2e0fced6e8 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -6,6 +6,10 @@ #include "bpf_iter_bpf_map.skel.h" #include "bpf_iter_task.skel.h" #include "bpf_iter_task_file.skel.h" +#include "bpf_iter_tcp4.skel.h" +#include "bpf_iter_tcp6.skel.h" +#include "bpf_iter_udp4.skel.h" +#include "bpf_iter_udp6.skel.h" #include "bpf_iter_test_kern1.skel.h" #include "bpf_iter_test_kern2.skel.h" #include "bpf_iter_test_kern3.skel.h" @@ -120,6 +124,62 @@ static void test_task_file(void) bpf_iter_task_file__destroy(skel); } +static void test_tcp4(void) +{ + struct bpf_iter_tcp4 *skel; + + skel = bpf_iter_tcp4__open_and_load(); + if (CHECK(!skel, "bpf_iter_tcp4__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_tcp4); + + bpf_iter_tcp4__destroy(skel); +} + +static void test_tcp6(void) +{ + struct bpf_iter_tcp6 *skel; + + skel = bpf_iter_tcp6__open_and_load(); + if (CHECK(!skel, "bpf_iter_tcp6__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_tcp6); + + bpf_iter_tcp6__destroy(skel); +} + +static void test_udp4(void) +{ + struct bpf_iter_udp4 *skel; + + skel = bpf_iter_udp4__open_and_load(); + if (CHECK(!skel, "bpf_iter_udp4__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_udp4); + + bpf_iter_udp4__destroy(skel); +} + +static void test_udp6(void) +{ + struct bpf_iter_udp6 *skel; + + skel = bpf_iter_udp6__open_and_load(); + if (CHECK(!skel, "bpf_iter_udp6__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_udp6); + + bpf_iter_udp6__destroy(skel); +} + /* The expected string is less than 16 bytes */ static int do_read_with_fd(int iter_fd, const char *expected, bool read_one_char) @@ -394,6 +454,14 @@ void test_bpf_iter(void) test_task(); if (test__start_subtest("task_file")) test_task_file(); + if (test__start_subtest("tcp4")) + test_tcp4(); + if (test__start_subtest("tcp6")) + test_tcp6(); + if (test__start_subtest("udp4")) + test_udp4(); + if (test__start_subtest("udp6")) + test_udp6(); if (test__start_subtest("anon")) test_anon_iter(false); if (test__start_subtest("anon-read-one-char")) -- cgit v1.2.3 From 9023497d8746d355bac8ddbc65797a4f553726fd Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 24 Jun 2020 16:31:24 +0200 Subject: tools, bpftool: Define prog_type_name array only once Define prog_type_name in prog.c instead of main.h so it is only defined once. This leads to a slight decrease in the binary size of bpftool. Before: text data bss dec hex filename 401032 11936 1573160 1986128 1e4e50 bpftool After: text data bss dec hex filename 399024 11168 1573160 1983352 1e4378 bpftool Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200624143124.12914-1-tklauser@distanz.ch --- tools/bpf/bpftool/feature.c | 4 ++-- tools/bpf/bpftool/link.c | 4 ++-- tools/bpf/bpftool/main.h | 33 ++------------------------------- tools/bpf/bpftool/map.c | 4 ++-- tools/bpf/bpftool/prog.c | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 42 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 768bf77df886..1cd75807673e 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -695,7 +695,7 @@ section_program_types(bool *supported_types, const char *define_prefix, "/*** eBPF program types ***/", define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) probe_prog_type(i, supported_types, define_prefix, ifindex); print_end_section(); @@ -741,7 +741,7 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", define_prefix, define_prefix, define_prefix, define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) probe_helpers_for_progtype(i, supported_types[i], define_prefix, ifindex); diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 7329f3134283..326b8fdf0243 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -108,7 +108,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < ARRAY_SIZE(prog_type_name)) + if (prog_info.type < prog_type_name_size) jsonw_string_field(json_wtr, "prog_type", prog_type_name[prog_info.type]); else @@ -187,7 +187,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < ARRAY_SIZE(prog_type_name)) + if (prog_info.type < prog_type_name_size) printf("\n\tprog_type %s ", prog_type_name[prog_info.type]); else diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index ce26271e5f0c..269f1cb6aef5 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -56,37 +56,8 @@ #define HELP_SPEC_LINK \ "LINK := { id LINK_ID | pinned FILE }" -static const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", - [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", - [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", - [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", - [BPF_PROG_TYPE_XDP] = "xdp", - [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", - [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", - [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", - [BPF_PROG_TYPE_LWT_IN] = "lwt_in", - [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", - [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", - [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", - [BPF_PROG_TYPE_SK_SKB] = "sk_skb", - [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", - [BPF_PROG_TYPE_SK_MSG] = "sk_msg", - [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", - [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", - [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", - [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", - [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", - [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", - [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", - [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", - [BPF_PROG_TYPE_TRACING] = "tracing", - [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", - [BPF_PROG_TYPE_EXT] = "ext", -}; +extern const char * const prog_type_name[]; +extern const size_t prog_type_name_size; static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { [BPF_CGROUP_INET_INGRESS] = "ingress", diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index bbb74d387fb0..42c215b6eae6 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -473,7 +473,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); - if (prog_type < ARRAY_SIZE(prog_type_name)) + if (prog_type < prog_type_name_size) jsonw_string_field(json_wtr, "owner_prog_type", prog_type_name[prog_type]); else @@ -558,7 +558,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); - if (prog_type < ARRAY_SIZE(prog_type_name)) + if (prog_type < prog_type_name_size) printf("owner_prog_type %s ", prog_type_name[prog_type]); else diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index e21fa8ad2efa..6863c57effd0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -29,6 +29,40 @@ #include "main.h" #include "xlated_dumper.h" +const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", + [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", + [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", + [BPF_PROG_TYPE_TRACING] = "tracing", + [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_PROG_TYPE_EXT] = "ext", +}; + +const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); + enum dump_mode { DUMP_JITED, DUMP_XLATED, -- cgit v1.2.3 From 16d37ee3d2b1c30052ba5ebb69556040fc174061 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 24 Jun 2020 16:31:54 +0200 Subject: tools, bpftool: Define attach_type_name array only once Define attach_type_name in common.c instead of main.h so it is only defined once. This leads to a slight decrease in the binary size of bpftool. Before: text data bss dec hex filename 399024 11168 1573160 1983352 1e4378 bpftool After: text data bss dec hex filename 398256 10880 1573160 1982296 1e3f58 bpftool Signed-off-by: Tobias Klauser Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20200624143154.13145-1-tklauser@distanz.ch --- tools/bpf/bpftool/common.c | 36 ++++++++++++++++++++++++++++++++++++ tools/bpf/bpftool/main.h | 36 +----------------------------------- 2 files changed, 37 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 6c864c3683fc..18e5604fe260 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -29,6 +29,42 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { + [BPF_CGROUP_INET_INGRESS] = "ingress", + [BPF_CGROUP_INET_EGRESS] = "egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", + [BPF_CGROUP_SOCK_OPS] = "sock_ops", + [BPF_CGROUP_DEVICE] = "device", + [BPF_CGROUP_INET4_BIND] = "bind4", + [BPF_CGROUP_INET6_BIND] = "bind6", + [BPF_CGROUP_INET4_CONNECT] = "connect4", + [BPF_CGROUP_INET6_CONNECT] = "connect6", + [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", + [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", + [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4", + [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6", + [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4", + [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6", + [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", + [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", + [BPF_CGROUP_SYSCTL] = "sysctl", + [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4", + [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6", + [BPF_CGROUP_GETSOCKOPT] = "getsockopt", + [BPF_CGROUP_SETSOCKOPT] = "setsockopt", + + [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", + [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", + [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", + [BPF_LIRC_MODE2] = "lirc_mode2", + [BPF_FLOW_DISSECTOR] = "flow_dissector", + [BPF_TRACE_RAW_TP] = "raw_tp", + [BPF_TRACE_FENTRY] = "fentry", + [BPF_TRACE_FEXIT] = "fexit", + [BPF_MODIFY_RETURN] = "mod_ret", + [BPF_LSM_MAC] = "lsm_mac", +}; + void p_err(const char *fmt, ...) { va_list ap; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 269f1cb6aef5..78d34e860713 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -59,41 +59,7 @@ extern const char * const prog_type_name[]; extern const size_t prog_type_name_size; -static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { - [BPF_CGROUP_INET_INGRESS] = "ingress", - [BPF_CGROUP_INET_EGRESS] = "egress", - [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", - [BPF_CGROUP_SOCK_OPS] = "sock_ops", - [BPF_CGROUP_DEVICE] = "device", - [BPF_CGROUP_INET4_BIND] = "bind4", - [BPF_CGROUP_INET6_BIND] = "bind6", - [BPF_CGROUP_INET4_CONNECT] = "connect4", - [BPF_CGROUP_INET6_CONNECT] = "connect6", - [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", - [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", - [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4", - [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6", - [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4", - [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6", - [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", - [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", - [BPF_CGROUP_SYSCTL] = "sysctl", - [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4", - [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6", - [BPF_CGROUP_GETSOCKOPT] = "getsockopt", - [BPF_CGROUP_SETSOCKOPT] = "setsockopt", - - [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", - [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", - [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", - [BPF_LIRC_MODE2] = "lirc_mode2", - [BPF_FLOW_DISSECTOR] = "flow_dissector", - [BPF_TRACE_RAW_TP] = "raw_tp", - [BPF_TRACE_FENTRY] = "fentry", - [BPF_TRACE_FEXIT] = "fexit", - [BPF_MODIFY_RETURN] = "mod_ret", - [BPF_LSM_MAC] = "lsm_mac", -}; +extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE]; extern const char * const map_type_name[]; extern const size_t map_type_name_size; -- cgit v1.2.3 From d929758101fc0674008169dc1de963e3181c587b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 25 Jun 2020 16:26:28 -0700 Subject: libbpf: Support disabling auto-loading BPF programs Currently, bpf_object__load() (and by induction skeleton's load), will always attempt to prepare, relocate, and load into kernel every single BPF program found inside the BPF object file. This is often convenient and the right thing to do and what users expect. But there are plenty of cases (especially with BPF development constantly picking up the pace), where BPF application is intended to work with old kernels, with potentially reduced set of features. But on kernels supporting extra features, it would like to take a full advantage of them, by employing extra BPF program. This could be a choice of using fentry/fexit over kprobe/kretprobe, if kernel is recent enough and is built with BTF. Or BPF program might be providing optimized bpf_iter-based solution that user-space might want to use, whenever available. And so on. With libbpf and BPF CO-RE in particular, it's advantageous to not have to maintain two separate BPF object files to achieve this. So to enable such use cases, this patch adds ability to request not auto-loading chosen BPF programs. In such case, libbpf won't attempt to perform relocations (which might fail due to old kernel), won't try to resolve BTF types for BTF-aware (tp_btf/fentry/fexit/etc) program types, because BTF might not be present, and so on. Skeleton will also automatically skip auto-attachment step for such not loaded BPF programs. Overall, this feature allows to simplify development and deployment of real-world BPF applications with complicated compatibility requirements. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200625232629.3444003-2-andriin@fb.com --- tools/lib/bpf/libbpf.c | 48 ++++++++++++++++++++++++++++++++++++++++-------- tools/lib/bpf/libbpf.h | 2 ++ tools/lib/bpf/libbpf.map | 2 ++ 3 files changed, 44 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6b4955d170ff..4ea7f4f1a691 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -230,6 +230,7 @@ struct bpf_program { struct bpf_insn *insns; size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; + bool load; struct reloc_desc *reloc_desc; int nr_reloc; @@ -541,6 +542,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->instances.fds = NULL; prog->instances.nr = -1; prog->type = BPF_PROG_TYPE_UNSPEC; + prog->load = true; return 0; errout: @@ -2513,6 +2515,8 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) need_vmlinux_btf = true; bpf_object__for_each_program(prog, obj) { + if (!prog->load) + continue; if (libbpf_prog_needs_vmlinux_btf(prog)) { need_vmlinux_btf = true; break; @@ -5445,6 +5449,12 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { int err = 0, fd, i, btf_id; + if (prog->obj->loaded) { + pr_warn("prog '%s'('%s'): can't load after object was loaded\n", + prog->name, prog->section_name); + return -EINVAL; + } + if ((prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_LSM || prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { @@ -5533,16 +5543,21 @@ static bool bpf_program__is_function_storage(const struct bpf_program *prog, static int bpf_object__load_progs(struct bpf_object *obj, int log_level) { + struct bpf_program *prog; size_t i; int err; for (i = 0; i < obj->nr_programs; i++) { - if (bpf_program__is_function_storage(&obj->programs[i], obj)) + prog = &obj->programs[i]; + if (bpf_program__is_function_storage(prog, obj)) continue; - obj->programs[i].log_level |= log_level; - err = bpf_program__load(&obj->programs[i], - obj->license, - obj->kern_version); + if (!prog->load) { + pr_debug("prog '%s'('%s'): skipped loading\n", + prog->name, prog->section_name); + continue; + } + prog->log_level |= log_level; + err = bpf_program__load(prog, obj->license, obj->kern_version); if (err) return err; } @@ -5869,12 +5884,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) return -EINVAL; if (obj->loaded) { - pr_warn("object should not be loaded twice\n"); + pr_warn("object '%s': load can't be attempted twice\n", obj->name); return -EINVAL; } - obj->loaded = true; - err = bpf_object__probe_loading(obj); err = err ? : bpf_object__probe_caps(obj); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); @@ -5889,6 +5902,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) btf__free(obj->btf_vmlinux); obj->btf_vmlinux = NULL; + obj->loaded = true; /* doesn't matter if successfully or not */ + if (err) goto out; @@ -6661,6 +6676,20 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) return title; } +bool bpf_program__autoload(const struct bpf_program *prog) +{ + return prog->load; +} + +int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) +{ + if (prog->obj->loaded) + return -EINVAL; + + prog->load = autoload; + return 0; +} + int bpf_program__fd(const struct bpf_program *prog) { return bpf_program__nth_fd(prog, 0); @@ -9283,6 +9312,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) const struct bpf_sec_def *sec_def; const char *sec_name = bpf_program__title(prog, false); + if (!prog->load) + continue; + sec_def = find_sec_def(sec_name); if (!sec_def || !sec_def->attach_fn) continue; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index fdd279fb1866..2335971ed0bd 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -200,6 +200,8 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); +LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); +LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); /* returns program size in bytes */ LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 9914e0db4859..6544d2cd1ed6 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -286,4 +286,6 @@ LIBBPF_0.1.0 { bpf_map__set_value_size; bpf_map__type; bpf_map__value_size; + bpf_program__autoload; + bpf_program__set_autoload; } LIBBPF_0.0.9; -- cgit v1.2.3 From 5712174c5c9e3d684ad05d4aaed1e14acda4bb74 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 25 Jun 2020 16:26:29 -0700 Subject: selftests/bpf: Test auto-load disabling logic for BPF programs Validate that BPF object with broken (in multiple ways) BPF program can still be successfully loaded, if that broken BPF program is disabled. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200625232629.3444003-3-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/autoload.c | 41 +++++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_autoload.c | 40 ++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/autoload.c create mode 100644 tools/testing/selftests/bpf/progs/test_autoload.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/autoload.c b/tools/testing/selftests/bpf/prog_tests/autoload.c new file mode 100644 index 000000000000..3693f7d133eb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/autoload.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include +#include "test_autoload.skel.h" + +void test_autoload(void) +{ + int duration = 0, err; + struct test_autoload* skel; + + skel = test_autoload__open_and_load(); + /* prog3 should be broken */ + if (CHECK(skel, "skel_open_and_load", "unexpected success\n")) + goto cleanup; + + skel = test_autoload__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + goto cleanup; + + /* don't load prog3 */ + bpf_program__set_autoload(skel->progs.prog3, false); + + err = test_autoload__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + err = test_autoload__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(!skel->bss->prog1_called, "prog1", "not called\n"); + CHECK(!skel->bss->prog2_called, "prog2", "not called\n"); + CHECK(skel->bss->prog3_called, "prog3", "called?!\n"); + +cleanup: + test_autoload__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_autoload.c b/tools/testing/selftests/bpf/progs/test_autoload.c new file mode 100644 index 000000000000..62c8cdec6d5d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_autoload.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include +#include +#include + +bool prog1_called = false; +bool prog2_called = false; +bool prog3_called = false; + +SEC("raw_tp/sys_enter") +int prog1(const void *ctx) +{ + prog1_called = true; + return 0; +} + +SEC("raw_tp/sys_exit") +int prog2(const void *ctx) +{ + prog2_called = true; + return 0; +} + +struct fake_kernel_struct { + int whatever; +} __attribute__((preserve_access_index)); + +SEC("fentry/unexisting-kprobe-will-fail-if-loaded") +int prog3(const void *ctx) +{ + struct fake_kernel_struct *fake = (void *)ctx; + fake->whatever = 123; + prog3_called = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From ec23eb705620234421fd48fc2382490fcfbafc37 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 29 Jun 2020 17:47:58 -0700 Subject: tools/bpftool: Allow substituting custom vmlinux.h for the build In some build contexts (e.g., Travis CI build for outdated kernel), vmlinux.h, generated from available kernel, doesn't contain all the types necessary for BPF program compilation. For such set up, the most maintainable way to deal with this problem is to keep pre-generated (almost up-to-date) vmlinux.h checked in and use it for compilation purposes. bpftool after that can deal with kernel missing some of the features in runtime with no problems. To that effect, allow to specify path to custom vmlinux.h to bpftool's Makefile with VMLINUX_H variable. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200630004759.521530-1-andriin@fb.com --- tools/bpf/bpftool/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 8c6563e56ffc..273da1615503 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -122,20 +122,24 @@ BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstr BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o -VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ ../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -ifneq ($(VMLINUX_BTF),) +ifneq ($(VMLINUX_BTF)$(VMLINUX_H),) ifeq ($(feature-clang-bpf-co-re),1) BUILD_BPF_SKELS := 1 $(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) +ifeq ($(VMLINUX_H),) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@ +else + $(Q)cp "$(VMLINUX_H)" $@ +endif $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ -- cgit v1.2.3 From ca4db6389d611eee2eb7c1dfe710b62d8ea06772 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 29 Jun 2020 17:47:59 -0700 Subject: selftests/bpf: Allow substituting custom vmlinux.h for selftests build Similarly to bpftool Makefile, allow to specify custom location of vmlinux.h to be used during the build. This allows simpler testing setups with checked-in pre-generated vmlinux.h. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200630004759.521530-2-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 22aaec74ea0a..1f9c696b3edf 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -134,12 +134,12 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< -VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ ../../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(OUTPUT)/runqslower: $(BPFOBJ) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ @@ -182,8 +182,13 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): mkdir -p $@ $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) +ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ +else + $(call msg,CP,,$@) + cp "$(VMLINUX_H)" $@ +endif # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, -- cgit v1.2.3 From 30ad688094bcfe8721bfd4003f6a20c9b6ddf964 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 30 Jun 2020 08:21:24 -0700 Subject: libbpf: Make bpf_endian co-exist with vmlinux.h Make bpf_endian.h compatible with vmlinux.h. It is a frequent request from users wanting to use bpf_endian.h in their BPF applications using CO-RE and vmlinux.h. To achieve that, re-implement byte swap macros and drop all the header includes. This way it can be used both with linux header includes, as well as with a vmlinux.h. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200630152125.3631920-2-andriin@fb.com --- tools/lib/bpf/bpf_endian.h | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h index fbe28008450f..ec9db4feca9f 100644 --- a/tools/lib/bpf/bpf_endian.h +++ b/tools/lib/bpf/bpf_endian.h @@ -2,8 +2,35 @@ #ifndef __BPF_ENDIAN__ #define __BPF_ENDIAN__ -#include -#include +/* + * Isolate byte #n and put it into byte #m, for __u##b type. + * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64: + * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx + * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000 + * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn + * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000 + */ +#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8)) + +#define ___bpf_swab16(x) ((__u16)( \ + ___bpf_mvb(x, 16, 0, 1) | \ + ___bpf_mvb(x, 16, 1, 0))) + +#define ___bpf_swab32(x) ((__u32)( \ + ___bpf_mvb(x, 32, 0, 3) | \ + ___bpf_mvb(x, 32, 1, 2) | \ + ___bpf_mvb(x, 32, 2, 1) | \ + ___bpf_mvb(x, 32, 3, 0))) + +#define ___bpf_swab64(x) ((__u64)( \ + ___bpf_mvb(x, 64, 0, 7) | \ + ___bpf_mvb(x, 64, 1, 6) | \ + ___bpf_mvb(x, 64, 2, 5) | \ + ___bpf_mvb(x, 64, 3, 4) | \ + ___bpf_mvb(x, 64, 4, 3) | \ + ___bpf_mvb(x, 64, 5, 2) | \ + ___bpf_mvb(x, 64, 6, 1) | \ + ___bpf_mvb(x, 64, 7, 0))) /* LLVM's BPF target selects the endianness of the CPU * it compiles on, or the user specifies (bpfel/bpfeb), @@ -23,16 +50,16 @@ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ # define __bpf_ntohs(x) __builtin_bswap16(x) # define __bpf_htons(x) __builtin_bswap16(x) -# define __bpf_constant_ntohs(x) ___constant_swab16(x) -# define __bpf_constant_htons(x) ___constant_swab16(x) +# define __bpf_constant_ntohs(x) ___bpf_swab16(x) +# define __bpf_constant_htons(x) ___bpf_swab16(x) # define __bpf_ntohl(x) __builtin_bswap32(x) # define __bpf_htonl(x) __builtin_bswap32(x) -# define __bpf_constant_ntohl(x) ___constant_swab32(x) -# define __bpf_constant_htonl(x) ___constant_swab32(x) +# define __bpf_constant_ntohl(x) ___bpf_swab32(x) +# define __bpf_constant_htonl(x) ___bpf_swab32(x) # define __bpf_be64_to_cpu(x) __builtin_bswap64(x) # define __bpf_cpu_to_be64(x) __builtin_bswap64(x) -# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x) -# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x) +# define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x) +# define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x) #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ # define __bpf_ntohs(x) (x) # define __bpf_htons(x) (x) -- cgit v1.2.3 From 8c18311067d0f0d5f332b9e1f3859eb15e23332d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 30 Jun 2020 08:21:25 -0700 Subject: selftests/bpf: Add byte swapping selftest Add simple selftest validating byte swap built-ins and compile-time macros. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200630152125.3631920-3-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/endian.c | 53 +++++++++++++++++++++++++ tools/testing/selftests/bpf/progs/test_endian.c | 37 +++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/endian.c create mode 100644 tools/testing/selftests/bpf/progs/test_endian.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/endian.c b/tools/testing/selftests/bpf/prog_tests/endian.c new file mode 100644 index 000000000000..1a11612ace6c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/endian.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include +#include "test_endian.skel.h" + +static int duration; + +#define IN16 0x1234 +#define IN32 0x12345678U +#define IN64 0x123456789abcdef0ULL + +#define OUT16 0x3412 +#define OUT32 0x78563412U +#define OUT64 0xf0debc9a78563412ULL + +void test_endian(void) +{ + struct test_endian* skel; + struct test_endian__bss *bss; + int err; + + skel = test_endian__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + + bss->in16 = IN16; + bss->in32 = IN32; + bss->in64 = IN64; + + err = test_endian__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(bss->out16 != OUT16, "out16", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->out16, (__u64)OUT16); + CHECK(bss->out32 != OUT32, "out32", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->out32, (__u64)OUT32); + CHECK(bss->out64 != OUT64, "out16", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->out64, (__u64)OUT64); + + CHECK(bss->const16 != OUT16, "const16", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->const16, (__u64)OUT16); + CHECK(bss->const32 != OUT32, "const32", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->const32, (__u64)OUT32); + CHECK(bss->const64 != OUT64, "const64", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->const64, (__u64)OUT64); +cleanup: + test_endian__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_endian.c b/tools/testing/selftests/bpf/progs/test_endian.c new file mode 100644 index 000000000000..ddb687c5d125 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_endian.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include +#include + +#define IN16 0x1234 +#define IN32 0x12345678U +#define IN64 0x123456789abcdef0ULL + +__u16 in16 = 0; +__u32 in32 = 0; +__u64 in64 = 0; + +__u16 out16 = 0; +__u32 out32 = 0; +__u64 out64 = 0; + +__u16 const16 = 0; +__u32 const32 = 0; +__u64 const64 = 0; + +SEC("raw_tp/sys_enter") +int sys_enter(const void *ctx) +{ + out16 = __builtin_bswap16(in16); + out32 = __builtin_bswap32(in32); + out64 = __builtin_bswap64(in64); + const16 = ___bpf_swab16(IN16); + const32 = ___bpf_swab32(IN32); + const64 = ___bpf_swab64(IN64); + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From fa28dcb82a38f8e3993b0fae9106b1a80b59e4f0 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 29 Jun 2020 23:28:44 -0700 Subject: bpf: Introduce helper bpf_get_task_stack() Introduce helper bpf_get_task_stack(), which dumps stack trace of given task. This is different to bpf_get_stack(), which gets stack track of current task. One potential use case of bpf_get_task_stack() is to call it from bpf_iter__task and dump all /proc//stack to a seq_file. bpf_get_task_stack() uses stack_trace_save_tsk() instead of get_perf_callchain() for kernel stack. The benefit of this choice is that stack_trace_save_tsk() doesn't require changes in arch/. The downside of using stack_trace_save_tsk() is that stack_trace_save_tsk() dumps the stack trace to unsigned long array. For 32-bit systems, we need to translate it to u64 array. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200630062846.664389-3-songliubraving@fb.com --- tools/include/uapi/linux/bpf.h | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0cb8ec948816..da9bf35a26f8 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3285,6 +3285,39 @@ union bpf_attr { * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. * Return * *sk* if casting is valid, or NULL otherwise. + * + * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *task*, which is a valid + * pointer to struct task_struct. To store the stacktrace, the + * bpf program provides *buf* with a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_task_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack= + * Return + * A non-negative value equal to or less than *size* on success, + * or a negative error in case of failure. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3427,7 +3460,9 @@ union bpf_attr { FN(skc_to_tcp_sock), \ FN(skc_to_tcp_timewait_sock), \ FN(skc_to_tcp_request_sock), \ - FN(skc_to_udp6_sock), + FN(skc_to_udp6_sock), \ + FN(get_task_stack), \ + /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From c7568114bc56cf3ec0bd9eb117bbe7cad3d30e11 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 29 Jun 2020 23:28:46 -0700 Subject: selftests/bpf: Add bpf_iter test with bpf_get_task_stack() The new test is similar to other bpf_iter tests. It dumps all /proc//stack to a seq_file. Here is some example output: pid: 2873 num_entries: 3 [<0>] worker_thread+0xc6/0x380 [<0>] kthread+0x135/0x150 [<0>] ret_from_fork+0x22/0x30 pid: 2874 num_entries: 9 [<0>] __bpf_get_stack+0x15e/0x250 [<0>] bpf_prog_22a400774977bb30_dump_task_stack+0x4a/0xb3c [<0>] bpf_iter_run_prog+0x81/0x170 [<0>] __task_seq_show+0x58/0x80 [<0>] bpf_seq_read+0x1c3/0x3b0 [<0>] vfs_read+0x9e/0x170 [<0>] ksys_read+0xa7/0xe0 [<0>] do_syscall_64+0x4c/0xa0 [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Note: bpf_iter test as-is doesn't print the contents of the seq_file. To see the example above, it is necessary to add printf() to do_dummy_read. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200630062846.664389-5-songliubraving@fb.com --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 17 ++++++++++ .../selftests/bpf/progs/bpf_iter_task_stack.c | 37 ++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 1e2e0fced6e8..fed42755416d 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -5,6 +5,7 @@ #include "bpf_iter_netlink.skel.h" #include "bpf_iter_bpf_map.skel.h" #include "bpf_iter_task.skel.h" +#include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" #include "bpf_iter_tcp4.skel.h" #include "bpf_iter_tcp6.skel.h" @@ -110,6 +111,20 @@ static void test_task(void) bpf_iter_task__destroy(skel); } +static void test_task_stack(void) +{ + struct bpf_iter_task_stack *skel; + + skel = bpf_iter_task_stack__open_and_load(); + if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_task_stack); + + bpf_iter_task_stack__destroy(skel); +} + static void test_task_file(void) { struct bpf_iter_task_file *skel; @@ -452,6 +467,8 @@ void test_bpf_iter(void) test_bpf_map(); if (test__start_subtest("task")) test_task(); + if (test__start_subtest("task_stack")) + test_task_stack(); if (test__start_subtest("task_file")) test_task_file(); if (test__start_subtest("tcp4")) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c new file mode 100644 index 000000000000..e40d32a2ed93 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +#define MAX_STACK_TRACE_DEPTH 64 +unsigned long entries[MAX_STACK_TRACE_DEPTH]; +#define SIZE_OF_ULONG (sizeof(unsigned long)) + +SEC("iter/task") +int dump_task_stack(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + long i, retlen; + + if (task == (void *)0) + return 0; + + retlen = bpf_get_task_stack(task, entries, + MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0); + if (retlen < 0) + return 0; + + BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid, + retlen / SIZE_OF_ULONG); + for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) { + if (retlen > i * SIZE_OF_ULONG) + BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]); + } + BPF_SEQ_PRINTF(seq, "\n"); + + return 0; +} -- cgit v1.2.3 From 8d821b5db70723d27ee749b4870de90760606918 Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Wed, 1 Jul 2020 10:53:15 -0700 Subject: selftests/bpf: Switch test_vmlinux to use hrtimer_range_start_ns. The test_vmlinux test uses hrtimer_nanosleep as hook to test tracing programs. But in a kernel built by clang, which performs more aggresive inlining, that function gets inlined into its caller SyS_nanosleep. Therefore, even though fentry and kprobe do hook on the function, they aren't triggered by the call to nanosleep in the test. A possible fix is switching to use a function that is less likely to be inlined, such as hrtimer_range_start_ns. The EXPORT_SYMBOL functions shouldn't be inlined based on the description of [1], therefore safe to use for this test. Also the arguments of this function include the duration of sleep, therefore suitable for test verification. [1] af3b56289be1 time: don't inline EXPORT_SYMBOL functions Tested: In a clang build kernel, before this change, the test fails: test_vmlinux:PASS:skel_open 0 nsec test_vmlinux:PASS:skel_attach 0 nsec test_vmlinux:PASS:tp 0 nsec test_vmlinux:PASS:raw_tp 0 nsec test_vmlinux:PASS:tp_btf 0 nsec test_vmlinux:FAIL:kprobe not called test_vmlinux:FAIL:fentry not called After switching to hrtimer_range_start_ns, the test passes: test_vmlinux:PASS:skel_open 0 nsec test_vmlinux:PASS:skel_attach 0 nsec test_vmlinux:PASS:tp 0 nsec test_vmlinux:PASS:raw_tp 0 nsec test_vmlinux:PASS:tp_btf 0 nsec test_vmlinux:PASS:kprobe 0 nsec test_vmlinux:PASS:fentry 0 nsec Signed-off-by: Hao Luo Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200701175315.1161242-1-haoluo@google.com --- tools/testing/selftests/bpf/progs/test_vmlinux.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 5611b564d3b1..29fa09d6a6c6 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -63,20 +63,20 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) return 0; } -SEC("kprobe/hrtimer_nanosleep") -int BPF_KPROBE(handle__kprobe, - ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +SEC("kprobe/hrtimer_start_range_ns") +int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns, + const enum hrtimer_mode mode) { - if (rqtp == MY_TV_NSEC) + if (tim == MY_TV_NSEC) kprobe_called = true; return 0; } -SEC("fentry/hrtimer_nanosleep") -int BPF_PROG(handle__fentry, - ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +SEC("fentry/hrtimer_start_range_ns") +int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns, + const enum hrtimer_mode mode) { - if (rqtp == MY_TV_NSEC) + if (tim == MY_TV_NSEC) fentry_called = true; return 0; } -- cgit v1.2.3 From 17bbf925c6f86be8c08bc473cb5327c173154596 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 1 Jul 2020 14:28:16 -0700 Subject: tools/bpftool: Turn off -Wnested-externs warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turn off -Wnested-externs to avoid annoying warnings in BUILD_BUG_ON macro when compiling bpftool: In file included from /data/users/andriin/linux/tools/include/linux/build_bug.h:5, from /data/users/andriin/linux/tools/include/linux/kernel.h:8, from /data/users/andriin/linux/kernel/bpf/disasm.h:10, from /data/users/andriin/linux/kernel/bpf/disasm.c:8: /data/users/andriin/linux/kernel/bpf/disasm.c: In function ‘__func_get_name’: /data/users/andriin/linux/tools/include/linux/compiler.h:37:38: warning: nested extern declaration of ‘__compiletime_assert_0’ [-Wnested-externs] _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^~~~~~~~~~~~~~~~~~~~~ /data/users/andriin/linux/tools/include/linux/compiler.h:16:15: note: in definition of macro ‘__compiletime_assert’ extern void prefix ## suffix(void) __compiletime_error(msg); \ ^~~~~~ /data/users/andriin/linux/tools/include/linux/compiler.h:37:2: note: in expansion of macro ‘_compiletime_assert’ _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^~~~~~~~~~~~~~~~~~~ /data/users/andriin/linux/tools/include/linux/build_bug.h:39:37: note: in expansion of macro ‘compiletime_assert’ #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg) ^~~~~~~~~~~~~~~~~~ /data/users/andriin/linux/tools/include/linux/build_bug.h:50:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’ BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition) ^~~~~~~~~~~~~~~~ /data/users/andriin/linux/kernel/bpf/disasm.c:20:2: note: in expansion of macro ‘BUILD_BUG_ON’ BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID); ^~~~~~~~~~~~ Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200701212816.2072340-1-andriin@fb.com --- tools/bpf/bpftool/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 273da1615503..51bd520ed437 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -40,7 +40,7 @@ bash_compdir ?= /usr/share/bash-completion/completions CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS)) +CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/kernel/bpf/ \ -- cgit v1.2.3 From 6c92bd5cd4650c39dd929565ee172984c680fead Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 1 Jul 2020 23:44:07 +0200 Subject: selftests/bpf: Test_progs indicate to shell on non-actions When a user selects a non-existing test the summary is printed with indication 0 for all info types, and shell "success" (EXIT_SUCCESS) is indicated. This can be understood by a human end-user, but for shell scripting is it useful to indicate a shell failure (EXIT_FAILURE). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159363984736.930467.17956007131403952343.stgit@firesoul --- tools/testing/selftests/bpf/test_progs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 54fa5fa688ce..da70a4f72f54 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -687,5 +687,8 @@ int main(int argc, char **argv) free_str_set(&env.subtest_selector.whitelist); free(env.subtest_selector.num_set); + if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) + return EXIT_FAILURE; + return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS; } -- cgit v1.2.3 From 643e7233aa948901dce81d4573c91ed99fdd272e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 1 Jul 2020 23:44:12 +0200 Subject: selftests/bpf: Test_progs option for getting number of tests It can be practial to get the number of tests that test_progs contain. This could for example be used to create a shell for-loop construct that runs the individual tests. Like: for N in $(seq 1 $(./test_progs -c)); do ./test_progs -n $N 2>&1 > result_test_${N}.log & done ; wait V2: Add the ability to return the count for the selected tests. This is useful for getting a count e.g. after excluding some tests with option -b. The current beakers test script like to report the max test count upfront. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159363985244.930467.12617117873058936829.stgit@firesoul --- tools/testing/selftests/bpf/test_progs.c | 18 ++++++++++++++++++ tools/testing/selftests/bpf/test_progs.h | 1 + 2 files changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index da70a4f72f54..a5dba14b2025 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -366,6 +366,7 @@ enum ARG_KEYS { ARG_TEST_NAME_BLACKLIST = 'b', ARG_VERIFIER_STATS = 's', ARG_VERBOSE = 'v', + ARG_GET_TEST_CNT = 'c', }; static const struct argp_option opts[] = { @@ -379,6 +380,8 @@ static const struct argp_option opts[] = { "Output verifier statistics", }, { "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL, "Verbose output (use -vv or -vvv for progressively verbose output)" }, + { "count", ARG_GET_TEST_CNT, NULL, 0, + "Get number of selected top-level tests " }, {}, }; @@ -511,6 +514,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } } break; + case ARG_GET_TEST_CNT: + env->get_test_cnt = true; + break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -654,6 +660,11 @@ int main(int argc, char **argv) test->test_num, test->test_name)) continue; + if (env.get_test_cnt) { + env.succ_cnt++; + continue; + } + test->run_test(); /* ensure last sub-test is finalized properly */ if (test->subtest_name) @@ -677,9 +688,16 @@ int main(int argc, char **argv) cleanup_cgroup_environment(); } stdio_restore(); + + if (env.get_test_cnt) { + printf("%d\n", env.succ_cnt); + goto out; + } + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); +out: free_str_set(&env.test_selector.blacklist); free_str_set(&env.test_selector.whitelist); free(env.test_selector.num_set); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index f4503c926aca..0030584619c3 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -66,6 +66,7 @@ struct test_env { enum verbosity verbosity; bool jit_enabled; + bool get_test_cnt; struct prog_test_def *test; FILE *stdout; -- cgit v1.2.3 From c1f1f3656eee3a59a40e1805699041ec1c14ab83 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 1 Jul 2020 23:44:17 +0200 Subject: selftests/bpf: Test_progs option for listing test names The program test_progs have some very useful ability to specify a list of test name substrings for selecting which tests to run. This patch add the ability to list the selected test names without running them. This is practical for seeing which tests gets selected with given select arguments (which can also contain a exclude list via --name-blacklist). This output can also be used by shell-scripts in a for-loop: for N in $(./test_progs --list -t xdp); do \ ./test_progs -t $N 2>&1 > result_test_${N}.log & \ done ; wait This features can also be used for looking up a test number and returning a testname. If the selection was empty then a shell EXIT_FAILURE is returned. This is useful for scripting. e.g. like this: n=1; while [ $(./test_progs --list -n $n) ] ; do \ ./test_progs -n $n ; n=$(( n+1 )); \ done Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159363985751.930467.9610992940793316982.stgit@firesoul --- tools/testing/selftests/bpf/test_progs.c | 15 +++++++++++++++ tools/testing/selftests/bpf/test_progs.h | 1 + 2 files changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index a5dba14b2025..ef05d2f0e7bb 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -367,6 +367,7 @@ enum ARG_KEYS { ARG_VERIFIER_STATS = 's', ARG_VERBOSE = 'v', ARG_GET_TEST_CNT = 'c', + ARG_LIST_TEST_NAMES = 'l', }; static const struct argp_option opts[] = { @@ -382,6 +383,8 @@ static const struct argp_option opts[] = { "Verbose output (use -vv or -vvv for progressively verbose output)" }, { "count", ARG_GET_TEST_CNT, NULL, 0, "Get number of selected top-level tests " }, + { "list", ARG_LIST_TEST_NAMES, NULL, 0, + "List test names that would run (without running them) " }, {}, }; @@ -517,6 +520,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case ARG_GET_TEST_CNT: env->get_test_cnt = true; break; + case ARG_LIST_TEST_NAMES: + env->list_test_names = true; + break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -665,6 +671,12 @@ int main(int argc, char **argv) continue; } + if (env.list_test_names) { + fprintf(env.stdout, "%s\n", test->test_name); + env.succ_cnt++; + continue; + } + test->run_test(); /* ensure last sub-test is finalized properly */ if (test->subtest_name) @@ -694,6 +706,9 @@ int main(int argc, char **argv) goto out; } + if (env.list_test_names) + goto out; + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 0030584619c3..ec31f382e7fd 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -67,6 +67,7 @@ struct test_env { bool jit_enabled; bool get_test_cnt; + bool list_test_names; struct prog_test_def *test; FILE *stdout; -- cgit v1.2.3 From 99126abec5e5778583b959cb164f1888374917d3 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 1 Jul 2020 17:48:52 -0700 Subject: bpf: selftests: A few improvements to network_helpers.c This patch makes a few changes to the network_helpers.c 1) Enforce SO_RCVTIMEO and SO_SNDTIMEO This patch enforces timeout to the network fds through setsockopt SO_RCVTIMEO and SO_SNDTIMEO. It will remove the need for SOCK_NONBLOCK that requires a more demanding timeout logic with epoll/select, e.g. epoll_create, epoll_ctrl, and then epoll_wait for timeout. That removes the need for connect_wait() from the cgroup_skb_sk_lookup.c. The needed change is made in cgroup_skb_sk_lookup.c. 2) start_server(): Add optional addr_str and port to start_server(). That removes the need of the start_server_with_port(). The caller can pass addr_str==NULL and/or port==0. I have a future tcp-hdr-opt test that will pass a non-NULL addr_str and it is in general useful for other future tests. "int timeout_ms" is also added to control the timeout on the "accept(listen_fd)". 3) connect_to_fd(): Fully use the server_fd. The server sock address has already been obtained from getsockname(server_fd). The sockaddr includes the family, so the "int family" arg is redundant. Since the server address is obtained from server_fd, there is little reason not to get the server's socket type from the server_fd also. getsockopt(server_fd) can be used to do that, so "int type" arg is also removed. "int timeout_ms" is added. 4) connect_fd_to_fd(): "int timeout_ms" is added. Some code is also refactored to connect_fd_to_addr() which is shared with connect_to_fd(). 5) Preserve errno: Some callers need to check errno, e.g. cgroup_skb_sk_lookup.c. Make changes to do it more consistently in save_errno_close() and log_err(). Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200702004852.2103003-1-kafai@fb.com --- tools/testing/selftests/bpf/network_helpers.c | 157 ++++++++++++--------- tools/testing/selftests/bpf/network_helpers.h | 9 +- .../bpf/prog_tests/cgroup_skb_sk_lookup.c | 12 +- .../selftests/bpf/prog_tests/connect_force_port.c | 10 +- .../selftests/bpf/prog_tests/load_bytes_relative.c | 4 +- tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 4 +- 6 files changed, 110 insertions(+), 86 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index e36dd1a1780d..acd08715be2e 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -7,8 +7,6 @@ #include -#include - #include #include #include @@ -17,8 +15,13 @@ #include "network_helpers.h" #define clean_errno() (errno == 0 ? "None" : strerror(errno)) -#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ - __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) +#define log_err(MSG, ...) ({ \ + int __save = errno; \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, clean_errno(), \ + ##__VA_ARGS__); \ + errno = __save; \ +}) struct ipv4_packet pkt_v4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), @@ -37,7 +40,34 @@ struct ipv6_packet pkt_v6 = { .tcp.doff = 5, }; -int start_server_with_port(int family, int type, __u16 port) +static int settimeo(int fd, int timeout_ms) +{ + struct timeval timeout = { .tv_sec = 3 }; + + if (timeout_ms > 0) { + timeout.tv_sec = timeout_ms / 1000; + timeout.tv_usec = (timeout_ms % 1000) * 1000; + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, + sizeof(timeout))) { + log_err("Failed to set SO_RCVTIMEO"); + return -1; + } + + if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout, + sizeof(timeout))) { + log_err("Failed to set SO_SNDTIMEO"); + return -1; + } + + return 0; +} + +#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) + +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) { struct sockaddr_storage addr = {}; socklen_t len; @@ -48,120 +78,119 @@ int start_server_with_port(int family, int type, __u16 port) sin->sin_family = AF_INET; sin->sin_port = htons(port); + if (addr_str && + inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { + log_err("inet_pton(AF_INET, %s)", addr_str); + return -1; + } len = sizeof(*sin); } else { struct sockaddr_in6 *sin6 = (void *)&addr; sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(port); + if (addr_str && + inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { + log_err("inet_pton(AF_INET6, %s)", addr_str); + return -1; + } len = sizeof(*sin6); } - fd = socket(family, type | SOCK_NONBLOCK, 0); + fd = socket(family, type, 0); if (fd < 0) { log_err("Failed to create server socket"); return -1; } + if (settimeo(fd, timeout_ms)) + goto error_close; + if (bind(fd, (const struct sockaddr *)&addr, len) < 0) { log_err("Failed to bind socket"); - close(fd); - return -1; + goto error_close; } if (type == SOCK_STREAM) { if (listen(fd, 1) < 0) { log_err("Failed to listed on socket"); - close(fd); - return -1; + goto error_close; } } return fd; -} -int start_server(int family, int type) -{ - return start_server_with_port(family, type, 0); +error_close: + save_errno_close(fd); + return -1; } -static const struct timeval timeo_sec = { .tv_sec = 3 }; -static const size_t timeo_optlen = sizeof(timeo_sec); - -int connect_to_fd(int family, int type, int server_fd) +static int connect_fd_to_addr(int fd, + const struct sockaddr_storage *addr, + socklen_t addrlen) { - int fd, save_errno; - - fd = socket(family, type, 0); - if (fd < 0) { - log_err("Failed to create client socket"); + if (connect(fd, (const struct sockaddr *)addr, addrlen)) { + log_err("Failed to connect to server"); return -1; } - if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) { - save_errno = errno; - close(fd); - errno = save_errno; - return -1; - } - - return fd; + return 0; } -int connect_fd_to_fd(int client_fd, int server_fd) +int connect_to_fd(int server_fd, int timeout_ms) { struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int save_errno; + struct sockaddr_in *addr_in; + socklen_t addrlen, optlen; + int fd, type; - if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, - timeo_optlen)) { - log_err("Failed to set SO_RCVTIMEO"); + optlen = sizeof(type); + if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { + log_err("getsockopt(SOL_TYPE)"); return -1; } - if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + addrlen = sizeof(addr); + if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { log_err("Failed to get server addr"); return -1; } - if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) { - if (errno != EINPROGRESS) { - save_errno = errno; - log_err("Failed to connect to server"); - errno = save_errno; - } + addr_in = (struct sockaddr_in *)&addr; + fd = socket(addr_in->sin_family, type, 0); + if (fd < 0) { + log_err("Failed to create client socket"); return -1; } - return 0; + if (settimeo(fd, timeout_ms)) + goto error_close; + + if (connect_fd_to_addr(fd, &addr, addrlen)) + goto error_close; + + return fd; + +error_close: + save_errno_close(fd); + return -1; } -int connect_wait(int fd) +int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) { - struct epoll_event ev = {}, events[2]; - int timeout_ms = 1000; - int efd, nfd; + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); - efd = epoll_create1(EPOLL_CLOEXEC); - if (efd < 0) { - log_err("Failed to open epoll fd"); + if (settimeo(client_fd, timeout_ms)) return -1; - } - - ev.events = EPOLLRDHUP | EPOLLOUT; - ev.data.fd = fd; - if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) { - log_err("Failed to register fd=%d on epoll fd=%d", fd, efd); - close(efd); + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); return -1; } - nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms); - if (nfd < 0) - log_err("Failed to wait for I/O event on epoll fd=%d", efd); + if (connect_fd_to_addr(client_fd, &addr, len)) + return -1; - close(efd); - return nfd; + return 0; } diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 6a8009605670..f580e82fda58 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -33,10 +33,9 @@ struct ipv6_packet { } __packed; extern struct ipv6_packet pkt_v6; -int start_server(int family, int type); -int start_server_with_port(int family, int type, __u16 port); -int connect_to_fd(int family, int type, int server_fd); -int connect_fd_to_fd(int client_fd, int server_fd); -int connect_wait(int client_fd); +int start_server(int family, int type, const char *addr, __u16 port, + int timeout_ms); +int connect_to_fd(int server_fd, int timeout_ms); +int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); #endif diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c index 059047af7df3..464edc1c1708 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c @@ -13,7 +13,7 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk) socklen_t addr_len = sizeof(addr); __u32 duration = 0; - serv_sk = start_server(AF_INET6, SOCK_STREAM); + serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) return; @@ -24,17 +24,13 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk) *g_serv_port = addr.sin6_port; /* Client outside of test cgroup should fail to connect by timeout. */ - err = connect_fd_to_fd(out_sk, serv_sk); + err = connect_fd_to_fd(out_sk, serv_sk, 1000); if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd", "unexpected result err %d errno %d\n", err, errno)) goto cleanup; - err = connect_wait(out_sk); - if (CHECK(err, "connect_wait", "unexpected result %d\n", err)) - goto cleanup; - /* Client inside test cgroup should connect just fine. */ - in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk); + in_sk = connect_to_fd(serv_sk, 0); if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno)) goto cleanup; @@ -85,7 +81,7 @@ void test_cgroup_skb_sk_lookup(void) * differs from that of testing cgroup. Moving selftests process to * testing cgroup won't change cgroup id of an already created socket. */ - out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + out_sk = socket(AF_INET6, SOCK_STREAM, 0); if (CHECK_FAIL(out_sk < 0)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c index 17bbf76812ca..9229db2f5ca5 100644 --- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c +++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c @@ -114,7 +114,7 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) goto close_bpf_object; } - fd = connect_to_fd(family, type, server_fd); + fd = connect_to_fd(server_fd, 0); if (fd < 0) { err = -1; goto close_bpf_object; @@ -137,25 +137,25 @@ void test_connect_force_port(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM)); close(server_fd); - server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124); + server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM)); close(server_fd); - server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123); + server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM)); close(server_fd); - server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124); + server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM)); diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c index c1168e4a9036..5a2a689dbb68 100644 --- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c +++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c @@ -23,7 +23,7 @@ void test_load_bytes_relative(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server(AF_INET, SOCK_STREAM); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; @@ -49,7 +49,7 @@ void test_load_bytes_relative(void) if (CHECK_FAIL(err)) goto close_bpf_object; - client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd); + client_fd = connect_to_fd(server_fd, 0); if (CHECK_FAIL(client_fd < 0)) goto close_bpf_object; close(client_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 9013a0c01eed..d207e968e6b1 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -118,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd) goto close_bpf_object; } - client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd); + client_fd = connect_to_fd(server_fd, 0); if (client_fd < 0) { err = -1; goto close_bpf_object; @@ -161,7 +161,7 @@ void test_tcp_rtt(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server(AF_INET, SOCK_STREAM); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; -- cgit v1.2.3 From 811d7e375d08312dba23f3b6bf7e58ec14aa5dcb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 1 Jul 2020 17:48:58 -0700 Subject: bpf: selftests: Restore netns after each test It is common for networking tests creating its netns and making its own setting under this new netns (e.g. changing tcp sysctl). If the test forgot to restore to the original netns, it would affect the result of other tests. This patch saves the original netns at the beginning and then restores it after every test. Since the restore "setns()" is not expensive, it does it on all tests without tracking if a test has created a new netns or not. The new restore_netns() could also be done in test__end_subtest() such that each subtest will get an automatic netns reset. However, the individual test would lose flexibility to have total control on netns for its own subtests. In some cases, forcing a test to do unnecessary netns re-configure for each subtest is time consuming. e.g. In my vm, forcing netns re-configure on each subtest in sk_assign.c increased the runtime from 1s to 8s. On top of that, test_progs.c is also doing per-test (instead of per-subtest) cleanup for cgroup. Thus, this patch also does per-test restore_netns(). The only existing per-subtest cleanup is reset_affinity() and no test is depending on this. Thus, it is removed from test__end_subtest() to give a consistent expectation to the individual tests. test_progs.c only ensures any affinity/netns/cgroup change made by an earlier test does not affect the following tests. Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200702004858.2103728-1-kafai@fb.com --- tools/testing/selftests/bpf/test_progs.c | 23 +++++++++++++++++++++-- tools/testing/selftests/bpf/test_progs.h | 2 ++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index ef05d2f0e7bb..104e833d0087 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -121,6 +121,24 @@ static void reset_affinity() { } } +static void save_netns(void) +{ + env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (env.saved_netns_fd == -1) { + perror("open(/proc/self/ns/net)"); + exit(-1); + } +} + +static void restore_netns(void) +{ + if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) { + stdio_restore(); + perror("setns(CLONE_NEWNS)"); + exit(-1); + } +} + void test__end_subtest() { struct prog_test_def *test = env.test; @@ -138,8 +156,6 @@ void test__end_subtest() test->test_num, test->subtest_num, test->subtest_name, sub_error_cnt ? "FAIL" : "OK"); - reset_affinity(); - free(test->subtest_name); test->subtest_name = NULL; } @@ -655,6 +671,7 @@ int main(int argc, char **argv) return -1; } + save_netns(); stdio_hijack(); for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; @@ -696,6 +713,7 @@ int main(int argc, char **argv) test->error_cnt ? "FAIL" : "OK"); reset_affinity(); + restore_netns(); if (test->need_cgroup_cleanup) cleanup_cgroup_environment(); } @@ -719,6 +737,7 @@ out: free_str_set(&env.subtest_selector.blacklist); free_str_set(&env.subtest_selector.whitelist); free(env.subtest_selector.num_set); + close(env.saved_netns_fd); if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) return EXIT_FAILURE; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index ec31f382e7fd..6e09bf738473 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -80,6 +80,8 @@ struct test_env { int sub_succ_cnt; /* successful sub-tests */ int fail_cnt; /* total failed tests + sub-tests */ int skip_cnt; /* skipped tests */ + + int saved_netns_fd; }; extern struct test_env env; -- cgit v1.2.3 From 8ae4121bd89e3dce27b519ed469efbc15423af18 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 2 Jul 2020 21:31:59 -0700 Subject: bpf: Fix bpftool without skeleton code enabled Fix segfault from bpftool by adding emit_obj_refs_plain when skeleton code is disabled. Tested by deleting BUILD_BPF_SKELS in Makefile. We found this doing backports for Cilium when a testing image pulled in latest bpf-next bpftool, but kept using an older clang-7. # ./bpftool prog show Error: bpftool built without PID iterator support 3: cgroup_skb tag 7be49e3934a125ba gpl loaded_at 2020-07-01T08:01:29-0700 uid 0 Segmentation fault Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs") Reported-by: Joe Stringer Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/159375071997.14984.17404504293832961401.stgit@john-XPS-13-9370 --- tools/bpf/bpftool/pids.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 2709be4de2b1..7d5416667c85 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -19,6 +19,7 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) return -ENOTSUP; } void delete_obj_refs_table(struct obj_refs_table *table) {} +void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {} #else /* BPFTOOL_WITHOUT_SKELETONS */ -- cgit v1.2.3 From 9ff79af3331277c69ac61cc75b2392eb3284e305 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Fri, 3 Jul 2020 11:17:19 -0700 Subject: selftests/bpf: Fix compilation error of bpf_iter_task_stack.c BPF selftests show a compilation error as follows: libbpf: invalid relo for 'entries' in special section 0xfff2; forgot to initialize global var?.. Fix it by initializing 'entries' to zeros. Fixes: c7568114bc56 ("selftests/bpf: Add bpf_iter test with bpf_get_task_stack()") Reported-by: Jesper Dangaard Brouer Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200703181719.3747072-1-songliubraving@fb.com --- tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index e40d32a2ed93..50e59a2e142e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; #define MAX_STACK_TRACE_DEPTH 64 -unsigned long entries[MAX_STACK_TRACE_DEPTH]; +unsigned long entries[MAX_STACK_TRACE_DEPTH] = {}; #define SIZE_OF_ULONG (sizeof(unsigned long)) SEC("iter/task") -- cgit v1.2.3