From 7bd3a33ae6d2b820bc44a206f9b81b96840219fd Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 17 Jun 2020 11:31:32 -0700
Subject: libbpf: Bump version to 0.1.0

Bump libbpf version to 0.1.0, as new development cycle starts.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200617183132.1970836-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.map | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f732c77b7ed0..c914347f5065 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -270,3 +270,6 @@ LIBBPF_0.0.9 {
 		ring_buffer__new;
 		ring_buffer__poll;
 } LIBBPF_0.0.8;
+
+LIBBPF_0.1.0 {
+} LIBBPF_0.0.9;
-- 
cgit v1.2.3


From d56b74b9e1b8d747171dc6ff60315c00c41562ce Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 18 Jun 2020 16:46:32 -0700
Subject: tools/bpf: Add verifier tests for 32bit pointer/scalar arithmetic

Added two test_verifier subtests for 32bit pointer/scalar arithmetic
with BPF_SUB operator. They are passing verifier now.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200618234632.3321367-1-yhs@fb.com
---
 .../selftests/bpf/verifier/value_ptr_arith.c       | 38 ++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index 97ee658e1242..ed4e76b24649 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -836,3 +836,41 @@
 	.errstr = "R0 invalid mem access 'inv'",
 	.errstr_unpriv = "R0 pointer -= pointer prohibited",
 },
+{
+	"32bit pkt_ptr -= scalar",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+	BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+	BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7),
+	BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+	"32bit scalar -= pkt_ptr",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+	BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+	BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6),
+	BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
-- 
cgit v1.2.3


From bb8dc2695a7db4f35c1de94d212f86229bb4a5d2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 15:20:24 -0700
Subject: tools/bpftool: Relicense bpftool's BPF profiler prog as dual-license
 GPL/BSD

Relicense it to be compatible with the rest of bpftool files.

Suggested-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200619222024.519774-1-andriin@fb.com
---
 tools/bpf/bpftool/skeleton/profiler.bpf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index 20034c12f7c5..c9d196ddb670 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 // Copyright (c) 2020 Facebook
 #include "profiler.h"
 #include <linux/bpf.h>
@@ -116,4 +116,4 @@ int BPF_PROG(fexit_XXX)
 	return 0;
 }
 
-char LICENSE[] SEC("license") = "GPL";
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
-- 
cgit v1.2.3


From 41c48f3a98231738c5ce79f6f2aa6e40ba924d18 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 19 Jun 2020 14:11:43 -0700
Subject: bpf: Support access to bpf map fields

There are multiple use-cases when it's convenient to have access to bpf
map fields, both `struct bpf_map` and map type specific struct-s such as
`struct bpf_array`, `struct bpf_htab`, etc.

For example while working with sock arrays it can be necessary to
calculate the key based on map->max_entries (some_hash % max_entries).
Currently this is solved by communicating max_entries via "out-of-band"
channel, e.g. via additional map with known key to get info about target
map. That works, but is not very convenient and error-prone while
working with many maps.

In other cases necessary data is dynamic (i.e. unknown at loading time)
and it's impossible to get it at all. For example while working with a
hash table it can be convenient to know how much capacity is already
used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case).

At the same time kernel knows this info and can provide it to bpf
program.

Fill this gap by adding support to access bpf map fields from bpf
program for both `struct bpf_map` and map type specific fields.

Support is implemented via btf_struct_access() so that a user can define
their own `struct bpf_map` or map type specific struct in their program
with only necessary fields and preserve_access_index attribute, cast a
map to this struct and use a field.

For example:

	struct bpf_map {
		__u32 max_entries;
	} __attribute__((preserve_access_index));

	struct bpf_array {
		struct bpf_map map;
		__u32 elem_size;
	} __attribute__((preserve_access_index));

	struct {
		__uint(type, BPF_MAP_TYPE_ARRAY);
		__uint(max_entries, 4);
		__type(key, __u32);
		__type(value, __u32);
	} m_array SEC(".maps");

	SEC("cgroup_skb/egress")
	int cg_skb(void *ctx)
	{
		struct bpf_array *array = (struct bpf_array *)&m_array;
		struct bpf_map *map = (struct bpf_map *)&m_array;

		/* .. use map->max_entries or array->map.max_entries .. */
	}

Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock
in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of
corresponding struct. Only reading from map fields is supported.

For btf_struct_access() to work there should be a way to know btf id of
a struct that corresponds to a map type. To get btf id there should be a
way to get a stringified name of map-specific struct, such as
"bpf_array", "bpf_htab", etc for a map type. Two new fields are added to
`struct bpf_map_ops` to handle it:
* .map_btf_name keeps a btf name of a struct returned by map_alloc();
* .map_btf_id is used to cache btf id of that struct.

To make btf ids calculation cheaper they're calculated once while
preparing btf_vmlinux and cached same way as it's done for btf_id field
of `struct bpf_func_proto`

While calculating btf ids, struct names are NOT checked for collision.
Collisions will be checked as a part of the work to prepare btf ids used
in verifier in compile time that should land soon. The only known
collision for `struct bpf_htab` (kernel/bpf/hashtab.c vs
net/core/sock_map.c) was fixed earlier.

Both new fields .map_btf_name and .map_btf_id must be set for a map type
for the feature to work. If neither is set for a map type, verifier will
return ENOTSUPP on a try to access map_ptr of corresponding type. If
just one of them set, it's verifier misconfiguration.

Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for
BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be
supported separately.

The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by
perfmon_capable() so that unpriv programs won't have access to bpf map
fields.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/6479686a0cd1e9067993df57b4c3eef0e276fec9.1592600985.git.rdna@fb.com
---
 tools/testing/selftests/bpf/verifier/map_ptr_mixing.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
index cd26ee6b7b1d..1f2b8c4cb26d 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
@@ -56,7 +56,7 @@
 	.fixup_map_in_map = { 16 },
 	.fixup_map_array_48b = { 13 },
 	.result = REJECT,
-	.errstr = "R0 invalid mem access 'map_ptr'",
+	.errstr = "only read from bpf_array is supported",
 },
 {
 	"cond: two branches returning different map pointers for lookup (tail, tail)",
-- 
cgit v1.2.3


From b1b53d413f16c6b5078edb127e660e67332e4d2f Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 19 Jun 2020 14:11:45 -0700
Subject: selftests/bpf: Test access to bpf map pointer

Add selftests to test access to map pointers from bpf program for all
map types except struct_ops (that one would need additional work).

verifier test focuses mostly on scenarios that must be rejected.

prog_tests test focuses on accessing multiple fields both scalar and a
nested struct from bpf program and verifies that those fields have
expected values.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/139a6a17f8016491e39347849b951525335c6eb4.1592600985.git.rdna@fb.com
---
 tools/testing/selftests/bpf/prog_tests/map_ptr.c |  32 ++
 tools/testing/selftests/bpf/progs/map_ptr_kern.c | 686 +++++++++++++++++++++++
 tools/testing/selftests/bpf/verifier/map_ptr.c   |  62 ++
 3 files changed, 780 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/map_ptr.c
 create mode 100644 tools/testing/selftests/bpf/progs/map_ptr_kern.c
 create mode 100644 tools/testing/selftests/bpf/verifier/map_ptr.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
new file mode 100644
index 000000000000..c230a573c373
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_ptr_kern.skel.h"
+
+void test_map_ptr(void)
+{
+	struct map_ptr_kern *skel;
+	__u32 duration = 0, retval;
+	char buf[128];
+	int err;
+
+	skel = map_ptr_kern__open_and_load();
+	if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+		return;
+
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
+				sizeof(pkt_v4), buf, NULL, &retval, NULL);
+
+	if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
+		goto cleanup;
+
+	if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval,
+		  skel->bss->g_map_type, skel->bss->g_line))
+		goto cleanup;
+
+cleanup:
+	map_ptr_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
new file mode 100644
index 000000000000..473665cac67e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -0,0 +1,686 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define LOOP_BOUND 0xf
+#define MAX_ENTRIES 8
+#define HALF_ENTRIES (MAX_ENTRIES >> 1)
+
+_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
+
+enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
+__u32 g_line = 0;
+
+#define VERIFY_TYPE(type, func) ({	\
+	g_map_type = type;		\
+	if (!func())			\
+		return 0;		\
+})
+
+
+#define VERIFY(expr) ({		\
+	g_line = __LINE__;	\
+	if (!(expr))		\
+		return 0;	\
+})
+
+struct bpf_map_memory {
+	__u32 pages;
+} __attribute__((preserve_access_index));
+
+struct bpf_map {
+	enum bpf_map_type map_type;
+	__u32 key_size;
+	__u32 value_size;
+	__u32 max_entries;
+	__u32 id;
+	struct bpf_map_memory memory;
+} __attribute__((preserve_access_index));
+
+static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
+				       __u32 value_size, __u32 max_entries)
+{
+	VERIFY(map->map_type == g_map_type);
+	VERIFY(map->key_size == key_size);
+	VERIFY(map->value_size == value_size);
+	VERIFY(map->max_entries == max_entries);
+	VERIFY(map->id > 0);
+	VERIFY(map->memory.pages > 0);
+
+	return 1;
+}
+
+static inline int check_bpf_map_ptr(struct bpf_map *indirect,
+				    struct bpf_map *direct)
+{
+	VERIFY(indirect->map_type == direct->map_type);
+	VERIFY(indirect->key_size == direct->key_size);
+	VERIFY(indirect->value_size == direct->value_size);
+	VERIFY(indirect->max_entries == direct->max_entries);
+	VERIFY(indirect->id == direct->id);
+	VERIFY(indirect->memory.pages == direct->memory.pages);
+
+	return 1;
+}
+
+static inline int check(struct bpf_map *indirect, struct bpf_map *direct,
+			__u32 key_size, __u32 value_size, __u32 max_entries)
+{
+	VERIFY(check_bpf_map_ptr(indirect, direct));
+	VERIFY(check_bpf_map_fields(indirect, key_size, value_size,
+				    max_entries));
+	return 1;
+}
+
+static inline int check_default(struct bpf_map *indirect,
+				struct bpf_map *direct)
+{
+	VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+		     MAX_ENTRIES));
+	return 1;
+}
+
+typedef struct {
+	int counter;
+} atomic_t;
+
+struct bpf_htab {
+	struct bpf_map map;
+	atomic_t count;
+	__u32 n_buckets;
+	__u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_hash SEC(".maps");
+
+static inline int check_hash(void)
+{
+	struct bpf_htab *hash = (struct bpf_htab *)&m_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_hash;
+	int i;
+
+	VERIFY(check_default(&hash->map, map));
+
+	VERIFY(hash->n_buckets == MAX_ENTRIES);
+	VERIFY(hash->elem_size == 64);
+
+	VERIFY(hash->count.counter == 0);
+	for (i = 0; i < HALF_ENTRIES; ++i) {
+		const __u32 key = i;
+		const __u32 val = 1;
+
+		if (bpf_map_update_elem(hash, &key, &val, 0))
+			return 0;
+	}
+	VERIFY(hash->count.counter == HALF_ENTRIES);
+
+	return 1;
+}
+
+struct bpf_array {
+	struct bpf_map map;
+	__u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_array SEC(".maps");
+
+static inline int check_array(void)
+{
+	struct bpf_array *array = (struct bpf_array *)&m_array;
+	struct bpf_map *map = (struct bpf_map *)&m_array;
+	int i, n_lookups = 0, n_keys = 0;
+
+	VERIFY(check_default(&array->map, map));
+
+	VERIFY(array->elem_size == 8);
+
+	for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) {
+		const __u32 key = i;
+		__u32 *val = bpf_map_lookup_elem(array, &key);
+
+		++n_lookups;
+		if (val)
+			++n_keys;
+	}
+
+	VERIFY(n_lookups == MAX_ENTRIES);
+	VERIFY(n_keys == MAX_ENTRIES);
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_prog_array SEC(".maps");
+
+static inline int check_prog_array(void)
+{
+	struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array;
+	struct bpf_map *map = (struct bpf_map *)&m_prog_array;
+
+	VERIFY(check_default(&prog_array->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_perf_event_array SEC(".maps");
+
+static inline int check_perf_event_array(void)
+{
+	struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array;
+	struct bpf_map *map = (struct bpf_map *)&m_perf_event_array;
+
+	VERIFY(check_default(&perf_event_array->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_percpu_hash SEC(".maps");
+
+static inline int check_percpu_hash(void)
+{
+	struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_percpu_hash;
+
+	VERIFY(check_default(&percpu_hash->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_percpu_array SEC(".maps");
+
+static inline int check_percpu_array(void)
+{
+	struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array;
+	struct bpf_map *map = (struct bpf_map *)&m_percpu_array;
+
+	VERIFY(check_default(&percpu_array->map, map));
+
+	return 1;
+}
+
+struct bpf_stack_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u64);
+} m_stack_trace SEC(".maps");
+
+static inline int check_stack_trace(void)
+{
+	struct bpf_stack_map *stack_trace =
+		(struct bpf_stack_map *)&m_stack_trace;
+	struct bpf_map *map = (struct bpf_map *)&m_stack_trace;
+
+	VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64),
+		     MAX_ENTRIES));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_cgroup_array SEC(".maps");
+
+static inline int check_cgroup_array(void)
+{
+	struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array;
+	struct bpf_map *map = (struct bpf_map *)&m_cgroup_array;
+
+	VERIFY(check_default(&cgroup_array->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_lru_hash SEC(".maps");
+
+static inline int check_lru_hash(void)
+{
+	struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_lru_hash;
+
+	VERIFY(check_default(&lru_hash->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_lru_percpu_hash SEC(".maps");
+
+static inline int check_lru_percpu_hash(void)
+{
+	struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash;
+
+	VERIFY(check_default(&lru_percpu_hash->map, map));
+
+	return 1;
+}
+
+struct lpm_trie {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct lpm_key {
+	struct bpf_lpm_trie_key trie_key;
+	__u32 data;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, struct lpm_key);
+	__type(value, __u32);
+} m_lpm_trie SEC(".maps");
+
+static inline int check_lpm_trie(void)
+{
+	struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie;
+	struct bpf_map *map = (struct bpf_map *)&m_lpm_trie;
+
+	VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32),
+		     MAX_ENTRIES));
+
+	return 1;
+}
+
+struct inner_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} inner_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+	__array(values, struct {
+		__uint(type, BPF_MAP_TYPE_ARRAY);
+		__uint(max_entries, 1);
+		__type(key, __u32);
+		__type(value, __u32);
+	});
+} m_array_of_maps SEC(".maps") = {
+	.values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+
+static inline int check_array_of_maps(void)
+{
+	struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps;
+	struct bpf_map *map = (struct bpf_map *)&m_array_of_maps;
+
+	VERIFY(check_default(&array_of_maps->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+	__array(values, struct inner_map);
+} m_hash_of_maps SEC(".maps") = {
+	.values = {
+		[2] = &inner_map,
+	},
+};
+
+static inline int check_hash_of_maps(void)
+{
+	struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps;
+	struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps;
+
+	VERIFY(check_default(&hash_of_maps->map, map));
+
+	return 1;
+}
+
+struct bpf_dtab {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_devmap SEC(".maps");
+
+static inline int check_devmap(void)
+{
+	struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap;
+	struct bpf_map *map = (struct bpf_map *)&m_devmap;
+
+	VERIFY(check_default(&devmap->map, map));
+
+	return 1;
+}
+
+struct bpf_stab {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_sockmap SEC(".maps");
+
+static inline int check_sockmap(void)
+{
+	struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap;
+	struct bpf_map *map = (struct bpf_map *)&m_sockmap;
+
+	VERIFY(check_default(&sockmap->map, map));
+
+	return 1;
+}
+
+struct bpf_cpu_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CPUMAP);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_cpumap SEC(".maps");
+
+static inline int check_cpumap(void)
+{
+	struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap;
+	struct bpf_map *map = (struct bpf_map *)&m_cpumap;
+
+	VERIFY(check_default(&cpumap->map, map));
+
+	return 1;
+}
+
+struct xsk_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_XSKMAP);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_xskmap SEC(".maps");
+
+static inline int check_xskmap(void)
+{
+	struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap;
+	struct bpf_map *map = (struct bpf_map *)&m_xskmap;
+
+	VERIFY(check_default(&xskmap->map, map));
+
+	return 1;
+}
+
+struct bpf_shtab {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_sockhash SEC(".maps");
+
+static inline int check_sockhash(void)
+{
+	struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash;
+	struct bpf_map *map = (struct bpf_map *)&m_sockhash;
+
+	VERIFY(check_default(&sockhash->map, map));
+
+	return 1;
+}
+
+struct bpf_cgroup_storage_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, __u32);
+} m_cgroup_storage SEC(".maps");
+
+static inline int check_cgroup_storage(void)
+{
+	struct bpf_cgroup_storage_map *cgroup_storage =
+		(struct bpf_cgroup_storage_map *)&m_cgroup_storage;
+	struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage;
+
+	VERIFY(check(&cgroup_storage->map, map,
+		     sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+	return 1;
+}
+
+struct reuseport_array {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_reuseport_sockarray SEC(".maps");
+
+static inline int check_reuseport_sockarray(void)
+{
+	struct reuseport_array *reuseport_sockarray =
+		(struct reuseport_array *)&m_reuseport_sockarray;
+	struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray;
+
+	VERIFY(check_default(&reuseport_sockarray->map, map));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, __u32);
+} m_percpu_cgroup_storage SEC(".maps");
+
+static inline int check_percpu_cgroup_storage(void)
+{
+	struct bpf_cgroup_storage_map *percpu_cgroup_storage =
+		(struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage;
+	struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage;
+
+	VERIFY(check(&percpu_cgroup_storage->map, map,
+		     sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+	return 1;
+}
+
+struct bpf_queue_stack {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_QUEUE);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(value, __u32);
+} m_queue SEC(".maps");
+
+static inline int check_queue(void)
+{
+	struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue;
+	struct bpf_map *map = (struct bpf_map *)&m_queue;
+
+	VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(value, __u32);
+} m_stack SEC(".maps");
+
+static inline int check_stack(void)
+{
+	struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack;
+	struct bpf_map *map = (struct bpf_map *)&m_stack;
+
+	VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+	return 1;
+}
+
+struct bpf_sk_storage_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_sk_storage SEC(".maps");
+
+static inline int check_sk_storage(void)
+{
+	struct bpf_sk_storage_map *sk_storage =
+		(struct bpf_sk_storage_map *)&m_sk_storage;
+	struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
+
+	VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
+
+	return 1;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+	__uint(max_entries, MAX_ENTRIES);
+	__type(key, __u32);
+	__type(value, __u32);
+} m_devmap_hash SEC(".maps");
+
+static inline int check_devmap_hash(void)
+{
+	struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash;
+	struct bpf_map *map = (struct bpf_map *)&m_devmap_hash;
+
+	VERIFY(check_default(&devmap_hash->map, map));
+
+	return 1;
+}
+
+struct bpf_ringbuf_map {
+	struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 1 << 12);
+} m_ringbuf SEC(".maps");
+
+static inline int check_ringbuf(void)
+{
+	struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
+	struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
+
+	VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+
+	return 1;
+}
+
+SEC("cgroup_skb/egress")
+int cg_skb(void *ctx)
+{
+	VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace);
+	VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array);
+	VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie);
+	VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps);
+	VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps);
+	VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap);
+	VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap);
+	VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap);
+	VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap);
+	VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash);
+	VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage);
+	VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+		    check_reuseport_sockarray);
+	VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+		    check_percpu_cgroup_storage);
+	VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue);
+	VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack);
+	VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage);
+	VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash);
+	VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf);
+
+	return 1;
+}
+
+__u32 _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
new file mode 100644
index 000000000000..b52209db8250
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -0,0 +1,62 @@
+{
+	"bpf_map_ptr: read with negative offset rejected",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.result = REJECT,
+	.errstr = "R1 is bpf_array invalid negative access: off=-8",
+},
+{
+	"bpf_map_ptr: write rejected",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 3 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.result = REJECT,
+	.errstr = "only read from bpf_array is supported",
+},
+{
+	"bpf_map_ptr: read non-existent field rejected",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.result = REJECT,
+	.errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
+},
+{
+	"bpf_map_ptr: read ops field accepted",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_array_48b = { 1 },
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+	.result = ACCEPT,
+	.retval = 1,
+},
-- 
cgit v1.2.3


From 1bdb6c9a1c43fdf9b83b2331dfc6229bd2e71d9b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 20 Jun 2020 23:21:12 -0700
Subject: libbpf: Add a bunch of attribute getters/setters for map definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a bunch of getter for various aspects of BPF map. Some of these attribute
(e.g., key_size, value_size, type, etc) are available right now in struct
bpf_map_def, but this patch adds getter allowing to fetch them individually.
bpf_map_def approach isn't very scalable, when ABI stability requirements are
taken into account. It's much easier to extend libbpf and add support for new
features, when each aspect of BPF map has separate getter/setter.

Getters follow the common naming convention of not explicitly having "get" in
its name: bpf_map__type() returns map type, bpf_map__key_size() returns
key_size. Setters, though, explicitly have set in their name:
bpf_map__set_type(), bpf_map__set_key_size().

This patch ensures we now have a getter and a setter for the following
map attributes:
  - type;
  - max_entries;
  - map_flags;
  - numa_node;
  - key_size;
  - value_size;
  - ifindex.

bpf_map__resize() enforces unnecessary restriction of max_entries > 0. It is
unnecessary, because libbpf actually supports zero max_entries for some cases
(e.g., for PERF_EVENT_ARRAY map) and treats it specially during map creation
time. To allow setting max_entries=0, new bpf_map__set_max_entries() setter is
added. bpf_map__resize()'s behavior is preserved for backwards compatibility
reasons.

Map ifindex getter is added as well. There is a setter already, but no
corresponding getter. Fix this assymetry as well. bpf_map__set_ifindex()
itself is converted from void function into error-returning one, similar to
other setters. The only error returned right now is -EBUSY, if BPF map is
already loaded and has corresponding FD.

One lacking attribute with no ability to get/set or even specify it
declaratively is numa_node. This patch fixes this gap and both adds
programmatic getter/setter, as well as adds support for numa_node field in
BTF-defined map.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200621062112.3006313-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c   | 100 +++++++++++++++++++++++++++++++++++++++++++----
 tools/lib/bpf/libbpf.h   |  30 ++++++++++++--
 tools/lib/bpf/libbpf.map |  14 +++++++
 3 files changed, 134 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 477c679ed945..259a6360475f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -310,6 +310,7 @@ struct bpf_map {
 	int map_ifindex;
 	int inner_map_fd;
 	struct bpf_map_def def;
+	__u32 numa_node;
 	__u32 btf_var_idx;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
@@ -1957,6 +1958,10 @@ static int parse_btf_map_def(struct bpf_object *obj,
 				return -EINVAL;
 			pr_debug("map '%s': found map_flags = %u.\n",
 				 map->name, map->def.map_flags);
+		} else if (strcmp(name, "numa_node") == 0) {
+			if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+				return -EINVAL;
+			pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
 		} else if (strcmp(name, "key_size") == 0) {
 			__u32 sz;
 
@@ -3222,20 +3227,27 @@ err_free_new_name:
 	return err;
 }
 
-int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+__u32 bpf_map__max_entries(const struct bpf_map *map)
 {
-	if (!map || !max_entries)
-		return -EINVAL;
+	return map->def.max_entries;
+}
 
-	/* If map already created, its attributes can't be changed. */
+int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
+{
 	if (map->fd >= 0)
 		return -EBUSY;
-
 	map->def.max_entries = max_entries;
-
 	return 0;
 }
 
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+{
+	if (!map || !max_entries)
+		return -EINVAL;
+
+	return bpf_map__set_max_entries(map, max_entries);
+}
+
 static int
 bpf_object__probe_loading(struct bpf_object *obj)
 {
@@ -3603,6 +3615,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 	create_attr.map_flags = def->map_flags;
 	create_attr.key_size = def->key_size;
 	create_attr.value_size = def->value_size;
+	create_attr.numa_node = map->numa_node;
 
 	if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
 		int nr_cpus;
@@ -7088,6 +7101,71 @@ const char *bpf_map__name(const struct bpf_map *map)
 	return map ? map->name : NULL;
 }
 
+enum bpf_map_type bpf_map__type(const struct bpf_map *map)
+{
+	return map->def.type;
+}
+
+int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.type = type;
+	return 0;
+}
+
+__u32 bpf_map__map_flags(const struct bpf_map *map)
+{
+	return map->def.map_flags;
+}
+
+int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.map_flags = flags;
+	return 0;
+}
+
+__u32 bpf_map__numa_node(const struct bpf_map *map)
+{
+	return map->numa_node;
+}
+
+int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->numa_node = numa_node;
+	return 0;
+}
+
+__u32 bpf_map__key_size(const struct bpf_map *map)
+{
+	return map->def.key_size;
+}
+
+int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.key_size = size;
+	return 0;
+}
+
+__u32 bpf_map__value_size(const struct bpf_map *map)
+{
+	return map->def.value_size;
+}
+
+int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
+{
+	if (map->fd >= 0)
+		return -EBUSY;
+	map->def.value_size = size;
+	return 0;
+}
+
 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
 {
 	return map ? map->btf_key_type_id : 0;
@@ -7140,9 +7218,17 @@ bool bpf_map__is_internal(const struct bpf_map *map)
 	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
 }
 
-void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+__u32 bpf_map__ifindex(const struct bpf_map *map)
+{
+	return map->map_ifindex;
+}
+
+int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 {
+	if (map->fd >= 0)
+		return -EBUSY;
 	map->map_ifindex = ifindex;
+	return 0;
 }
 
 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 334437af3014..fdd279fb1866 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -418,11 +418,38 @@ bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
 LIBBPF_API struct bpf_map *
 bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
 
+/* get/set map FD */
 LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+/* get map definition */
 LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+/* get map name */
 LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
+/* get/set map type */
+LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
+/* get/set map size (max_entries) */
+LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
+LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
+/* get/set map flags */
+LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
+/* get/set map NUMA node */
+LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);
+/* get/set map key size */
+LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size);
+/* get/set map value size */
+LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);
+/* get map key/value BTF type IDs */
 LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
 LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+/* get/set map if_index */
+LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
 LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
@@ -430,11 +457,8 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
 LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
 					  const void *data, size_t size);
-LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
 LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
-LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
 LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
 LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index c914347f5065..9914e0db4859 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -272,4 +272,18 @@ LIBBPF_0.0.9 {
 } LIBBPF_0.0.8;
 
 LIBBPF_0.1.0 {
+	global:
+		bpf_map__ifindex;
+		bpf_map__key_size;
+		bpf_map__map_flags;
+		bpf_map__max_entries;
+		bpf_map__numa_node;
+		bpf_map__set_key_size;
+		bpf_map__set_map_flags;
+		bpf_map__set_max_entries;
+		bpf_map__set_numa_node;
+		bpf_map__set_type;
+		bpf_map__set_value_size;
+		bpf_map__type;
+		bpf_map__value_size;
 } LIBBPF_0.0.9;
-- 
cgit v1.2.3


From 2e33efe32e019328916ce653dc1265d637261993 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:55 -0700
Subject: libbpf: Generalize libbpf externs support

Switch existing Kconfig externs to be just one of few possible kinds of more
generic externs. This refactoring is in preparation for ksymbol extern
support, added in the follow up patch. There are no functional changes
intended.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-2-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 346 +++++++++++++++++++++++++++++--------------------
 1 file changed, 206 insertions(+), 140 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 259a6360475f..ffccb5af32a5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -330,24 +330,35 @@ struct bpf_map {
 
 enum extern_type {
 	EXT_UNKNOWN,
-	EXT_CHAR,
-	EXT_BOOL,
-	EXT_INT,
-	EXT_TRISTATE,
-	EXT_CHAR_ARR,
+	EXT_KCFG,
+};
+
+enum kcfg_type {
+	KCFG_UNKNOWN,
+	KCFG_CHAR,
+	KCFG_BOOL,
+	KCFG_INT,
+	KCFG_TRISTATE,
+	KCFG_CHAR_ARR,
 };
 
 struct extern_desc {
-	const char *name;
+	enum extern_type type;
 	int sym_idx;
 	int btf_id;
-	enum extern_type type;
-	int sz;
-	int align;
-	int data_off;
-	bool is_signed;
-	bool is_weak;
+	int sec_btf_id;
+	const char *name;
 	bool is_set;
+	bool is_weak;
+	union {
+		struct {
+			enum kcfg_type type;
+			int sz;
+			int align;
+			int data_off;
+			bool is_signed;
+		} kcfg;
+	};
 };
 
 static LIST_HEAD(bpf_objects_list);
@@ -1424,19 +1435,19 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
 	return NULL;
 }
 
-static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
-			     char value)
+static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
+			      char value)
 {
-	switch (ext->type) {
-	case EXT_BOOL:
+	switch (ext->kcfg.type) {
+	case KCFG_BOOL:
 		if (value == 'm') {
-			pr_warn("extern %s=%c should be tristate or char\n",
+			pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
 				ext->name, value);
 			return -EINVAL;
 		}
 		*(bool *)ext_val = value == 'y' ? true : false;
 		break;
-	case EXT_TRISTATE:
+	case KCFG_TRISTATE:
 		if (value == 'y')
 			*(enum libbpf_tristate *)ext_val = TRI_YES;
 		else if (value == 'm')
@@ -1444,14 +1455,14 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
 		else /* value == 'n' */
 			*(enum libbpf_tristate *)ext_val = TRI_NO;
 		break;
-	case EXT_CHAR:
+	case KCFG_CHAR:
 		*(char *)ext_val = value;
 		break;
-	case EXT_UNKNOWN:
-	case EXT_INT:
-	case EXT_CHAR_ARR:
+	case KCFG_UNKNOWN:
+	case KCFG_INT:
+	case KCFG_CHAR_ARR:
 	default:
-		pr_warn("extern %s=%c should be bool, tristate, or char\n",
+		pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
 			ext->name, value);
 		return -EINVAL;
 	}
@@ -1459,29 +1470,29 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
 	return 0;
 }
 
-static int set_ext_value_str(struct extern_desc *ext, char *ext_val,
-			     const char *value)
+static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
+			      const char *value)
 {
 	size_t len;
 
-	if (ext->type != EXT_CHAR_ARR) {
-		pr_warn("extern %s=%s should char array\n", ext->name, value);
+	if (ext->kcfg.type != KCFG_CHAR_ARR) {
+		pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
 		return -EINVAL;
 	}
 
 	len = strlen(value);
 	if (value[len - 1] != '"') {
-		pr_warn("extern '%s': invalid string config '%s'\n",
+		pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
 			ext->name, value);
 		return -EINVAL;
 	}
 
 	/* strip quotes */
 	len -= 2;
-	if (len >= ext->sz) {
-		pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
-			ext->name, value, len, ext->sz - 1);
-		len = ext->sz - 1;
+	if (len >= ext->kcfg.sz) {
+		pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+			ext->name, value, len, ext->kcfg.sz - 1);
+		len = ext->kcfg.sz - 1;
 	}
 	memcpy(ext_val, value + 1, len);
 	ext_val[len] = '\0';
@@ -1508,11 +1519,11 @@ static int parse_u64(const char *value, __u64 *res)
 	return 0;
 }
 
-static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
+static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
 {
-	int bit_sz = ext->sz * 8;
+	int bit_sz = ext->kcfg.sz * 8;
 
-	if (ext->sz == 8)
+	if (ext->kcfg.sz == 8)
 		return true;
 
 	/* Validate that value stored in u64 fits in integer of `ext->sz`
@@ -1527,26 +1538,26 @@ static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
 	 *  For unsigned target integer, check that all the (64 - Y) bits are
 	 *  zero.
 	 */
-	if (ext->is_signed)
+	if (ext->kcfg.is_signed)
 		return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
 	else
 		return (v >> bit_sz) == 0;
 }
 
-static int set_ext_value_num(struct extern_desc *ext, void *ext_val,
-			     __u64 value)
+static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
+			      __u64 value)
 {
-	if (ext->type != EXT_INT && ext->type != EXT_CHAR) {
-		pr_warn("extern %s=%llu should be integer\n",
+	if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
+		pr_warn("extern (kcfg) %s=%llu should be integer\n",
 			ext->name, (unsigned long long)value);
 		return -EINVAL;
 	}
-	if (!is_ext_value_in_range(ext, value)) {
-		pr_warn("extern %s=%llu value doesn't fit in %d bytes\n",
-			ext->name, (unsigned long long)value, ext->sz);
+	if (!is_kcfg_value_in_range(ext, value)) {
+		pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
+			ext->name, (unsigned long long)value, ext->kcfg.sz);
 		return -ERANGE;
 	}
-	switch (ext->sz) {
+	switch (ext->kcfg.sz) {
 		case 1: *(__u8 *)ext_val = value; break;
 		case 2: *(__u16 *)ext_val = value; break;
 		case 4: *(__u32 *)ext_val = value; break;
@@ -1592,30 +1603,30 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj,
 	if (!ext || ext->is_set)
 		return 0;
 
-	ext_val = data + ext->data_off;
+	ext_val = data + ext->kcfg.data_off;
 	value = sep + 1;
 
 	switch (*value) {
 	case 'y': case 'n': case 'm':
-		err = set_ext_value_tri(ext, ext_val, *value);
+		err = set_kcfg_value_tri(ext, ext_val, *value);
 		break;
 	case '"':
-		err = set_ext_value_str(ext, ext_val, value);
+		err = set_kcfg_value_str(ext, ext_val, value);
 		break;
 	default:
 		/* assume integer */
 		err = parse_u64(value, &num);
 		if (err) {
-			pr_warn("extern %s=%s should be integer\n",
+			pr_warn("extern (kcfg) %s=%s should be integer\n",
 				ext->name, value);
 			return err;
 		}
-		err = set_ext_value_num(ext, ext_val, num);
+		err = set_kcfg_value_num(ext, ext_val, num);
 		break;
 	}
 	if (err)
 		return err;
-	pr_debug("extern %s=%s\n", ext->name, value);
+	pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
 	return 0;
 }
 
@@ -1686,16 +1697,20 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
 
 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
 {
-	struct extern_desc *last_ext;
+	struct extern_desc *last_ext = NULL, *ext;
 	size_t map_sz;
-	int err;
+	int i, err;
 
-	if (obj->nr_extern == 0)
-		return 0;
+	for (i = 0; i < obj->nr_extern; i++) {
+		ext = &obj->externs[i];
+		if (ext->type == EXT_KCFG)
+			last_ext = ext;
+	}
 
-	last_ext = &obj->externs[obj->nr_extern - 1];
-	map_sz = last_ext->data_off + last_ext->sz;
+	if (!last_ext)
+		return 0;
 
+	map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
 	err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
 					    obj->efile.symbols_shndx,
 					    NULL, map_sz);
@@ -2714,8 +2729,33 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
 	return -ENOENT;
 }
 
-static enum extern_type find_extern_type(const struct btf *btf, int id,
-					 bool *is_signed)
+static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
+	const struct btf_var_secinfo *vs;
+	const struct btf_type *t;
+	int i, j, n;
+
+	if (!btf)
+		return -ESRCH;
+
+	n = btf__get_nr_types(btf);
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(btf, i);
+
+		if (!btf_is_datasec(t))
+			continue;
+
+		vs = btf_var_secinfos(t);
+		for (j = 0; j < btf_vlen(t); j++, vs++) {
+			if (vs->type == ext_btf_id)
+				return i;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
+				     bool *is_signed)
 {
 	const struct btf_type *t;
 	const char *name;
@@ -2730,29 +2770,29 @@ static enum extern_type find_extern_type(const struct btf *btf, int id,
 		int enc = btf_int_encoding(t);
 
 		if (enc & BTF_INT_BOOL)
-			return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN;
+			return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
 		if (is_signed)
 			*is_signed = enc & BTF_INT_SIGNED;
 		if (t->size == 1)
-			return EXT_CHAR;
+			return KCFG_CHAR;
 		if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
-			return EXT_UNKNOWN;
-		return EXT_INT;
+			return KCFG_UNKNOWN;
+		return KCFG_INT;
 	}
 	case BTF_KIND_ENUM:
 		if (t->size != 4)
-			return EXT_UNKNOWN;
+			return KCFG_UNKNOWN;
 		if (strcmp(name, "libbpf_tristate"))
-			return EXT_UNKNOWN;
-		return EXT_TRISTATE;
+			return KCFG_UNKNOWN;
+		return KCFG_TRISTATE;
 	case BTF_KIND_ARRAY:
 		if (btf_array(t)->nelems == 0)
-			return EXT_UNKNOWN;
-		if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR)
-			return EXT_UNKNOWN;
-		return EXT_CHAR_ARR;
+			return KCFG_UNKNOWN;
+		if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
+			return KCFG_UNKNOWN;
+		return KCFG_CHAR_ARR;
 	default:
-		return EXT_UNKNOWN;
+		return KCFG_UNKNOWN;
 	}
 }
 
@@ -2761,23 +2801,29 @@ static int cmp_externs(const void *_a, const void *_b)
 	const struct extern_desc *a = _a;
 	const struct extern_desc *b = _b;
 
-	/* descending order by alignment requirements */
-	if (a->align != b->align)
-		return a->align > b->align ? -1 : 1;
-	/* ascending order by size, within same alignment class */
-	if (a->sz != b->sz)
-		return a->sz < b->sz ? -1 : 1;
+	if (a->type != b->type)
+		return a->type < b->type ? -1 : 1;
+
+	if (a->type == EXT_KCFG) {
+		/* descending order by alignment requirements */
+		if (a->kcfg.align != b->kcfg.align)
+			return a->kcfg.align > b->kcfg.align ? -1 : 1;
+		/* ascending order by size, within same alignment class */
+		if (a->kcfg.sz != b->kcfg.sz)
+			return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
+	}
+
 	/* resolve ties by name */
 	return strcmp(a->name, b->name);
 }
 
 static int bpf_object__collect_externs(struct bpf_object *obj)
 {
+	struct btf_type *sec, *kcfg_sec = NULL;
 	const struct btf_type *t;
 	struct extern_desc *ext;
-	int i, n, off, btf_id;
-	struct btf_type *sec;
-	const char *ext_name;
+	int i, n, off;
+	const char *ext_name, *sec_name;
 	Elf_Scn *scn;
 	GElf_Shdr sh;
 
@@ -2823,22 +2869,39 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 		ext->name = btf__name_by_offset(obj->btf, t->name_off);
 		ext->sym_idx = i;
 		ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
-		ext->sz = btf__resolve_size(obj->btf, t->type);
-		if (ext->sz <= 0) {
-			pr_warn("failed to resolve size of extern '%s': %d\n",
-				ext_name, ext->sz);
-			return ext->sz;
-		}
-		ext->align = btf__align_of(obj->btf, t->type);
-		if (ext->align <= 0) {
-			pr_warn("failed to determine alignment of extern '%s': %d\n",
-				ext_name, ext->align);
-			return -EINVAL;
-		}
-		ext->type = find_extern_type(obj->btf, t->type,
-					     &ext->is_signed);
-		if (ext->type == EXT_UNKNOWN) {
-			pr_warn("extern '%s' type is unsupported\n", ext_name);
+
+		ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
+		if (ext->sec_btf_id <= 0) {
+			pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
+				ext_name, ext->btf_id, ext->sec_btf_id);
+			return ext->sec_btf_id;
+		}
+		sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
+		sec_name = btf__name_by_offset(obj->btf, sec->name_off);
+
+		if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+			kcfg_sec = sec;
+			ext->type = EXT_KCFG;
+			ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
+			if (ext->kcfg.sz <= 0) {
+				pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
+					ext_name, ext->kcfg.sz);
+				return ext->kcfg.sz;
+			}
+			ext->kcfg.align = btf__align_of(obj->btf, t->type);
+			if (ext->kcfg.align <= 0) {
+				pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
+					ext_name, ext->kcfg.align);
+				return -EINVAL;
+			}
+			ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
+						        &ext->kcfg.is_signed);
+			if (ext->kcfg.type == KCFG_UNKNOWN) {
+				pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
+				return -ENOTSUP;
+			}
+		} else {
+			pr_warn("unrecognized extern section '%s'\n", sec_name);
 			return -ENOTSUP;
 		}
 	}
@@ -2847,42 +2910,40 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 	if (!obj->nr_extern)
 		return 0;
 
-	/* sort externs by (alignment, size, name) and calculate their offsets
-	 * within a map */
+	/* sort externs by type, for kcfg ones also by (align, size, name) */
 	qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
-	off = 0;
-	for (i = 0; i < obj->nr_extern; i++) {
-		ext = &obj->externs[i];
-		ext->data_off = roundup(off, ext->align);
-		off = ext->data_off + ext->sz;
-		pr_debug("extern #%d: symbol %d, off %u, name %s\n",
-			 i, ext->sym_idx, ext->data_off, ext->name);
-	}
 
-	btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC);
-	if (btf_id <= 0) {
-		pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC);
-		return -ESRCH;
-	}
+	if (kcfg_sec) {
+		sec = kcfg_sec;
+		/* for kcfg externs calculate their offsets within a .kconfig map */
+		off = 0;
+		for (i = 0; i < obj->nr_extern; i++) {
+			ext = &obj->externs[i];
+			if (ext->type != EXT_KCFG)
+				continue;
 
-	sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id);
-	sec->size = off;
-	n = btf_vlen(sec);
-	for (i = 0; i < n; i++) {
-		struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
-
-		t = btf__type_by_id(obj->btf, vs->type);
-		ext_name = btf__name_by_offset(obj->btf, t->name_off);
-		ext = find_extern_by_name(obj, ext_name);
-		if (!ext) {
-			pr_warn("failed to find extern definition for BTF var '%s'\n",
-				ext_name);
-			return -ESRCH;
+			ext->kcfg.data_off = roundup(off, ext->kcfg.align);
+			off = ext->kcfg.data_off + ext->kcfg.sz;
+			pr_debug("extern #%d (kcfg): symbol %d, off %u, name %s\n",
+				 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
+		}
+		sec->size = off;
+		n = btf_vlen(sec);
+		for (i = 0; i < n; i++) {
+			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+
+			t = btf__type_by_id(obj->btf, vs->type);
+			ext_name = btf__name_by_offset(obj->btf, t->name_off);
+			ext = find_extern_by_name(obj, ext_name);
+			if (!ext) {
+				pr_warn("failed to find extern definition for BTF var '%s'\n",
+					ext_name);
+				return -ESRCH;
+			}
+			btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+			vs->offset = ext->kcfg.data_off;
 		}
-		vs->offset = ext->data_off;
-		btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
 	}
-
 	return 0;
 }
 
@@ -3012,11 +3073,11 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 				sym_idx);
 			return -LIBBPF_ERRNO__RELOC;
 		}
-		pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n",
-			 i, ext->name, ext->sym_idx, ext->data_off, insn_idx);
+		pr_debug("found extern #%d '%s' (sym %d) for insn %u\n",
+			 i, ext->name, ext->sym_idx, insn_idx);
 		reloc_desc->type = RELO_EXTERN;
 		reloc_desc->insn_idx = insn_idx;
-		reloc_desc->sym_off = ext->data_off;
+		reloc_desc->sym_off = i; /* sym_off stores extern index */
 		return 0;
 	}
 
@@ -4941,6 +5002,7 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 	for (i = 0; i < prog->nr_reloc; i++) {
 		struct reloc_desc *relo = &prog->reloc_desc[i];
 		struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+		struct extern_desc *ext;
 
 		if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
 			pr_warn("relocation out of range: '%s'\n",
@@ -4959,9 +5021,10 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 			insn[0].imm = obj->maps[relo->map_idx].fd;
 			break;
 		case RELO_EXTERN:
+			ext = &obj->externs[relo->sym_off];
 			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 			insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
-			insn[1].imm = relo->sym_off;
+			insn[1].imm = ext->kcfg.data_off;
 			break;
 		case RELO_CALL:
 			err = bpf_program__reloc_text(prog, obj, relo);
@@ -5585,30 +5648,33 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
 {
 	bool need_config = false;
 	struct extern_desc *ext;
+	void *kcfg_data = NULL;
 	int err, i;
-	void *data;
 
 	if (obj->nr_extern == 0)
 		return 0;
 
-	data = obj->maps[obj->kconfig_map_idx].mmaped;
+	if (obj->kconfig_map_idx >= 0)
+		kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
 
 	for (i = 0; i < obj->nr_extern; i++) {
 		ext = &obj->externs[i];
 
-		if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
-			void *ext_val = data + ext->data_off;
+		if (ext->type == EXT_KCFG &&
+		    strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+			void *ext_val = kcfg_data + ext->kcfg.data_off;
 			__u32 kver = get_kernel_version();
 
 			if (!kver) {
 				pr_warn("failed to get kernel version\n");
 				return -EINVAL;
 			}
-			err = set_ext_value_num(ext, ext_val, kver);
+			err = set_kcfg_value_num(ext, ext_val, kver);
 			if (err)
 				return err;
-			pr_debug("extern %s=0x%x\n", ext->name, kver);
-		} else if (strncmp(ext->name, "CONFIG_", 7) == 0) {
+			pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
+		} else if (ext->type == EXT_KCFG &&
+			   strncmp(ext->name, "CONFIG_", 7) == 0) {
 			need_config = true;
 		} else {
 			pr_warn("unrecognized extern '%s'\n", ext->name);
@@ -5616,20 +5682,20 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
 		}
 	}
 	if (need_config && extra_kconfig) {
-		err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data);
+		err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
 		if (err)
 			return -EINVAL;
 		need_config = false;
 		for (i = 0; i < obj->nr_extern; i++) {
 			ext = &obj->externs[i];
-			if (!ext->is_set) {
+			if (ext->type == EXT_KCFG && !ext->is_set) {
 				need_config = true;
 				break;
 			}
 		}
 	}
 	if (need_config) {
-		err = bpf_object__read_kconfig_file(obj, data);
+		err = bpf_object__read_kconfig_file(obj, kcfg_data);
 		if (err)
 			return -EINVAL;
 	}
-- 
cgit v1.2.3


From 1c0c7074fefd769f62dda155e881ca90c9e3e75a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:56 -0700
Subject: libbpf: Add support for extracting kernel symbol addresses

Add support for another (in addition to existing Kconfig) special kind of
externs in BPF code, kernel symbol externs. Such externs allow BPF code to
"know" kernel symbol address and either use it for comparisons with kernel
data structures (e.g., struct file's f_op pointer, to distinguish different
kinds of file), or, with the help of bpf_probe_user_kernel(), to follow
pointers and read data from global variables. Kernel symbol addresses are
found through /proc/kallsyms, which should be present in the system.

Currently, such kernel symbol variables are typeless: they have to be defined
as `extern const void <symbol>` and the only operation you can do (in C code)
with them is to take its address. Such extern should reside in a special
section '.ksyms'. bpf_helpers.h header provides __ksym macro for this. Strong
vs weak semantics stays the same as with Kconfig externs. If symbol is not
found in /proc/kallsyms, this will be a failure for strong (non-weak) extern,
but will be defaulted to 0 for weak externs.

If the same symbol is defined multiple times in /proc/kallsyms, then it will
be error if any of the associated addresses differs. In that case, address is
ambiguous, so libbpf falls on the side of caution, rather than confusing user
with randomly chosen address.

In the future, once kernel is extended with variables BTF information, such
ksym externs will be supported in a typed version, which will allow BPF
program to read variable's contents directly, similarly to how it's done for
fentry/fexit input arguments.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-3-andriin@fb.com
---
 tools/lib/bpf/bpf_helpers.h |   1 +
 tools/lib/bpf/btf.h         |   5 ++
 tools/lib/bpf/libbpf.c      | 144 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 144 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index f67dce2af802..a510d8ed716f 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -75,5 +75,6 @@ enum libbpf_tristate {
 };
 
 #define __kconfig __attribute__((section(".kconfig")))
+#define __ksym __attribute__((section(".ksyms")))
 
 #endif
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 70c1b7ec2bd0..06cd1731c154 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -168,6 +168,11 @@ static inline bool btf_kflag(const struct btf_type *t)
 	return BTF_INFO_KFLAG(t->info);
 }
 
+static inline bool btf_is_void(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_UNKN;
+}
+
 static inline bool btf_is_int(const struct btf_type *t)
 {
 	return btf_kind(t) == BTF_KIND_INT;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ffccb5af32a5..18461deb1b19 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -285,6 +285,7 @@ struct bpf_struct_ops {
 #define BSS_SEC ".bss"
 #define RODATA_SEC ".rodata"
 #define KCONFIG_SEC ".kconfig"
+#define KSYMS_SEC ".ksyms"
 #define STRUCT_OPS_SEC ".struct_ops"
 
 enum libbpf_map_type {
@@ -331,6 +332,7 @@ struct bpf_map {
 enum extern_type {
 	EXT_UNKNOWN,
 	EXT_KCFG,
+	EXT_KSYM,
 };
 
 enum kcfg_type {
@@ -358,6 +360,9 @@ struct extern_desc {
 			int data_off;
 			bool is_signed;
 		} kcfg;
+		struct {
+			unsigned long long addr;
+		} ksym;
 	};
 };
 
@@ -2817,9 +2822,25 @@ static int cmp_externs(const void *_a, const void *_b)
 	return strcmp(a->name, b->name);
 }
 
+static int find_int_btf_id(const struct btf *btf)
+{
+	const struct btf_type *t;
+	int i, n;
+
+	n = btf__get_nr_types(btf);
+	for (i = 1; i <= n; i++) {
+		t = btf__type_by_id(btf, i);
+
+		if (btf_is_int(t) && btf_int_bits(t) == 32)
+			return i;
+	}
+
+	return 0;
+}
+
 static int bpf_object__collect_externs(struct bpf_object *obj)
 {
-	struct btf_type *sec, *kcfg_sec = NULL;
+	struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
 	const struct btf_type *t;
 	struct extern_desc *ext;
 	int i, n, off;
@@ -2900,6 +2921,17 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 				pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
 				return -ENOTSUP;
 			}
+		} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+			const struct btf_type *vt;
+
+			ksym_sec = sec;
+			ext->type = EXT_KSYM;
+
+			vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+			if (!btf_is_void(vt)) {
+				pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
+				return -ENOTSUP;
+			}
 		} else {
 			pr_warn("unrecognized extern section '%s'\n", sec_name);
 			return -ENOTSUP;
@@ -2913,6 +2945,46 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 	/* sort externs by type, for kcfg ones also by (align, size, name) */
 	qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
 
+	/* for .ksyms section, we need to turn all externs into allocated
+	 * variables in BTF to pass kernel verification; we do this by
+	 * pretending that each extern is a 8-byte variable
+	 */
+	if (ksym_sec) {
+		/* find existing 4-byte integer type in BTF to use for fake
+		 * extern variables in DATASEC
+		 */
+		int int_btf_id = find_int_btf_id(obj->btf);
+
+		for (i = 0; i < obj->nr_extern; i++) {
+			ext = &obj->externs[i];
+			if (ext->type != EXT_KSYM)
+				continue;
+			pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
+				 i, ext->sym_idx, ext->name);
+		}
+
+		sec = ksym_sec;
+		n = btf_vlen(sec);
+		for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
+			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+			struct btf_type *vt;
+
+			vt = (void *)btf__type_by_id(obj->btf, vs->type);
+			ext_name = btf__name_by_offset(obj->btf, vt->name_off);
+			ext = find_extern_by_name(obj, ext_name);
+			if (!ext) {
+				pr_warn("failed to find extern definition for BTF var '%s'\n",
+					ext_name);
+				return -ESRCH;
+			}
+			btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+			vt->type = int_btf_id;
+			vs->offset = off;
+			vs->size = sizeof(int);
+		}
+		sec->size = off;
+	}
+
 	if (kcfg_sec) {
 		sec = kcfg_sec;
 		/* for kcfg externs calculate their offsets within a .kconfig map */
@@ -2924,7 +2996,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 
 			ext->kcfg.data_off = roundup(off, ext->kcfg.align);
 			off = ext->kcfg.data_off + ext->kcfg.sz;
-			pr_debug("extern #%d (kcfg): symbol %d, off %u, name %s\n",
+			pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
 				 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
 		}
 		sec->size = off;
@@ -5022,9 +5094,14 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
 			break;
 		case RELO_EXTERN:
 			ext = &obj->externs[relo->sym_off];
-			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
-			insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
-			insn[1].imm = ext->kcfg.data_off;
+			if (ext->type == EXT_KCFG) {
+				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+				insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+				insn[1].imm = ext->kcfg.data_off;
+			} else /* EXT_KSYM */ {
+				insn[0].imm = (__u32)ext->ksym.addr;
+				insn[1].imm = ext->ksym.addr >> 32;
+			}
 			break;
 		case RELO_CALL:
 			err = bpf_program__reloc_text(prog, obj, relo);
@@ -5643,10 +5720,58 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
 	return 0;
 }
 
+static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
+{
+	char sym_type, sym_name[500];
+	unsigned long long sym_addr;
+	struct extern_desc *ext;
+	int ret, err = 0;
+	FILE *f;
+
+	f = fopen("/proc/kallsyms", "r");
+	if (!f) {
+		err = -errno;
+		pr_warn("failed to open /proc/kallsyms: %d\n", err);
+		return err;
+	}
+
+	while (true) {
+		ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
+			     &sym_addr, &sym_type, sym_name);
+		if (ret == EOF && feof(f))
+			break;
+		if (ret != 3) {
+			pr_warn("failed to read kallasyms entry: %d\n", ret);
+			err = -EINVAL;
+			goto out;
+		}
+
+		ext = find_extern_by_name(obj, sym_name);
+		if (!ext || ext->type != EXT_KSYM)
+			continue;
+
+		if (ext->is_set && ext->ksym.addr != sym_addr) {
+			pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
+				sym_name, ext->ksym.addr, sym_addr);
+			err = -EINVAL;
+			goto out;
+		}
+		if (!ext->is_set) {
+			ext->is_set = true;
+			ext->ksym.addr = sym_addr;
+			pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
+		}
+	}
+
+out:
+	fclose(f);
+	return err;
+}
+
 static int bpf_object__resolve_externs(struct bpf_object *obj,
 				       const char *extra_kconfig)
 {
-	bool need_config = false;
+	bool need_config = false, need_kallsyms = false;
 	struct extern_desc *ext;
 	void *kcfg_data = NULL;
 	int err, i;
@@ -5676,6 +5801,8 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
 		} else if (ext->type == EXT_KCFG &&
 			   strncmp(ext->name, "CONFIG_", 7) == 0) {
 			need_config = true;
+		} else if (ext->type == EXT_KSYM) {
+			need_kallsyms = true;
 		} else {
 			pr_warn("unrecognized extern '%s'\n", ext->name);
 			return -EINVAL;
@@ -5699,6 +5826,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,
 		if (err)
 			return -EINVAL;
 	}
+	if (need_kallsyms) {
+		err = bpf_object__read_kallsyms_file(obj);
+		if (err)
+			return -EINVAL;
+	}
 	for (i = 0; i < obj->nr_extern; i++) {
 		ext = &obj->externs[i];
 
-- 
cgit v1.2.3


From b7ddfab20a6af3a0e366000eada63adf6a7683e7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:57 -0700
Subject: selftests/bpf: Add __ksym extern selftest

Validate libbpf is able to handle weak and strong kernel symbol externs in BPF
code correctly.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-4-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/ksyms.c | 71 ++++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_ksyms.c | 32 ++++++++++++
 2 files changed, 103 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/ksyms.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_ksyms.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
new file mode 100644
index 000000000000..e3d6777226a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_ksyms.skel.h"
+#include <sys/stat.h>
+
+static int duration;
+
+static __u64 kallsyms_find(const char *sym)
+{
+	char type, name[500];
+	__u64 addr, res = 0;
+	FILE *f;
+
+	f = fopen("/proc/kallsyms", "r");
+	if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno))
+		return 0;
+
+	while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) {
+		if (strcmp(name, sym) == 0) {
+			res = addr;
+			goto out;
+		}
+	}
+
+	CHECK(false, "not_found", "symbol %s not found\n", sym);
+out:
+	fclose(f);
+	return res;
+}
+
+void test_ksyms(void)
+{
+	__u64 link_fops_addr = kallsyms_find("bpf_link_fops");
+	const char *btf_path = "/sys/kernel/btf/vmlinux";
+	struct test_ksyms *skel;
+	struct test_ksyms__data *data;
+	struct stat st;
+	__u64 btf_size;
+	int err;
+
+	if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
+		return;
+	btf_size = st.st_size;
+
+	skel = test_ksyms__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+		return;
+
+	err = test_ksyms__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	usleep(1);
+
+	data = skel->data;
+	CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops",
+	      "got 0x%llx, exp 0x%llx\n",
+	      data->out__bpf_link_fops, link_fops_addr);
+	CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1",
+	      "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
+	CHECK(data->out__btf_size != btf_size, "btf_size",
+	      "got %llu, exp %llu\n", data->out__btf_size, btf_size);
+	CHECK(data->out__per_cpu_start != 0, "__per_cpu_start",
+	      "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0);
+
+cleanup:
+	test_ksyms__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c
new file mode 100644
index 000000000000..6c9cbb5a3bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u64 out__bpf_link_fops = -1;
+__u64 out__bpf_link_fops1 = -1;
+__u64 out__btf_size = -1;
+__u64 out__per_cpu_start = -1;
+
+extern const void bpf_link_fops __ksym;
+extern const void __start_BTF __ksym;
+extern const void __stop_BTF __ksym;
+extern const void __per_cpu_start __ksym;
+/* non-existing symbol, weak, default to zero */
+extern const void bpf_link_fops1 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+	out__bpf_link_fops = (__u64)&bpf_link_fops;
+	out__btf_size = (__u64)(&__stop_BTF - &__start_BTF);
+	out__per_cpu_start = (__u64)&__per_cpu_start;
+
+	out__bpf_link_fops1 = (__u64)&bpf_link_fops1;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From a479b8ce4ed1457f814be6f67a8447a9af38f235 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:58 -0700
Subject: tools/bpftool: Move map/prog parsing logic into common

Move functions that parse map and prog by id/tag/name/etc outside of
map.c/prog.c, respectively. These functions are used outside of those files
and are generic enough to be in common. This also makes heavy-weight map.c and
prog.c more decoupled from the rest of bpftool files and facilitates more
lightweight bootstrap bpftool variant.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-5-andriin@fb.com
---
 tools/bpf/bpftool/common.c | 308 +++++++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.h   |   2 +
 tools/bpf/bpftool/map.c    | 156 -----------------------
 tools/bpf/bpftool/prog.c   | 152 ----------------------
 4 files changed, 310 insertions(+), 308 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c47bdc65de8e..6c864c3683fc 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -581,3 +581,311 @@ print_all_levels(__maybe_unused enum libbpf_print_level level,
 {
 	return vfprintf(stderr, format, args);
 }
+
+static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
+{
+	unsigned int id = 0;
+	int fd, nb_fds = 0;
+	void *tmp;
+	int err;
+
+	while (true) {
+		struct bpf_prog_info info = {};
+		__u32 len = sizeof(info);
+
+		err = bpf_prog_get_next_id(id, &id);
+		if (err) {
+			if (errno != ENOENT) {
+				p_err("%s", strerror(errno));
+				goto err_close_fds;
+			}
+			return nb_fds;
+		}
+
+		fd = bpf_prog_get_fd_by_id(id);
+		if (fd < 0) {
+			p_err("can't get prog by id (%u): %s",
+			      id, strerror(errno));
+			goto err_close_fds;
+		}
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			p_err("can't get prog info (%u): %s",
+			      id, strerror(errno));
+			goto err_close_fd;
+		}
+
+		if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
+		    (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+			close(fd);
+			continue;
+		}
+
+		if (nb_fds > 0) {
+			tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+			if (!tmp) {
+				p_err("failed to realloc");
+				goto err_close_fd;
+			}
+			*fds = tmp;
+		}
+		(*fds)[nb_fds++] = fd;
+	}
+
+err_close_fd:
+	close(fd);
+err_close_fds:
+	while (--nb_fds >= 0)
+		close((*fds)[nb_fds]);
+	return -1;
+}
+
+int prog_parse_fds(int *argc, char ***argv, int **fds)
+{
+	if (is_prefix(**argv, "id")) {
+		unsigned int id;
+		char *endptr;
+
+		NEXT_ARGP();
+
+		id = strtoul(**argv, &endptr, 0);
+		if (*endptr) {
+			p_err("can't parse %s as ID", **argv);
+			return -1;
+		}
+		NEXT_ARGP();
+
+		(*fds)[0] = bpf_prog_get_fd_by_id(id);
+		if ((*fds)[0] < 0) {
+			p_err("get by id (%u): %s", id, strerror(errno));
+			return -1;
+		}
+		return 1;
+	} else if (is_prefix(**argv, "tag")) {
+		unsigned char tag[BPF_TAG_SIZE];
+
+		NEXT_ARGP();
+
+		if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
+			   tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
+		    != BPF_TAG_SIZE) {
+			p_err("can't parse tag");
+			return -1;
+		}
+		NEXT_ARGP();
+
+		return prog_fd_by_nametag(tag, fds, true);
+	} else if (is_prefix(**argv, "name")) {
+		char *name;
+
+		NEXT_ARGP();
+
+		name = **argv;
+		if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+			p_err("can't parse name");
+			return -1;
+		}
+		NEXT_ARGP();
+
+		return prog_fd_by_nametag(name, fds, false);
+	} else if (is_prefix(**argv, "pinned")) {
+		char *path;
+
+		NEXT_ARGP();
+
+		path = **argv;
+		NEXT_ARGP();
+
+		(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
+		if ((*fds)[0] < 0)
+			return -1;
+		return 1;
+	}
+
+	p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
+	return -1;
+}
+
+int prog_parse_fd(int *argc, char ***argv)
+{
+	int *fds = NULL;
+	int nb_fds, fd;
+
+	fds = malloc(sizeof(int));
+	if (!fds) {
+		p_err("mem alloc failed");
+		return -1;
+	}
+	nb_fds = prog_parse_fds(argc, argv, &fds);
+	if (nb_fds != 1) {
+		if (nb_fds > 1) {
+			p_err("several programs match this handle");
+			while (nb_fds--)
+				close(fds[nb_fds]);
+		}
+		fd = -1;
+		goto exit_free;
+	}
+
+	fd = fds[0];
+exit_free:
+	free(fds);
+	return fd;
+}
+
+static int map_fd_by_name(char *name, int **fds)
+{
+	unsigned int id = 0;
+	int fd, nb_fds = 0;
+	void *tmp;
+	int err;
+
+	while (true) {
+		struct bpf_map_info info = {};
+		__u32 len = sizeof(info);
+
+		err = bpf_map_get_next_id(id, &id);
+		if (err) {
+			if (errno != ENOENT) {
+				p_err("%s", strerror(errno));
+				goto err_close_fds;
+			}
+			return nb_fds;
+		}
+
+		fd = bpf_map_get_fd_by_id(id);
+		if (fd < 0) {
+			p_err("can't get map by id (%u): %s",
+			      id, strerror(errno));
+			goto err_close_fds;
+		}
+
+		err = bpf_obj_get_info_by_fd(fd, &info, &len);
+		if (err) {
+			p_err("can't get map info (%u): %s",
+			      id, strerror(errno));
+			goto err_close_fd;
+		}
+
+		if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
+			close(fd);
+			continue;
+		}
+
+		if (nb_fds > 0) {
+			tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+			if (!tmp) {
+				p_err("failed to realloc");
+				goto err_close_fd;
+			}
+			*fds = tmp;
+		}
+		(*fds)[nb_fds++] = fd;
+	}
+
+err_close_fd:
+	close(fd);
+err_close_fds:
+	while (--nb_fds >= 0)
+		close((*fds)[nb_fds]);
+	return -1;
+}
+
+int map_parse_fds(int *argc, char ***argv, int **fds)
+{
+	if (is_prefix(**argv, "id")) {
+		unsigned int id;
+		char *endptr;
+
+		NEXT_ARGP();
+
+		id = strtoul(**argv, &endptr, 0);
+		if (*endptr) {
+			p_err("can't parse %s as ID", **argv);
+			return -1;
+		}
+		NEXT_ARGP();
+
+		(*fds)[0] = bpf_map_get_fd_by_id(id);
+		if ((*fds)[0] < 0) {
+			p_err("get map by id (%u): %s", id, strerror(errno));
+			return -1;
+		}
+		return 1;
+	} else if (is_prefix(**argv, "name")) {
+		char *name;
+
+		NEXT_ARGP();
+
+		name = **argv;
+		if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+			p_err("can't parse name");
+			return -1;
+		}
+		NEXT_ARGP();
+
+		return map_fd_by_name(name, fds);
+	} else if (is_prefix(**argv, "pinned")) {
+		char *path;
+
+		NEXT_ARGP();
+
+		path = **argv;
+		NEXT_ARGP();
+
+		(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
+		if ((*fds)[0] < 0)
+			return -1;
+		return 1;
+	}
+
+	p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
+	return -1;
+}
+
+int map_parse_fd(int *argc, char ***argv)
+{
+	int *fds = NULL;
+	int nb_fds, fd;
+
+	fds = malloc(sizeof(int));
+	if (!fds) {
+		p_err("mem alloc failed");
+		return -1;
+	}
+	nb_fds = map_parse_fds(argc, argv, &fds);
+	if (nb_fds != 1) {
+		if (nb_fds > 1) {
+			p_err("several maps match this handle");
+			while (nb_fds--)
+				close(fds[nb_fds]);
+		}
+		fd = -1;
+		goto exit_free;
+	}
+
+	fd = fds[0];
+exit_free:
+	free(fds);
+	return fd;
+}
+
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+{
+	int err;
+	int fd;
+
+	fd = map_parse_fd(argc, argv);
+	if (fd < 0)
+		return -1;
+
+	err = bpf_obj_get_info_by_fd(fd, info, info_len);
+	if (err) {
+		p_err("can't get map info: %s", strerror(errno));
+		close(fd);
+		return err;
+	}
+
+	return fd;
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 5cdf0bc049bd..4338ab9d86d4 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -210,7 +210,9 @@ int do_iter(int argc, char **argv);
 
 int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
+int prog_parse_fds(int *argc, char ***argv, int **fds);
 int map_parse_fd(int *argc, char ***argv);
+int map_parse_fds(int *argc, char ***argv, int **fds);
 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
 
 struct bpf_prog_linfo;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index c5fac8068ba1..b9eee19b094c 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -92,162 +92,6 @@ static void *alloc_value(struct bpf_map_info *info)
 		return malloc(info->value_size);
 }
 
-static int map_fd_by_name(char *name, int **fds)
-{
-	unsigned int id = 0;
-	int fd, nb_fds = 0;
-	void *tmp;
-	int err;
-
-	while (true) {
-		struct bpf_map_info info = {};
-		__u32 len = sizeof(info);
-
-		err = bpf_map_get_next_id(id, &id);
-		if (err) {
-			if (errno != ENOENT) {
-				p_err("%s", strerror(errno));
-				goto err_close_fds;
-			}
-			return nb_fds;
-		}
-
-		fd = bpf_map_get_fd_by_id(id);
-		if (fd < 0) {
-			p_err("can't get map by id (%u): %s",
-			      id, strerror(errno));
-			goto err_close_fds;
-		}
-
-		err = bpf_obj_get_info_by_fd(fd, &info, &len);
-		if (err) {
-			p_err("can't get map info (%u): %s",
-			      id, strerror(errno));
-			goto err_close_fd;
-		}
-
-		if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
-			close(fd);
-			continue;
-		}
-
-		if (nb_fds > 0) {
-			tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
-			if (!tmp) {
-				p_err("failed to realloc");
-				goto err_close_fd;
-			}
-			*fds = tmp;
-		}
-		(*fds)[nb_fds++] = fd;
-	}
-
-err_close_fd:
-	close(fd);
-err_close_fds:
-	while (--nb_fds >= 0)
-		close((*fds)[nb_fds]);
-	return -1;
-}
-
-static int map_parse_fds(int *argc, char ***argv, int **fds)
-{
-	if (is_prefix(**argv, "id")) {
-		unsigned int id;
-		char *endptr;
-
-		NEXT_ARGP();
-
-		id = strtoul(**argv, &endptr, 0);
-		if (*endptr) {
-			p_err("can't parse %s as ID", **argv);
-			return -1;
-		}
-		NEXT_ARGP();
-
-		(*fds)[0] = bpf_map_get_fd_by_id(id);
-		if ((*fds)[0] < 0) {
-			p_err("get map by id (%u): %s", id, strerror(errno));
-			return -1;
-		}
-		return 1;
-	} else if (is_prefix(**argv, "name")) {
-		char *name;
-
-		NEXT_ARGP();
-
-		name = **argv;
-		if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
-			p_err("can't parse name");
-			return -1;
-		}
-		NEXT_ARGP();
-
-		return map_fd_by_name(name, fds);
-	} else if (is_prefix(**argv, "pinned")) {
-		char *path;
-
-		NEXT_ARGP();
-
-		path = **argv;
-		NEXT_ARGP();
-
-		(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
-		if ((*fds)[0] < 0)
-			return -1;
-		return 1;
-	}
-
-	p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
-	return -1;
-}
-
-int map_parse_fd(int *argc, char ***argv)
-{
-	int *fds = NULL;
-	int nb_fds, fd;
-
-	fds = malloc(sizeof(int));
-	if (!fds) {
-		p_err("mem alloc failed");
-		return -1;
-	}
-	nb_fds = map_parse_fds(argc, argv, &fds);
-	if (nb_fds != 1) {
-		if (nb_fds > 1) {
-			p_err("several maps match this handle");
-			while (nb_fds--)
-				close(fds[nb_fds]);
-		}
-		fd = -1;
-		goto exit_free;
-	}
-
-	fd = fds[0];
-exit_free:
-	free(fds);
-	return fd;
-}
-
-int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
-{
-	int err;
-	int fd;
-
-	fd = map_parse_fd(argc, argv);
-	if (fd < 0)
-		return -1;
-
-	err = bpf_obj_get_info_by_fd(fd, info, info_len);
-	if (err) {
-		p_err("can't get map info: %s", strerror(errno));
-		close(fd);
-		return err;
-	}
-
-	return fd;
-}
-
 static int do_dump_btf(const struct btf_dumper *d,
 		       struct bpf_map_info *map_info, void *key,
 		       void *value)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index a5eff83496f2..53d47610ff58 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -86,158 +86,6 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 		strftime(buf, size, "%FT%T%z", &load_tm);
 }
 
-static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
-{
-	unsigned int id = 0;
-	int fd, nb_fds = 0;
-	void *tmp;
-	int err;
-
-	while (true) {
-		struct bpf_prog_info info = {};
-		__u32 len = sizeof(info);
-
-		err = bpf_prog_get_next_id(id, &id);
-		if (err) {
-			if (errno != ENOENT) {
-				p_err("%s", strerror(errno));
-				goto err_close_fds;
-			}
-			return nb_fds;
-		}
-
-		fd = bpf_prog_get_fd_by_id(id);
-		if (fd < 0) {
-			p_err("can't get prog by id (%u): %s",
-			      id, strerror(errno));
-			goto err_close_fds;
-		}
-
-		err = bpf_obj_get_info_by_fd(fd, &info, &len);
-		if (err) {
-			p_err("can't get prog info (%u): %s",
-			      id, strerror(errno));
-			goto err_close_fd;
-		}
-
-		if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
-		    (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
-			close(fd);
-			continue;
-		}
-
-		if (nb_fds > 0) {
-			tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
-			if (!tmp) {
-				p_err("failed to realloc");
-				goto err_close_fd;
-			}
-			*fds = tmp;
-		}
-		(*fds)[nb_fds++] = fd;
-	}
-
-err_close_fd:
-	close(fd);
-err_close_fds:
-	while (--nb_fds >= 0)
-		close((*fds)[nb_fds]);
-	return -1;
-}
-
-static int prog_parse_fds(int *argc, char ***argv, int **fds)
-{
-	if (is_prefix(**argv, "id")) {
-		unsigned int id;
-		char *endptr;
-
-		NEXT_ARGP();
-
-		id = strtoul(**argv, &endptr, 0);
-		if (*endptr) {
-			p_err("can't parse %s as ID", **argv);
-			return -1;
-		}
-		NEXT_ARGP();
-
-		(*fds)[0] = bpf_prog_get_fd_by_id(id);
-		if ((*fds)[0] < 0) {
-			p_err("get by id (%u): %s", id, strerror(errno));
-			return -1;
-		}
-		return 1;
-	} else if (is_prefix(**argv, "tag")) {
-		unsigned char tag[BPF_TAG_SIZE];
-
-		NEXT_ARGP();
-
-		if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
-			   tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
-		    != BPF_TAG_SIZE) {
-			p_err("can't parse tag");
-			return -1;
-		}
-		NEXT_ARGP();
-
-		return prog_fd_by_nametag(tag, fds, true);
-	} else if (is_prefix(**argv, "name")) {
-		char *name;
-
-		NEXT_ARGP();
-
-		name = **argv;
-		if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
-			p_err("can't parse name");
-			return -1;
-		}
-		NEXT_ARGP();
-
-		return prog_fd_by_nametag(name, fds, false);
-	} else if (is_prefix(**argv, "pinned")) {
-		char *path;
-
-		NEXT_ARGP();
-
-		path = **argv;
-		NEXT_ARGP();
-
-		(*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
-		if ((*fds)[0] < 0)
-			return -1;
-		return 1;
-	}
-
-	p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
-	return -1;
-}
-
-int prog_parse_fd(int *argc, char ***argv)
-{
-	int *fds = NULL;
-	int nb_fds, fd;
-
-	fds = malloc(sizeof(int));
-	if (!fds) {
-		p_err("mem alloc failed");
-		return -1;
-	}
-	nb_fds = prog_parse_fds(argc, argv, &fds);
-	if (nb_fds != 1) {
-		if (nb_fds > 1) {
-			p_err("several programs match this handle");
-			while (nb_fds--)
-				close(fds[nb_fds]);
-		}
-		fd = -1;
-		goto exit_free;
-	}
-
-	fd = fds[0];
-exit_free:
-	free(fds);
-	return fd;
-}
-
 static void show_prog_maps(int fd, __u32 num_maps)
 {
 	struct bpf_prog_info info = {};
-- 
cgit v1.2.3


From 16e9b187aba60f9014f11a7e95b878850b6c95e5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:16:59 -0700
Subject: tools/bpftool: Minimize bootstrap bpftool

Build minimal "bootstrap mode" bpftool to enable skeleton (and, later,
vmlinux.h generation), instead of building almost complete, but slightly
different (w/o skeletons, etc) bpftool to bootstrap complete bpftool build.

Current approach doesn't scale well (engineering-wise) when adding more BPF
programs to bpftool and other complicated functionality, as it requires
constant adjusting of the code to work in both bootstrapped mode and normal
mode.

So it's better to build only minimal bpftool version that supports only BPF
skeleton code generation and BTF-to-C conversion. Thankfully, this is quite
easy to accomplish due to internal modularity of bpftool commands. This will
also allow to keep adding new functionality to bpftool in general, without the
need to care about bootstrap mode for those new parts of bpftool.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-6-andriin@fb.com
---
 tools/bpf/bpftool/.gitignore |  2 +-
 tools/bpf/bpftool/Makefile   | 30 +++++++++++++-----------------
 tools/bpf/bpftool/main.c     | 11 +++++++++--
 tools/bpf/bpftool/main.h     | 27 +++++++++++++++------------
 4 files changed, 38 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 26cde83e1ca3..ce721adf3161 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 *.d
-/_bpftool
+/bpftool-bootstrap
 /bpftool
 bpftool*.8
 bpf-helpers.*
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 9e85f101be85..eec2da4d45d2 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -116,40 +116,36 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT
 SRCS += $(BFD_SRCS)
 endif
 
+BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap)
+
+BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
-_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o
 
-ifeq ($(feature-clang-bpf-global-var),1)
-	__OBJS = $(OBJS)
-else
-	__OBJS = $(_OBJS)
+ifneq ($(feature-clang-bpf-global-var),1)
+	CFLAGS += -DBPFTOOL_WITHOUT_SKELETONS
 endif
 
-$(OUTPUT)_prog.o: prog.c
-	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
-
-$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
-
 skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
 	$(QUIET_CLANG)$(CLANG) \
 		-I$(srctree)/tools/include/uapi/ \
 		-I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
 		-g -O2 -target bpf -c $< -o $@
 
-profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
-	$(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
+profiler.skel.h: $(BPFTOOL_BOOTSTRAP) skeleton/profiler.bpf.o
+	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton skeleton/profiler.bpf.o > $@
 
 $(OUTPUT)prog.o: prog.c profiler.skel.h
-	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
 $(OUTPUT)feature.o: | zdep
 
-$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
+$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS)
+
+$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
 
 $(OUTPUT)%.o: %.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
@@ -157,7 +153,7 @@ $(OUTPUT)%.o: %.c
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
-	$(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o
+	$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) profiler.skel.h skeleton/profiler.bpf.o
 	$(Q)$(RM) -r -- $(OUTPUT)libbpf/
 	$(call QUIET_CLEAN, core-gen)
 	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 46bd716a9d86..bf4d7487552a 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -92,9 +92,16 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
 	if (argc < 1 && cmds[0].func)
 		return cmds[0].func(argc, argv);
 
-	for (i = 0; cmds[i].func; i++)
-		if (is_prefix(*argv, cmds[i].cmd))
+	for (i = 0; cmds[i].cmd; i++) {
+		if (is_prefix(*argv, cmds[i].cmd)) {
+			if (!cmds[i].func) {
+				p_err("command '%s' is not supported in bootstrap mode",
+				      cmds[i].cmd);
+				return -1;
+			}
 			return cmds[i].func(argc - 1, argv + 1);
+		}
+	}
 
 	help(argc - 1, argv + 1);
 
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 4338ab9d86d4..aad7be74e8a7 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -194,19 +194,22 @@ int mount_bpffs_for_pin(const char *name);
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
 int do_pin_fd(int fd, const char *name);
 
-int do_prog(int argc, char **arg);
-int do_map(int argc, char **arg);
-int do_link(int argc, char **arg);
-int do_event_pipe(int argc, char **argv);
-int do_cgroup(int argc, char **arg);
-int do_perf(int argc, char **arg);
-int do_net(int argc, char **arg);
-int do_tracelog(int argc, char **arg);
-int do_feature(int argc, char **argv);
-int do_btf(int argc, char **argv);
+/* commands available in bootstrap mode */
 int do_gen(int argc, char **argv);
-int do_struct_ops(int argc, char **argv);
-int do_iter(int argc, char **argv);
+int do_btf(int argc, char **argv);
+
+/* non-bootstrap only commands */
+int do_prog(int argc, char **arg) __weak;
+int do_map(int argc, char **arg) __weak;
+int do_link(int argc, char **arg) __weak;
+int do_event_pipe(int argc, char **argv) __weak;
+int do_cgroup(int argc, char **arg) __weak;
+int do_perf(int argc, char **arg) __weak;
+int do_net(int argc, char **arg) __weak;
+int do_tracelog(int argc, char **arg) __weak;
+int do_feature(int argc, char **argv) __weak;
+int do_struct_ops(int argc, char **argv) __weak;
+int do_iter(int argc, char **argv) __weak;
 
 int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
-- 
cgit v1.2.3


From 05aca6da3b5ab3c5c6003dbbefc9580d9a6a308b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:17:00 -0700
Subject: tools/bpftool: Generalize BPF skeleton support and generate vmlinux.h

Adapt Makefile to support BPF skeleton generation beyond single profiler.bpf.c
case. Also add vmlinux.h generation and switch profiler.bpf.c to use it.

clang-bpf-global-var feature is extended and renamed to clang-bpf-co-re to
check for support of preserve_access_index attribute, which, together with BTF
for global variables, is the minimum requirement for modern BPF programs.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-7-andriin@fb.com
---
 tools/bpf/bpftool/.gitignore                    |  3 +-
 tools/bpf/bpftool/Makefile                      | 42 ++++++++++++++++------
 tools/bpf/bpftool/skeleton/profiler.bpf.c       |  3 +-
 tools/bpf/bpftool/skeleton/profiler.h           | 46 -------------------------
 tools/build/feature/Makefile                    |  4 +--
 tools/build/feature/test-clang-bpf-co-re.c      |  9 +++++
 tools/build/feature/test-clang-bpf-global-var.c |  4 ---
 7 files changed, 45 insertions(+), 66 deletions(-)
 delete mode 100644 tools/bpf/bpftool/skeleton/profiler.h
 create mode 100644 tools/build/feature/test-clang-bpf-co-re.c
 delete mode 100644 tools/build/feature/test-clang-bpf-global-var.c

(limited to 'tools')

diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index ce721adf3161..3e601bcfd461 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -7,4 +7,5 @@ bpf-helpers.*
 FEATURE-DUMP.bpftool
 feature
 libbpf
-profiler.skel.h
+/*.skel.h
+/vmlinux.h
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index eec2da4d45d2..bdb6e38c6c5c 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -42,6 +42,7 @@ CFLAGS += -O2
 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
 CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS))
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
+	-I$(if $(OUTPUT),$(OUTPUT),.) \
 	-I$(srctree)/kernel/bpf/ \
 	-I$(srctree)/tools/include \
 	-I$(srctree)/tools/include/uapi \
@@ -61,9 +62,9 @@ CLANG ?= clang
 
 FEATURE_USER = .bpftool
 FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
-	clang-bpf-global-var
+	clang-bpf-co-re
 FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
-	clang-bpf-global-var
+	clang-bpf-co-re
 
 check_feat := 1
 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -121,20 +122,38 @@ BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstr
 BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
-ifneq ($(feature-clang-bpf-global-var),1)
-	CFLAGS += -DBPFTOOL_WITHOUT_SKELETONS
-endif
+VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)				\
+		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
+		     ../../../vmlinux					\
+		     /sys/kernel/btf/vmlinux				\
+		     /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+ifneq ($(VMLINUX_BTF),)
+ifeq ($(feature-clang-bpf-co-re),1)
+
+BUILD_BPF_SKELS := 1
+
+$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP)
+	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@
 
-skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
+$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF)
 	$(QUIET_CLANG)$(CLANG) \
+		-I$(if $(OUTPUT),$(OUTPUT),.) \
 		-I$(srctree)/tools/include/uapi/ \
-		-I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
+		-I$(LIBBPF_PATH) \
+		-I$(srctree)/tools/lib \
 		-g -O2 -target bpf -c $< -o $@
 
-profiler.skel.h: $(BPFTOOL_BOOTSTRAP) skeleton/profiler.bpf.o
-	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton skeleton/profiler.bpf.o > $@
+$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
+	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@
+
+$(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h
+
+endif
+endif
 
-$(OUTPUT)prog.o: prog.c profiler.skel.h
+CFLAGS += $(if BUILD_BPF_SKELS,,-DBPFTOOL_WITHOUT_SKELETONS)
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
@@ -153,7 +172,7 @@ $(OUTPUT)%.o: %.c
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
 	$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
-	$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) profiler.skel.h skeleton/profiler.bpf.o
+	$(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
 	$(Q)$(RM) -r -- $(OUTPUT)libbpf/
 	$(call QUIET_CLEAN, core-gen)
 	$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
@@ -188,6 +207,7 @@ FORCE:
 zdep:
 	@if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
 
+.SECONDARY:
 .PHONY: all FORCE clean install uninstall zdep
 .PHONY: doc doc-clean doc-install doc-uninstall
 .DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index c9d196ddb670..4e3512f700c0 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 // Copyright (c) 2020 Facebook
-#include "profiler.h"
-#include <linux/bpf.h>
+#include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h
deleted file mode 100644
index 1f767e9510f7..000000000000
--- a/tools/bpf/bpftool/skeleton/profiler.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __PROFILER_H
-#define __PROFILER_H
-
-/* useful typedefs from vmlinux.h */
-
-typedef signed char __s8;
-typedef unsigned char __u8;
-typedef short int __s16;
-typedef short unsigned int __u16;
-typedef int __s32;
-typedef unsigned int __u32;
-typedef long long int __s64;
-typedef long long unsigned int __u64;
-
-typedef __s8 s8;
-typedef __u8 u8;
-typedef __s16 s16;
-typedef __u16 u16;
-typedef __s32 s32;
-typedef __u32 u32;
-typedef __s64 s64;
-typedef __u64 u64;
-
-enum {
-	false = 0,
-	true = 1,
-};
-
-#ifdef __CHECKER__
-#define __bitwise__ __attribute__((bitwise))
-#else
-#define __bitwise__
-#endif
-
-typedef __u16 __bitwise__ __le16;
-typedef __u16 __bitwise__ __be16;
-typedef __u32 __bitwise__ __le32;
-typedef __u32 __bitwise__ __be32;
-typedef __u64 __bitwise__ __le64;
-typedef __u64 __bitwise__ __be64;
-
-typedef __u16 __bitwise__ __sum16;
-typedef __u32 __bitwise__ __wsum;
-
-#endif /* __PROFILER_H */
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index b1f0321180f5..88371f7f0369 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -68,7 +68,7 @@ FILES=                                          \
          test-llvm-version.bin			\
          test-libaio.bin			\
          test-libzstd.bin			\
-         test-clang-bpf-global-var.bin		\
+         test-clang-bpf-co-re.bin		\
          test-file-handle.bin			\
          test-libpfm4.bin
 
@@ -325,7 +325,7 @@ $(OUTPUT)test-libaio.bin:
 $(OUTPUT)test-libzstd.bin:
 	$(BUILD) -lzstd
 
-$(OUTPUT)test-clang-bpf-global-var.bin:
+$(OUTPUT)test-clang-bpf-co-re.bin:
 	$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) |	\
 		grep BTF_KIND_VAR
 
diff --git a/tools/build/feature/test-clang-bpf-co-re.c b/tools/build/feature/test-clang-bpf-co-re.c
new file mode 100644
index 000000000000..cb5265bfdd83
--- /dev/null
+++ b/tools/build/feature/test-clang-bpf-co-re.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+struct test {
+	int a;
+	int b;
+} __attribute__((preserve_access_index));
+
+volatile struct test global_value_for_test = {};
diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c
deleted file mode 100644
index 221f1481d52e..000000000000
--- a/tools/build/feature/test-clang-bpf-global-var.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2020 Facebook
-
-volatile int global_value_for_test = 1;
-- 
cgit v1.2.3


From bd9bedf84b87289b9a87eebfe7917e54373e99f9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:17:01 -0700
Subject: libbpf: Wrap source argument of BPF_CORE_READ macro in parentheses

Wrap source argument of BPF_CORE_READ family of macros into parentheses to
allow uses like this:

BPF_CORE_READ((struct cast_struct *)src, a, b, c);

Fixes: 7db3822ab991 ("libbpf: Add BPF_CORE_READ/BPF_CORE_READ_INTO helpers")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200619231703.738941-8-andriin@fb.com
---
 tools/lib/bpf/bpf_core_read.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 7009dc90e012..eae5cccff761 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -217,7 +217,7 @@ enum bpf_field_info_kind {
  */
 #define BPF_CORE_READ_INTO(dst, src, a, ...)				    \
 	({								    \
-		___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__)	    \
+		___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__)   \
 	})
 
 /*
@@ -227,7 +227,7 @@ enum bpf_field_info_kind {
  */
 #define BPF_CORE_READ_STR_INTO(dst, src, a, ...)			    \
 	({								    \
-		___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \
+		___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\
 	})
 
 /*
@@ -254,8 +254,8 @@ enum bpf_field_info_kind {
  */
 #define BPF_CORE_READ(src, a, ...)					    \
 	({								    \
-		___type(src, a, ##__VA_ARGS__) __r;			    \
-		BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__);	    \
+		___type((src), a, ##__VA_ARGS__) __r;			    \
+		BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__);	    \
 		__r;							    \
 	})
 
-- 
cgit v1.2.3


From d53dee3fe0138610fcce5721bae9414377c41ec3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:17:02 -0700
Subject: tools/bpftool: Show info for processes holding BPF map/prog/link/btf
 FDs

Add bpf_iter-based way to find all the processes that hold open FDs against
BPF object (map, prog, link, btf). bpftool always attempts to discover this,
but will silently give up if kernel doesn't yet support bpf_iter BPF programs.
Process name and PID are emitted for each process (task group).

Sample output for each of 4 BPF objects:

$ sudo ./bpftool prog show
2694: cgroup_device  tag 8c42dee26e8cd4c2  gpl
        loaded_at 2020-06-16T15:34:32-0700  uid 0
        xlated 648B  jited 409B  memlock 4096B
        pids systemd(1)
2907: cgroup_skb  name egress  tag 9ad187367cf2b9e8  gpl
        loaded_at 2020-06-16T18:06:54-0700  uid 0
        xlated 48B  jited 59B  memlock 4096B  map_ids 2436
        btf_id 1202
        pids test_progs(2238417), test_progs(2238445)

$ sudo ./bpftool map show
2436: array  name test_cgr.bss  flags 0x400
        key 4B  value 8B  max_entries 1  memlock 8192B
        btf_id 1202
        pids test_progs(2238417), test_progs(2238445)
2445: array  name pid_iter.rodata  flags 0x480
        key 4B  value 4B  max_entries 1  memlock 8192B
        btf_id 1214  frozen
        pids bpftool(2239612)

$ sudo ./bpftool link show
61: cgroup  prog 2908
        cgroup_id 375301  attach_type egress
        pids test_progs(2238417), test_progs(2238445)
62: cgroup  prog 2908
        cgroup_id 375344  attach_type egress
        pids test_progs(2238417), test_progs(2238445)

$ sudo ./bpftool btf show
1202: size 1527B  prog_ids 2908,2907  map_ids 2436
        pids test_progs(2238417), test_progs(2238445)
1242: size 34684B
        pids bpftool(2258892)

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-9-andriin@fb.com
---
 tools/bpf/bpftool/Makefile                |   2 +
 tools/bpf/bpftool/btf.c                   |   6 +
 tools/bpf/bpftool/link.c                  |   7 +
 tools/bpf/bpftool/main.c                  |   1 +
 tools/bpf/bpftool/main.h                  |  27 ++++
 tools/bpf/bpftool/map.c                   |   7 +
 tools/bpf/bpftool/pids.c                  | 229 ++++++++++++++++++++++++++++++
 tools/bpf/bpftool/prog.c                  |   7 +
 tools/bpf/bpftool/skeleton/pid_iter.bpf.c |  80 +++++++++++
 tools/bpf/bpftool/skeleton/pid_iter.h     |  12 ++
 10 files changed, 378 insertions(+)
 create mode 100644 tools/bpf/bpftool/pids.c
 create mode 100644 tools/bpf/bpftool/skeleton/pid_iter.bpf.c
 create mode 100644 tools/bpf/bpftool/skeleton/pid_iter.h

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index bdb6e38c6c5c..06f436e8191a 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -150,6 +150,8 @@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
 
 $(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h
 
+$(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h
+
 endif
 endif
 
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index faac8189b285..fc9bc7a23db6 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -809,6 +809,7 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
 			printf("%s%u", n++ == 0 ? "  map_ids " : ",",
 			       obj->obj_id);
 	}
+	emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
 
 	printf("\n");
 }
@@ -841,6 +842,9 @@ show_btf_json(struct bpf_btf_info *info, int fd,
 			jsonw_uint(json_wtr, obj->obj_id);
 	}
 	jsonw_end_array(json_wtr);	/* map_ids */
+
+	emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */
+
 	jsonw_end_object(json_wtr);	/* btf object */
 }
 
@@ -893,6 +897,7 @@ static int do_show(int argc, char **argv)
 			close(fd);
 		return err;
 	}
+	build_obj_refs_table(&refs_table, BPF_OBJ_BTF);
 
 	if (fd >= 0) {
 		err = show_btf(fd, &btf_prog_table, &btf_map_table);
@@ -939,6 +944,7 @@ static int do_show(int argc, char **argv)
 exit_free:
 	delete_btf_table(&btf_prog_table);
 	delete_btf_table(&btf_map_table);
+	delete_obj_refs_table(&refs_table);
 
 	return err;
 }
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index fca57ee8fafe..7329f3134283 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -143,6 +143,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 		}
 		jsonw_end_array(json_wtr);
 	}
+
+	emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
 	jsonw_end_object(json_wtr);
 
 	return 0;
@@ -212,6 +215,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 				printf("\n\tpinned %s", obj->path);
 		}
 	}
+	emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
 
 	printf("\n");
 
@@ -257,6 +261,7 @@ static int do_show(int argc, char **argv)
 
 	if (show_pinned)
 		build_pinned_obj_table(&link_table, BPF_OBJ_LINK);
+	build_obj_refs_table(&refs_table, BPF_OBJ_LINK);
 
 	if (argc == 2) {
 		fd = link_parse_fd(&argc, &argv);
@@ -296,6 +301,8 @@ static int do_show(int argc, char **argv)
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
+	delete_obj_refs_table(&refs_table);
+
 	return errno == ENOENT ? 0 : -1;
 }
 
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index bf4d7487552a..4a191fcbeb82 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -31,6 +31,7 @@ bool relaxed_maps;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
 struct pinned_obj_table link_table;
+struct obj_refs_table refs_table;
 
 static void __noreturn clean_and_exit(int i)
 {
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index aad7be74e8a7..ce26271e5f0c 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -127,11 +127,13 @@ static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
 extern const char * const map_type_name[];
 extern const size_t map_type_name_size;
 
+/* keep in sync with the definition in skeleton/pid_iter.bpf.c */
 enum bpf_obj_type {
 	BPF_OBJ_UNKNOWN,
 	BPF_OBJ_PROG,
 	BPF_OBJ_MAP,
 	BPF_OBJ_LINK,
+	BPF_OBJ_BTF,
 };
 
 extern const char *bin_name;
@@ -139,12 +141,14 @@ extern const char *bin_name;
 extern json_writer_t *json_wtr;
 extern bool json_output;
 extern bool show_pinned;
+extern bool show_pids;
 extern bool block_mount;
 extern bool verifier_logs;
 extern bool relaxed_maps;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
 extern struct pinned_obj_table link_table;
+extern struct obj_refs_table refs_table;
 
 void __printf(1, 2) p_err(const char *fmt, ...);
 void __printf(1, 2) p_info(const char *fmt, ...);
@@ -168,12 +172,35 @@ struct pinned_obj {
 	struct hlist_node hash;
 };
 
+struct obj_refs_table {
+	DECLARE_HASHTABLE(table, 16);
+};
+
+struct obj_ref {
+	int pid;
+	char comm[16];
+};
+
+struct obj_refs {
+	struct hlist_node node;
+	__u32 id;
+	int ref_cnt;
+	struct obj_ref *refs;
+};
+
 struct btf;
 struct bpf_line_info;
 
 int build_pinned_obj_table(struct pinned_obj_table *table,
 			   enum bpf_obj_type type);
 void delete_pinned_obj_table(struct pinned_obj_table *tab);
+__weak int build_obj_refs_table(struct obj_refs_table *table,
+				enum bpf_obj_type type);
+__weak void delete_obj_refs_table(struct obj_refs_table *table);
+__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+			       json_writer_t *json_wtr);
+__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id,
+				const char *prefix);
 void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
 void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
 
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b9eee19b094c..0a6a5d82d380 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -509,6 +509,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 		jsonw_end_array(json_wtr);
 	}
 
+	emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
 	jsonw_end_object(json_wtr);
 
 	return 0;
@@ -596,6 +598,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 	if (frozen)
 		printf("%sfrozen", info->btf_id ? "  " : "");
 
+	emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
 	printf("\n");
 	return 0;
 }
@@ -654,6 +658,7 @@ static int do_show(int argc, char **argv)
 
 	if (show_pinned)
 		build_pinned_obj_table(&map_table, BPF_OBJ_MAP);
+	build_obj_refs_table(&refs_table, BPF_OBJ_MAP);
 
 	if (argc == 2)
 		return do_show_subset(argc, argv);
@@ -697,6 +702,8 @@ static int do_show(int argc, char **argv)
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
+	delete_obj_refs_table(&refs_table);
+
 	return errno == ENOENT ? 0 : -1;
 }
 
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
new file mode 100644
index 000000000000..3474a91743ff
--- /dev/null
+++ b/tools/bpf/bpftool/pids.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+
+#include "main.h"
+#include "skeleton/pid_iter.h"
+
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+	p_err("bpftool built without PID iterator support");
+	return -ENOTSUP;
+}
+void delete_obj_refs_table(struct obj_refs_table *table) {}
+
+#else /* BPFTOOL_WITHOUT_SKELETONS */
+
+#include "pid_iter.skel.h"
+
+static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e)
+{
+	struct obj_refs *refs;
+	struct obj_ref *ref;
+	void *tmp;
+	int i;
+
+	hash_for_each_possible(table->table, refs, node, e->id) {
+		if (refs->id != e->id)
+			continue;
+
+		for (i = 0; i < refs->ref_cnt; i++) {
+			if (refs->refs[i].pid == e->pid)
+				return;
+		}
+
+		tmp = realloc(refs->refs, (refs->ref_cnt + 1) * sizeof(*ref));
+		if (!tmp) {
+			p_err("failed to re-alloc memory for ID %u, PID %d, COMM %s...",
+			      e->id, e->pid, e->comm);
+			return;
+		}
+		refs->refs = tmp;
+		ref = &refs->refs[refs->ref_cnt];
+		ref->pid = e->pid;
+		memcpy(ref->comm, e->comm, sizeof(ref->comm));
+		refs->ref_cnt++;
+
+		return;
+	}
+
+	/* new ref */
+	refs = calloc(1, sizeof(*refs));
+	if (!refs) {
+		p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+		      e->id, e->pid, e->comm);
+		return;
+	}
+
+	refs->id = e->id;
+	refs->refs = malloc(sizeof(*refs->refs));
+	if (!refs->refs) {
+		free(refs);
+		p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+		      e->id, e->pid, e->comm);
+		return;
+	}
+	ref = &refs->refs[0];
+	ref->pid = e->pid;
+	memcpy(ref->comm, e->comm, sizeof(ref->comm));
+	refs->ref_cnt = 1;
+	hash_add(table->table, &refs->node, e->id);
+}
+
+static int __printf(2, 0)
+libbpf_print_none(__maybe_unused enum libbpf_print_level level,
+		  __maybe_unused const char *format,
+		  __maybe_unused va_list args)
+{
+	return 0;
+}
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+	char buf[4096];
+	struct pid_iter_bpf *skel;
+	struct pid_iter_entry *e;
+	int err, ret, fd = -1, i;
+	libbpf_print_fn_t default_print;
+
+	hash_init(table->table);
+	set_max_rlimit();
+
+	skel = pid_iter_bpf__open();
+	if (!skel) {
+		p_err("failed to open PID iterator skeleton");
+		return -1;
+	}
+
+	skel->rodata->obj_type = type;
+
+	/* we don't want output polluted with libbpf errors if bpf_iter is not
+	 * supported
+	 */
+	default_print = libbpf_set_print(libbpf_print_none);
+	err = pid_iter_bpf__load(skel);
+	libbpf_set_print(default_print);
+	if (err) {
+		/* too bad, kernel doesn't support BPF iterators yet */
+		err = 0;
+		goto out;
+	}
+	err = pid_iter_bpf__attach(skel);
+	if (err) {
+		/* if we loaded above successfully, attach has to succeed */
+		p_err("failed to attach PID iterator: %d", err);
+		goto out;
+	}
+
+	fd = bpf_iter_create(bpf_link__fd(skel->links.iter));
+	if (fd < 0) {
+		err = -errno;
+		p_err("failed to create PID iterator session: %d", err);
+		goto out;
+	}
+
+	while (true) {
+		ret = read(fd, buf, sizeof(buf));
+		if (ret < 0) {
+			err = -errno;
+			p_err("failed to read PID iterator output: %d", err);
+			goto out;
+		}
+		if (ret == 0)
+			break;
+		if (ret % sizeof(*e)) {
+			err = -EINVAL;
+			p_err("invalid PID iterator output format");
+			goto out;
+		}
+		ret /= sizeof(*e);
+
+		e = (void *)buf;
+		for (i = 0; i < ret; i++, e++) {
+			add_ref(table, e);
+		}
+	}
+	err = 0;
+out:
+	if (fd >= 0)
+		close(fd);
+	pid_iter_bpf__destroy(skel);
+	return err;
+}
+
+void delete_obj_refs_table(struct obj_refs_table *table)
+{
+	struct obj_refs *refs;
+	struct hlist_node *tmp;
+	unsigned int bkt;
+
+	hash_for_each_safe(table->table, bkt, tmp, refs, node) {
+		hash_del(&refs->node);
+		free(refs->refs);
+		free(refs);
+	}
+}
+
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr)
+{
+	struct obj_refs *refs;
+	struct obj_ref *ref;
+	int i;
+
+	if (hash_empty(table->table))
+		return;
+
+	hash_for_each_possible(table->table, refs, node, id) {
+		if (refs->id != id)
+			continue;
+		if (refs->ref_cnt == 0)
+			break;
+
+		jsonw_name(json_wtr, "pids");
+		jsonw_start_array(json_wtr);
+		for (i = 0; i < refs->ref_cnt; i++) {
+			ref = &refs->refs[i];
+			jsonw_start_object(json_wtr);
+			jsonw_int_field(json_wtr, "pid", ref->pid);
+			jsonw_string_field(json_wtr, "comm", ref->comm);
+			jsonw_end_object(json_wtr);
+		}
+		jsonw_end_array(json_wtr);
+		break;
+	}
+}
+
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix)
+{
+	struct obj_refs *refs;
+	struct obj_ref *ref;
+	int i;
+
+	if (hash_empty(table->table))
+		return;
+
+	hash_for_each_possible(table->table, refs, node, id) {
+		if (refs->id != id)
+			continue;
+		if (refs->ref_cnt == 0)
+			break;
+
+		printf("%s", prefix);
+		for (i = 0; i < refs->ref_cnt; i++) {
+			ref = &refs->refs[i];
+			printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid);
+		}
+		break;
+	}
+}
+
+
+#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 53d47610ff58..e21fa8ad2efa 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -190,6 +190,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
 		jsonw_end_array(json_wtr);
 	}
 
+	emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
 	jsonw_end_object(json_wtr);
 }
 
@@ -256,6 +258,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 	if (info->btf_id)
 		printf("\n\tbtf_id %d", info->btf_id);
 
+	emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
 	printf("\n");
 }
 
@@ -321,6 +325,7 @@ static int do_show(int argc, char **argv)
 
 	if (show_pinned)
 		build_pinned_obj_table(&prog_table, BPF_OBJ_PROG);
+	build_obj_refs_table(&refs_table, BPF_OBJ_PROG);
 
 	if (argc == 2)
 		return do_show_subset(argc, argv);
@@ -362,6 +367,8 @@ static int do_show(int argc, char **argv)
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
+	delete_obj_refs_table(&refs_table);
+
 	return err;
 }
 
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
new file mode 100644
index 000000000000..8468a608911e
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "pid_iter.h"
+
+/* keep in sync with the definition in main.h */
+enum bpf_obj_type {
+	BPF_OBJ_UNKNOWN,
+	BPF_OBJ_PROG,
+	BPF_OBJ_MAP,
+	BPF_OBJ_LINK,
+	BPF_OBJ_BTF,
+};
+
+extern const void bpf_link_fops __ksym;
+extern const void bpf_map_fops __ksym;
+extern const void bpf_prog_fops __ksym;
+extern const void btf_fops __ksym;
+
+const volatile enum bpf_obj_type obj_type = BPF_OBJ_UNKNOWN;
+
+static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type)
+{
+	switch (type) {
+	case BPF_OBJ_PROG:
+		return BPF_CORE_READ((struct bpf_prog *)ent, aux, id);
+	case BPF_OBJ_MAP:
+		return BPF_CORE_READ((struct bpf_map *)ent, id);
+	case BPF_OBJ_BTF:
+		return BPF_CORE_READ((struct btf *)ent, id);
+	case BPF_OBJ_LINK:
+		return BPF_CORE_READ((struct bpf_link *)ent, id);
+	default:
+		return 0;
+	}
+}
+
+SEC("iter/task_file")
+int iter(struct bpf_iter__task_file *ctx)
+{
+	struct file *file = ctx->file;
+	struct task_struct *task = ctx->task;
+	struct pid_iter_entry e;
+	const void *fops;
+
+	if (!file || !task)
+		return 0;
+
+	switch (obj_type) {
+	case BPF_OBJ_PROG:
+		fops = &bpf_prog_fops;
+		break;
+	case BPF_OBJ_MAP:
+		fops = &bpf_map_fops;
+		break;
+	case BPF_OBJ_BTF:
+		fops = &btf_fops;
+		break;
+	case BPF_OBJ_LINK:
+		fops = &bpf_link_fops;
+		break;
+	default:
+		return 0;
+	}
+
+	if (file->f_op != fops)
+		return 0;
+
+	e.pid = task->tgid;
+	e.id = get_obj_id(file->private_data, obj_type);
+	bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm);
+	bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
+
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.h b/tools/bpf/bpftool/skeleton/pid_iter.h
new file mode 100644
index 000000000000..5692cf257adb
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2020 Facebook */
+#ifndef __PID_ITER_H
+#define __PID_ITER_H
+
+struct pid_iter_entry {
+	__u32 id;
+	int pid;
+	char comm[16];
+};
+
+#endif
-- 
cgit v1.2.3


From 075c776658190681d2bf9997306f871d6c8a9b36 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 19 Jun 2020 16:17:03 -0700
Subject: tools/bpftool: Add documentation and sample output for process info

Add statements about bpftool being able to discover process info, holding
reference to BPF map, prog, link, or BTF. Show example output as well.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200619231703.738941-10-andriin@fb.com
---
 tools/bpf/bpftool/Documentation/bpftool-btf.rst  |  5 +++++
 tools/bpf/bpftool/Documentation/bpftool-link.rst | 13 ++++++++++++-
 tools/bpf/bpftool/Documentation/bpftool-map.rst  |  8 +++++++-
 tools/bpf/bpftool/Documentation/bpftool-prog.rst | 11 +++++++++++
 4 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index ce3a724f50c1..896f4c6c2870 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -36,6 +36,11 @@ DESCRIPTION
 		  otherwise list all BTF objects currently loaded on the
 		  system.
 
+		  Since Linux 5.8 bpftool is able to discover information about
+		  processes that hold open file descriptors (FDs) against BTF
+		  objects. On such kernels bpftool will automatically emit this
+		  information as well.
+
 	**bpftool btf dump** *BTF_SRC*
 		  Dump BTF entries from a given *BTF_SRC*.
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 0e43d7b06c11..38b0949a185b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -37,6 +37,11 @@ DESCRIPTION
 		  zero or more named attributes, some of which depend on type
 		  of link.
 
+		  Since Linux 5.8 bpftool is able to discover information about
+		  processes that hold open file descriptors (FDs) against BPF
+		  links. On such kernels bpftool will automatically emit this
+		  information as well.
+
 	**bpftool link pin** *LINK* *FILE*
 		  Pin link *LINK* as *FILE*.
 
@@ -82,6 +87,7 @@ EXAMPLES
 
     10: cgroup  prog 25
             cgroup_id 614  attach_type egress
+            pids test_progs(223)
 
 **# bpftool --json --pretty link show**
 
@@ -91,7 +97,12 @@ EXAMPLES
             "type": "cgroup",
             "prog_id": 25,
             "cgroup_id": 614,
-            "attach_type": "egress"
+            "attach_type": "egress",
+            "pids": [{
+                    "pid": 223,
+                    "comm": "test_progs"
+                }
+            ]
         }
     ]
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 31101643e57c..5bc2123e9944 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -62,6 +62,11 @@ DESCRIPTION
 		  Output will start with map ID followed by map type and
 		  zero or more named attributes (depending on kernel version).
 
+		  Since Linux 5.8 bpftool is able to discover information about
+		  processes that hold open file descriptors (FDs) against BPF
+		  maps. On such kernels bpftool will automatically emit this
+		  information as well.
+
 	**bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE*  **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
 		  Create a new map with given parameters and pin it to *bpffs*
 		  as *FILE*.
@@ -180,7 +185,8 @@ EXAMPLES
 ::
 
   10: hash  name some_map  flags 0x0
-	key 4B  value 8B  max_entries 2048  memlock 167936B
+        key 4B  value 8B  max_entries 2048  memlock 167936B
+        pids systemd(1)
 
 The following three commands are equivalent:
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 2b254959d488..412ea3d9bf7f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -75,6 +75,11 @@ DESCRIPTION
 		  program run. Activation or deactivation of the feature is
 		  performed via the **kernel.bpf_stats_enabled** sysctl knob.
 
+		  Since Linux 5.8 bpftool is able to discover information about
+		  processes that hold open file descriptors (FDs) against BPF
+		  programs. On such kernels bpftool will automatically emit this
+		  information as well.
+
 	**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
 		  Dump eBPF instructions of the programs from the kernel. By
 		  default, eBPF will be disassembled and printed to standard
@@ -243,6 +248,7 @@ EXAMPLES
     10: xdp  name some_prog  tag 005a3d2123620c8b  gpl run_time_ns 81632 run_cnt 10
             loaded_at 2017-09-29T20:11:00+0000  uid 0
             xlated 528B  jited 370B  memlock 4096B  map_ids 10
+            pids systemd(1)
 
 **# bpftool --json --pretty prog show**
 
@@ -262,6 +268,11 @@ EXAMPLES
             "bytes_jited": 370,
             "bytes_memlock": 4096,
             "map_ids": [10
+            ],
+            "pids": [{
+                    "pid": 1,
+                    "comm": "systemd"
+                }
             ]
         }
     ]
-- 
cgit v1.2.3


From bdb7b79b4ce864a724250e1d35948c46f135de36 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 22 Jun 2020 20:22:21 -0700
Subject: bpf: Switch most helper return values from 32-bit int to 64-bit long

Switch most of BPF helper definitions from returning int to long. These
definitions are coming from comments in BPF UAPI header and are used to
generate bpf_helper_defs.h (under libbpf) to be later included and used from
BPF programs.

In actual in-kernel implementation, all the helpers are defined as returning
u64, but due to some historical reasons, most of them are actually defined as
returning int in UAPI (usually, to return 0 on success, and negative value on
error).

This actually causes Clang to quite often generate sub-optimal code, because
compiler believes that return value is 32-bit, and in a lot of cases has to be
up-converted (usually with a pair of 32-bit bit shifts) to 64-bit values,
before they can be used further in BPF code.

Besides just "polluting" the code, these 32-bit shifts quite often cause
problems for cases in which return value matters. This is especially the case
for the family of bpf_probe_read_str() functions. There are few other similar
helpers (e.g., bpf_read_branch_records()), in which return value is used by
BPF program logic to record variable-length data and process it. For such
cases, BPF program logic carefully manages offsets within some array or map to
read variable-length data. For such uses, it's crucial for BPF verifier to
track possible range of register values to prove that all the accesses happen
within given memory bounds. Those extraneous zero-extending bit shifts,
inserted by Clang (and quite often interleaved with other code, which makes
the issues even more challenging and sometimes requires employing extra
per-variable compiler barriers), throws off verifier logic and makes it mark
registers as having unknown variable offset. We'll study this pattern a bit
later below.

Another common pattern is to check return of BPF helper for non-zero state to
detect error conditions and attempt alternative actions in such case. Even in
this simple and straightforward case, this 32-bit vs BPF's native 64-bit mode
quite often leads to sub-optimal and unnecessary extra code. We'll look at
this pattern as well.

Clang's BPF target supports two modes of code generation: ALU32, in which it
is capable of using lower 32-bit parts of registers, and no-ALU32, in which
only full 64-bit registers are being used. ALU32 mode somewhat mitigates the
above described problems, but not in all cases.

This patch switches all the cases in which BPF helpers return 0 or negative
error from returning int to returning long. It is shown below that such change
in definition leads to equivalent or better code. No-ALU32 mode benefits more,
but ALU32 mode doesn't degrade or still gets improved code generation.

Another class of cases switched from int to long are bpf_probe_read_str()-like
helpers, which encode successful case as non-negative values, while still
returning negative value for errors.

In all of such cases, correctness is preserved due to two's complement
encoding of negative values and the fact that all helpers return values with
32-bit absolute value. Two's complement ensures that for negative values
higher 32 bits are all ones and when truncated, leave valid negative 32-bit
value with the same value. Non-negative values have upper 32 bits set to zero
and similarly preserve value when high 32 bits are truncated. This means that
just casting to int/u32 is correct and efficient (and in ALU32 mode doesn't
require any extra shifts).

To minimize the chances of regressions, two code patterns were investigated,
as mentioned above. For both patterns, BPF assembly was analyzed in
ALU32/NO-ALU32 compiler modes, both with current 32-bit int return type and
new 64-bit long return type.

Case 1. Variable-length data reading and concatenation. This is quite
ubiquitous pattern in tracing/monitoring applications, reading data like
process's environment variables, file path, etc. In such case, many pieces of
string-like variable-length data are read into a single big buffer, and at the
end of the process, only a part of array containing actual data is sent to
user-space for further processing. This case is tested in test_varlen.c
selftest (in the next patch). Code flow is roughly as follows:

  void *payload = &sample->payload;
  u64 len;

  len = bpf_probe_read_kernel_str(payload, MAX_SZ1, &source_data1);
  if (len <= MAX_SZ1) {
      payload += len;
      sample->len1 = len;
  }
  len = bpf_probe_read_kernel_str(payload, MAX_SZ2, &source_data2);
  if (len <= MAX_SZ2) {
      payload += len;
      sample->len2 = len;
  }
  /* and so on */
  sample->total_len = payload - &sample->payload;
  /* send over, e.g., perf buffer */

There could be two variations with slightly different code generated: when len
is 64-bit integer and when it is 32-bit integer. Both variations were analysed.
BPF assembly instructions between two successive invocations of
bpf_probe_read_kernel_str() were used to check code regressions. Results are
below, followed by short analysis. Left side is using helpers with int return
type, the right one is after the switch to long.

ALU32 + INT                                ALU32 + LONG
===========                                ============

64-BIT (13 insns):                         64-BIT (10 insns):
------------------------------------       ------------------------------------
  17:   call 115                             17:   call 115
  18:   if w0 > 256 goto +9 <LBB0_4>         18:   if r0 > 256 goto +6 <LBB0_4>
  19:   w1 = w0                              19:   r1 = 0 ll
  20:   r1 <<= 32                            21:   *(u64 *)(r1 + 0) = r0
  21:   r1 s>>= 32                           22:   r6 = 0 ll
  22:   r2 = 0 ll                            24:   r6 += r0
  24:   *(u64 *)(r2 + 0) = r1              00000000000000c8 <LBB0_4>:
  25:   r6 = 0 ll                            25:   r1 = r6
  27:   r6 += r1                             26:   w2 = 256
00000000000000e0 <LBB0_4>:                   27:   r3 = 0 ll
  28:   r1 = r6                              29:   call 115
  29:   w2 = 256
  30:   r3 = 0 ll
  32:   call 115

32-BIT (11 insns):                         32-BIT (12 insns):
------------------------------------       ------------------------------------
  17:   call 115                             17:   call 115
  18:   if w0 > 256 goto +7 <LBB1_4>         18:   if w0 > 256 goto +8 <LBB1_4>
  19:   r1 = 0 ll                            19:   r1 = 0 ll
  21:   *(u32 *)(r1 + 0) = r0                21:   *(u32 *)(r1 + 0) = r0
  22:   w1 = w0                              22:   r0 <<= 32
  23:   r6 = 0 ll                            23:   r0 >>= 32
  25:   r6 += r1                             24:   r6 = 0 ll
00000000000000d0 <LBB1_4>:                   26:   r6 += r0
  26:   r1 = r6                            00000000000000d8 <LBB1_4>:
  27:   w2 = 256                             27:   r1 = r6
  28:   r3 = 0 ll                            28:   w2 = 256
  30:   call 115                             29:   r3 = 0 ll
                                             31:   call 115

In ALU32 mode, the variant using 64-bit length variable clearly wins and
avoids unnecessary zero-extension bit shifts. In practice, this is even more
important and good, because BPF code won't need to do extra checks to "prove"
that payload/len are within good bounds.

32-bit len is one instruction longer. Clang decided to do 64-to-32 casting
with two bit shifts, instead of equivalent `w1 = w0` assignment. The former
uses extra register. The latter might potentially lose some range information,
but not for 32-bit value. So in this case, verifier infers that r0 is [0, 256]
after check at 18:, and shifting 32 bits left/right keeps that range intact.
We should probably look into Clang's logic and see why it chooses bitshifts
over sub-register assignments for this.

NO-ALU32 + INT                             NO-ALU32 + LONG
==============                             ===============

64-BIT (14 insns):                         64-BIT (10 insns):
------------------------------------       ------------------------------------
  17:   call 115                             17:   call 115
  18:   r0 <<= 32                            18:   if r0 > 256 goto +6 <LBB0_4>
  19:   r1 = r0                              19:   r1 = 0 ll
  20:   r1 >>= 32                            21:   *(u64 *)(r1 + 0) = r0
  21:   if r1 > 256 goto +7 <LBB0_4>         22:   r6 = 0 ll
  22:   r0 s>>= 32                           24:   r6 += r0
  23:   r1 = 0 ll                          00000000000000c8 <LBB0_4>:
  25:   *(u64 *)(r1 + 0) = r0                25:   r1 = r6
  26:   r6 = 0 ll                            26:   r2 = 256
  28:   r6 += r0                             27:   r3 = 0 ll
00000000000000e8 <LBB0_4>:                   29:   call 115
  29:   r1 = r6
  30:   r2 = 256
  31:   r3 = 0 ll
  33:   call 115

32-BIT (13 insns):                         32-BIT (13 insns):
------------------------------------       ------------------------------------
  17:   call 115                             17:   call 115
  18:   r1 = r0                              18:   r1 = r0
  19:   r1 <<= 32                            19:   r1 <<= 32
  20:   r1 >>= 32                            20:   r1 >>= 32
  21:   if r1 > 256 goto +6 <LBB1_4>         21:   if r1 > 256 goto +6 <LBB1_4>
  22:   r2 = 0 ll                            22:   r2 = 0 ll
  24:   *(u32 *)(r2 + 0) = r0                24:   *(u32 *)(r2 + 0) = r0
  25:   r6 = 0 ll                            25:   r6 = 0 ll
  27:   r6 += r1                             27:   r6 += r1
00000000000000e0 <LBB1_4>:                 00000000000000e0 <LBB1_4>:
  28:   r1 = r6                              28:   r1 = r6
  29:   r2 = 256                             29:   r2 = 256
  30:   r3 = 0 ll                            30:   r3 = 0 ll
  32:   call 115                             32:   call 115

In NO-ALU32 mode, for the case of 64-bit len variable, Clang generates much
superior code, as expected, eliminating unnecessary bit shifts. For 32-bit
len, code is identical.

So overall, only ALU-32 32-bit len case is more-or-less equivalent and the
difference stems from internal Clang decision, rather than compiler lacking
enough information about types.

Case 2. Let's look at the simpler case of checking return result of BPF helper
for errors. The code is very simple:

  long bla;
  if (bpf_probe_read_kenerl(&bla, sizeof(bla), 0))
      return 1;
  else
      return 0;

ALU32 + CHECK (9 insns)                    ALU32 + CHECK (9 insns)
====================================       ====================================
  0:    r1 = r10                             0:    r1 = r10
  1:    r1 += -8                             1:    r1 += -8
  2:    w2 = 8                               2:    w2 = 8
  3:    r3 = 0                               3:    r3 = 0
  4:    call 113                             4:    call 113
  5:    w1 = w0                              5:    r1 = r0
  6:    w0 = 1                               6:    w0 = 1
  7:    if w1 != 0 goto +1 <LBB2_2>          7:    if r1 != 0 goto +1 <LBB2_2>
  8:    w0 = 0                               8:    w0 = 0
0000000000000048 <LBB2_2>:                 0000000000000048 <LBB2_2>:
  9:    exit                                 9:    exit

Almost identical code, the only difference is the use of full register
assignment (r1 = r0) vs half-registers (w1 = w0) in instruction #5. On 32-bit
architectures, new BPF assembly might be slightly less optimal, in theory. But
one can argue that's not a big issue, given that use of full registers is
still prevalent (e.g., for parameter passing).

NO-ALU32 + CHECK (11 insns)                NO-ALU32 + CHECK (9 insns)
====================================       ====================================
  0:    r1 = r10                             0:    r1 = r10
  1:    r1 += -8                             1:    r1 += -8
  2:    r2 = 8                               2:    r2 = 8
  3:    r3 = 0                               3:    r3 = 0
  4:    call 113                             4:    call 113
  5:    r1 = r0                              5:    r1 = r0
  6:    r1 <<= 32                            6:    r0 = 1
  7:    r1 >>= 32                            7:    if r1 != 0 goto +1 <LBB2_2>
  8:    r0 = 1                               8:    r0 = 0
  9:    if r1 != 0 goto +1 <LBB2_2>        0000000000000048 <LBB2_2>:
 10:    r0 = 0                               9:    exit
0000000000000058 <LBB2_2>:
 11:    exit

NO-ALU32 is a clear improvement, getting rid of unnecessary zero-extension bit
shifts.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200623032224.4020118-1-andriin@fb.com
---
 tools/include/uapi/linux/bpf.h | 192 ++++++++++++++++++++---------------------
 1 file changed, 96 insertions(+), 96 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 19684813faae..be0efee49093 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -653,7 +653,7 @@ union bpf_attr {
  * 		Map value associated to *key*, or **NULL** if no entry was
  * 		found.
  *
- * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
  * 	Description
  * 		Add or update the value of the entry associated to *key* in
  * 		*map* with *value*. *flags* is one of:
@@ -671,13 +671,13 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * long bpf_map_delete_elem(struct bpf_map *map, const void *key)
  * 	Description
  * 		Delete entry with *key* from *map*.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		For tracing programs, safely attempt to read *size* bytes from
  * 		kernel space address *unsafe_ptr* and store the data in *dst*.
@@ -695,7 +695,7 @@ union bpf_attr {
  * 	Return
  * 		Current *ktime*.
  *
- * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
  * 	Description
  * 		This helper is a "printk()-like" facility for debugging. It
  * 		prints a message defined by format *fmt* (of size *fmt_size*)
@@ -775,7 +775,7 @@ union bpf_attr {
  * 	Return
  * 		The SMP id of the processor running the program.
  *
- * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
  * 	Description
  * 		Store *len* bytes from address *from* into the packet
  * 		associated to *skb*, at *offset*. *flags* are a combination of
@@ -792,7 +792,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
  * 	Description
  * 		Recompute the layer 3 (e.g. IP) checksum for the packet
  * 		associated to *skb*. Computation is incremental, so the helper
@@ -817,7 +817,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
  * 	Description
  * 		Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
  * 		packet associated to *skb*. Computation is incremental, so the
@@ -849,7 +849,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
  * 	Description
  * 		This special helper is used to trigger a "tail call", or in
  * 		other words, to jump into another eBPF program. The same stack
@@ -880,7 +880,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
  * 	Description
  * 		Clone and redirect the packet associated to *skb* to another
  * 		net device of index *ifindex*. Both ingress and egress
@@ -916,7 +916,7 @@ union bpf_attr {
  * 		A 64-bit integer containing the current GID and UID, and
  * 		created as such: *current_gid* **<< 32 \|** *current_uid*.
  *
- * int bpf_get_current_comm(void *buf, u32 size_of_buf)
+ * long bpf_get_current_comm(void *buf, u32 size_of_buf)
  * 	Description
  * 		Copy the **comm** attribute of the current task into *buf* of
  * 		*size_of_buf*. The **comm** attribute contains the name of
@@ -953,7 +953,7 @@ union bpf_attr {
  * 	Return
  * 		The classid, or 0 for the default unconfigured classid.
  *
- * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
  * 	Description
  * 		Push a *vlan_tci* (VLAN tag control information) of protocol
  * 		*vlan_proto* to the packet associated to *skb*, then update
@@ -969,7 +969,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * long bpf_skb_vlan_pop(struct sk_buff *skb)
  * 	Description
  * 		Pop a VLAN header from the packet associated to *skb*.
  *
@@ -981,7 +981,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
  * 	Description
  * 		Get tunnel metadata. This helper takes a pointer *key* to an
  * 		empty **struct bpf_tunnel_key** of **size**, that will be
@@ -1032,7 +1032,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
  * 	Description
  * 		Populate tunnel metadata for packet associated to *skb.* The
  * 		tunnel metadata is set to the contents of *key*, of *size*. The
@@ -1098,7 +1098,7 @@ union bpf_attr {
  * 		The value of the perf event counter read from the map, or a
  * 		negative error code in case of failure.
  *
- * int bpf_redirect(u32 ifindex, u64 flags)
+ * long bpf_redirect(u32 ifindex, u64 flags)
  * 	Description
  * 		Redirect the packet to another net device of index *ifindex*.
  * 		This helper is somewhat similar to **bpf_clone_redirect**\
@@ -1145,7 +1145,7 @@ union bpf_attr {
  * 		The realm of the route for the packet associated to *skb*, or 0
  * 		if none was found.
  *
- * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
  * 	Description
  * 		Write raw *data* blob into a special BPF perf event held by
  * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1190,7 +1190,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
+ * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
  * 	Description
  * 		This helper was provided as an easy way to load data from a
  * 		packet. It can be used to load *len* bytes from *offset* from
@@ -1207,7 +1207,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
+ * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
  * 	Description
  * 		Walk a user or a kernel stack and return its id. To achieve
  * 		this, the helper needs *ctx*, which is a pointer to the context
@@ -1276,7 +1276,7 @@ union bpf_attr {
  * 		The checksum result, or a negative error code in case of
  * 		failure.
  *
- * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
  * 	Description
  * 		Retrieve tunnel options metadata for the packet associated to
  * 		*skb*, and store the raw tunnel option data to the buffer *opt*
@@ -1294,7 +1294,7 @@ union bpf_attr {
  * 	Return
  * 		The size of the option data retrieved.
  *
- * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
  * 	Description
  * 		Set tunnel options metadata for the packet associated to *skb*
  * 		to the option data contained in the raw buffer *opt* of *size*.
@@ -1304,7 +1304,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
  * 	Description
  * 		Change the protocol of the *skb* to *proto*. Currently
  * 		supported are transition from IPv4 to IPv6, and from IPv6 to
@@ -1331,7 +1331,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * long bpf_skb_change_type(struct sk_buff *skb, u32 type)
  * 	Description
  * 		Change the packet type for the packet associated to *skb*. This
  * 		comes down to setting *skb*\ **->pkt_type** to *type*, except
@@ -1358,7 +1358,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
  * 	Description
  * 		Check whether *skb* is a descendant of the cgroup2 held by
  * 		*map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
@@ -1389,7 +1389,7 @@ union bpf_attr {
  * 	Return
  * 		A pointer to the current task struct.
  *
- * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * long bpf_probe_write_user(void *dst, const void *src, u32 len)
  * 	Description
  * 		Attempt in a safe way to write *len* bytes from the buffer
  * 		*src* to *dst* in memory. It only works for threads that are in
@@ -1408,7 +1408,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
  * 	Description
  * 		Check whether the probe is being run is the context of a given
  * 		subset of the cgroup2 hierarchy. The cgroup2 to test is held by
@@ -1420,7 +1420,7 @@ union bpf_attr {
  * 		* 1, if the *skb* task does not belong to the cgroup2.
  * 		* A negative error code, if an error occurred.
  *
- * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
  * 	Description
  * 		Resize (trim or grow) the packet associated to *skb* to the
  * 		new *len*. The *flags* are reserved for future usage, and must
@@ -1444,7 +1444,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * long bpf_skb_pull_data(struct sk_buff *skb, u32 len)
  * 	Description
  * 		Pull in non-linear data in case the *skb* is non-linear and not
  * 		all of *len* are part of the linear section. Make *len* bytes
@@ -1500,7 +1500,7 @@ union bpf_attr {
  * 		recalculation the next time the kernel tries to access this
  * 		hash or when the **bpf_get_hash_recalc**\ () helper is called.
  *
- * int bpf_get_numa_node_id(void)
+ * long bpf_get_numa_node_id(void)
  * 	Description
  * 		Return the id of the current NUMA node. The primary use case
  * 		for this helper is the selection of sockets for the local NUMA
@@ -1511,7 +1511,7 @@ union bpf_attr {
  * 	Return
  * 		The id of current NUMA node.
  *
- * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
  * 	Description
  * 		Grows headroom of packet associated to *skb* and adjusts the
  * 		offset of the MAC header accordingly, adding *len* bytes of
@@ -1532,7 +1532,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
  * 	Description
  * 		Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
  * 		it is possible to use a negative value for *delta*. This helper
@@ -1547,7 +1547,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe kernel address
  * 		*unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
@@ -1595,14 +1595,14 @@ union bpf_attr {
  * 		is returned (note that **overflowuid** might also be the actual
  * 		UID value for the socket).
  *
- * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * long bpf_set_hash(struct sk_buff *skb, u32 hash)
  * 	Description
  * 		Set the full hash for *skb* (set the field *skb*\ **->hash**)
  * 		to value *hash*.
  * 	Return
  * 		0
  *
- * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
  * 	Description
  * 		Emulate a call to **setsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1630,7 +1630,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
  * 	Description
  * 		Grow or shrink the room for data in the packet associated to
  * 		*skb* by *len_diff*, and according to the selected *mode*.
@@ -1676,7 +1676,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
  * 	Description
  * 		Redirect the packet to the endpoint referenced by *map* at
  * 		index *key*. Depending on its type, this *map* can contain
@@ -1697,7 +1697,7 @@ union bpf_attr {
  * 		**XDP_REDIRECT** on success, or the value of the two lower bits
  * 		of the *flags* argument on error.
  *
- * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
  * 	Description
  * 		Redirect the packet to the socket referenced by *map* (of type
  * 		**BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
@@ -1708,7 +1708,7 @@ union bpf_attr {
  * 	Return
  * 		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  * 	Description
  * 		Add an entry to, or update a *map* referencing sockets. The
  * 		*skops* is used as a new value for the entry associated to
@@ -1727,7 +1727,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
  * 	Description
  * 		Adjust the address pointed by *xdp_md*\ **->data_meta** by
  * 		*delta* (which can be positive or negative). Note that this
@@ -1756,7 +1756,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
  * 	Description
  * 		Read the value of a perf event counter, and store it into *buf*
  * 		of size *buf_size*. This helper relies on a *map* of type
@@ -1806,7 +1806,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
  * 	Description
  * 		For en eBPF program attached to a perf event, retrieve the
  * 		value of the event counter associated to *ctx* and store it in
@@ -1817,7 +1817,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
  * 	Description
  * 		Emulate a call to **getsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1842,7 +1842,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_override_return(struct pt_regs *regs, u64 rc)
+ * long bpf_override_return(struct pt_regs *regs, u64 rc)
  * 	Description
  * 		Used for error injection, this helper uses kprobes to override
  * 		the return value of the probed function, and to set it to *rc*.
@@ -1867,7 +1867,7 @@ union bpf_attr {
  * 	Return
  * 		0
  *
- * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
  * 	Description
  * 		Attempt to set the value of the **bpf_sock_ops_cb_flags** field
  * 		for the full TCP socket associated to *bpf_sock_ops* to
@@ -1911,7 +1911,7 @@ union bpf_attr {
  * 		be set is returned (which comes down to 0 if all bits were set
  * 		as required).
  *
- * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
  * 	Description
  * 		This helper is used in programs implementing policies at the
  * 		socket level. If the message *msg* is allowed to pass (i.e. if
@@ -1925,7 +1925,7 @@ union bpf_attr {
  * 	Return
  * 		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
  * 	Description
  * 		For socket policies, apply the verdict of the eBPF program to
  * 		the next *bytes* (number of bytes) of message *msg*.
@@ -1959,7 +1959,7 @@ union bpf_attr {
  * 	Return
  * 		0
  *
- * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
  * 	Description
  * 		For socket policies, prevent the execution of the verdict eBPF
  * 		program for message *msg* until *bytes* (byte number) have been
@@ -1977,7 +1977,7 @@ union bpf_attr {
  * 	Return
  * 		0
  *
- * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
  * 	Description
  * 		For socket policies, pull in non-linear data from user space
  * 		for *msg* and set pointers *msg*\ **->data** and *msg*\
@@ -2008,7 +2008,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
  * 	Description
  * 		Bind the socket associated to *ctx* to the address pointed by
  * 		*addr*, of length *addr_len*. This allows for making outgoing
@@ -2026,7 +2026,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
  * 	Description
  * 		Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
  * 		possible to both shrink and grow the packet tail.
@@ -2040,7 +2040,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
  * 	Description
  * 		Retrieve the XFRM state (IP transform framework, see also
  * 		**ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
@@ -2056,7 +2056,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
  * 	Description
  * 		Return a user or a kernel stack in bpf program provided buffer.
  * 		To achieve this, the helper needs *ctx*, which is a pointer
@@ -2089,7 +2089,7 @@ union bpf_attr {
  * 		A non-negative value equal to or less than *size* on success,
  * 		or a negative error in case of failure.
  *
- * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
  * 	Description
  * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
  * 		it provides an easy way to load *len* bytes from *offset*
@@ -2111,7 +2111,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
  *	Description
  *		Do FIB lookup in kernel tables using parameters in *params*.
  *		If lookup is successful and result shows packet is to be
@@ -2142,7 +2142,7 @@ union bpf_attr {
  *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
  *		  packet is not forwarded or needs assist from full stack
  *
- * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		Add an entry to, or update a sockhash *map* referencing sockets.
  *		The *skops* is used as a new value for the entry associated to
@@ -2161,7 +2161,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		This helper is used in programs implementing policies at the
  *		socket level. If the message *msg* is allowed to pass (i.e. if
@@ -2175,7 +2175,7 @@ union bpf_attr {
  *	Return
  *		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		This helper is used in programs implementing policies at the
  *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
@@ -2189,7 +2189,7 @@ union bpf_attr {
  *	Return
  *		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
  *	Description
  *		Encapsulate the packet associated to *skb* within a Layer 3
  *		protocol header. This header is provided in the buffer at
@@ -2226,7 +2226,7 @@ union bpf_attr {
  *	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
  *	Description
  *		Store *len* bytes from address *from* into the packet
  *		associated to *skb*, at *offset*. Only the flags, tag and TLVs
@@ -2241,7 +2241,7 @@ union bpf_attr {
  *	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
  *	Description
  *		Adjust the size allocated to TLVs in the outermost IPv6
  *		Segment Routing Header contained in the packet associated to
@@ -2257,7 +2257,7 @@ union bpf_attr {
  *	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
  *	Description
  *		Apply an IPv6 Segment Routing action of type *action* to the
  *		packet associated to *skb*. Each action takes a parameter
@@ -2286,7 +2286,7 @@ union bpf_attr {
  *	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_rc_repeat(void *ctx)
+ * long bpf_rc_repeat(void *ctx)
  *	Description
  *		This helper is used in programs implementing IR decoding, to
  *		report a successfully decoded repeat key message. This delays
@@ -2305,7 +2305,7 @@ union bpf_attr {
  *	Return
  *		0
  *
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
  *	Description
  *		This helper is used in programs implementing IR decoding, to
  *		report a successfully decoded key press with *scancode*,
@@ -2370,7 +2370,7 @@ union bpf_attr {
  *	Return
  *		A pointer to the local storage area.
  *
- * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
  *	Description
  *		Select a **SO_REUSEPORT** socket from a
  *		**BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
@@ -2471,7 +2471,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * int bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(struct bpf_sock *sock)
  *	Description
  *		Release the reference held by *sock*. *sock* must be a
  *		non-**NULL** pointer that was returned from
@@ -2479,7 +2479,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
  * 	Description
  * 		Push an element *value* in *map*. *flags* is one of:
  *
@@ -2489,19 +2489,19 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * long bpf_map_pop_elem(struct bpf_map *map, void *value)
  * 	Description
  * 		Pop an element from *map*.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * long bpf_map_peek_elem(struct bpf_map *map, void *value)
  * 	Description
  * 		Get an element from *map* without removing it.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
  *	Description
  *		For socket policies, insert *len* bytes into *msg* at offset
  *		*start*.
@@ -2517,7 +2517,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
  *	Description
  *		Will remove *len* bytes from a *msg* starting at byte *start*.
  *		This may result in **ENOMEM** errors under certain situations if
@@ -2529,7 +2529,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
  *	Description
  *		This helper is used in programs implementing IR decoding, to
  *		report a successfully decoded pointer movement.
@@ -2543,7 +2543,7 @@ union bpf_attr {
  *	Return
  *		0
  *
- * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ * long bpf_spin_lock(struct bpf_spin_lock *lock)
  *	Description
  *		Acquire a spinlock represented by the pointer *lock*, which is
  *		stored as part of a value of a map. Taking the lock allows to
@@ -2591,7 +2591,7 @@ union bpf_attr {
  *	Return
  *		0
  *
- * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ * long bpf_spin_unlock(struct bpf_spin_lock *lock)
  *	Description
  *		Release the *lock* previously locked by a call to
  *		**bpf_spin_lock**\ (\ *lock*\ ).
@@ -2614,7 +2614,7 @@ union bpf_attr {
  *		A **struct bpf_tcp_sock** pointer on success, or **NULL** in
  *		case of failure.
  *
- * int bpf_skb_ecn_set_ce(struct sk_buff *skb)
+ * long bpf_skb_ecn_set_ce(struct sk_buff *skb)
  *	Description
  *		Set ECN (Explicit Congestion Notification) field of IP header
  *		to **CE** (Congestion Encountered) if current value is **ECT**
@@ -2651,7 +2651,7 @@ union bpf_attr {
  *		result is from *reuse*\ **->socks**\ [] using the hash of the
  *		tuple.
  *
- * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
  * 	Description
  * 		Check whether *iph* and *th* contain a valid SYN cookie ACK for
  * 		the listening socket in *sk*.
@@ -2666,7 +2666,7 @@ union bpf_attr {
  * 		0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
  * 		error otherwise.
  *
- * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
  *	Description
  *		Get name of sysctl in /proc/sys/ and copy it into provided by
  *		program buffer *buf* of size *buf_len*.
@@ -2682,7 +2682,7 @@ union bpf_attr {
  *		**-E2BIG** if the buffer wasn't big enough (*buf* will contain
  *		truncated name in this case).
  *
- * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
  *	Description
  *		Get current value of sysctl as it is presented in /proc/sys
  *		(incl. newline, etc), and copy it as a string into provided
@@ -2701,7 +2701,7 @@ union bpf_attr {
  *		**-EINVAL** if current value was unavailable, e.g. because
  *		sysctl is uninitialized and read returns -EIO for it.
  *
- * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
  *	Description
  *		Get new value being written by user space to sysctl (before
  *		the actual write happens) and copy it as a string into
@@ -2718,7 +2718,7 @@ union bpf_attr {
  *
  *		**-EINVAL** if sysctl is being read.
  *
- * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
  *	Description
  *		Override new value being written by user space to sysctl with
  *		value provided by program in buffer *buf* of size *buf_len*.
@@ -2735,7 +2735,7 @@ union bpf_attr {
  *
  *		**-EINVAL** if sysctl is being read.
  *
- * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
  *	Description
  *		Convert the initial part of the string from buffer *buf* of
  *		size *buf_len* to a long integer according to the given base
@@ -2759,7 +2759,7 @@ union bpf_attr {
  *
  *		**-ERANGE** if resulting value was out of range.
  *
- * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
  *	Description
  *		Convert the initial part of the string from buffer *buf* of
  *		size *buf_len* to an unsigned long integer according to the
@@ -2810,7 +2810,7 @@ union bpf_attr {
  *		**NULL** if not found or there was an error in adding
  *		a new bpf-local-storage.
  *
- * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
  *	Description
  *		Delete a bpf-local-storage from a *sk*.
  *	Return
@@ -2818,7 +2818,7 @@ union bpf_attr {
  *
  *		**-ENOENT** if the bpf-local-storage cannot be found.
  *
- * int bpf_send_signal(u32 sig)
+ * long bpf_send_signal(u32 sig)
  *	Description
  *		Send signal *sig* to the process of the current task.
  *		The signal may be delivered to any of this process's threads.
@@ -2859,7 +2859,7 @@ union bpf_attr {
  *
  *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
  *
- * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
  * 	Description
  * 		Write raw *data* blob into a special BPF perf event held by
  * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -2883,21 +2883,21 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Safely attempt to read *size* bytes from user space address
  * 		*unsafe_ptr* and store the data in *dst*.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Safely attempt to read *size* bytes from kernel space address
  * 		*unsafe_ptr* and store the data in *dst*.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe user address
  * 		*unsafe_ptr* to *dst*. The *size* should include the
@@ -2941,7 +2941,7 @@ union bpf_attr {
  * 		including the trailing NUL character. On error, a negative
  * 		value.
  *
- * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
  * 		to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
@@ -2949,14 +2949,14 @@ union bpf_attr {
  * 		On success, the strictly positive length of the string, including
  * 		the trailing NUL character. On error, a negative value.
  *
- * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
+ * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
  *	Description
  *		Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
  *		*rcv_nxt* is the ack_seq to be sent out.
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * int bpf_send_signal_thread(u32 sig)
+ * long bpf_send_signal_thread(u32 sig)
  *	Description
  *		Send signal *sig* to the thread corresponding to the current task.
  *	Return
@@ -2976,7 +2976,7 @@ union bpf_attr {
  *	Return
  *		The 64 bit jiffies
  *
- * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
  *	Description
  *		For an eBPF program attached to a perf event, retrieve the
  *		branch records (**struct perf_branch_entry**) associated to *ctx*
@@ -2995,7 +2995,7 @@ union bpf_attr {
  *
  *		**-ENOENT** if architecture does not support branch records.
  *
- * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
  *	Description
  *		Returns 0 on success, values for *pid* and *tgid* as seen from the current
  *		*namespace* will be returned in *nsdata*.
@@ -3007,7 +3007,7 @@ union bpf_attr {
  *
  *		**-ENOENT** if pidns does not exists for the current task.
  *
- * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
  *	Description
  *		Write raw *data* blob into a special BPF perf event held by
  *		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -3062,7 +3062,7 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
  *	Description
  *		Assign the *sk* to the *skb*. When combined with appropriate
  *		routing configuration to receive the packet towards the socket,
@@ -3097,7 +3097,7 @@ union bpf_attr {
  * 	Return
  * 		Current *ktime*.
  *
- * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
  * 	Description
  * 		**bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
  * 		out the format string.
@@ -3126,7 +3126,7 @@ union bpf_attr {
  *
  *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
  *
- * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * long bpf_seq_write(struct seq_file *m, const void *data, u32 len)
  * 	Description
  * 		**bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
  * 		The *m* represents the seq_file. The *data* and *len* represent the
@@ -3221,7 +3221,7 @@ union bpf_attr {
  *	Return
  *		Requested value, or 0, if flags are not recognized.
  *
- * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * long bpf_csum_level(struct sk_buff *skb, u64 level)
  * 	Description
  * 		Change the skbs checksum level by one layer up or down, or
  * 		reset it entirely to none in order to have the stack perform
-- 
cgit v1.2.3


From 5e85c6bb8e74bd9daa4f5815da373d4ac2cb1a35 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 22 Jun 2020 20:22:22 -0700
Subject: selftests/bpf: Add variable-length data concatenation pattern test

Add selftest that validates variable-length data reading and concatentation
with one big shared data array. This is a common pattern in production use for
monitoring and tracing applications, that potentially can read a lot of data,
but overall read much less. Such pattern allows to determine precisely what
amount of data needs to be sent over perfbuf/ringbuf and maximize efficiency.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200623032224.4020118-2-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/varlen.c | 56 +++++++++++++++
 tools/testing/selftests/bpf/progs/test_varlen.c | 96 +++++++++++++++++++++++++
 2 files changed, 152 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/varlen.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_varlen.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
new file mode 100644
index 000000000000..7533565e096d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_varlen.skel.h"
+
+#define CHECK_VAL(got, exp) \
+	CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \
+	      (long)(got), (long)(exp))
+
+void test_varlen(void)
+{
+	int duration = 0, err;
+	struct test_varlen* skel;
+	struct test_varlen__bss *bss;
+	struct test_varlen__data *data;
+	const char str1[] = "Hello, ";
+	const char str2[] = "World!";
+	const char exp_str[] = "Hello, \0World!\0";
+	const int size1 = sizeof(str1);
+	const int size2 = sizeof(str2);
+
+	skel = test_varlen__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+	bss = skel->bss;
+	data = skel->data;
+
+	err = test_varlen__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	bss->test_pid = getpid();
+
+	/* trigger everything */
+	memcpy(bss->buf_in1, str1, size1);
+	memcpy(bss->buf_in2, str2, size2);
+	bss->capture = true;
+	usleep(1);
+	bss->capture = false;
+
+	CHECK_VAL(bss->payload1_len1, size1);
+	CHECK_VAL(bss->payload1_len2, size2);
+	CHECK_VAL(bss->total1, size1 + size2);
+	CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
+
+	CHECK_VAL(data->payload2_len1, size1);
+	CHECK_VAL(data->payload2_len2, size2);
+	CHECK_VAL(data->total2, size1 + size2);
+	CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
+cleanup:
+	test_varlen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
new file mode 100644
index 000000000000..09691852debf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_LEN 256
+
+char buf_in1[MAX_LEN] = {};
+char buf_in2[MAX_LEN] = {};
+
+int test_pid = 0;
+bool capture = false;
+
+/* .bss */
+long payload1_len1 = 0;
+long payload1_len2 = 0;
+long total1 = 0;
+char payload1[MAX_LEN + MAX_LEN] = {};
+
+/* .data */
+int payload2_len1 = -1;
+int payload2_len2 = -1;
+int total2 = -1;
+char payload2[MAX_LEN + MAX_LEN] = { 1 };
+
+SEC("raw_tp/sys_enter")
+int handler64(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload1;
+	u64 len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload1_len1 = len;
+	}
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload1_len2 = len;
+	}
+
+	total1 = payload - (void *)payload1;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int handler32(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload2;
+	u32 len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload2_len1 = len;
+	}
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len <= MAX_LEN) {
+		payload += len;
+		payload2_len2 = len;
+	}
+
+	total2 = payload - (void *)payload2;
+
+	return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int handler_exit(void *regs)
+{
+	long bla;
+
+	if (bpf_probe_read_kernel(&bla, sizeof(bla), 0))
+		return 1;
+	else
+		return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 2fde1747c986cac28fa66d0cffd7577db042640b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 22 Jun 2020 20:22:23 -0700
Subject: selftests/bpf: Add variable-length data concat pattern less than test

Extend original variable-length tests with a case to catch a common
existing pattern of testing for < 0 for errors. Note because
verifier also tracks upper bounds and we know it can not be greater
than MAX_LEN here we can skip upper bound check.

In ALU64 enabled compilation converting from long->int return types
in probe helpers results in extra instruction pattern, <<= 32, s >>= 32.
The trade-off is the non-ALU64 case works. If you really care about
every extra insn (XDP case?) then you probably should be using original
int type.

In addition adding a sext insn to bpf might help the verifier in the
general case to avoid these types of tricks.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200623032224.4020118-3-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/varlen.c | 12 +++++
 tools/testing/selftests/bpf/progs/test_varlen.c | 70 +++++++++++++++++++++++--
 2 files changed, 78 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
index 7533565e096d..c75525eab02c 100644
--- a/tools/testing/selftests/bpf/prog_tests/varlen.c
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -51,6 +51,18 @@ void test_varlen(void)
 	CHECK_VAL(data->total2, size1 + size2);
 	CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
 	      "doesn't match!");
+
+	CHECK_VAL(data->payload3_len1, size1);
+	CHECK_VAL(data->payload3_len2, size2);
+	CHECK_VAL(data->total3, size1 + size2);
+	CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
+
+	CHECK_VAL(data->payload4_len1, size1);
+	CHECK_VAL(data->payload4_len2, size2);
+	CHECK_VAL(data->total4, size1 + size2);
+	CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check",
+	      "doesn't match!");
 cleanup:
 	test_varlen__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
index 09691852debf..cd4b72c55dfe 100644
--- a/tools/testing/selftests/bpf/progs/test_varlen.c
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -26,8 +26,18 @@ int payload2_len2 = -1;
 int total2 = -1;
 char payload2[MAX_LEN + MAX_LEN] = { 1 };
 
+int payload3_len1 = -1;
+int payload3_len2 = -1;
+int total3= -1;
+char payload3[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload4_len1 = -1;
+int payload4_len2 = -1;
+int total4= -1;
+char payload4[MAX_LEN + MAX_LEN] = { 1 };
+
 SEC("raw_tp/sys_enter")
-int handler64(void *regs)
+int handler64_unsigned(void *regs)
 {
 	int pid = bpf_get_current_pid_tgid() >> 32;
 	void *payload = payload1;
@@ -54,8 +64,34 @@ int handler64(void *regs)
 	return 0;
 }
 
-SEC("tp_btf/sys_enter")
-int handler32(void *regs)
+SEC("raw_tp/sys_exit")
+int handler64_signed(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload3;
+	long len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len >= 0) {
+		payload += len;
+		payload3_len1 = len;
+	}
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len >= 0) {
+		payload += len;
+		payload3_len2 = len;
+	}
+	total3 = payload - (void *)payload3;
+
+	return 0;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int handler32_unsigned(void *regs)
 {
 	int pid = bpf_get_current_pid_tgid() >> 32;
 	void *payload = payload2;
@@ -82,7 +118,33 @@ int handler32(void *regs)
 	return 0;
 }
 
-SEC("tp_btf/sys_exit")
+SEC("tp/raw_syscalls/sys_exit")
+int handler32_signed(void *regs)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+	void *payload = payload4;
+	int len;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid || !capture)
+		return 0;
+
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+	if (len >= 0) {
+		payload += len;
+		payload4_len1 = len;
+	}
+	len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+	if (len >= 0) {
+		payload += len;
+		payload4_len2 = len;
+	}
+	total4 = payload - (void *)payload4;
+
+	return 0;
+}
+
+SEC("tp/syscalls/sys_exit_getpid")
 int handler_exit(void *regs)
 {
 	long bla;
-- 
cgit v1.2.3


From 9d9d8cc21e3827b89e414f990016836290de3038 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Tue, 23 Jun 2020 12:37:10 +0200
Subject: tools, bpftool: Correctly evaluate $(BUILD_BPF_SKELS) in Makefile

Currently, if the clang-bpf-co-re feature is not available, the build
fails with e.g.

  CC       prog.o
prog.c:1462:10: fatal error: profiler.skel.h: No such file or directory
 1462 | #include "profiler.skel.h"
      |          ^~~~~~~~~~~~~~~~~

This is due to the fact that the BPFTOOL_WITHOUT_SKELETONS macro is not
defined, despite BUILD_BPF_SKELS not being set. Fix this by correctly
evaluating $(BUILD_BPF_SKELS) when deciding on whether to add
-DBPFTOOL_WITHOUT_SKELETONS to CFLAGS.

Fixes: 05aca6da3b5a ("tools/bpftool: Generalize BPF skeleton support and generate vmlinux.h")
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200623103710.10370-1-tklauser@distanz.ch
---
 tools/bpf/bpftool/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 06f436e8191a..8c6563e56ffc 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -155,7 +155,7 @@ $(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h
 endif
 endif
 
-CFLAGS += $(if BUILD_BPF_SKELS,,-DBPFTOOL_WITHOUT_SKELETONS)
+CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
-- 
cgit v1.2.3


From 54b66c2255fadc8d78e88b5ffd99b19f7f754f5a Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Tue, 23 Jun 2020 22:36:00 +0100
Subject: tools, bpftool: Fix variable shadowing in emit_obj_refs_json()

Building bpftool yields the following complaint:

    pids.c: In function 'emit_obj_refs_json':
    pids.c:175:80: warning: declaration of 'json_wtr' shadows a global declaration [-Wshadow]
      175 | void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr)
          |                                                                 ~~~~~~~~~~~~~~~^~~~~~~~
    In file included from pids.c:11:
    main.h:141:23: note: shadowed declaration is here
      141 | extern json_writer_t *json_wtr;
          |                       ^~~~~~~~

Let's rename the variable.

v2:
- Rename the variable instead of calling the global json_wtr directly.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200623213600.16643-1-quentin@isovalent.com
---
 tools/bpf/bpftool/pids.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index 3474a91743ff..2709be4de2b1 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -172,7 +172,8 @@ void delete_obj_refs_table(struct obj_refs_table *table)
 	}
 }
 
-void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *json_wtr)
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+			json_writer_t *json_writer)
 {
 	struct obj_refs *refs;
 	struct obj_ref *ref;
@@ -187,16 +188,16 @@ void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, json_writer_t *j
 		if (refs->ref_cnt == 0)
 			break;
 
-		jsonw_name(json_wtr, "pids");
-		jsonw_start_array(json_wtr);
+		jsonw_name(json_writer, "pids");
+		jsonw_start_array(json_writer);
 		for (i = 0; i < refs->ref_cnt; i++) {
 			ref = &refs->refs[i];
-			jsonw_start_object(json_wtr);
-			jsonw_int_field(json_wtr, "pid", ref->pid);
-			jsonw_string_field(json_wtr, "comm", ref->comm);
-			jsonw_end_object(json_wtr);
+			jsonw_start_object(json_writer);
+			jsonw_int_field(json_writer, "pid", ref->pid);
+			jsonw_string_field(json_writer, "comm", ref->comm);
+			jsonw_end_object(json_writer);
 		}
-		jsonw_end_array(json_wtr);
+		jsonw_end_array(json_writer);
 		break;
 	}
 }
-- 
cgit v1.2.3


From 135c783f4794fbdeace4a969dea6eabd27f8a501 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 23 Jun 2020 09:42:07 +0100
Subject: libbpf: Fix spelling mistake "kallasyms" -> "kallsyms"

There is a spelling mistake in a pr_warn message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200623084207.149253-1-colin.king@canonical.com
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 18461deb1b19..deea27aadcef 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5741,7 +5741,7 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
 		if (ret == EOF && feof(f))
 			break;
 		if (ret != 3) {
-			pr_warn("failed to read kallasyms entry: %d\n", ret);
+			pr_warn("failed to read kallsyms entry: %d\n", ret);
 			err = -EINVAL;
 			goto out;
 		}
-- 
cgit v1.2.3


From 192b6638eea5d40c99964291671fc0371b858f6e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 23 Jun 2020 21:38:05 -0700
Subject: libbpf: Prevent loading vmlinux BTF twice

Prevent loading/parsing vmlinux BTF twice in some cases: for CO-RE relocations
and for BTF-aware hooks (tp_btf, fentry/fexit, etc).

Fixes: a6ed02cac690 ("libbpf: Load btf_vmlinux only once per object.")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200624043805.1794620-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index deea27aadcef..6b4955d170ff 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2504,22 +2504,31 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
 
 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
 {
+	bool need_vmlinux_btf = false;
 	struct bpf_program *prog;
 	int err;
 
+	/* CO-RE relocations need kernel BTF */
+	if (obj->btf_ext && obj->btf_ext->field_reloc_info.len)
+		need_vmlinux_btf = true;
+
 	bpf_object__for_each_program(prog, obj) {
 		if (libbpf_prog_needs_vmlinux_btf(prog)) {
-			obj->btf_vmlinux = libbpf_find_kernel_btf();
-			if (IS_ERR(obj->btf_vmlinux)) {
-				err = PTR_ERR(obj->btf_vmlinux);
-				pr_warn("Error loading vmlinux BTF: %d\n", err);
-				obj->btf_vmlinux = NULL;
-				return err;
-			}
-			return 0;
+			need_vmlinux_btf = true;
+			break;
 		}
 	}
 
+	if (!need_vmlinux_btf)
+		return 0;
+
+	obj->btf_vmlinux = libbpf_find_kernel_btf();
+	if (IS_ERR(obj->btf_vmlinux)) {
+		err = PTR_ERR(obj->btf_vmlinux);
+		pr_warn("Error loading vmlinux BTF: %d\n", err);
+		obj->btf_vmlinux = NULL;
+		return err;
+	}
 	return 0;
 }
 
@@ -4945,8 +4954,8 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
 	if (targ_btf_path)
 		targ_btf = btf__parse_elf(targ_btf_path, NULL);
 	else
-		targ_btf = libbpf_find_kernel_btf();
-	if (IS_ERR(targ_btf)) {
+		targ_btf = obj->btf_vmlinux;
+	if (IS_ERR_OR_NULL(targ_btf)) {
 		pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
 		return PTR_ERR(targ_btf);
 	}
@@ -4987,7 +4996,9 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
 	}
 
 out:
-	btf__free(targ_btf);
+	/* obj->btf_vmlinux is freed at the end of object load phase */
+	if (targ_btf != obj->btf_vmlinux)
+		btf__free(targ_btf);
 	if (!IS_ERR_OR_NULL(cand_cache)) {
 		hashmap__for_each_entry(cand_cache, entry, i) {
 			bpf_core_free_cands(entry->value);
-- 
cgit v1.2.3


From fea549b030152d5336dbd960b357a4d4b841a851 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 24 Jun 2020 11:10:59 -0700
Subject: selftests/bpf: Workaround for get_stack_rawtp test.

./test_progs-no_alu32 -t get_stack_raw_tp
fails due to:

52: (85) call bpf_get_stack#67
53: (bf) r8 = r0
54: (bf) r1 = r8
55: (67) r1 <<= 32
56: (c7) r1 s>>= 32
; if (usize < 0)
57: (c5) if r1 s< 0x0 goto pc+26
 R0=inv(id=0,smax_value=800) R1_w=inv(id=0,umax_value=800,var_off=(0x0; 0x3ff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R8_w=inv(id=0,smax_value=800) R9=inv800
; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
58: (1f) r9 -= r8
; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
59: (bf) r2 = r7
60: (0f) r2 += r1
regs=1 stack=0 before 52: (85) call bpf_get_stack#67
; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
61: (bf) r1 = r6
62: (bf) r3 = r9
63: (b7) r4 = 0
64: (85) call bpf_get_stack#67
 R0=inv(id=0,smax_value=800) R1_w=ctx(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=1600,umax_value=800,var_off=(0x0; 0x3ff),s32_max_value=1023,u32_max_value=1023) R3_w=inv(id=0,umax_value=9223372036854776608)
R3 unbounded memory access, use 'var &= const' or 'if (var < const)'

In the C code:
  usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
  if (usize < 0)
          return 0;

  ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
  if (ksize < 0)
          return 0;

We used to have problem with pointer arith in R2.
Now it's a problem with two integers in R3.
'if (usize < 0)' is comparing R1 and makes it [0,800], but R8 stays [-inf,800].
Both registers represent the same 'usize' variable.
Then R9 -= R8 is doing 800 - [-inf, 800]
so the result of "max_len - usize" looks unbounded to the verifier while
it's obvious in C code that "max_len - usize" should be [0, 800].

To workaround the problem convert ksize and usize variables from int to long.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index 29817a703984..b6a6eb279e54 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -57,8 +57,9 @@ struct {
 SEC("raw_tracepoint/sys_enter")
 int bpf_prog1(void *ctx)
 {
-	int max_len, max_buildid_len, usize, ksize, total_size;
+	int max_len, max_buildid_len, total_size;
 	struct stack_trace_t *data;
+	long usize, ksize;
 	void *raw_data;
 	__u32 key = 0;
 
-- 
cgit v1.2.3


From f9bcf96837f158db6ea982d15cd2c8161ca6bc23 Mon Sep 17 00:00:00 2001
From: Dmitry Yakunin <zeil@yandex-team.ru>
Date: Sat, 20 Jun 2020 18:30:52 +0300
Subject: bpf: Add SO_KEEPALIVE and related options to bpf_setsockopt

This patch adds support of SO_KEEPALIVE flag and TCP related options
to bpf_setsockopt() routine. This is helpful if we want to enable or tune
TCP keepalive for applications which don't do it in the userspace code.

v3:
  - update kernel-doc in uapi (Nikita Vetoshkin <nekto0n@yandex-team.ru>)

v4:
  - update kernel-doc in tools too (Alexei Starovoitov)
  - add test to selftests (Alexei Starovoitov)

Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200620153052.9439-3-zeil@yandex-team.ru
---
 tools/include/uapi/linux/bpf.h                    |  7 ++++--
 tools/testing/selftests/bpf/progs/connect4_prog.c | 27 +++++++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9d3923e6b860..d9737d51dd19 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1621,10 +1621,13 @@ union bpf_attr {
  *
  * 		* **SOL_SOCKET**, which supports the following *optname*\ s:
  * 		  **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
- * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * 		  **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
+ * 		  **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
  * 		* **IPPROTO_TCP**, which supports the following *optname*\ s:
  * 		  **TCP_CONGESTION**, **TCP_BPF_IW**,
- * 		  **TCP_BPF_SNDCWND_CLAMP**.
+ * 		  **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
+ * 		  **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
+ * 		  **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
  * 		* **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  * 		* **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
  * 	Return
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 1ab2c5eba86c..b1b2773c0b9d 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
 	return 0;
 }
 
+static __inline int set_keepalive(struct bpf_sock_addr *ctx)
+{
+	int zero = 0, one = 1;
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
+		return 1;
+	if (ctx->type == SOCK_STREAM) {
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
+			return 1;
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
+			return 1;
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
+			return 1;
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
+			return 1;
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
+			return 1;
+	}
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/connect4")
 int connect_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
 	if (bind_to_device(ctx))
 		return 0;
 
+	if (set_keepalive(ctx))
+		return 0;
+
 	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
 		return 0;
 	else if (ctx->type == SOCK_STREAM)
-- 
cgit v1.2.3


From af7ec13833619e17f03aa73a785a2f871da6d66b Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:09 -0700
Subject: bpf: Add bpf_skc_to_tcp6_sock() helper

The helper is used in tracing programs to cast a socket
pointer to a tcp6_sock pointer.
The return value could be NULL if the casting is illegal.

A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
so the verifier is able to deduce proper return types for the helper.

Different from the previous BTF_ID based helpers,
the bpf_skc_to_tcp6_sock() argument can be several possible
btf_ids. More specifically, all possible socket data structures
with sock_common appearing in the first in the memory layout.
This patch only added socket types related to tcp and udp.

All possible argument btf_id and return value btf_id
for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
cached. In the future, it is even possible to precompute
these btf_id's at kernel build time.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230809.3988195-1-yhs@fb.com
---
 tools/include/uapi/linux/bpf.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d9737d51dd19..e90ad07b291a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3255,6 +3255,12 @@ union bpf_attr {
  * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
  * 		is returned or the error code -EACCES in case the skb is not
  * 		subject to CHECKSUM_UNNECESSARY.
+ *
+ * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
+ *	Description
+ *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3392,7 +3398,8 @@ union bpf_attr {
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
-	FN(csum_level),
+	FN(csum_level),			\
+	FN(skc_to_tcp6_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From 478cfbdf5f13dfe09cfd0b1cbac821f5e27f6108 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:11 -0700
Subject: bpf: Add bpf_skc_to_{tcp, tcp_timewait, tcp_request}_sock() helpers

Three more helpers are added to cast a sock_common pointer to
an tcp_sock, tcp_timewait_sock or a tcp_request_sock for
tracing programs.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230811.3988277-1-yhs@fb.com
---
 tools/include/uapi/linux/bpf.h | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e90ad07b291a..b9412ab275f3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3261,6 +3261,24 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
+ *	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3399,7 +3417,10 @@ union bpf_attr {
 	FN(ringbuf_discard),		\
 	FN(ringbuf_query),		\
 	FN(csum_level),			\
-	FN(skc_to_tcp6_sock),
+	FN(skc_to_tcp6_sock),		\
+	FN(skc_to_tcp_sock),		\
+	FN(skc_to_tcp_timewait_sock),	\
+	FN(skc_to_tcp_request_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From 0d4fad3e57df2bf61e8ffc8d12a34b1caf9b8835 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:15 -0700
Subject: bpf: Add bpf_skc_to_udp6_sock() helper

The helper is used in tracing programs to cast a socket
pointer to a udp6_sock pointer.
The return value could be NULL if the casting is illegal.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/bpf/20200623230815.3988481-1-yhs@fb.com
---
 tools/include/uapi/linux/bpf.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b9412ab275f3..0cb8ec948816 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3279,6 +3279,12 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or NULL otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3420,7 +3426,8 @@ union bpf_attr {
 	FN(skc_to_tcp6_sock),		\
 	FN(skc_to_tcp_sock),		\
 	FN(skc_to_tcp_timewait_sock),	\
-	FN(skc_to_tcp_request_sock),
+	FN(skc_to_tcp_request_sock),	\
+	FN(skc_to_udp6_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From 84544f5637ff3501876ba96bd48ca900317e08fb Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:16 -0700
Subject: selftests/bpf: Move newer bpf_iter_* type redefining to a new header
 file

Commit b9f4c01f3e0b ("selftest/bpf: Make bpf_iter selftest
compilable against old vmlinux.h") and Commit dda18a5c0b75
("selftests/bpf: Convert bpf_iter_test_kern{3, 4}.c to define
own bpf_iter_meta") redefined newly introduced types
in bpf programs so the bpf program can still compile
properly with old kernels although loading may fail.

Since this patch set introduced new types and the same
workaround is needed, so let us move the workaround
to a separate header file so they do not clutter
bpf programs.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230816.3988656-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter.h       | 49 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/bpf_iter_bpf_map.c | 18 +-------
 .../selftests/bpf/progs/bpf_iter_ipv6_route.c      | 18 +-------
 .../testing/selftests/bpf/progs/bpf_iter_netlink.c | 18 +-------
 tools/testing/selftests/bpf/progs/bpf_iter_task.c  | 18 +-------
 .../selftests/bpf/progs/bpf_iter_task_file.c       | 20 +--------
 .../selftests/bpf/progs/bpf_iter_test_kern3.c      | 17 +-------
 .../selftests/bpf/progs/bpf_iter_test_kern4.c      | 17 +-------
 .../bpf/progs/bpf_iter_test_kern_common.h          | 18 +-------
 9 files changed, 57 insertions(+), 136 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
new file mode 100644
index 000000000000..3757e88c6406
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#undef bpf_iter__ipv6_route
+#undef bpf_iter__netlink
+#undef bpf_iter__task
+#undef bpf_iter__task_file
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+	struct bpf_iter_meta *meta;
+	struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+	struct bpf_iter_meta *meta;
+	struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+	__u32 fd;
+	struct file *file;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b57bd6fef208..08651b23edba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -1,27 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
-	struct bpf_iter_meta *meta;
-	struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
 SEC("iter/bpf_map")
 int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index c8e9ca74c87b..93a452d1d136 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,25 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__ipv6_route
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__ipv6_route {
-	struct bpf_iter_meta *meta;
-	struct fib6_info *rt;
-} __attribute__((preserve_access_index));
-
 char _license[] SEC("license") = "GPL";
 
 extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index e7b8753eac0b..fda5036fdf75 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,11 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__netlink bpf_iter__netlink___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__netlink
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
@@ -14,17 +9,6 @@ char _license[] SEC("license") = "GPL";
 #define sk_rmem_alloc	sk_backlog.rmem_alloc
 #define sk_refcnt	__sk_common.skc_refcnt
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__netlink {
-	struct bpf_iter_meta *meta;
-	struct netlink_sock *sk;
-} __attribute__((preserve_access_index));
-
 static inline struct inode *SOCK_INODE(struct socket *socket)
 {
 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index ee754021f98e..4983087852a0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -1,27 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 0f0ec3db20ba..8b787baa2654 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -1,29 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task_file bpf_iter__task_file___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task_file
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task_file {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-	__u32 fd;
-	struct file *file;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task_file")
 int dump_task_file(struct bpf_iter__task_file *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
index 13c2c90c835f..2a4647f20c46 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -1,25 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index 0aa71b333cf3..ee49493dc125 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -1,25 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
-	struct bpf_iter_meta *meta;
-	struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
 __u32 map1_id = 0, map2_id = 0;
 __u32 map1_accessed = 0, map2_accessed = 0;
 __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
index dee1339e6905..d5e3df66ad9a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -1,27 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 int count = 0;
 
-struct bpf_iter_meta {
-	struct seq_file *seq;
-	__u64 session_id;
-	__u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
-	struct bpf_iter_meta *meta;
-	struct task_struct *task;
-} __attribute__((preserve_access_index));
-
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
-- 
cgit v1.2.3


From 647b502e3d5456f5c240b1587112b163c69732e9 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:17 -0700
Subject: selftests/bpf: Refactor some net macros to bpf_tracing_net.h

Refactor bpf_iter_ipv6_route.c and bpf_iter_netlink.c
so net macros, originally from various include/linux header
files, are moved to a new header file
bpf_tracing_net.h. The goal is to improve reuse so
networking tracing programs do not need to
copy these macros every time they use them.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230817.3988962-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c |  7 +------
 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c    |  4 +---
 tools/testing/selftests/bpf/progs/bpf_tracing_net.h     | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 9 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_tracing_net.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index 93a452d1d136..d58d9f1642b5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
+#include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
@@ -8,12 +9,6 @@ char _license[] SEC("license") = "GPL";
 
 extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
 
-#define RTF_GATEWAY		0x0002
-#define IFNAMSIZ		16
-#define fib_nh_gw_family	nh_common.nhc_gw_family
-#define fib_nh_gw6		nh_common.nhc_gw.ipv6
-#define fib_nh_dev		nh_common.nhc_dev
-
 SEC("iter/ipv6_route")
 int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index fda5036fdf75..cec82a419800 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,14 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
+#include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
-#define sk_rmem_alloc	sk_backlog.rmem_alloc
-#define sk_refcnt	__sk_common.skc_refcnt
-
 static inline struct inode *SOCK_INODE(struct socket *socket)
 {
 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
new file mode 100644
index 000000000000..1f38a1098727
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_NET_H__
+#define __BPF_TRACING_NET_H__
+
+#define IFNAMSIZ		16
+
+#define RTF_GATEWAY		0x0002
+
+#define fib_nh_dev		nh_common.nhc_dev
+#define fib_nh_gw_family	nh_common.nhc_gw_family
+#define fib_nh_gw6		nh_common.nhc_gw.ipv6
+
+#define sk_rmem_alloc		sk_backlog.rmem_alloc
+#define sk_refcnt		__sk_common.skc_refcnt
+
+#endif
-- 
cgit v1.2.3


From 3982bfaaef7c80ecf6a065cbf9422165a8e36f75 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:19 -0700
Subject: selftests/bpf: Add more common macros to bpf_tracing_net.h

These newly added macros will be used in subsequent bpf iterator
tcp{4,6} and udp{4,6} programs.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230819.3989050-1-yhs@fb.com
---
 .../testing/selftests/bpf/progs/bpf_tracing_net.h  | 35 ++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 1f38a1098727..01378911252b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -2,15 +2,50 @@
 #ifndef __BPF_TRACING_NET_H__
 #define __BPF_TRACING_NET_H__
 
+#define AF_INET			2
+#define AF_INET6		10
+
+#define ICSK_TIME_RETRANS	1
+#define ICSK_TIME_PROBE0	3
+#define ICSK_TIME_LOSS_PROBE	5
+#define ICSK_TIME_REO_TIMEOUT	6
+
 #define IFNAMSIZ		16
 
 #define RTF_GATEWAY		0x0002
 
+#define TCP_INFINITE_SSTHRESH	0x7fffffff
+#define TCP_PINGPONG_THRESH	3
+
 #define fib_nh_dev		nh_common.nhc_dev
 #define fib_nh_gw_family	nh_common.nhc_gw_family
 #define fib_nh_gw6		nh_common.nhc_gw.ipv6
 
+#define inet_daddr		sk.__sk_common.skc_daddr
+#define inet_rcv_saddr		sk.__sk_common.skc_rcv_saddr
+#define inet_dport		sk.__sk_common.skc_dport
+
+#define ir_loc_addr		req.__req_common.skc_rcv_saddr
+#define ir_num			req.__req_common.skc_num
+#define ir_rmt_addr		req.__req_common.skc_daddr
+#define ir_rmt_port		req.__req_common.skc_dport
+#define ir_v6_rmt_addr		req.__req_common.skc_v6_daddr
+#define ir_v6_loc_addr		req.__req_common.skc_v6_rcv_saddr
+
+#define sk_family		__sk_common.skc_family
 #define sk_rmem_alloc		sk_backlog.rmem_alloc
 #define sk_refcnt		__sk_common.skc_refcnt
+#define sk_state		__sk_common.skc_state
+#define sk_v6_daddr		__sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr		__sk_common.skc_v6_rcv_saddr
+
+#define s6_addr32		in6_u.u6_addr32
+
+#define tw_daddr		__tw_common.skc_daddr
+#define tw_rcv_saddr		__tw_common.skc_rcv_saddr
+#define tw_dport		__tw_common.skc_dport
+#define tw_refcnt		__tw_common.skc_refcnt
+#define tw_v6_daddr		__tw_common.skc_v6_daddr
+#define tw_v6_rcv_saddr		__tw_common.skc_v6_rcv_saddr
 
 #endif
-- 
cgit v1.2.3


From 2767c97765cb3d9b54c8e62b468e55cc56854a66 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:20 -0700
Subject: selftests/bpf: Implement sample tcp/tcp6 bpf_iter programs

In my VM, I got identical result compared to /proc/net/{tcp,tcp6}.
For tcp6:
  $ cat /proc/net/tcp6
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
     0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000001 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0

  $ cat /sys/fs/bpf/p1
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
     0: 00000000000000000000000000000000:0016 00000000000000000000000000000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 17955 1 000000003eb3102e 100 0 0 10 0

For tcp:
  $ cat /proc/net/tcp
  sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
   0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0
  $ cat /sys/fs/bpf/p2
  sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode
   1: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000     0        0 2666 1 000000007152e43f 100 0 0 10 0

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230820.3989165-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter.h      |  15 ++
 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c | 234 ++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c | 250 ++++++++++++++++++++++
 3 files changed, 499 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 3757e88c6406..bde23e16e777 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -7,6 +7,8 @@
 #define bpf_iter__netlink bpf_iter__netlink___not_used
 #define bpf_iter__task bpf_iter__task___not_used
 #define bpf_iter__task_file bpf_iter__task_file___not_used
+#define bpf_iter__tcp bpf_iter__tcp___not_used
+#define tcp6_sock tcp6_sock___not_used
 #include "vmlinux.h"
 #undef bpf_iter_meta
 #undef bpf_iter__bpf_map
@@ -14,6 +16,8 @@
 #undef bpf_iter__netlink
 #undef bpf_iter__task
 #undef bpf_iter__task_file
+#undef bpf_iter__tcp
+#undef tcp6_sock
 
 struct bpf_iter_meta {
 	struct seq_file *seq;
@@ -47,3 +51,14 @@ struct bpf_iter__bpf_map {
 	struct bpf_iter_meta *meta;
 	struct bpf_map *map;
 } __attribute__((preserve_access_index));
+
+struct bpf_iter__tcp {
+	struct bpf_iter_meta *meta;
+	struct sock_common *sk_common;
+	uid_t uid;
+} __attribute__((preserve_access_index));
+
+struct tcp6_sock {
+	struct tcp_sock	tcp;
+	struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
new file mode 100644
index 000000000000..30fd587cb325
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+        return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+	return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ		100
+#define NSEC_PER_SEC	1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+	/* The implementation here tailored to a particular
+	 * setting of USER_HZ.
+	 */
+	u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+	u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+	if ((tick_nsec % user_hz_nsec) == 0) {
+		if (CONFIG_HZ < USER_HZ)
+			return x * (USER_HZ / CONFIG_HZ);
+		else
+			return x / (CONFIG_HZ / USER_HZ);
+	}
+	return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+	if (delta <= 0)
+		return 0;
+
+	return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+	return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+	return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
+			 uid_t uid, __u32 seq_num)
+{
+	const struct inet_connection_sock *icsk;
+	const struct fastopen_queue *fastopenq;
+	const struct inet_sock *inet;
+	unsigned long timer_expires;
+	const struct sock *sp;
+	__u16 destp, srcp;
+	__be32 dest, src;
+	int timer_active;
+	int rx_queue;
+	int state;
+
+	icsk = &tp->inet_conn;
+	inet = &icsk->icsk_inet;
+	sp = &inet->sk;
+	fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+	dest = inet->inet_daddr;
+	src = inet->inet_rcv_saddr;
+	destp = bpf_ntohs(inet->inet_dport);
+	srcp = bpf_ntohs(inet->inet_sport);
+
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+		timer_active = 1;
+		timer_expires = icsk->icsk_timeout;
+	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+		timer_active = 4;
+		timer_expires = icsk->icsk_timeout;
+	} else if (timer_pending(&sp->sk_timer)) {
+		timer_active = 2;
+		timer_expires = sp->sk_timer.expires;
+	} else {
+		timer_active = 0;
+		timer_expires = bpf_jiffies64();
+	}
+
+	state = sp->sk_state;
+	if (state == TCP_LISTEN) {
+		rx_queue = sp->sk_ack_backlog;
+	} else {
+		rx_queue = tp->rcv_nxt - tp->copied_seq;
+		if (rx_queue < 0)
+			rx_queue = 0;
+	}
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+		       seq_num, src, srcp, destp, destp);
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+		       state,
+		       tp->write_seq - tp->snd_una, rx_queue,
+		       timer_active,
+		       jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+		       icsk->icsk_retransmits, uid,
+		       icsk->icsk_probes_out,
+		       sock_i_ino(sp),
+		       sp->sk_refcnt.refs.counter);
+	BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+		       tp,
+		       jiffies_to_clock_t(icsk->icsk_rto),
+		       jiffies_to_clock_t(icsk->icsk_ack.ato),
+		       (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+		       tp->snd_cwnd,
+		       state == TCP_LISTEN ? fastopenq->max_qlen
+				: (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
+		      );
+
+	return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+			uid_t uid, __u32 seq_num)
+{
+	struct inet_timewait_sock *tw = &ttw->tw_sk;
+	__u16 destp, srcp;
+	__be32 dest, src;
+	long delta;
+
+	delta = tw->tw_timer.expires - bpf_jiffies64();
+	dest = tw->tw_daddr;
+	src  = tw->tw_rcv_saddr;
+	destp = bpf_ntohs(tw->tw_dport);
+	srcp  = bpf_ntohs(tw->tw_sport);
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+		       seq_num, src, srcp, dest, destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       tw->tw_substate, 0, 0,
+		       3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+		       tw->tw_refcnt.refs.counter, tw);
+
+	return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+			 uid_t uid, __u32 seq_num)
+{
+	struct inet_request_sock *irsk = &treq->req;
+	struct request_sock *req = &irsk->req;
+	long ttd;
+
+	ttd = req->rsk_timer.expires - bpf_jiffies64();
+
+	if (ttd < 0)
+		ttd = 0;
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+		       seq_num, irsk->ir_loc_addr,
+		       irsk->ir_num, irsk->ir_rmt_addr,
+		       bpf_ntohs(irsk->ir_rmt_port));
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+		       req->num_timeout, uid, 0, 0, 0, req);
+
+	return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp4(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = ctx->sk_common;
+	struct seq_file *seq = ctx->meta->seq;
+	struct tcp_timewait_sock *tw;
+	struct tcp_request_sock *req;
+	struct tcp_sock *tp;
+	uid_t uid = ctx->uid;
+	__u32 seq_num;
+
+	if (sk_common == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "  sl  "
+				    "local_address "
+				    "rem_address   "
+				    "st tx_queue rx_queue tr tm->when retrnsmt"
+				    "   uid  timeout inode\n");
+
+	if (sk_common->skc_family != AF_INET)
+		return 0;
+
+	tp = bpf_skc_to_tcp_sock(sk_common);
+	if (tp)
+		return dump_tcp_sock(seq, tp, uid, seq_num);
+
+	tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+	if (tw)
+		return dump_tw_sock(seq, tw, uid, seq_num);
+
+	req = bpf_skc_to_tcp_request_sock(sk_common);
+	if (req)
+		return dump_req_sock(seq, req, uid, seq_num);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
new file mode 100644
index 000000000000..10dec4392031
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+        return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+	return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ		100
+#define NSEC_PER_SEC	1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+	/* The implementation here tailored to a particular
+	 * setting of USER_HZ.
+	 */
+	u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+	u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+	if ((tick_nsec % user_hz_nsec) == 0) {
+		if (CONFIG_HZ < USER_HZ)
+			return x * (USER_HZ / CONFIG_HZ);
+		else
+			return x / (CONFIG_HZ / USER_HZ);
+	}
+	return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+	if (delta <= 0)
+		return 0;
+
+	return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+	return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+	return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
+			 uid_t uid, __u32 seq_num)
+{
+	const struct inet_connection_sock *icsk;
+	const struct fastopen_queue *fastopenq;
+	const struct in6_addr *dest, *src;
+	const struct inet_sock *inet;
+	unsigned long timer_expires;
+	const struct sock *sp;
+	__u16 destp, srcp;
+	int timer_active;
+	int rx_queue;
+	int state;
+
+	icsk = &tp->tcp.inet_conn;
+	inet = &icsk->icsk_inet;
+	sp = &inet->sk;
+	fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+	dest = &sp->sk_v6_daddr;
+	src = &sp->sk_v6_rcv_saddr;
+	destp = bpf_ntohs(inet->inet_dport);
+	srcp = bpf_ntohs(inet->inet_sport);
+
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+		timer_active = 1;
+		timer_expires = icsk->icsk_timeout;
+	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+		timer_active = 4;
+		timer_expires = icsk->icsk_timeout;
+	} else if (timer_pending(&sp->sk_timer)) {
+		timer_active = 2;
+		timer_expires = sp->sk_timer.expires;
+	} else {
+		timer_active = 0;
+		timer_expires = bpf_jiffies64();
+	}
+
+	state = sp->sk_state;
+	if (state == TCP_LISTEN) {
+		rx_queue = sp->sk_ack_backlog;
+	} else {
+		rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
+		if (rx_queue < 0)
+			rx_queue = 0;
+	}
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       seq_num,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3], srcp,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+		       state,
+		       tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
+		       timer_active,
+		       jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+		       icsk->icsk_retransmits, uid,
+		       icsk->icsk_probes_out,
+		       sock_i_ino(sp),
+		       sp->sk_refcnt.refs.counter);
+	BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+		       tp,
+		       jiffies_to_clock_t(icsk->icsk_rto),
+		       jiffies_to_clock_t(icsk->icsk_ack.ato),
+		       (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+		       tp->tcp.snd_cwnd,
+		       state == TCP_LISTEN ? fastopenq->max_qlen
+				: (tcp_in_initial_slowstart(&tp->tcp) ? -1
+								      : tp->tcp.snd_ssthresh)
+		      );
+
+	return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+			uid_t uid, __u32 seq_num)
+{
+	struct inet_timewait_sock *tw = &ttw->tw_sk;
+	const struct in6_addr *dest, *src;
+	__u16 destp, srcp;
+	long delta;
+
+	delta = tw->tw_timer.expires - bpf_jiffies64();
+	dest = &tw->tw_v6_daddr;
+	src  = &tw->tw_v6_rcv_saddr;
+	destp = bpf_ntohs(tw->tw_dport);
+	srcp  = bpf_ntohs(tw->tw_sport);
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       seq_num,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3], srcp,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       tw->tw_substate, 0, 0,
+		       3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+		       tw->tw_refcnt.refs.counter, tw);
+
+	return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+			 uid_t uid, __u32 seq_num)
+{
+	struct inet_request_sock *irsk = &treq->req;
+	struct request_sock *req = &irsk->req;
+	struct in6_addr *src, *dest;
+	long ttd;
+
+	ttd = req->rsk_timer.expires - bpf_jiffies64();
+	src = &irsk->ir_v6_loc_addr;
+	dest = &irsk->ir_v6_rmt_addr;
+
+	if (ttd < 0)
+		ttd = 0;
+
+	BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       seq_num,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3],
+		       irsk->ir_num,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3],
+		       bpf_ntohs(irsk->ir_rmt_port));
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+		       TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+		       req->num_timeout, uid, 0, 0, 0, req);
+
+	return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp6(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = ctx->sk_common;
+	struct seq_file *seq = ctx->meta->seq;
+	struct tcp_timewait_sock *tw;
+	struct tcp_request_sock *req;
+	struct tcp6_sock *tp;
+	uid_t uid = ctx->uid;
+	__u32 seq_num;
+
+	if (sk_common == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "  sl  "
+				    "local_address                         "
+				    "remote_address                        "
+				    "st tx_queue rx_queue tr tm->when retrnsmt"
+				    "   uid  timeout inode\n");
+
+	if (sk_common->skc_family != AF_INET6)
+		return 0;
+
+	tp = bpf_skc_to_tcp6_sock(sk_common);
+	if (tp)
+		return dump_tcp6_sock(seq, tp, uid, seq_num);
+
+	tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+	if (tw)
+		return dump_tw_sock(seq, tw, uid, seq_num);
+
+	req = bpf_skc_to_tcp_request_sock(sk_common);
+	if (req)
+		return dump_req_sock(seq, req, uid, seq_num);
+
+	return 0;
+}
-- 
cgit v1.2.3


From ace6d6ec9e9e167047b6c8ca462a0830220640c2 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:22 -0700
Subject: selftests/bpf: Implement sample udp/udp6 bpf_iter programs

On my VM, I got identical results between /proc/net/udp[6] and
the udp{4,6} bpf iterator.

For udp6:
  $ cat /sys/fs/bpf/p1
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0
  $ cat /proc/net/udp6
    sl  local_address                         remote_address                        st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   1405: 000080FE00000000FF7CC4D0D9EFE4FE:0222 00000000000000000000000000000000:0000 07 00000000:00000000 00:00000000 00000000   193        0 19183 2 0000000029eab111 0

For udp4:
  $ cat /sys/fs/bpf/p4
    sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0
  $ cat /proc/net/udp
    sl  local_address rem_address   st tx_queue rx_queue tr tm->when retrnsmt   uid  timeout inode ref pointer drops
   2007: 00000000:1F90 00000000:0000 07 00000000:00000000 00:00000000 00000000     0        0 72540 2 000000004ede477a 0

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230822.3989299-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter.h      | 16 +++++
 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c | 71 ++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c | 79 +++++++++++++++++++++++
 3 files changed, 166 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_udp6.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index bde23e16e777..17db3bac518b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -9,6 +9,8 @@
 #define bpf_iter__task_file bpf_iter__task_file___not_used
 #define bpf_iter__tcp bpf_iter__tcp___not_used
 #define tcp6_sock tcp6_sock___not_used
+#define bpf_iter__udp bpf_iter__udp___not_used
+#define udp6_sock udp6_sock___not_used
 #include "vmlinux.h"
 #undef bpf_iter_meta
 #undef bpf_iter__bpf_map
@@ -18,6 +20,8 @@
 #undef bpf_iter__task_file
 #undef bpf_iter__tcp
 #undef tcp6_sock
+#undef bpf_iter__udp
+#undef udp6_sock
 
 struct bpf_iter_meta {
 	struct seq_file *seq;
@@ -62,3 +66,15 @@ struct tcp6_sock {
 	struct tcp_sock	tcp;
 	struct ipv6_pinfo inet6;
 } __attribute__((preserve_access_index));
+
+struct bpf_iter__udp {
+	struct bpf_iter_meta *meta;
+	struct udp_sock *udp_sk;
+	uid_t uid __attribute__((aligned(8)));
+	int bucket __attribute__((aligned(8)));
+} __attribute__((preserve_access_index));
+
+struct udp6_sock {
+	struct udp_sock	udp;
+	struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
new file mode 100644
index 000000000000..7053784575e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/udp")
+int dump_udp4(struct bpf_iter__udp *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct udp_sock *udp_sk = ctx->udp_sk;
+	struct inet_sock *inet;
+	__u16 srcp, destp;
+	__be32 dest, src;
+	__u32 seq_num;
+	int rqueue;
+
+	if (udp_sk == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq,
+			       "  sl  local_address rem_address   st tx_queue "
+			       "rx_queue tr tm->when retrnsmt   uid  timeout "
+			       "inode ref pointer drops\n");
+
+	/* filter out udp6 sockets */
+	inet = &udp_sk->inet;
+	if (inet->sk.sk_family == AF_INET6)
+		return 0;
+
+	inet = &udp_sk->inet;
+	dest = inet->inet_daddr;
+	src = inet->inet_rcv_saddr;
+	srcp = bpf_ntohs(inet->inet_sport);
+	destp = bpf_ntohs(inet->inet_dport);
+	rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+
+	BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ",
+		       ctx->bucket, src, srcp, dest, destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+		       inet->sk.sk_state,
+		       inet->sk.sk_wmem_alloc.refs.counter - 1,
+		       rqueue,
+		       0, 0L, 0, ctx->uid, 0,
+		       sock_i_ino(&inet->sk),
+		       inet->sk.sk_refcnt.refs.counter, udp_sk,
+		       inet->sk.sk_drops.counter);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
new file mode 100644
index 000000000000..c1175a6ecf43
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define IPV6_SEQ_DGRAM_HEADER				\
+	"  sl  "					\
+	"local_address                         "	\
+	"remote_address                        "	\
+	"st tx_queue rx_queue tr tm->when retrnsmt"	\
+	"   uid  timeout inode ref pointer drops\n"
+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/udp")
+int dump_udp6(struct bpf_iter__udp *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct udp_sock *udp_sk = ctx->udp_sk;
+	const struct in6_addr *dest, *src;
+	struct udp6_sock *udp6_sk;
+	struct inet_sock *inet;
+	__u16 srcp, destp;
+	__u32 seq_num;
+	int rqueue;
+
+	if (udp_sk == (void *)0)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER);
+
+	udp6_sk = bpf_skc_to_udp6_sock(udp_sk);
+	if (udp6_sk == (void *)0)
+		return 0;
+
+	inet = &udp_sk->inet;
+	srcp = bpf_ntohs(inet->inet_sport);
+	destp = bpf_ntohs(inet->inet_dport);
+	rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+	dest  = &inet->sk.sk_v6_daddr;
+	src   = &inet->sk.sk_v6_rcv_saddr;
+
+	BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+		       ctx->bucket,
+		       src->s6_addr32[0], src->s6_addr32[1],
+		       src->s6_addr32[2], src->s6_addr32[3], srcp,
+		       dest->s6_addr32[0], dest->s6_addr32[1],
+		       dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+	BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+		       inet->sk.sk_state,
+		       inet->sk.sk_wmem_alloc.refs.counter - 1,
+		       rqueue,
+		       0, 0L, 0, ctx->uid, 0,
+		       sock_i_ino(&inet->sk),
+		       inet->sk.sk_refcnt.refs.counter, udp_sk,
+		       inet->sk.sk_drops.counter);
+
+	return 0;
+}
-- 
cgit v1.2.3


From cfcd75f9bf12301dfdcfe9ff6dfb240997e7745f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 23 Jun 2020 16:08:23 -0700
Subject: selftests/bpf: Add tcp/udp iterator programs to selftests

Added tcp{4,6} and udp{4,6} bpf programs into test_progs
selftest so that they at least can load successfully.
  $ ./test_progs -n 3
  ...
  #3/7 tcp4:OK
  #3/8 tcp6:OK
  #3/9 udp4:OK
  #3/10 udp6:OK
  ...
  #3 bpf_iter:OK
  Summary: 1/16 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200623230823.3989372-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 68 +++++++++++++++++++++++
 1 file changed, 68 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 87c29dde1cf9..1e2e0fced6e8 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -6,6 +6,10 @@
 #include "bpf_iter_bpf_map.skel.h"
 #include "bpf_iter_task.skel.h"
 #include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_tcp4.skel.h"
+#include "bpf_iter_tcp6.skel.h"
+#include "bpf_iter_udp4.skel.h"
+#include "bpf_iter_udp6.skel.h"
 #include "bpf_iter_test_kern1.skel.h"
 #include "bpf_iter_test_kern2.skel.h"
 #include "bpf_iter_test_kern3.skel.h"
@@ -120,6 +124,62 @@ static void test_task_file(void)
 	bpf_iter_task_file__destroy(skel);
 }
 
+static void test_tcp4(void)
+{
+	struct bpf_iter_tcp4 *skel;
+
+	skel = bpf_iter_tcp4__open_and_load();
+	if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_tcp4);
+
+	bpf_iter_tcp4__destroy(skel);
+}
+
+static void test_tcp6(void)
+{
+	struct bpf_iter_tcp6 *skel;
+
+	skel = bpf_iter_tcp6__open_and_load();
+	if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_tcp6);
+
+	bpf_iter_tcp6__destroy(skel);
+}
+
+static void test_udp4(void)
+{
+	struct bpf_iter_udp4 *skel;
+
+	skel = bpf_iter_udp4__open_and_load();
+	if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_udp4);
+
+	bpf_iter_udp4__destroy(skel);
+}
+
+static void test_udp6(void)
+{
+	struct bpf_iter_udp6 *skel;
+
+	skel = bpf_iter_udp6__open_and_load();
+	if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_udp6);
+
+	bpf_iter_udp6__destroy(skel);
+}
+
 /* The expected string is less than 16 bytes */
 static int do_read_with_fd(int iter_fd, const char *expected,
 			   bool read_one_char)
@@ -394,6 +454,14 @@ void test_bpf_iter(void)
 		test_task();
 	if (test__start_subtest("task_file"))
 		test_task_file();
+	if (test__start_subtest("tcp4"))
+		test_tcp4();
+	if (test__start_subtest("tcp6"))
+		test_tcp6();
+	if (test__start_subtest("udp4"))
+		test_udp4();
+	if (test__start_subtest("udp6"))
+		test_udp6();
 	if (test__start_subtest("anon"))
 		test_anon_iter(false);
 	if (test__start_subtest("anon-read-one-char"))
-- 
cgit v1.2.3


From 9023497d8746d355bac8ddbc65797a4f553726fd Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 24 Jun 2020 16:31:24 +0200
Subject: tools, bpftool: Define prog_type_name array only once

Define prog_type_name in prog.c instead of main.h so it is only defined
once. This leads to a slight decrease in the binary size of bpftool.

Before:

   text	   data	    bss	    dec	    hex	filename
 401032	  11936	1573160	1986128	 1e4e50	bpftool

After:

   text	   data	    bss	    dec	    hex	filename
 399024	  11168	1573160	1983352	 1e4378	bpftool

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200624143124.12914-1-tklauser@distanz.ch
---
 tools/bpf/bpftool/feature.c |  4 ++--
 tools/bpf/bpftool/link.c    |  4 ++--
 tools/bpf/bpftool/main.h    | 33 ++-------------------------------
 tools/bpf/bpftool/map.c     |  4 ++--
 tools/bpf/bpftool/prog.c    | 34 ++++++++++++++++++++++++++++++++++
 5 files changed, 42 insertions(+), 37 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 768bf77df886..1cd75807673e 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -695,7 +695,7 @@ section_program_types(bool *supported_types, const char *define_prefix,
 			    "/*** eBPF program types ***/",
 			    define_prefix);
 
-	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
 		probe_prog_type(i, supported_types, define_prefix, ifindex);
 
 	print_end_section();
@@ -741,7 +741,7 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
 		       "	%sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
 		       define_prefix, define_prefix, define_prefix,
 		       define_prefix);
-	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
 		probe_helpers_for_progtype(i, supported_types[i], define_prefix,
 					   ifindex);
 
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 7329f3134283..326b8fdf0243 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -108,7 +108,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 		if (err)
 			return err;
 
-		if (prog_info.type < ARRAY_SIZE(prog_type_name))
+		if (prog_info.type < prog_type_name_size)
 			jsonw_string_field(json_wtr, "prog_type",
 					   prog_type_name[prog_info.type]);
 		else
@@ -187,7 +187,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 		if (err)
 			return err;
 
-		if (prog_info.type < ARRAY_SIZE(prog_type_name))
+		if (prog_info.type < prog_type_name_size)
 			printf("\n\tprog_type %s  ",
 			       prog_type_name[prog_info.type]);
 		else
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index ce26271e5f0c..269f1cb6aef5 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -56,37 +56,8 @@
 #define HELP_SPEC_LINK							\
 	"LINK := { id LINK_ID | pinned FILE }"
 
-static const char * const prog_type_name[] = {
-	[BPF_PROG_TYPE_UNSPEC]			= "unspec",
-	[BPF_PROG_TYPE_SOCKET_FILTER]		= "socket_filter",
-	[BPF_PROG_TYPE_KPROBE]			= "kprobe",
-	[BPF_PROG_TYPE_SCHED_CLS]		= "sched_cls",
-	[BPF_PROG_TYPE_SCHED_ACT]		= "sched_act",
-	[BPF_PROG_TYPE_TRACEPOINT]		= "tracepoint",
-	[BPF_PROG_TYPE_XDP]			= "xdp",
-	[BPF_PROG_TYPE_PERF_EVENT]		= "perf_event",
-	[BPF_PROG_TYPE_CGROUP_SKB]		= "cgroup_skb",
-	[BPF_PROG_TYPE_CGROUP_SOCK]		= "cgroup_sock",
-	[BPF_PROG_TYPE_LWT_IN]			= "lwt_in",
-	[BPF_PROG_TYPE_LWT_OUT]			= "lwt_out",
-	[BPF_PROG_TYPE_LWT_XMIT]		= "lwt_xmit",
-	[BPF_PROG_TYPE_SOCK_OPS]		= "sock_ops",
-	[BPF_PROG_TYPE_SK_SKB]			= "sk_skb",
-	[BPF_PROG_TYPE_CGROUP_DEVICE]		= "cgroup_device",
-	[BPF_PROG_TYPE_SK_MSG]			= "sk_msg",
-	[BPF_PROG_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
-	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR]	= "cgroup_sock_addr",
-	[BPF_PROG_TYPE_LWT_SEG6LOCAL]		= "lwt_seg6local",
-	[BPF_PROG_TYPE_LIRC_MODE2]		= "lirc_mode2",
-	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
-	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
-	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
-	[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE]	= "raw_tracepoint_writable",
-	[BPF_PROG_TYPE_CGROUP_SOCKOPT]		= "cgroup_sockopt",
-	[BPF_PROG_TYPE_TRACING]			= "tracing",
-	[BPF_PROG_TYPE_STRUCT_OPS]		= "struct_ops",
-	[BPF_PROG_TYPE_EXT]			= "ext",
-};
+extern const char * const prog_type_name[];
+extern const size_t prog_type_name_size;
 
 static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index bbb74d387fb0..42c215b6eae6 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -473,7 +473,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 		if (owner_prog_type) {
 			unsigned int prog_type = atoi(owner_prog_type);
 
-			if (prog_type < ARRAY_SIZE(prog_type_name))
+			if (prog_type < prog_type_name_size)
 				jsonw_string_field(json_wtr, "owner_prog_type",
 						   prog_type_name[prog_type]);
 			else
@@ -558,7 +558,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 		if (owner_prog_type) {
 			unsigned int prog_type = atoi(owner_prog_type);
 
-			if (prog_type < ARRAY_SIZE(prog_type_name))
+			if (prog_type < prog_type_name_size)
 				printf("owner_prog_type %s  ",
 				       prog_type_name[prog_type]);
 			else
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index e21fa8ad2efa..6863c57effd0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -29,6 +29,40 @@
 #include "main.h"
 #include "xlated_dumper.h"
 
+const char * const prog_type_name[] = {
+	[BPF_PROG_TYPE_UNSPEC]			= "unspec",
+	[BPF_PROG_TYPE_SOCKET_FILTER]		= "socket_filter",
+	[BPF_PROG_TYPE_KPROBE]			= "kprobe",
+	[BPF_PROG_TYPE_SCHED_CLS]		= "sched_cls",
+	[BPF_PROG_TYPE_SCHED_ACT]		= "sched_act",
+	[BPF_PROG_TYPE_TRACEPOINT]		= "tracepoint",
+	[BPF_PROG_TYPE_XDP]			= "xdp",
+	[BPF_PROG_TYPE_PERF_EVENT]		= "perf_event",
+	[BPF_PROG_TYPE_CGROUP_SKB]		= "cgroup_skb",
+	[BPF_PROG_TYPE_CGROUP_SOCK]		= "cgroup_sock",
+	[BPF_PROG_TYPE_LWT_IN]			= "lwt_in",
+	[BPF_PROG_TYPE_LWT_OUT]			= "lwt_out",
+	[BPF_PROG_TYPE_LWT_XMIT]		= "lwt_xmit",
+	[BPF_PROG_TYPE_SOCK_OPS]		= "sock_ops",
+	[BPF_PROG_TYPE_SK_SKB]			= "sk_skb",
+	[BPF_PROG_TYPE_CGROUP_DEVICE]		= "cgroup_device",
+	[BPF_PROG_TYPE_SK_MSG]			= "sk_msg",
+	[BPF_PROG_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
+	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR]	= "cgroup_sock_addr",
+	[BPF_PROG_TYPE_LWT_SEG6LOCAL]		= "lwt_seg6local",
+	[BPF_PROG_TYPE_LIRC_MODE2]		= "lirc_mode2",
+	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
+	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
+	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
+	[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE]	= "raw_tracepoint_writable",
+	[BPF_PROG_TYPE_CGROUP_SOCKOPT]		= "cgroup_sockopt",
+	[BPF_PROG_TYPE_TRACING]			= "tracing",
+	[BPF_PROG_TYPE_STRUCT_OPS]		= "struct_ops",
+	[BPF_PROG_TYPE_EXT]			= "ext",
+};
+
+const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
+
 enum dump_mode {
 	DUMP_JITED,
 	DUMP_XLATED,
-- 
cgit v1.2.3


From 16d37ee3d2b1c30052ba5ebb69556040fc174061 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 24 Jun 2020 16:31:54 +0200
Subject: tools, bpftool: Define attach_type_name array only once

Define attach_type_name in common.c instead of main.h so it is only
defined once. This leads to a slight decrease in the binary size of
bpftool.

Before:

   text	   data	    bss	    dec	    hex	filename
 399024	  11168	1573160	1983352	 1e4378	bpftool

After:

   text	   data	    bss	    dec	    hex	filename
 398256	  10880	1573160	1982296	 1e3f58	bpftool

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200624143154.13145-1-tklauser@distanz.ch
---
 tools/bpf/bpftool/common.c | 36 ++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.h   | 36 +-----------------------------------
 2 files changed, 37 insertions(+), 35 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 6c864c3683fc..18e5604fe260 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -29,6 +29,42 @@
 #define BPF_FS_MAGIC		0xcafe4a11
 #endif
 
+const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
+	[BPF_CGROUP_INET_INGRESS]	= "ingress",
+	[BPF_CGROUP_INET_EGRESS]	= "egress",
+	[BPF_CGROUP_INET_SOCK_CREATE]	= "sock_create",
+	[BPF_CGROUP_SOCK_OPS]		= "sock_ops",
+	[BPF_CGROUP_DEVICE]		= "device",
+	[BPF_CGROUP_INET4_BIND]		= "bind4",
+	[BPF_CGROUP_INET6_BIND]		= "bind6",
+	[BPF_CGROUP_INET4_CONNECT]	= "connect4",
+	[BPF_CGROUP_INET6_CONNECT]	= "connect6",
+	[BPF_CGROUP_INET4_POST_BIND]	= "post_bind4",
+	[BPF_CGROUP_INET6_POST_BIND]	= "post_bind6",
+	[BPF_CGROUP_INET4_GETPEERNAME]	= "getpeername4",
+	[BPF_CGROUP_INET6_GETPEERNAME]	= "getpeername6",
+	[BPF_CGROUP_INET4_GETSOCKNAME]	= "getsockname4",
+	[BPF_CGROUP_INET6_GETSOCKNAME]	= "getsockname6",
+	[BPF_CGROUP_UDP4_SENDMSG]	= "sendmsg4",
+	[BPF_CGROUP_UDP6_SENDMSG]	= "sendmsg6",
+	[BPF_CGROUP_SYSCTL]		= "sysctl",
+	[BPF_CGROUP_UDP4_RECVMSG]	= "recvmsg4",
+	[BPF_CGROUP_UDP6_RECVMSG]	= "recvmsg6",
+	[BPF_CGROUP_GETSOCKOPT]		= "getsockopt",
+	[BPF_CGROUP_SETSOCKOPT]		= "setsockopt",
+
+	[BPF_SK_SKB_STREAM_PARSER]	= "sk_skb_stream_parser",
+	[BPF_SK_SKB_STREAM_VERDICT]	= "sk_skb_stream_verdict",
+	[BPF_SK_MSG_VERDICT]		= "sk_msg_verdict",
+	[BPF_LIRC_MODE2]		= "lirc_mode2",
+	[BPF_FLOW_DISSECTOR]		= "flow_dissector",
+	[BPF_TRACE_RAW_TP]		= "raw_tp",
+	[BPF_TRACE_FENTRY]		= "fentry",
+	[BPF_TRACE_FEXIT]		= "fexit",
+	[BPF_MODIFY_RETURN]		= "mod_ret",
+	[BPF_LSM_MAC]			= "lsm_mac",
+};
+
 void p_err(const char *fmt, ...)
 {
 	va_list ap;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 269f1cb6aef5..78d34e860713 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -59,41 +59,7 @@
 extern const char * const prog_type_name[];
 extern const size_t prog_type_name_size;
 
-static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
-	[BPF_CGROUP_INET_INGRESS] = "ingress",
-	[BPF_CGROUP_INET_EGRESS] = "egress",
-	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
-	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
-	[BPF_CGROUP_DEVICE] = "device",
-	[BPF_CGROUP_INET4_BIND] = "bind4",
-	[BPF_CGROUP_INET6_BIND] = "bind6",
-	[BPF_CGROUP_INET4_CONNECT] = "connect4",
-	[BPF_CGROUP_INET6_CONNECT] = "connect6",
-	[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
-	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
-	[BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
-	[BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
-	[BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
-	[BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
-	[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
-	[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
-	[BPF_CGROUP_SYSCTL] = "sysctl",
-	[BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
-	[BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
-	[BPF_CGROUP_GETSOCKOPT] = "getsockopt",
-	[BPF_CGROUP_SETSOCKOPT] = "setsockopt",
-
-	[BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
-	[BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
-	[BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
-	[BPF_LIRC_MODE2] = "lirc_mode2",
-	[BPF_FLOW_DISSECTOR] = "flow_dissector",
-	[BPF_TRACE_RAW_TP] = "raw_tp",
-	[BPF_TRACE_FENTRY] = "fentry",
-	[BPF_TRACE_FEXIT] = "fexit",
-	[BPF_MODIFY_RETURN] = "mod_ret",
-	[BPF_LSM_MAC] = "lsm_mac",
-};
+extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE];
 
 extern const char * const map_type_name[];
 extern const size_t map_type_name_size;
-- 
cgit v1.2.3


From d929758101fc0674008169dc1de963e3181c587b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 25 Jun 2020 16:26:28 -0700
Subject: libbpf: Support disabling auto-loading BPF programs

Currently, bpf_object__load() (and by induction skeleton's load), will always
attempt to prepare, relocate, and load into kernel every single BPF program
found inside the BPF object file. This is often convenient and the right thing
to do and what users expect.

But there are plenty of cases (especially with BPF development constantly
picking up the pace), where BPF application is intended to work with old
kernels, with potentially reduced set of features. But on kernels supporting
extra features, it would like to take a full advantage of them, by employing
extra BPF program. This could be a choice of using fentry/fexit over
kprobe/kretprobe, if kernel is recent enough and is built with BTF. Or BPF
program might be providing optimized bpf_iter-based solution that user-space
might want to use, whenever available. And so on.

With libbpf and BPF CO-RE in particular, it's advantageous to not have to
maintain two separate BPF object files to achieve this. So to enable such use
cases, this patch adds ability to request not auto-loading chosen BPF
programs. In such case, libbpf won't attempt to perform relocations (which
might fail due to old kernel), won't try to resolve BTF types for
BTF-aware (tp_btf/fentry/fexit/etc) program types, because BTF might not be
present, and so on. Skeleton will also automatically skip auto-attachment step
for such not loaded BPF programs.

Overall, this feature allows to simplify development and deployment of
real-world BPF applications with complicated compatibility requirements.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200625232629.3444003-2-andriin@fb.com
---
 tools/lib/bpf/libbpf.c   | 48 ++++++++++++++++++++++++++++++++++++++++--------
 tools/lib/bpf/libbpf.h   |  2 ++
 tools/lib/bpf/libbpf.map |  2 ++
 3 files changed, 44 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6b4955d170ff..4ea7f4f1a691 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -230,6 +230,7 @@ struct bpf_program {
 	struct bpf_insn *insns;
 	size_t insns_cnt, main_prog_cnt;
 	enum bpf_prog_type type;
+	bool load;
 
 	struct reloc_desc *reloc_desc;
 	int nr_reloc;
@@ -541,6 +542,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 	prog->instances.fds = NULL;
 	prog->instances.nr = -1;
 	prog->type = BPF_PROG_TYPE_UNSPEC;
+	prog->load = true;
 
 	return 0;
 errout:
@@ -2513,6 +2515,8 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
 		need_vmlinux_btf = true;
 
 	bpf_object__for_each_program(prog, obj) {
+		if (!prog->load)
+			continue;
 		if (libbpf_prog_needs_vmlinux_btf(prog)) {
 			need_vmlinux_btf = true;
 			break;
@@ -5445,6 +5449,12 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 {
 	int err = 0, fd, i, btf_id;
 
+	if (prog->obj->loaded) {
+		pr_warn("prog '%s'('%s'): can't load after object was loaded\n",
+			prog->name, prog->section_name);
+		return -EINVAL;
+	}
+
 	if ((prog->type == BPF_PROG_TYPE_TRACING ||
 	     prog->type == BPF_PROG_TYPE_LSM ||
 	     prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -5533,16 +5543,21 @@ static bool bpf_program__is_function_storage(const struct bpf_program *prog,
 static int
 bpf_object__load_progs(struct bpf_object *obj, int log_level)
 {
+	struct bpf_program *prog;
 	size_t i;
 	int err;
 
 	for (i = 0; i < obj->nr_programs; i++) {
-		if (bpf_program__is_function_storage(&obj->programs[i], obj))
+		prog = &obj->programs[i];
+		if (bpf_program__is_function_storage(prog, obj))
 			continue;
-		obj->programs[i].log_level |= log_level;
-		err = bpf_program__load(&obj->programs[i],
-					obj->license,
-					obj->kern_version);
+		if (!prog->load) {
+			pr_debug("prog '%s'('%s'): skipped loading\n",
+				 prog->name, prog->section_name);
+			continue;
+		}
+		prog->log_level |= log_level;
+		err = bpf_program__load(prog, obj->license, obj->kern_version);
 		if (err)
 			return err;
 	}
@@ -5869,12 +5884,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 		return -EINVAL;
 
 	if (obj->loaded) {
-		pr_warn("object should not be loaded twice\n");
+		pr_warn("object '%s': load can't be attempted twice\n", obj->name);
 		return -EINVAL;
 	}
 
-	obj->loaded = true;
-
 	err = bpf_object__probe_loading(obj);
 	err = err ? : bpf_object__probe_caps(obj);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
@@ -5889,6 +5902,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 	btf__free(obj->btf_vmlinux);
 	obj->btf_vmlinux = NULL;
 
+	obj->loaded = true; /* doesn't matter if successfully or not */
+
 	if (err)
 		goto out;
 
@@ -6661,6 +6676,20 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
 	return title;
 }
 
+bool bpf_program__autoload(const struct bpf_program *prog)
+{
+	return prog->load;
+}
+
+int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
+{
+	if (prog->obj->loaded)
+		return -EINVAL;
+
+	prog->load = autoload;
+	return 0;
+}
+
 int bpf_program__fd(const struct bpf_program *prog)
 {
 	return bpf_program__nth_fd(prog, 0);
@@ -9283,6 +9312,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
 		const struct bpf_sec_def *sec_def;
 		const char *sec_name = bpf_program__title(prog, false);
 
+		if (!prog->load)
+			continue;
+
 		sec_def = find_sec_def(sec_name);
 		if (!sec_def || !sec_def->attach_fn)
 			continue;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index fdd279fb1866..2335971ed0bd 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -200,6 +200,8 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
 LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
 LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
 					  bool needs_copy);
+LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
 
 /* returns program size in bytes */
 LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 9914e0db4859..6544d2cd1ed6 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -286,4 +286,6 @@ LIBBPF_0.1.0 {
 		bpf_map__set_value_size;
 		bpf_map__type;
 		bpf_map__value_size;
+		bpf_program__autoload;
+		bpf_program__set_autoload;
 } LIBBPF_0.0.9;
-- 
cgit v1.2.3


From 5712174c5c9e3d684ad05d4aaed1e14acda4bb74 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 25 Jun 2020 16:26:29 -0700
Subject: selftests/bpf: Test auto-load disabling logic for BPF programs

Validate that BPF object with broken (in multiple ways) BPF program can still
be successfully loaded, if that broken BPF program is disabled.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200625232629.3444003-3-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/autoload.c | 41 +++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_autoload.c | 40 ++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/autoload.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_autoload.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/autoload.c b/tools/testing/selftests/bpf/prog_tests/autoload.c
new file mode 100644
index 000000000000..3693f7d133eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoload.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_autoload.skel.h"
+
+void test_autoload(void)
+{
+	int duration = 0, err;
+	struct test_autoload* skel;
+
+	skel = test_autoload__open_and_load();
+	/* prog3 should be broken */
+	if (CHECK(skel, "skel_open_and_load", "unexpected success\n"))
+		goto cleanup;
+
+	skel = test_autoload__open();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		goto cleanup;
+
+	/* don't load prog3 */
+	bpf_program__set_autoload(skel->progs.prog3, false);
+
+	err = test_autoload__load(skel);
+	if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+		goto cleanup;
+
+	err = test_autoload__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	usleep(1);
+
+	CHECK(!skel->bss->prog1_called, "prog1", "not called\n");
+	CHECK(!skel->bss->prog2_called, "prog2", "not called\n");
+	CHECK(skel->bss->prog3_called, "prog3", "called?!\n");
+
+cleanup:
+	test_autoload__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_autoload.c b/tools/testing/selftests/bpf/progs/test_autoload.c
new file mode 100644
index 000000000000..62c8cdec6d5d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoload.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+bool prog3_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+	prog1_called = true;
+	return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+	prog2_called = true;
+	return 0;
+}
+
+struct fake_kernel_struct {
+	int whatever;
+} __attribute__((preserve_access_index));
+
+SEC("fentry/unexisting-kprobe-will-fail-if-loaded")
+int prog3(const void *ctx)
+{
+	struct fake_kernel_struct *fake = (void *)ctx;
+	fake->whatever = 123;
+	prog3_called = true;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From ec23eb705620234421fd48fc2382490fcfbafc37 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 29 Jun 2020 17:47:58 -0700
Subject: tools/bpftool: Allow substituting custom vmlinux.h for the build

In some build contexts (e.g., Travis CI build for outdated kernel), vmlinux.h,
generated from available kernel, doesn't contain all the types necessary for
BPF program compilation. For such set up, the most maintainable way to deal
with this problem is to keep pre-generated (almost up-to-date) vmlinux.h
checked in and use it for compilation purposes. bpftool after that can deal
with kernel missing some of the features in runtime with no problems.

To that effect, allow to specify path to custom vmlinux.h to bpftool's
Makefile with VMLINUX_H variable.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630004759.521530-1-andriin@fb.com
---
 tools/bpf/bpftool/Makefile | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 8c6563e56ffc..273da1615503 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -122,20 +122,24 @@ BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstr
 BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)				\
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
 		     ../../../vmlinux					\
 		     /sys/kernel/btf/vmlinux				\
 		     /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
 
-ifneq ($(VMLINUX_BTF),)
+ifneq ($(VMLINUX_BTF)$(VMLINUX_H),)
 ifeq ($(feature-clang-bpf-co-re),1)
 
 BUILD_BPF_SKELS := 1
 
 $(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP)
+ifeq ($(VMLINUX_H),)
 	$(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@
+else
+	$(Q)cp "$(VMLINUX_H)" $@
+endif
 
 $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF)
 	$(QUIET_CLANG)$(CLANG) \
-- 
cgit v1.2.3


From ca4db6389d611eee2eb7c1dfe710b62d8ea06772 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 29 Jun 2020 17:47:59 -0700
Subject: selftests/bpf: Allow substituting custom vmlinux.h for selftests
 build

Similarly to bpftool Makefile, allow to specify custom location of vmlinux.h
to be used during the build. This allows simpler testing setups with
checked-in pre-generated vmlinux.h.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630004759.521530-2-andriin@fb.com
---
 tools/testing/selftests/bpf/Makefile | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 22aaec74ea0a..1f9c696b3edf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -134,12 +134,12 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 	$(call msg,CC,,$@)
 	$(CC) -c $(CFLAGS) -o $@ $<
 
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)				\
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
 		     ../../../../vmlinux				\
 		     /sys/kernel/btf/vmlinux				\
 		     /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
 
 $(OUTPUT)/runqslower: $(BPFOBJ)
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	\
@@ -182,8 +182,13 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
 	mkdir -p $@
 
 $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+ifeq ($(VMLINUX_H),)
 	$(call msg,GEN,,$@)
 	$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+	$(call msg,CP,,$@)
+	cp "$(VMLINUX_H)" $@
+endif
 
 # Get Clang's default includes on this system, as opposed to those seen by
 # '-target bpf'. This fixes "missing" files on some architectures/distros,
-- 
cgit v1.2.3


From 30ad688094bcfe8721bfd4003f6a20c9b6ddf964 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 30 Jun 2020 08:21:24 -0700
Subject: libbpf: Make bpf_endian co-exist with vmlinux.h

Make bpf_endian.h compatible with vmlinux.h. It is a frequent request from
users wanting to use bpf_endian.h in their BPF applications using CO-RE and
vmlinux.h.

To achieve that, re-implement byte swap macros and drop all the header
includes. This way it can be used both with linux header includes, as well as
with a vmlinux.h.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630152125.3631920-2-andriin@fb.com
---
 tools/lib/bpf/bpf_endian.h | 43 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h
index fbe28008450f..ec9db4feca9f 100644
--- a/tools/lib/bpf/bpf_endian.h
+++ b/tools/lib/bpf/bpf_endian.h
@@ -2,8 +2,35 @@
 #ifndef __BPF_ENDIAN__
 #define __BPF_ENDIAN__
 
-#include <linux/stddef.h>
-#include <linux/swab.h>
+/*
+ * Isolate byte #n and put it into byte #m, for __u##b type.
+ * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64:
+ * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx
+ * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000
+ * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn
+ * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000
+ */
+#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8))
+
+#define ___bpf_swab16(x) ((__u16)(			\
+			  ___bpf_mvb(x, 16, 0, 1) |	\
+			  ___bpf_mvb(x, 16, 1, 0)))
+
+#define ___bpf_swab32(x) ((__u32)(			\
+			  ___bpf_mvb(x, 32, 0, 3) |	\
+			  ___bpf_mvb(x, 32, 1, 2) |	\
+			  ___bpf_mvb(x, 32, 2, 1) |	\
+			  ___bpf_mvb(x, 32, 3, 0)))
+
+#define ___bpf_swab64(x) ((__u64)(			\
+			  ___bpf_mvb(x, 64, 0, 7) |	\
+			  ___bpf_mvb(x, 64, 1, 6) |	\
+			  ___bpf_mvb(x, 64, 2, 5) |	\
+			  ___bpf_mvb(x, 64, 3, 4) |	\
+			  ___bpf_mvb(x, 64, 4, 3) |	\
+			  ___bpf_mvb(x, 64, 5, 2) |	\
+			  ___bpf_mvb(x, 64, 6, 1) |	\
+			  ___bpf_mvb(x, 64, 7, 0)))
 
 /* LLVM's BPF target selects the endianness of the CPU
  * it compiles on, or the user specifies (bpfel/bpfeb),
@@ -23,16 +50,16 @@
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 # define __bpf_ntohs(x)			__builtin_bswap16(x)
 # define __bpf_htons(x)			__builtin_bswap16(x)
-# define __bpf_constant_ntohs(x)	___constant_swab16(x)
-# define __bpf_constant_htons(x)	___constant_swab16(x)
+# define __bpf_constant_ntohs(x)	___bpf_swab16(x)
+# define __bpf_constant_htons(x)	___bpf_swab16(x)
 # define __bpf_ntohl(x)			__builtin_bswap32(x)
 # define __bpf_htonl(x)			__builtin_bswap32(x)
-# define __bpf_constant_ntohl(x)	___constant_swab32(x)
-# define __bpf_constant_htonl(x)	___constant_swab32(x)
+# define __bpf_constant_ntohl(x)	___bpf_swab32(x)
+# define __bpf_constant_htonl(x)	___bpf_swab32(x)
 # define __bpf_be64_to_cpu(x)		__builtin_bswap64(x)
 # define __bpf_cpu_to_be64(x)		__builtin_bswap64(x)
-# define __bpf_constant_be64_to_cpu(x)	___constant_swab64(x)
-# define __bpf_constant_cpu_to_be64(x)	___constant_swab64(x)
+# define __bpf_constant_be64_to_cpu(x)	___bpf_swab64(x)
+# define __bpf_constant_cpu_to_be64(x)	___bpf_swab64(x)
 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 # define __bpf_ntohs(x)			(x)
 # define __bpf_htons(x)			(x)
-- 
cgit v1.2.3


From 8c18311067d0f0d5f332b9e1f3859eb15e23332d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 30 Jun 2020 08:21:25 -0700
Subject: selftests/bpf: Add byte swapping selftest

Add simple selftest validating byte swap built-ins and compile-time macros.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630152125.3631920-3-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/endian.c | 53 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_endian.c | 37 +++++++++++++++++
 2 files changed, 90 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/endian.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_endian.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/endian.c b/tools/testing/selftests/bpf/prog_tests/endian.c
new file mode 100644
index 000000000000..1a11612ace6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/endian.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include "test_endian.skel.h"
+
+static int duration;
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+#define OUT16 0x3412
+#define OUT32 0x78563412U
+#define OUT64 0xf0debc9a78563412ULL
+
+void test_endian(void)
+{
+	struct test_endian* skel;
+	struct test_endian__bss *bss;
+	int err;
+
+	skel = test_endian__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+	bss = skel->bss;
+
+	bss->in16 = IN16;
+	bss->in32 = IN32;
+	bss->in64 = IN64;
+
+	err = test_endian__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	usleep(1);
+
+	CHECK(bss->out16 != OUT16, "out16", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->out16, (__u64)OUT16);
+	CHECK(bss->out32 != OUT32, "out32", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->out32, (__u64)OUT32);
+	CHECK(bss->out64 != OUT64, "out16", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->out64, (__u64)OUT64);
+
+	CHECK(bss->const16 != OUT16, "const16", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->const16, (__u64)OUT16);
+	CHECK(bss->const32 != OUT32, "const32", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->const32, (__u64)OUT32);
+	CHECK(bss->const64 != OUT64, "const64", "got 0x%llx != exp 0x%llx\n",
+	      (__u64)bss->const64, (__u64)OUT64);
+cleanup:
+	test_endian__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_endian.c b/tools/testing/selftests/bpf/progs/test_endian.c
new file mode 100644
index 000000000000..ddb687c5d125
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_endian.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+__u16 in16 = 0;
+__u32 in32 = 0;
+__u64 in64 = 0;
+
+__u16 out16 = 0;
+__u32 out32 = 0;
+__u64 out64 = 0;
+
+__u16 const16 = 0;
+__u32 const32 = 0;
+__u64 const64 = 0;
+
+SEC("raw_tp/sys_enter")
+int sys_enter(const void *ctx)
+{
+	out16 = __builtin_bswap16(in16);
+	out32 = __builtin_bswap32(in32);
+	out64 = __builtin_bswap64(in64);
+	const16 = ___bpf_swab16(IN16);
+	const32 = ___bpf_swab32(IN32);
+	const64 = ___bpf_swab64(IN64);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From fa28dcb82a38f8e3993b0fae9106b1a80b59e4f0 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 29 Jun 2020 23:28:44 -0700
Subject: bpf: Introduce helper bpf_get_task_stack()

Introduce helper bpf_get_task_stack(), which dumps stack trace of given
task. This is different to bpf_get_stack(), which gets stack track of
current task. One potential use case of bpf_get_task_stack() is to call
it from bpf_iter__task and dump all /proc/<pid>/stack to a seq_file.

bpf_get_task_stack() uses stack_trace_save_tsk() instead of
get_perf_callchain() for kernel stack. The benefit of this choice is that
stack_trace_save_tsk() doesn't require changes in arch/. The downside of
using stack_trace_save_tsk() is that stack_trace_save_tsk() dumps the
stack trace to unsigned long array. For 32-bit systems, we need to
translate it to u64 array.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200630062846.664389-3-songliubraving@fb.com
---
 tools/include/uapi/linux/bpf.h | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0cb8ec948816..da9bf35a26f8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3285,6 +3285,39 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or NULL otherwise.
+ *
+ * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
+ *	Description
+ *		Return a user or a kernel stack in bpf program provided buffer.
+ *		To achieve this, the helper needs *task*, which is a valid
+ *		pointer to struct task_struct. To store the stacktrace, the
+ *		bpf program provides *buf* with	a nonnegative *size*.
+ *
+ *		The last argument, *flags*, holds the number of stack frames to
+ *		skip (from 0 to 255), masked with
+ *		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ *		the following flags:
+ *
+ *		**BPF_F_USER_STACK**
+ *			Collect a user space stack instead of a kernel stack.
+ *		**BPF_F_USER_BUILD_ID**
+ *			Collect buildid+offset instead of ips for user stack,
+ *			only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ *		**bpf_get_task_stack**\ () can collect up to
+ *		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ *		to sufficient large buffer size. Note that
+ *		this limit can be controlled with the **sysctl** program, and
+ *		that it should be manually increased in order to profile long
+ *		user stacks (such as stacks for Java programs). To do so, use:
+ *
+ *		::
+ *
+ *			# sysctl kernel.perf_event_max_stack=<new value>
+ *	Return
+ *		A non-negative value equal to or less than *size* on success,
+ *		or a negative error in case of failure.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3427,7 +3460,9 @@ union bpf_attr {
 	FN(skc_to_tcp_sock),		\
 	FN(skc_to_tcp_timewait_sock),	\
 	FN(skc_to_tcp_request_sock),	\
-	FN(skc_to_udp6_sock),
+	FN(skc_to_udp6_sock),		\
+	FN(get_task_stack),		\
+	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3


From c7568114bc56cf3ec0bd9eb117bbe7cad3d30e11 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 29 Jun 2020 23:28:46 -0700
Subject: selftests/bpf: Add bpf_iter test with bpf_get_task_stack()

The new test is similar to other bpf_iter tests. It dumps all
/proc/<pid>/stack to a seq_file. Here is some example output:

pid:     2873 num_entries:        3
[<0>] worker_thread+0xc6/0x380
[<0>] kthread+0x135/0x150
[<0>] ret_from_fork+0x22/0x30

pid:     2874 num_entries:        9
[<0>] __bpf_get_stack+0x15e/0x250
[<0>] bpf_prog_22a400774977bb30_dump_task_stack+0x4a/0xb3c
[<0>] bpf_iter_run_prog+0x81/0x170
[<0>] __task_seq_show+0x58/0x80
[<0>] bpf_seq_read+0x1c3/0x3b0
[<0>] vfs_read+0x9e/0x170
[<0>] ksys_read+0xa7/0xe0
[<0>] do_syscall_64+0x4c/0xa0
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

Note: bpf_iter test as-is doesn't print the contents of the seq_file. To
see the example above, it is necessary to add printf() to do_dummy_read.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200630062846.664389-5-songliubraving@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 17 ++++++++++
 .../selftests/bpf/progs/bpf_iter_task_stack.c      | 37 ++++++++++++++++++++++
 2 files changed, 54 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1e2e0fced6e8..fed42755416d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -5,6 +5,7 @@
 #include "bpf_iter_netlink.skel.h"
 #include "bpf_iter_bpf_map.skel.h"
 #include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_stack.skel.h"
 #include "bpf_iter_task_file.skel.h"
 #include "bpf_iter_tcp4.skel.h"
 #include "bpf_iter_tcp6.skel.h"
@@ -110,6 +111,20 @@ static void test_task(void)
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_stack(void)
+{
+	struct bpf_iter_task_stack *skel;
+
+	skel = bpf_iter_task_stack__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_stack);
+
+	bpf_iter_task_stack__destroy(skel);
+}
+
 static void test_task_file(void)
 {
 	struct bpf_iter_task_file *skel;
@@ -452,6 +467,8 @@ void test_bpf_iter(void)
 		test_bpf_map();
 	if (test__start_subtest("task"))
 		test_task();
+	if (test__start_subtest("task_stack"))
+		test_task_stack();
 	if (test__start_subtest("task_file"))
 		test_task_file();
 	if (test__start_subtest("tcp4"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
new file mode 100644
index 000000000000..e40d32a2ed93
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_STACK_TRACE_DEPTH   64
+unsigned long entries[MAX_STACK_TRACE_DEPTH];
+#define SIZE_OF_ULONG (sizeof(unsigned long))
+
+SEC("iter/task")
+int dump_task_stack(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	long i, retlen;
+
+	if (task == (void *)0)
+		return 0;
+
+	retlen = bpf_get_task_stack(task, entries,
+				    MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0);
+	if (retlen < 0)
+		return 0;
+
+	BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid,
+		       retlen / SIZE_OF_ULONG);
+	for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
+		if (retlen > i * SIZE_OF_ULONG)
+			BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
+	}
+	BPF_SEQ_PRINTF(seq, "\n");
+
+	return 0;
+}
-- 
cgit v1.2.3


From 8d821b5db70723d27ee749b4870de90760606918 Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Wed, 1 Jul 2020 10:53:15 -0700
Subject: selftests/bpf: Switch test_vmlinux to use hrtimer_range_start_ns.

The test_vmlinux test uses hrtimer_nanosleep as hook to test tracing
programs. But in a kernel built by clang, which performs more aggresive
inlining, that function gets inlined into its caller SyS_nanosleep.
Therefore, even though fentry and kprobe do hook on the function,
they aren't triggered by the call to nanosleep in the test.

A possible fix is switching to use a function that is less likely to
be inlined, such as hrtimer_range_start_ns. The EXPORT_SYMBOL functions
shouldn't be inlined based on the description of [1], therefore safe
to use for this test. Also the arguments of this function include the
duration of sleep, therefore suitable for test verification.

[1] af3b56289be1 time: don't inline EXPORT_SYMBOL functions

Tested:
 In a clang build kernel, before this change, the test fails:

 test_vmlinux:PASS:skel_open 0 nsec
 test_vmlinux:PASS:skel_attach 0 nsec
 test_vmlinux:PASS:tp 0 nsec
 test_vmlinux:PASS:raw_tp 0 nsec
 test_vmlinux:PASS:tp_btf 0 nsec
 test_vmlinux:FAIL:kprobe not called
 test_vmlinux:FAIL:fentry not called

 After switching to hrtimer_range_start_ns, the test passes:

 test_vmlinux:PASS:skel_open 0 nsec
 test_vmlinux:PASS:skel_attach 0 nsec
 test_vmlinux:PASS:tp 0 nsec
 test_vmlinux:PASS:raw_tp 0 nsec
 test_vmlinux:PASS:tp_btf 0 nsec
 test_vmlinux:PASS:kprobe 0 nsec
 test_vmlinux:PASS:fentry 0 nsec

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200701175315.1161242-1-haoluo@google.com
---
 tools/testing/selftests/bpf/progs/test_vmlinux.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 5611b564d3b1..29fa09d6a6c6 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -63,20 +63,20 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
 	return 0;
 }
 
-SEC("kprobe/hrtimer_nanosleep")
-int BPF_KPROBE(handle__kprobe,
-	       ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("kprobe/hrtimer_start_range_ns")
+int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+	       const enum hrtimer_mode mode)
 {
-	if (rqtp == MY_TV_NSEC)
+	if (tim == MY_TV_NSEC)
 		kprobe_called = true;
 	return 0;
 }
 
-SEC("fentry/hrtimer_nanosleep")
-int BPF_PROG(handle__fentry,
-	     ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("fentry/hrtimer_start_range_ns")
+int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+	     const enum hrtimer_mode mode)
 {
-	if (rqtp == MY_TV_NSEC)
+	if (tim == MY_TV_NSEC)
 		fentry_called = true;
 	return 0;
 }
-- 
cgit v1.2.3


From 17bbf925c6f86be8c08bc473cb5327c173154596 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 1 Jul 2020 14:28:16 -0700
Subject: tools/bpftool: Turn off -Wnested-externs warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Turn off -Wnested-externs to avoid annoying warnings in BUILD_BUG_ON macro when
compiling bpftool:

In file included from /data/users/andriin/linux/tools/include/linux/build_bug.h:5,
                 from /data/users/andriin/linux/tools/include/linux/kernel.h:8,
                 from /data/users/andriin/linux/kernel/bpf/disasm.h:10,
                 from /data/users/andriin/linux/kernel/bpf/disasm.c:8:
/data/users/andriin/linux/kernel/bpf/disasm.c: In function ‘__func_get_name’:
/data/users/andriin/linux/tools/include/linux/compiler.h:37:38: warning: nested extern declaration of ‘__compiletime_assert_0’ [-Wnested-externs]
  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
                                      ^~~~~~~~~~~~~~~~~~~~~
/data/users/andriin/linux/tools/include/linux/compiler.h:16:15: note: in definition of macro ‘__compiletime_assert’
   extern void prefix ## suffix(void) __compiletime_error(msg); \
               ^~~~~~
/data/users/andriin/linux/tools/include/linux/compiler.h:37:2: note: in expansion of macro ‘_compiletime_assert’
  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
  ^~~~~~~~~~~~~~~~~~~
/data/users/andriin/linux/tools/include/linux/build_bug.h:39:37: note: in expansion of macro ‘compiletime_assert’
 #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                     ^~~~~~~~~~~~~~~~~~
/data/users/andriin/linux/tools/include/linux/build_bug.h:50:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’
  BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
  ^~~~~~~~~~~~~~~~
/data/users/andriin/linux/kernel/bpf/disasm.c:20:2: note: in expansion of macro ‘BUILD_BUG_ON’
  BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
  ^~~~~~~~~~~~

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200701212816.2072340-1-andriin@fb.com
---
 tools/bpf/bpftool/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 273da1615503..51bd520ed437 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -40,7 +40,7 @@ bash_compdir ?= /usr/share/bash-completion/completions
 
 CFLAGS += -O2
 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
-CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS))
+CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS))
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
 	-I$(if $(OUTPUT),$(OUTPUT),.) \
 	-I$(srctree)/kernel/bpf/ \
-- 
cgit v1.2.3


From 6c92bd5cd4650c39dd929565ee172984c680fead Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Jul 2020 23:44:07 +0200
Subject: selftests/bpf: Test_progs indicate to shell on non-actions

When a user selects a non-existing test the summary is printed with
indication 0 for all info types, and shell "success" (EXIT_SUCCESS) is
indicated. This can be understood by a human end-user, but for shell
scripting is it useful to indicate a shell failure (EXIT_FAILURE).

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159363984736.930467.17956007131403952343.stgit@firesoul
---
 tools/testing/selftests/bpf/test_progs.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 54fa5fa688ce..da70a4f72f54 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -687,5 +687,8 @@ int main(int argc, char **argv)
 	free_str_set(&env.subtest_selector.whitelist);
 	free(env.subtest_selector.num_set);
 
+	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
+		return EXIT_FAILURE;
+
 	return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
 }
-- 
cgit v1.2.3


From 643e7233aa948901dce81d4573c91ed99fdd272e Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Jul 2020 23:44:12 +0200
Subject: selftests/bpf: Test_progs option for getting number of tests

It can be practial to get the number of tests that test_progs contain.
This could for example be used to create a shell for-loop construct that
runs the individual tests.

Like:
 for N in $(seq 1 $(./test_progs -c)); do
   ./test_progs -n $N 2>&1 > result_test_${N}.log &
 done ; wait

V2: Add the ability to return the count for the selected tests. This is
useful for getting a count e.g. after excluding some tests with option -b.
The current beakers test script like to report the max test count upfront.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159363985244.930467.12617117873058936829.stgit@firesoul
---
 tools/testing/selftests/bpf/test_progs.c | 18 ++++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h |  1 +
 2 files changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index da70a4f72f54..a5dba14b2025 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -366,6 +366,7 @@ enum ARG_KEYS {
 	ARG_TEST_NAME_BLACKLIST = 'b',
 	ARG_VERIFIER_STATS = 's',
 	ARG_VERBOSE = 'v',
+	ARG_GET_TEST_CNT = 'c',
 };
 
 static const struct argp_option opts[] = {
@@ -379,6 +380,8 @@ static const struct argp_option opts[] = {
 	  "Output verifier statistics", },
 	{ "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL,
 	  "Verbose output (use -vv or -vvv for progressively verbose output)" },
+	{ "count", ARG_GET_TEST_CNT, NULL, 0,
+	  "Get number of selected top-level tests " },
 	{},
 };
 
@@ -511,6 +514,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			}
 		}
 		break;
+	case ARG_GET_TEST_CNT:
+		env->get_test_cnt = true;
+		break;
 	case ARGP_KEY_ARG:
 		argp_usage(state);
 		break;
@@ -654,6 +660,11 @@ int main(int argc, char **argv)
 				test->test_num, test->test_name))
 			continue;
 
+		if (env.get_test_cnt) {
+			env.succ_cnt++;
+			continue;
+		}
+
 		test->run_test();
 		/* ensure last sub-test is finalized properly */
 		if (test->subtest_name)
@@ -677,9 +688,16 @@ int main(int argc, char **argv)
 			cleanup_cgroup_environment();
 	}
 	stdio_restore();
+
+	if (env.get_test_cnt) {
+		printf("%d\n", env.succ_cnt);
+		goto out;
+	}
+
 	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
 		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
+out:
 	free_str_set(&env.test_selector.blacklist);
 	free_str_set(&env.test_selector.whitelist);
 	free(env.test_selector.num_set);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f4503c926aca..0030584619c3 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -66,6 +66,7 @@ struct test_env {
 	enum verbosity verbosity;
 
 	bool jit_enabled;
+	bool get_test_cnt;
 
 	struct prog_test_def *test;
 	FILE *stdout;
-- 
cgit v1.2.3


From c1f1f3656eee3a59a40e1805699041ec1c14ab83 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 1 Jul 2020 23:44:17 +0200
Subject: selftests/bpf: Test_progs option for listing test names

The program test_progs have some very useful ability to specify a list of
test name substrings for selecting which tests to run.

This patch add the ability to list the selected test names without running
them. This is practical for seeing which tests gets selected with given
select arguments (which can also contain a exclude list via --name-blacklist).

This output can also be used by shell-scripts in a for-loop:

 for N in $(./test_progs --list -t xdp); do \
   ./test_progs -t $N 2>&1 > result_test_${N}.log & \
 done ; wait

This features can also be used for looking up a test number and returning
a testname. If the selection was empty then a shell EXIT_FAILURE is
returned.  This is useful for scripting. e.g. like this:

 n=1;
 while [ $(./test_progs --list -n $n) ] ; do \
   ./test_progs -n $n ; n=$(( n+1 )); \
 done

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159363985751.930467.9610992940793316982.stgit@firesoul
---
 tools/testing/selftests/bpf/test_progs.c | 15 +++++++++++++++
 tools/testing/selftests/bpf/test_progs.h |  1 +
 2 files changed, 16 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index a5dba14b2025..ef05d2f0e7bb 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -367,6 +367,7 @@ enum ARG_KEYS {
 	ARG_VERIFIER_STATS = 's',
 	ARG_VERBOSE = 'v',
 	ARG_GET_TEST_CNT = 'c',
+	ARG_LIST_TEST_NAMES = 'l',
 };
 
 static const struct argp_option opts[] = {
@@ -382,6 +383,8 @@ static const struct argp_option opts[] = {
 	  "Verbose output (use -vv or -vvv for progressively verbose output)" },
 	{ "count", ARG_GET_TEST_CNT, NULL, 0,
 	  "Get number of selected top-level tests " },
+	{ "list", ARG_LIST_TEST_NAMES, NULL, 0,
+	  "List test names that would run (without running them) " },
 	{},
 };
 
@@ -517,6 +520,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case ARG_GET_TEST_CNT:
 		env->get_test_cnt = true;
 		break;
+	case ARG_LIST_TEST_NAMES:
+		env->list_test_names = true;
+		break;
 	case ARGP_KEY_ARG:
 		argp_usage(state);
 		break;
@@ -665,6 +671,12 @@ int main(int argc, char **argv)
 			continue;
 		}
 
+		if (env.list_test_names) {
+			fprintf(env.stdout, "%s\n", test->test_name);
+			env.succ_cnt++;
+			continue;
+		}
+
 		test->run_test();
 		/* ensure last sub-test is finalized properly */
 		if (test->subtest_name)
@@ -694,6 +706,9 @@ int main(int argc, char **argv)
 		goto out;
 	}
 
+	if (env.list_test_names)
+		goto out;
+
 	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
 		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 0030584619c3..ec31f382e7fd 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -67,6 +67,7 @@ struct test_env {
 
 	bool jit_enabled;
 	bool get_test_cnt;
+	bool list_test_names;
 
 	struct prog_test_def *test;
 	FILE *stdout;
-- 
cgit v1.2.3


From 99126abec5e5778583b959cb164f1888374917d3 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 1 Jul 2020 17:48:52 -0700
Subject: bpf: selftests: A few improvements to network_helpers.c

This patch makes a few changes to the network_helpers.c

1) Enforce SO_RCVTIMEO and SO_SNDTIMEO
   This patch enforces timeout to the network fds through setsockopt
   SO_RCVTIMEO and SO_SNDTIMEO.

   It will remove the need for SOCK_NONBLOCK that requires a more demanding
   timeout logic with epoll/select, e.g. epoll_create, epoll_ctrl, and
   then epoll_wait for timeout.

   That removes the need for connect_wait() from the
   cgroup_skb_sk_lookup.c. The needed change is made in
   cgroup_skb_sk_lookup.c.

2) start_server():
   Add optional addr_str and port to start_server().
   That removes the need of the start_server_with_port().  The caller
   can pass addr_str==NULL and/or port==0.

   I have a future tcp-hdr-opt test that will pass a non-NULL addr_str
   and it is in general useful for other future tests.

   "int timeout_ms" is also added to control the timeout
   on the "accept(listen_fd)".

3) connect_to_fd(): Fully use the server_fd.
   The server sock address has already been obtained from
   getsockname(server_fd).  The sockaddr includes the family,
   so the "int family" arg is redundant.

   Since the server address is obtained from server_fd,  there
   is little reason not to get the server's socket type from the
   server_fd also.  getsockopt(server_fd) can be used to do that,
   so "int type" arg is also removed.

   "int timeout_ms" is added.

4) connect_fd_to_fd():
   "int timeout_ms" is added.
   Some code is also refactored to connect_fd_to_addr() which is
   shared with connect_to_fd().

5) Preserve errno:
   Some callers need to check errno, e.g. cgroup_skb_sk_lookup.c.
   Make changes to do it more consistently in save_errno_close()
   and log_err().

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200702004852.2103003-1-kafai@fb.com
---
 tools/testing/selftests/bpf/network_helpers.c      | 157 ++++++++++++---------
 tools/testing/selftests/bpf/network_helpers.h      |   9 +-
 .../bpf/prog_tests/cgroup_skb_sk_lookup.c          |  12 +-
 .../selftests/bpf/prog_tests/connect_force_port.c  |  10 +-
 .../selftests/bpf/prog_tests/load_bytes_relative.c |   4 +-
 tools/testing/selftests/bpf/prog_tests/tcp_rtt.c   |   4 +-
 6 files changed, 110 insertions(+), 86 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index e36dd1a1780d..acd08715be2e 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -7,8 +7,6 @@
 
 #include <arpa/inet.h>
 
-#include <sys/epoll.h>
-
 #include <linux/err.h>
 #include <linux/in.h>
 #include <linux/in6.h>
@@ -17,8 +15,13 @@
 #include "network_helpers.h"
 
 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
-#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
-	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+#define log_err(MSG, ...) ({						\
+			int __save = errno;				\
+			fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+				__FILE__, __LINE__, clean_errno(),	\
+				##__VA_ARGS__);				\
+			errno = __save;					\
+})
 
 struct ipv4_packet pkt_v4 = {
 	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -37,7 +40,34 @@ struct ipv6_packet pkt_v6 = {
 	.tcp.doff = 5,
 };
 
-int start_server_with_port(int family, int type, __u16 port)
+static int settimeo(int fd, int timeout_ms)
+{
+	struct timeval timeout = { .tv_sec = 3 };
+
+	if (timeout_ms > 0) {
+		timeout.tv_sec = timeout_ms / 1000;
+		timeout.tv_usec = (timeout_ms % 1000) * 1000;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
+		       sizeof(timeout))) {
+		log_err("Failed to set SO_RCVTIMEO");
+		return -1;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
+		       sizeof(timeout))) {
+		log_err("Failed to set SO_SNDTIMEO");
+		return -1;
+	}
+
+	return 0;
+}
+
+#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
+
+int start_server(int family, int type, const char *addr_str, __u16 port,
+		 int timeout_ms)
 {
 	struct sockaddr_storage addr = {};
 	socklen_t len;
@@ -48,120 +78,119 @@ int start_server_with_port(int family, int type, __u16 port)
 
 		sin->sin_family = AF_INET;
 		sin->sin_port = htons(port);
+		if (addr_str &&
+		    inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
+			log_err("inet_pton(AF_INET, %s)", addr_str);
+			return -1;
+		}
 		len = sizeof(*sin);
 	} else {
 		struct sockaddr_in6 *sin6 = (void *)&addr;
 
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = htons(port);
+		if (addr_str &&
+		    inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
+			log_err("inet_pton(AF_INET6, %s)", addr_str);
+			return -1;
+		}
 		len = sizeof(*sin6);
 	}
 
-	fd = socket(family, type | SOCK_NONBLOCK, 0);
+	fd = socket(family, type, 0);
 	if (fd < 0) {
 		log_err("Failed to create server socket");
 		return -1;
 	}
 
+	if (settimeo(fd, timeout_ms))
+		goto error_close;
+
 	if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
 		log_err("Failed to bind socket");
-		close(fd);
-		return -1;
+		goto error_close;
 	}
 
 	if (type == SOCK_STREAM) {
 		if (listen(fd, 1) < 0) {
 			log_err("Failed to listed on socket");
-			close(fd);
-			return -1;
+			goto error_close;
 		}
 	}
 
 	return fd;
-}
 
-int start_server(int family, int type)
-{
-	return start_server_with_port(family, type, 0);
+error_close:
+	save_errno_close(fd);
+	return -1;
 }
 
-static const struct timeval timeo_sec = { .tv_sec = 3 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
-
-int connect_to_fd(int family, int type, int server_fd)
+static int connect_fd_to_addr(int fd,
+			      const struct sockaddr_storage *addr,
+			      socklen_t addrlen)
 {
-	int fd, save_errno;
-
-	fd = socket(family, type, 0);
-	if (fd < 0) {
-		log_err("Failed to create client socket");
+	if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
+		log_err("Failed to connect to server");
 		return -1;
 	}
 
-	if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
-		save_errno = errno;
-		close(fd);
-		errno = save_errno;
-		return -1;
-	}
-
-	return fd;
+	return 0;
 }
 
-int connect_fd_to_fd(int client_fd, int server_fd)
+int connect_to_fd(int server_fd, int timeout_ms)
 {
 	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int save_errno;
+	struct sockaddr_in *addr_in;
+	socklen_t addrlen, optlen;
+	int fd, type;
 
-	if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
-		       timeo_optlen)) {
-		log_err("Failed to set SO_RCVTIMEO");
+	optlen = sizeof(type);
+	if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
+		log_err("getsockopt(SOL_TYPE)");
 		return -1;
 	}
 
-	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+	addrlen = sizeof(addr);
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
 		log_err("Failed to get server addr");
 		return -1;
 	}
 
-	if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
-		if (errno != EINPROGRESS) {
-			save_errno = errno;
-			log_err("Failed to connect to server");
-			errno = save_errno;
-		}
+	addr_in = (struct sockaddr_in *)&addr;
+	fd = socket(addr_in->sin_family, type, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
 		return -1;
 	}
 
-	return 0;
+	if (settimeo(fd, timeout_ms))
+		goto error_close;
+
+	if (connect_fd_to_addr(fd, &addr, addrlen))
+		goto error_close;
+
+	return fd;
+
+error_close:
+	save_errno_close(fd);
+	return -1;
 }
 
-int connect_wait(int fd)
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
 {
-	struct epoll_event ev = {}, events[2];
-	int timeout_ms = 1000;
-	int efd, nfd;
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
 
-	efd = epoll_create1(EPOLL_CLOEXEC);
-	if (efd < 0) {
-		log_err("Failed to open epoll fd");
+	if (settimeo(client_fd, timeout_ms))
 		return -1;
-	}
-
-	ev.events = EPOLLRDHUP | EPOLLOUT;
-	ev.data.fd = fd;
 
-	if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
-		log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
-		close(efd);
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
 		return -1;
 	}
 
-	nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
-	if (nfd < 0)
-		log_err("Failed to wait for I/O event on epoll fd=%d", efd);
+	if (connect_fd_to_addr(client_fd, &addr, len))
+		return -1;
 
-	close(efd);
-	return nfd;
+	return 0;
 }
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 6a8009605670..f580e82fda58 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -33,10 +33,9 @@ struct ipv6_packet {
 } __packed;
 extern struct ipv6_packet pkt_v6;
 
-int start_server(int family, int type);
-int start_server_with_port(int family, int type, __u16 port);
-int connect_to_fd(int family, int type, int server_fd);
-int connect_fd_to_fd(int client_fd, int server_fd);
-int connect_wait(int client_fd);
+int start_server(int family, int type, const char *addr, __u16 port,
+		 int timeout_ms);
+int connect_to_fd(int server_fd, int timeout_ms);
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
 
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 059047af7df3..464edc1c1708 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -13,7 +13,7 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
 	socklen_t addr_len = sizeof(addr);
 	__u32 duration = 0;
 
-	serv_sk = start_server(AF_INET6, SOCK_STREAM);
+	serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
 	if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
 		return;
 
@@ -24,17 +24,13 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
 	*g_serv_port = addr.sin6_port;
 
 	/* Client outside of test cgroup should fail to connect by timeout. */
-	err = connect_fd_to_fd(out_sk, serv_sk);
+	err = connect_fd_to_fd(out_sk, serv_sk, 1000);
 	if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
 		  "unexpected result err %d errno %d\n", err, errno))
 		goto cleanup;
 
-	err = connect_wait(out_sk);
-	if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
-		goto cleanup;
-
 	/* Client inside test cgroup should connect just fine. */
-	in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
+	in_sk = connect_to_fd(serv_sk, 0);
 	if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
 		goto cleanup;
 
@@ -85,7 +81,7 @@ void test_cgroup_skb_sk_lookup(void)
 	 * differs from that of testing cgroup. Moving selftests process to
 	 * testing cgroup won't change cgroup id of an already created socket.
 	 */
-	out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+	out_sk = socket(AF_INET6, SOCK_STREAM, 0);
 	if (CHECK_FAIL(out_sk < 0))
 		return;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 17bbf76812ca..9229db2f5ca5 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -114,7 +114,7 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
 		goto close_bpf_object;
 	}
 
-	fd = connect_to_fd(family, type, server_fd);
+	fd = connect_to_fd(server_fd, 0);
 	if (fd < 0) {
 		err = -1;
 		goto close_bpf_object;
@@ -137,25 +137,25 @@ void test_connect_force_port(void)
 	if (CHECK_FAIL(cgroup_fd < 0))
 		return;
 
-	server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123);
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
 	close(server_fd);
 
-	server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124);
+	server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
 	close(server_fd);
 
-	server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123);
+	server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
 	close(server_fd);
 
-	server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124);
+	server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
index c1168e4a9036..5a2a689dbb68 100644
--- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
+++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
@@ -23,7 +23,7 @@ void test_load_bytes_relative(void)
 	if (CHECK_FAIL(cgroup_fd < 0))
 		return;
 
-	server_fd = start_server(AF_INET, SOCK_STREAM);
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 
@@ -49,7 +49,7 @@ void test_load_bytes_relative(void)
 	if (CHECK_FAIL(err))
 		goto close_bpf_object;
 
-	client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+	client_fd = connect_to_fd(server_fd, 0);
 	if (CHECK_FAIL(client_fd < 0))
 		goto close_bpf_object;
 	close(client_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 9013a0c01eed..d207e968e6b1 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -118,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd)
 		goto close_bpf_object;
 	}
 
-	client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+	client_fd = connect_to_fd(server_fd, 0);
 	if (client_fd < 0) {
 		err = -1;
 		goto close_bpf_object;
@@ -161,7 +161,7 @@ void test_tcp_rtt(void)
 	if (CHECK_FAIL(cgroup_fd < 0))
 		return;
 
-	server_fd = start_server(AF_INET, SOCK_STREAM);
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 
-- 
cgit v1.2.3


From 811d7e375d08312dba23f3b6bf7e58ec14aa5dcb Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 1 Jul 2020 17:48:58 -0700
Subject: bpf: selftests: Restore netns after each test

It is common for networking tests creating its netns and making its own
setting under this new netns (e.g. changing tcp sysctl).  If the test
forgot to restore to the original netns, it would affect the
result of other tests.

This patch saves the original netns at the beginning and then restores it
after every test.  Since the restore "setns()" is not expensive, it does it
on all tests without tracking if a test has created a new netns or not.

The new restore_netns() could also be done in test__end_subtest() such
that each subtest will get an automatic netns reset.  However,
the individual test would lose flexibility to have total control
on netns for its own subtests.  In some cases, forcing a test to do
unnecessary netns re-configure for each subtest is time consuming.
e.g. In my vm, forcing netns re-configure on each subtest in sk_assign.c
increased the runtime from 1s to 8s.  On top of that,  test_progs.c
is also doing per-test (instead of per-subtest) cleanup for cgroup.
Thus, this patch also does per-test restore_netns().  The only existing
per-subtest cleanup is reset_affinity() and no test is depending on this.
Thus, it is removed from test__end_subtest() to give a consistent
expectation to the individual tests.  test_progs.c only ensures
any affinity/netns/cgroup change made by an earlier test does not
affect the following tests.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200702004858.2103728-1-kafai@fb.com
---
 tools/testing/selftests/bpf/test_progs.c | 23 +++++++++++++++++++++--
 tools/testing/selftests/bpf/test_progs.h |  2 ++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index ef05d2f0e7bb..104e833d0087 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -121,6 +121,24 @@ static void reset_affinity() {
 	}
 }
 
+static void save_netns(void)
+{
+	env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (env.saved_netns_fd == -1) {
+		perror("open(/proc/self/ns/net)");
+		exit(-1);
+	}
+}
+
+static void restore_netns(void)
+{
+	if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
+		stdio_restore();
+		perror("setns(CLONE_NEWNS)");
+		exit(-1);
+	}
+}
+
 void test__end_subtest()
 {
 	struct prog_test_def *test = env.test;
@@ -138,8 +156,6 @@ void test__end_subtest()
 	       test->test_num, test->subtest_num,
 	       test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
 
-	reset_affinity();
-
 	free(test->subtest_name);
 	test->subtest_name = NULL;
 }
@@ -655,6 +671,7 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
+	save_netns();
 	stdio_hijack();
 	for (i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
@@ -696,6 +713,7 @@ int main(int argc, char **argv)
 			test->error_cnt ? "FAIL" : "OK");
 
 		reset_affinity();
+		restore_netns();
 		if (test->need_cgroup_cleanup)
 			cleanup_cgroup_environment();
 	}
@@ -719,6 +737,7 @@ out:
 	free_str_set(&env.subtest_selector.blacklist);
 	free_str_set(&env.subtest_selector.whitelist);
 	free(env.subtest_selector.num_set);
+	close(env.saved_netns_fd);
 
 	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
 		return EXIT_FAILURE;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index ec31f382e7fd..6e09bf738473 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -80,6 +80,8 @@ struct test_env {
 	int sub_succ_cnt; /* successful sub-tests */
 	int fail_cnt; /* total failed tests + sub-tests */
 	int skip_cnt; /* skipped tests */
+
+	int saved_netns_fd;
 };
 
 extern struct test_env env;
-- 
cgit v1.2.3


From 8ae4121bd89e3dce27b519ed469efbc15423af18 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 2 Jul 2020 21:31:59 -0700
Subject: bpf: Fix bpftool without skeleton code enabled

Fix segfault from bpftool by adding emit_obj_refs_plain when skeleton
code is disabled.

Tested by deleting BUILD_BPF_SKELS in Makefile. We found this doing
backports for Cilium when a testing image pulled in latest bpf-next
bpftool, but kept using an older clang-7.

  # ./bpftool prog show
  Error: bpftool built without PID iterator support
  3: cgroup_skb  tag 7be49e3934a125ba  gpl
          loaded_at 2020-07-01T08:01:29-0700  uid 0
  Segmentation fault

Fixes: d53dee3fe013 ("tools/bpftool: Show info for processes holding BPF map/prog/link/btf FDs")
Reported-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/159375071997.14984.17404504293832961401.stgit@john-XPS-13-9370
---
 tools/bpf/bpftool/pids.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index 2709be4de2b1..7d5416667c85 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -19,6 +19,7 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
 	return -ENOTSUP;
 }
 void delete_obj_refs_table(struct obj_refs_table *table) {}
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {}
 
 #else /* BPFTOOL_WITHOUT_SKELETONS */
 
-- 
cgit v1.2.3


From 9ff79af3331277c69ac61cc75b2392eb3284e305 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 3 Jul 2020 11:17:19 -0700
Subject: selftests/bpf: Fix compilation error of bpf_iter_task_stack.c

BPF selftests show a compilation error as follows:

  libbpf: invalid relo for 'entries' in special section 0xfff2; forgot to
  initialize global var?..

Fix it by initializing 'entries' to zeros.

Fixes: c7568114bc56 ("selftests/bpf: Add bpf_iter test with bpf_get_task_stack()")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200703181719.3747072-1-songliubraving@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index e40d32a2ed93..50e59a2e142e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -7,7 +7,7 @@
 char _license[] SEC("license") = "GPL";
 
 #define MAX_STACK_TRACE_DEPTH   64
-unsigned long entries[MAX_STACK_TRACE_DEPTH];
+unsigned long entries[MAX_STACK_TRACE_DEPTH] = {};
 #define SIZE_OF_ULONG (sizeof(unsigned long))
 
 SEC("iter/task")
-- 
cgit v1.2.3