diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-28 16:02:33 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-28 16:02:33 -0800 |
commit | bd2463ac7d7ec51d432f23bf0e893fb371a908cd (patch) | |
tree | 3da32c23be83adb9d9bda7e51b51fa39f69f2447 /tools/lib | |
parent | a78208e2436963d0b2c7d186277d6e1a9755029a (diff) | |
parent | f76e4c167ea2212e23c15ee7e601a865e822c291 (diff) | |
download | linux-bd2463ac7d7ec51d432f23bf0e893fb371a908cd.tar.gz linux-bd2463ac7d7ec51d432f23bf0e893fb371a908cd.tar.bz2 linux-bd2463ac7d7ec51d432f23bf0e893fb371a908cd.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller:
1) Add WireGuard
2) Add HE and TWT support to ath11k driver, from John Crispin.
3) Add ESP in TCP encapsulation support, from Sabrina Dubroca.
4) Add variable window congestion control to TIPC, from Jon Maloy.
5) Add BCM84881 PHY driver, from Russell King.
6) Start adding netlink support for ethtool operations, from Michal
Kubecek.
7) Add XDP drop and TX action support to ena driver, from Sameeh
Jubran.
8) Add new ipv4 route notifications so that mlxsw driver does not have
to handle identical routes itself. From Ido Schimmel.
9) Add BPF dynamic program extensions, from Alexei Starovoitov.
10) Support RX and TX timestamping in igc, from Vinicius Costa Gomes.
11) Add support for macsec HW offloading, from Antoine Tenart.
12) Add initial support for MPTCP protocol, from Christoph Paasch,
Matthieu Baerts, Florian Westphal, Peter Krystad, and many others.
13) Add Octeontx2 PF support, from Sunil Goutham, Geetha sowjanya, Linu
Cherian, and others.
* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1469 commits)
net: phy: add default ARCH_BCM_IPROC for MDIO_BCM_IPROC
udp: segment looped gso packets correctly
netem: change mailing list
qed: FW 8.42.2.0 debug features
qed: rt init valid initialization changed
qed: Debug feature: ilt and mdump
qed: FW 8.42.2.0 Add fw overlay feature
qed: FW 8.42.2.0 HSI changes
qed: FW 8.42.2.0 iscsi/fcoe changes
qed: Add abstraction for different hsi values per chip
qed: FW 8.42.2.0 Additional ll2 type
qed: Use dmae to write to widebus registers in fw_funcs
qed: FW 8.42.2.0 Parser offsets modified
qed: FW 8.42.2.0 Queue Manager changes
qed: FW 8.42.2.0 Expose new registers and change windows
qed: FW 8.42.2.0 Internal ram offsets modifications
MAINTAINERS: Add entry for Marvell OcteonTX2 Physical Function driver
Documentation: net: octeontx2: Add RVU HW and drivers overview
octeontx2-pf: ethtool RSS config support
octeontx2-pf: Add basic ethtool support
...
Diffstat (limited to 'tools/lib')
-rw-r--r-- | tools/lib/bpf/Makefile | 29 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.c | 89 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.h | 44 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_helpers.h | 11 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_prog_linfo.c | 3 | ||||
-rw-r--r-- | tools/lib/bpf/btf.c | 151 | ||||
-rw-r--r-- | tools/lib/bpf/btf.h | 31 | ||||
-rw-r--r-- | tools/lib/bpf/btf_dump.c | 119 | ||||
-rw-r--r-- | tools/lib/bpf/hashmap.c | 3 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.c | 2746 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.h | 121 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.map | 27 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.pc.template | 2 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_common.h | 40 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_errno.c | 3 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_internal.h | 21 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_probes.c | 27 | ||||
-rw-r--r-- | tools/lib/bpf/netlink.c | 3 | ||||
-rw-r--r-- | tools/lib/bpf/nlattr.c | 3 | ||||
-rw-r--r-- | tools/lib/bpf/str_error.c | 3 | ||||
-rw-r--r-- | tools/lib/bpf/xsk.c | 3 |
21 files changed, 2868 insertions, 611 deletions
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 97830e46d1a0..aee7f1a83c77 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -56,8 +56,8 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray -FEATURE_DISPLAY = libelf bpf +FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray +FEATURE_DISPLAY = libelf zlib bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) @@ -148,6 +148,7 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ @@ -160,7 +161,7 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force elfdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -178,11 +179,11 @@ $(BPF_IN_SHARED): force elfdep bpfdep $(BPF_HELPER_DEFS) echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force elfdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h - $(Q)$(srctree)/scripts/bpf_helpers_doc.py --header \ + $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \ --file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS) $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) @@ -190,7 +191,7 @@ $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(QUIET_LINK)$(CC) $(LDFLAGS) \ --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ - -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) @@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so "versioned in $(VERSION_SCRIPT)." >&2; \ readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf -s --wide $(OUTPUT)libbpf.so | \ @@ -250,6 +252,7 @@ install_headers: $(BPF_HELPER_DEFS) $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ $(call do_install,btf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \ + $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \ $(call do_install,xsk.h,$(prefix)/include/bpf,644); \ $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \ $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \ @@ -270,20 +273,24 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ - *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ - *.pc LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ - $(SHARED_OBJDIR) $(STATIC_OBJDIR) + $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ + *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ + $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ + $(addprefix $(OUTPUT), \ + *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf -PHONY += force elfdep bpfdep cscope tags +PHONY += force elfdep zdep bpfdep cscope tags force: elfdep: @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi +zdep: + @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi + bpfdep: @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 98596e15390f..c6dafe563176 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -32,6 +32,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + /* * When building perf, unistd.h is overridden. __NR_bpf is * required to be defined explicitly. @@ -95,7 +98,11 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) attr.btf_key_type_id = create_attr->btf_key_type_id; attr.btf_value_type_id = create_attr->btf_value_type_id; attr.map_ifindex = create_attr->map_ifindex; - attr.inner_map_fd = create_attr->inner_map_fd; + if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS) + attr.btf_vmlinux_value_type_id = + create_attr->btf_vmlinux_value_type_id; + else + attr.inner_map_fd = create_attr->inner_map_fd; return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } @@ -228,7 +235,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; - if (attr.prog_type == BPF_PROG_TYPE_TRACING) { + if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) { + attr.attach_btf_id = load_attr->attach_btf_id; + } else if (attr.prog_type == BPF_PROG_TYPE_TRACING || + attr.prog_type == BPF_PROG_TYPE_EXT) { attr.attach_btf_id = load_attr->attach_btf_id; attr.attach_prog_fd = load_attr->attach_prog_fd; } else { @@ -443,6 +453,64 @@ int bpf_map_freeze(int fd) return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); } +static int bpf_map_batch_common(int cmd, int fd, void *in_batch, + void *out_batch, void *keys, void *values, + __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + union bpf_attr attr; + int ret; + + if (!OPTS_VALID(opts, bpf_map_batch_opts)) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.batch.map_fd = fd; + attr.batch.in_batch = ptr_to_u64(in_batch); + attr.batch.out_batch = ptr_to_u64(out_batch); + attr.batch.keys = ptr_to_u64(keys); + attr.batch.values = ptr_to_u64(values); + attr.batch.count = *count; + attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); + attr.batch.flags = OPTS_GET(opts, flags, 0); + + ret = sys_bpf(cmd, &attr, sizeof(attr)); + *count = attr.batch.count; + + return ret; +} + +int bpf_map_delete_batch(int fd, void *keys, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, + NULL, keys, NULL, count, opts); +} + +int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, + void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch, + out_batch, keys, values, count, opts); +} + +int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, + void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH, + fd, in_batch, out_batch, keys, values, + count, opts); +} + +int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, + keys, values, count, opts); +} + int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; @@ -467,13 +535,28 @@ int bpf_obj_get(const char *pathname) int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, unsigned int flags) { + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts, + .flags = flags, + ); + + return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); +} + +int bpf_prog_attach_xattr(int prog_fd, int target_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts) +{ union bpf_attr attr; + if (!OPTS_VALID(opts, bpf_prog_attach_opts)) + return -EINVAL; + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; - attr.attach_flags = flags; + attr.attach_flags = OPTS_GET(opts, flags, 0); + attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 3c791fa8e68e..b976e77316cc 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -28,14 +28,12 @@ #include <stddef.h> #include <stdint.h> +#include "libbpf_common.h" + #ifdef __cplusplus extern "C" { #endif -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - struct bpf_create_map_attr { const char *name; enum bpf_map_type map_type; @@ -48,7 +46,10 @@ struct bpf_create_map_attr { __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 map_ifindex; - __u32 inner_map_fd; + union { + __u32 inner_map_fd; + __u32 btf_vmlinux_value_type_id; + }; }; LIBBPF_API int @@ -126,10 +127,43 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); + +struct bpf_map_batch_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u64 elem_flags; + __u64 flags; +}; +#define bpf_map_batch_opts__last_field flags + +LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, + __u32 *count, + const struct bpf_map_batch_opts *opts); +LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, + void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts); +LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, + void *out_batch, void *keys, + void *values, __u32 *count, + const struct bpf_map_batch_opts *opts); +LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, + __u32 *count, + const struct bpf_map_batch_opts *opts); + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); + +struct bpf_prog_attach_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + unsigned int flags; + int replace_prog_fd; +}; +#define bpf_prog_attach_opts__last_field replace_prog_fd + LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); +LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 0c7d28292898..f69cc208778a 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -25,6 +25,9 @@ #ifndef __always_inline #define __always_inline __attribute__((always_inline)) #endif +#ifndef __weak +#define __weak __attribute__((weak)) +#endif /* * Helper structure used by eBPF C program @@ -44,4 +47,12 @@ enum libbpf_pin_type { LIBBPF_PIN_BY_NAME, }; +enum libbpf_tristate { + TRI_NO = 0, + TRI_YES = 1, + TRI_MODULE = 2, +}; + +#define __kconfig __attribute__((section(".kconfig"))) + #endif diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index 3ed1a27b5f7c..bafca49cb1e6 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -8,6 +8,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + struct bpf_prog_linfo { void *raw_linfo; void *raw_jited_linfo; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 88efa2bb7137..3d1c25fc97ae 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -8,6 +8,10 @@ #include <fcntl.h> #include <unistd.h> #include <errno.h> +#include <sys/utsname.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <linux/kernel.h> #include <linux/err.h> #include <linux/btf.h> #include <gelf.h> @@ -17,8 +21,11 @@ #include "libbpf_internal.h" #include "hashmap.h" -#define BTF_MAX_NR_TYPES 0x7fffffff -#define BTF_MAX_STR_OFFSET 0x7fffffff +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + +#define BTF_MAX_NR_TYPES 0x7fffffffU +#define BTF_MAX_STR_OFFSET 0x7fffffffU static struct btf_type btf_void; @@ -50,7 +57,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) if (btf->types_size == BTF_MAX_NR_TYPES) return -E2BIG; - expand_by = max(btf->types_size >> 2, 16); + expand_by = max(btf->types_size >> 2, 16U); new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); new_types = realloc(btf->types, sizeof(*new_types) * new_size); @@ -278,6 +285,45 @@ done: return nelems * size; } +int btf__align_of(const struct btf *btf, __u32 id) +{ + const struct btf_type *t = btf__type_by_id(btf, id); + __u16 kind = btf_kind(t); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + return min(sizeof(void *), (size_t)t->size); + case BTF_KIND_PTR: + return sizeof(void *); + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return btf__align_of(btf, t->type); + case BTF_KIND_ARRAY: + return btf__align_of(btf, btf_array(t)->type); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + __u16 vlen = btf_vlen(t); + int i, max_align = 1, align; + + for (i = 0; i < vlen; i++, m++) { + align = btf__align_of(btf, m->type); + if (align <= 0) + return align; + max_align = max(max_align, align); + } + + return max_align; + } + default: + pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); + return 0; + } +} + int btf__resolve_type(const struct btf *btf, __u32 type_id) { const struct btf_type *t; @@ -539,6 +585,12 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, return -ENOENT; } + /* .extern datasec size and var offsets were set correctly during + * extern collection step, so just skip straight to sorting variables + */ + if (t->size) + goto sort_vars; + ret = bpf_object__section_size(obj, name, &size); if (ret || !size || (t->size && t->size != size)) { pr_debug("Invalid size for section %s: %u bytes\n", name, size); @@ -575,7 +627,8 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, vsi->offset = off; } - qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off); +sort_vars: + qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); return 0; } @@ -1352,7 +1405,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d, if (d->hypot_cnt == d->hypot_cap) { __u32 *new_list; - d->hypot_cap += max(16, d->hypot_cap / 2); + d->hypot_cap += max((size_t)16, d->hypot_cap / 2); new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); if (!new_list) return -ENOMEM; @@ -1648,7 +1701,7 @@ static int btf_dedup_strings(struct btf_dedup *d) if (strs.cnt + 1 > strs.cap) { struct btf_str_ptr *new_ptrs; - strs.cap += max(strs.cnt / 2, 16); + strs.cap += max(strs.cnt / 2, 16U); new_ptrs = realloc(strs.ptrs, sizeof(strs.ptrs[0]) * strs.cap); if (!new_ptrs) { @@ -2882,3 +2935,89 @@ static int btf_dedup_remap_types(struct btf_dedup *d) } return 0; } + +static struct btf *btf_load_raw(const char *path) +{ + struct btf *btf; + size_t read_cnt; + struct stat st; + void *data; + FILE *f; + + if (stat(path, &st)) + return ERR_PTR(-errno); + + data = malloc(st.st_size); + if (!data) + return ERR_PTR(-ENOMEM); + + f = fopen(path, "rb"); + if (!f) { + btf = ERR_PTR(-errno); + goto cleanup; + } + + read_cnt = fread(data, 1, st.st_size, f); + fclose(f); + if (read_cnt < st.st_size) { + btf = ERR_PTR(-EBADF); + goto cleanup; + } + + btf = btf__new(data, read_cnt); + +cleanup: + free(data); + return btf; +} + +/* + * Probe few well-known locations for vmlinux kernel image and try to load BTF + * data out of it to use for target BTF. + */ +struct btf *libbpf_find_kernel_btf(void) +{ + struct { + const char *path_fmt; + bool raw_btf; + } locations[] = { + /* try canonical vmlinux BTF through sysfs first */ + { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, + /* fall back to trying to find vmlinux ELF on disk otherwise */ + { "/boot/vmlinux-%1$s" }, + { "/lib/modules/%1$s/vmlinux-%1$s" }, + { "/lib/modules/%1$s/build/vmlinux" }, + { "/usr/lib/modules/%1$s/kernel/vmlinux" }, + { "/usr/lib/debug/boot/vmlinux-%1$s" }, + { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, + { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, + }; + char path[PATH_MAX + 1]; + struct utsname buf; + struct btf *btf; + int i; + + uname(&buf); + + for (i = 0; i < ARRAY_SIZE(locations); i++) { + snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); + + if (access(path, R_OK)) + continue; + + if (locations[i].raw_btf) + btf = btf_load_raw(path); + else + btf = btf__parse_elf(path, NULL); + + pr_debug("loading kernel BTF '%s': %ld\n", + path, IS_ERR(btf) ? PTR_ERR(btf) : 0); + if (IS_ERR(btf)) + continue; + + return btf; + } + + pr_warn("failed to find valid kernel BTF\n"); + return ERR_PTR(-ESRCH); +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index d9ac73a02cde..70c1b7ec2bd0 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -8,14 +8,12 @@ #include <linux/btf.h> #include <linux/types.h> +#include "libbpf_common.h" + #ifdef __cplusplus extern "C" { #endif -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - #define BTF_ELF_SEC ".BTF" #define BTF_EXT_ELF_SEC ".BTF.ext" #define MAPS_ELF_SEC ".maps" @@ -79,6 +77,7 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); +LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); @@ -103,6 +102,8 @@ LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API struct btf *libbpf_find_kernel_btf(void); + struct btf_dedup_opts { unsigned int dedup_table_size; bool dont_resolve_fwds; @@ -127,6 +128,28 @@ LIBBPF_API void btf_dump__free(struct btf_dump *d); LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); +struct btf_dump_emit_type_decl_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* optional field name for type declaration, e.g.: + * - struct my_struct <FNAME> + * - void (*<FNAME>)(int) + * - char (*<FNAME>)[123] + */ + const char *field_name; + /* extra indentation level (in number of tabs) to emit for multi-line + * type declarations (e.g., anonymous struct); applies for lines + * starting from the second one (first line is assumed to have + * necessary indentation already + */ + int indent_level; +}; +#define btf_dump_emit_type_decl_opts__last_field indent_level + +LIBBPF_API int +btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, + const struct btf_dump_emit_type_decl_opts *opts); + /* * A set of helpers for easier BTF types handling */ diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index cb126d8fcf75..bd09ed1710f1 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -18,6 +18,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; @@ -116,6 +119,8 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) va_end(args); } +static int btf_dump_mark_referenced(struct btf_dump *d); + struct btf_dump *btf_dump__new(const struct btf *btf, const struct btf_ext *btf_ext, const struct btf_dump_opts *opts, @@ -137,18 +142,40 @@ struct btf_dump *btf_dump__new(const struct btf *btf, if (IS_ERR(d->type_names)) { err = PTR_ERR(d->type_names); d->type_names = NULL; - btf_dump__free(d); - return ERR_PTR(err); + goto err; } d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); if (IS_ERR(d->ident_names)) { err = PTR_ERR(d->ident_names); d->ident_names = NULL; - btf_dump__free(d); - return ERR_PTR(err); + goto err; + } + d->type_states = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->type_states[0])); + if (!d->type_states) { + err = -ENOMEM; + goto err; + } + d->cached_names = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->cached_names[0])); + if (!d->cached_names) { + err = -ENOMEM; + goto err; } + /* VOID is special */ + d->type_states[0].order_state = ORDERED; + d->type_states[0].emit_state = EMITTED; + + /* eagerly determine referenced types for anon enums */ + err = btf_dump_mark_referenced(d); + if (err) + goto err; + return d; +err: + btf_dump__free(d); + return ERR_PTR(err); } void btf_dump__free(struct btf_dump *d) @@ -175,7 +202,6 @@ void btf_dump__free(struct btf_dump *d) free(d); } -static int btf_dump_mark_referenced(struct btf_dump *d); static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); @@ -202,27 +228,6 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) if (id > btf__get_nr_types(d->btf)) return -EINVAL; - /* type states are lazily allocated, as they might not be needed */ - if (!d->type_states) { - d->type_states = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->type_states[0])); - if (!d->type_states) - return -ENOMEM; - d->cached_names = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->cached_names[0])); - if (!d->cached_names) - return -ENOMEM; - - /* VOID is special */ - d->type_states[0].order_state = ORDERED; - d->type_states[0].emit_state = EMITTED; - - /* eagerly determine referenced types for anon enums */ - err = btf_dump_mark_referenced(d); - if (err) - return err; - } - d->emit_queue_cnt = 0; err = btf_dump_order_type(d, id, false); if (err < 0) @@ -752,41 +757,6 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) } } -static int btf_align_of(const struct btf *btf, __u32 id) -{ - const struct btf_type *t = btf__type_by_id(btf, id); - __u16 kind = btf_kind(t); - - switch (kind) { - case BTF_KIND_INT: - case BTF_KIND_ENUM: - return min(sizeof(void *), t->size); - case BTF_KIND_PTR: - return sizeof(void *); - case BTF_KIND_TYPEDEF: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - return btf_align_of(btf, t->type); - case BTF_KIND_ARRAY: - return btf_align_of(btf, btf_array(t)->type); - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - const struct btf_member *m = btf_members(t); - __u16 vlen = btf_vlen(t); - int i, align = 1; - - for (i = 0; i < vlen; i++, m++) - align = max(align, btf_align_of(btf, m->type)); - - return align; - } - default: - pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); - return 1; - } -} - static bool btf_is_struct_packed(const struct btf *btf, __u32 id, const struct btf_type *t) { @@ -794,18 +764,18 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, int align, i, bit_sz; __u16 vlen; - align = btf_align_of(btf, id); + align = btf__align_of(btf, id); /* size of a non-packed struct has to be a multiple of its alignment*/ - if (t->size % align) + if (align && t->size % align) return true; m = btf_members(t); vlen = btf_vlen(t); /* all non-bitfield fields have to be naturally aligned */ for (i = 0; i < vlen; i++, m++) { - align = btf_align_of(btf, m->type); + align = btf__align_of(btf, m->type); bit_sz = btf_member_bitfield_size(t, i); - if (bit_sz == 0 && m->offset % (8 * align) != 0) + if (align && bit_sz == 0 && m->offset % (8 * align) != 0) return true; } @@ -889,7 +859,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, fname = btf_name_of(d, m->name_off); m_sz = btf_member_bitfield_size(t, i); m_off = btf_member_bit_offset(t, i); - align = packed ? 1 : btf_align_of(d->btf, m->type); + align = packed ? 1 : btf__align_of(d->btf, m->type); btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); btf_dump_printf(d, "\n%s", pfx(lvl + 1)); @@ -907,7 +877,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, /* pad at the end, if necessary */ if (is_struct) { - align = packed ? 1 : btf_align_of(d->btf, id); + align = packed ? 1 : btf__align_of(d->btf, id); btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, lvl + 1); } @@ -1051,6 +1021,21 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id) * of a stack frame. Some care is required to "pop" stack frames after * processing type declaration chain. */ +int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, + const struct btf_dump_emit_type_decl_opts *opts) +{ + const char *fname; + int lvl; + + if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) + return -EINVAL; + + fname = OPTS_GET(opts, field_name, NULL); + lvl = OPTS_GET(opts, indent_level, 0); + btf_dump_emit_type_decl(d, id, fname, lvl); + return 0; +} + static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, const char *fname, int lvl) { diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index 6122272943e6..54c30c802070 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -12,6 +12,9 @@ #include <linux/err.h> #include "hashmap.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + /* start with 4 buckets */ #define HASHMAP_MIN_CAP_BITS 2 diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3f09772192f1..514b1a524abb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -18,6 +18,7 @@ #include <stdarg.h> #include <libgen.h> #include <inttypes.h> +#include <limits.h> #include <string.h> #include <unistd.h> #include <endian.h> @@ -41,9 +42,11 @@ #include <sys/types.h> #include <sys/vfs.h> #include <sys/utsname.h> +#include <sys/resource.h> #include <tools/libc_compat.h> #include <libelf.h> #include <gelf.h> +#include <zlib.h> #include "libbpf.h" #include "bpf.h" @@ -52,6 +55,9 @@ #include "libbpf_internal.h" #include "hashmap.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #ifndef EM_BPF #define EM_BPF 247 #endif @@ -67,6 +73,12 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) +static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); +static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj, + int idx); +static const struct btf_type * +skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); + static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { @@ -99,14 +111,33 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) va_end(args); } -#define STRERR_BUFSIZE 128 +static void pr_perm_msg(int err) +{ + struct rlimit limit; + char buf[100]; + + if (err != -EPERM || geteuid() != 0) + return; + + err = getrlimit(RLIMIT_MEMLOCK, &limit); + if (err) + return; + + if (limit.rlim_cur == RLIM_INFINITY) + return; -#define CHECK_ERR(action, err, out) do { \ - err = action; \ - if (err) \ - goto out; \ -} while (0) + if (limit.rlim_cur < 1024) + snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); + else if (limit.rlim_cur < 1024*1024) + snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); + else + snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); + + pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", + buf); +} +#define STRERR_BUFSIZE 128 /* Copied from tools/perf/util/util.h */ #ifndef zfree @@ -144,6 +175,22 @@ struct bpf_capabilities { __u32 btf_datasec:1; /* BPF_F_MMAPABLE is supported for arrays */ __u32 array_mmap:1; + /* BTF_FUNC_GLOBAL is supported */ + __u32 btf_func_global:1; +}; + +enum reloc_type { + RELO_LD64, + RELO_CALL, + RELO_DATA, + RELO_EXTERN, +}; + +struct reloc_desc { + enum reloc_type type; + int insn_idx; + int map_idx; + int sym_off; }; /* @@ -164,16 +211,7 @@ struct bpf_program { size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; - struct reloc_desc { - enum { - RELO_LD64, - RELO_CALL, - RELO_DATA, - } type; - int insn_idx; - int map_idx; - int sym_off; - } *reloc_desc; + struct reloc_desc *reloc_desc; int nr_reloc; int log_level; @@ -202,22 +240,51 @@ struct bpf_program { __u32 prog_flags; }; +struct bpf_struct_ops { + const char *tname; + const struct btf_type *type; + struct bpf_program **progs; + __u32 *kern_func_off; + /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ + void *data; + /* e.g. struct bpf_struct_ops_tcp_congestion_ops in + * btf_vmlinux's format. + * struct bpf_struct_ops_tcp_congestion_ops { + * [... some other kernel fields ...] + * struct tcp_congestion_ops data; + * } + * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) + * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" + * from "data". + */ + void *kern_vdata; + __u32 type_id; +}; + +#define DATA_SEC ".data" +#define BSS_SEC ".bss" +#define RODATA_SEC ".rodata" +#define KCONFIG_SEC ".kconfig" +#define STRUCT_OPS_SEC ".struct_ops" + enum libbpf_map_type { LIBBPF_MAP_UNSPEC, LIBBPF_MAP_DATA, LIBBPF_MAP_BSS, LIBBPF_MAP_RODATA, + LIBBPF_MAP_KCONFIG, }; static const char * const libbpf_type_to_btf_name[] = { - [LIBBPF_MAP_DATA] = ".data", - [LIBBPF_MAP_BSS] = ".bss", - [LIBBPF_MAP_RODATA] = ".rodata", + [LIBBPF_MAP_DATA] = DATA_SEC, + [LIBBPF_MAP_BSS] = BSS_SEC, + [LIBBPF_MAP_RODATA] = RODATA_SEC, + [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC, }; struct bpf_map { - int fd; char *name; + int fd; int sec_idx; size_t sec_offset; int map_ifindex; @@ -225,17 +292,37 @@ struct bpf_map { struct bpf_map_def def; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 btf_vmlinux_value_type_id; void *priv; bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; + void *mmaped; + struct bpf_struct_ops *st_ops; char *pin_path; bool pinned; bool reused; }; -struct bpf_secdata { - void *rodata; - void *data; +enum extern_type { + EXT_UNKNOWN, + EXT_CHAR, + EXT_BOOL, + EXT_INT, + EXT_TRISTATE, + EXT_CHAR_ARR, +}; + +struct extern_desc { + const char *name; + int sym_idx; + int btf_id; + enum extern_type type; + int sz; + int align; + int data_off; + bool is_signed; + bool is_weak; + bool is_set; }; static LIST_HEAD(bpf_objects_list); @@ -250,11 +337,14 @@ struct bpf_object { struct bpf_map *maps; size_t nr_maps; size_t maps_cap; - struct bpf_secdata sections; + + char *kconfig; + struct extern_desc *externs; + int nr_extern; + int kconfig_map_idx; bool loaded; bool has_pseudo_calls; - bool relaxed_core_relocs; /* * Information when doing elf related work. Only valid if fd @@ -270,6 +360,7 @@ struct bpf_object { Elf_Data *data; Elf_Data *rodata; Elf_Data *bss; + Elf_Data *st_ops_data; size_t strtabidx; struct { GElf_Shdr shdr; @@ -279,9 +370,11 @@ struct bpf_object { int maps_shndx; int btf_maps_shndx; int text_shndx; + int symbols_shndx; int data_shndx; int rodata_shndx; int bss_shndx; + int st_ops_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -291,6 +384,10 @@ struct bpf_object { struct list_head list; struct btf *btf; + /* Parse and load BTF vmlinux if any of the programs in the object need + * it at load time. + */ + struct btf *btf_vmlinux; struct btf_ext *btf_ext; void *priv; @@ -509,6 +606,348 @@ static __u32 get_kernel_version(void) return KERNEL_VERSION(major, minor, patch); } +static const struct btf_member * +find_member_by_offset(const struct btf_type *t, __u32 bit_offset) +{ + struct btf_member *m; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + if (btf_member_bit_offset(t, i) == bit_offset) + return m; + } + + return NULL; +} + +static const struct btf_member * +find_member_by_name(const struct btf *btf, const struct btf_type *t, + const char *name) +{ + struct btf_member *m; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) + return m; + } + + return NULL; +} + +#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" +static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, + const char *name, __u32 kind); + +static int +find_struct_ops_kern_types(const struct btf *btf, const char *tname, + const struct btf_type **type, __u32 *type_id, + const struct btf_type **vtype, __u32 *vtype_id, + const struct btf_member **data_member) +{ + const struct btf_type *kern_type, *kern_vtype; + const struct btf_member *kern_data_member; + __s32 kern_vtype_id, kern_type_id; + __u32 i; + + kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (kern_type_id < 0) { + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", + tname); + return kern_type_id; + } + kern_type = btf__type_by_id(btf, kern_type_id); + + /* Find the corresponding "map_value" type that will be used + * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, + * find "struct bpf_struct_ops_tcp_congestion_ops" from the + * btf_vmlinux. + */ + kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, + tname, BTF_KIND_STRUCT); + if (kern_vtype_id < 0) { + pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", + STRUCT_OPS_VALUE_PREFIX, tname); + return kern_vtype_id; + } + kern_vtype = btf__type_by_id(btf, kern_vtype_id); + + /* Find "struct tcp_congestion_ops" from + * struct bpf_struct_ops_tcp_congestion_ops { + * [ ... ] + * struct tcp_congestion_ops data; + * } + */ + kern_data_member = btf_members(kern_vtype); + for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { + if (kern_data_member->type == kern_type_id) + break; + } + if (i == btf_vlen(kern_vtype)) { + pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", + tname, STRUCT_OPS_VALUE_PREFIX, tname); + return -EINVAL; + } + + *type = kern_type; + *type_id = kern_type_id; + *vtype = kern_vtype; + *vtype_id = kern_vtype_id; + *data_member = kern_data_member; + + return 0; +} + +static bool bpf_map__is_struct_ops(const struct bpf_map *map) +{ + return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; +} + +/* Init the map's fields that depend on kern_btf */ +static int bpf_map__init_kern_struct_ops(struct bpf_map *map, + const struct btf *btf, + const struct btf *kern_btf) +{ + const struct btf_member *member, *kern_member, *kern_data_member; + const struct btf_type *type, *kern_type, *kern_vtype; + __u32 i, kern_type_id, kern_vtype_id, kern_data_off; + struct bpf_struct_ops *st_ops; + void *data, *kern_data; + const char *tname; + int err; + + st_ops = map->st_ops; + type = st_ops->type; + tname = st_ops->tname; + err = find_struct_ops_kern_types(kern_btf, tname, + &kern_type, &kern_type_id, + &kern_vtype, &kern_vtype_id, + &kern_data_member); + if (err) + return err; + + pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", + map->name, st_ops->type_id, kern_type_id, kern_vtype_id); + + map->def.value_size = kern_vtype->size; + map->btf_vmlinux_value_type_id = kern_vtype_id; + + st_ops->kern_vdata = calloc(1, kern_vtype->size); + if (!st_ops->kern_vdata) + return -ENOMEM; + + data = st_ops->data; + kern_data_off = kern_data_member->offset / 8; + kern_data = st_ops->kern_vdata + kern_data_off; + + member = btf_members(type); + for (i = 0; i < btf_vlen(type); i++, member++) { + const struct btf_type *mtype, *kern_mtype; + __u32 mtype_id, kern_mtype_id; + void *mdata, *kern_mdata; + __s64 msize, kern_msize; + __u32 moff, kern_moff; + __u32 kern_member_idx; + const char *mname; + + mname = btf__name_by_offset(btf, member->name_off); + kern_member = find_member_by_name(kern_btf, kern_type, mname); + if (!kern_member) { + pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", + map->name, mname); + return -ENOTSUP; + } + + kern_member_idx = kern_member - btf_members(kern_type); + if (btf_member_bitfield_size(type, i) || + btf_member_bitfield_size(kern_type, kern_member_idx)) { + pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", + map->name, mname); + return -ENOTSUP; + } + + moff = member->offset / 8; + kern_moff = kern_member->offset / 8; + + mdata = data + moff; + kern_mdata = kern_data + kern_moff; + + mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, + &kern_mtype_id); + if (BTF_INFO_KIND(mtype->info) != + BTF_INFO_KIND(kern_mtype->info)) { + pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", + map->name, mname, BTF_INFO_KIND(mtype->info), + BTF_INFO_KIND(kern_mtype->info)); + return -ENOTSUP; + } + + if (btf_is_ptr(mtype)) { + struct bpf_program *prog; + + mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id); + kern_mtype = skip_mods_and_typedefs(kern_btf, + kern_mtype->type, + &kern_mtype_id); + if (!btf_is_func_proto(mtype) || + !btf_is_func_proto(kern_mtype)) { + pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n", + map->name, mname); + return -ENOTSUP; + } + + prog = st_ops->progs[i]; + if (!prog) { + pr_debug("struct_ops init_kern %s: func ptr %s is not set\n", + map->name, mname); + continue; + } + + prog->attach_btf_id = kern_type_id; + prog->expected_attach_type = kern_member_idx; + + st_ops->kern_func_off[i] = kern_data_off + kern_moff; + + pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", + map->name, mname, prog->name, moff, + kern_moff); + + continue; + } + + msize = btf__resolve_size(btf, mtype_id); + kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); + if (msize < 0 || kern_msize < 0 || msize != kern_msize) { + pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", + map->name, mname, (ssize_t)msize, + (ssize_t)kern_msize); + return -ENOTSUP; + } + + pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", + map->name, mname, (unsigned int)msize, + moff, kern_moff); + memcpy(kern_mdata, mdata, msize); + } + + return 0; +} + +static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) +{ + struct bpf_map *map; + size_t i; + int err; + + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + + if (!bpf_map__is_struct_ops(map)) + continue; + + err = bpf_map__init_kern_struct_ops(map, obj->btf, + obj->btf_vmlinux); + if (err) + return err; + } + + return 0; +} + +static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) +{ + const struct btf_type *type, *datasec; + const struct btf_var_secinfo *vsi; + struct bpf_struct_ops *st_ops; + const char *tname, *var_name; + __s32 type_id, datasec_id; + const struct btf *btf; + struct bpf_map *map; + __u32 i; + + if (obj->efile.st_ops_shndx == -1) + return 0; + + btf = obj->btf; + datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC, + BTF_KIND_DATASEC); + if (datasec_id < 0) { + pr_warn("struct_ops init: DATASEC %s not found\n", + STRUCT_OPS_SEC); + return -EINVAL; + } + + datasec = btf__type_by_id(btf, datasec_id); + vsi = btf_var_secinfos(datasec); + for (i = 0; i < btf_vlen(datasec); i++, vsi++) { + type = btf__type_by_id(obj->btf, vsi->type); + var_name = btf__name_by_offset(obj->btf, type->name_off); + + type_id = btf__resolve_type(obj->btf, vsi->type); + if (type_id < 0) { + pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", + vsi->type, STRUCT_OPS_SEC); + return -EINVAL; + } + + type = btf__type_by_id(obj->btf, type_id); + tname = btf__name_by_offset(obj->btf, type->name_off); + if (!tname[0]) { + pr_warn("struct_ops init: anonymous type is not supported\n"); + return -ENOTSUP; + } + if (!btf_is_struct(type)) { + pr_warn("struct_ops init: %s is not a struct\n", tname); + return -EINVAL; + } + + map = bpf_object__add_map(obj); + if (IS_ERR(map)) + return PTR_ERR(map); + + map->sec_idx = obj->efile.st_ops_shndx; + map->sec_offset = vsi->offset; + map->name = strdup(var_name); + if (!map->name) + return -ENOMEM; + + map->def.type = BPF_MAP_TYPE_STRUCT_OPS; + map->def.key_size = sizeof(int); + map->def.value_size = type->size; + map->def.max_entries = 1; + + map->st_ops = calloc(1, sizeof(*map->st_ops)); + if (!map->st_ops) + return -ENOMEM; + st_ops = map->st_ops; + st_ops->data = malloc(type->size); + st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); + st_ops->kern_func_off = malloc(btf_vlen(type) * + sizeof(*st_ops->kern_func_off)); + if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) + return -ENOMEM; + + if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) { + pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", + var_name, STRUCT_OPS_SEC); + return -EINVAL; + } + + memcpy(st_ops->data, + obj->efile.st_ops_data->d_buf + vsi->offset, + type->size); + st_ops->tname = tname; + st_ops->type = type; + st_ops->type_id = type_id; + + pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", + tname, type_id, var_name, vsi->offset); + } + + return 0; +} + static struct bpf_object *bpf_object__new(const char *path, const void *obj_buf, size_t obj_buf_sz, @@ -550,6 +989,8 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.data_shndx = -1; obj->efile.rodata_shndx = -1; obj->efile.bss_shndx = -1; + obj->efile.st_ops_shndx = -1; + obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; @@ -572,6 +1013,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.data = NULL; obj->efile.rodata = NULL; obj->efile.bss = NULL; + obj->efile.st_ops_data = NULL; zfree(&obj->efile.reloc_sects); obj->efile.nr_reloc_sects = 0; @@ -677,16 +1119,6 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) return 0; } -static int compare_bpf_map(const void *_a, const void *_b) -{ - const struct bpf_map *a = _a; - const struct bpf_map *b = _b; - - if (a->sec_idx != b->sec_idx) - return a->sec_idx - b->sec_idx; - return a->sec_offset - b->sec_offset; -} - static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) { if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || @@ -748,15 +1180,18 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, *size = 0; if (!name) { return -EINVAL; - } else if (!strcmp(name, ".data")) { + } else if (!strcmp(name, DATA_SEC)) { if (obj->efile.data) *size = obj->efile.data->d_size; - } else if (!strcmp(name, ".bss")) { + } else if (!strcmp(name, BSS_SEC)) { if (obj->efile.bss) *size = obj->efile.bss->d_size; - } else if (!strcmp(name, ".rodata")) { + } else if (!strcmp(name, RODATA_SEC)) { if (obj->efile.rodata) *size = obj->efile.rodata->d_size; + } else if (!strcmp(name, STRUCT_OPS_SEC)) { + if (obj->efile.st_ops_data) + *size = obj->efile.st_ops_data->d_size; } else { ret = bpf_object_search_section_size(obj, name, &d_size); if (!ret) @@ -835,13 +1270,38 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return &obj->maps[obj->nr_maps++]; } +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + long page_sz = sysconf(_SC_PAGE_SIZE); + size_t map_sz; + + map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; + map_sz = roundup(map_sz, page_sz); + return map_sz; +} + +static char *internal_map_name(struct bpf_object *obj, + enum libbpf_map_type type) +{ + char map_name[BPF_OBJ_NAME_LEN]; + const char *sfx = libbpf_type_to_btf_name[type]; + int sfx_len = max((size_t)7, strlen(sfx)); + int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, + strlen(obj->name)); + + snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, + sfx_len, libbpf_type_to_btf_name[type]); + + return strdup(map_name); +} + static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, - int sec_idx, Elf_Data *data, void **data_buff) + int sec_idx, void *data, size_t data_sz) { - char map_name[BPF_OBJ_NAME_LEN]; struct bpf_map_def *def; struct bpf_map *map; + int err; map = bpf_object__add_map(obj); if (IS_ERR(map)) @@ -850,9 +1310,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->libbpf_type = type; map->sec_idx = sec_idx; map->sec_offset = 0; - snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, - libbpf_type_to_btf_name[type]); - map->name = strdup(map_name); + map->name = internal_map_name(obj, type); if (!map->name) { pr_warn("failed to alloc map name\n"); return -ENOMEM; @@ -861,25 +1319,29 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def = &map->def; def->type = BPF_MAP_TYPE_ARRAY; def->key_size = sizeof(int); - def->value_size = data->d_size; + def->value_size = data_sz; def->max_entries = 1; - def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; - if (obj->caps.array_mmap) - def->map_flags |= BPF_F_MMAPABLE; + def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG + ? BPF_F_RDONLY_PROG : 0; + def->map_flags |= BPF_F_MMAPABLE; pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", - map_name, map->sec_idx, map->sec_offset, def->map_flags); + map->name, map->sec_idx, map->sec_offset, def->map_flags); - if (data_buff) { - *data_buff = malloc(data->d_size); - if (!*data_buff) { - zfree(&map->name); - pr_warn("failed to alloc map content buffer\n"); - return -ENOMEM; - } - memcpy(*data_buff, data->d_buf, data->d_size); + map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (map->mmaped == MAP_FAILED) { + err = -errno; + map->mmaped = NULL; + pr_warn("failed to alloc map '%s' content buffer: %d\n", + map->name, err); + zfree(&map->name); + return err; } + if (data) + memcpy(map->mmaped, data, data_sz); + pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); return 0; } @@ -888,37 +1350,332 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) { int err; - if (!obj->caps.global_data) - return 0; /* * Populate obj->maps with libbpf internal maps. */ if (obj->efile.data_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, obj->efile.data_shndx, - obj->efile.data, - &obj->sections.data); + obj->efile.data->d_buf, + obj->efile.data->d_size); if (err) return err; } if (obj->efile.rodata_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, obj->efile.rodata_shndx, - obj->efile.rodata, - &obj->sections.rodata); + obj->efile.rodata->d_buf, + obj->efile.rodata->d_size); if (err) return err; } if (obj->efile.bss_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, obj->efile.bss_shndx, - obj->efile.bss, NULL); + NULL, + obj->efile.bss->d_size); if (err) return err; } return 0; } + +static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, + const void *name) +{ + int i; + + for (i = 0; i < obj->nr_extern; i++) { + if (strcmp(obj->externs[i].name, name) == 0) + return &obj->externs[i]; + } + return NULL; +} + +static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, + char value) +{ + switch (ext->type) { + case EXT_BOOL: + if (value == 'm') { + pr_warn("extern %s=%c should be tristate or char\n", + ext->name, value); + return -EINVAL; + } + *(bool *)ext_val = value == 'y' ? true : false; + break; + case EXT_TRISTATE: + if (value == 'y') + *(enum libbpf_tristate *)ext_val = TRI_YES; + else if (value == 'm') + *(enum libbpf_tristate *)ext_val = TRI_MODULE; + else /* value == 'n' */ + *(enum libbpf_tristate *)ext_val = TRI_NO; + break; + case EXT_CHAR: + *(char *)ext_val = value; + break; + case EXT_UNKNOWN: + case EXT_INT: + case EXT_CHAR_ARR: + default: + pr_warn("extern %s=%c should be bool, tristate, or char\n", + ext->name, value); + return -EINVAL; + } + ext->is_set = true; + return 0; +} + +static int set_ext_value_str(struct extern_desc *ext, char *ext_val, + const char *value) +{ + size_t len; + + if (ext->type != EXT_CHAR_ARR) { + pr_warn("extern %s=%s should char array\n", ext->name, value); + return -EINVAL; + } + + len = strlen(value); + if (value[len - 1] != '"') { + pr_warn("extern '%s': invalid string config '%s'\n", + ext->name, value); + return -EINVAL; + } + + /* strip quotes */ + len -= 2; + if (len >= ext->sz) { + pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + ext->name, value, len, ext->sz - 1); + len = ext->sz - 1; + } + memcpy(ext_val, value + 1, len); + ext_val[len] = '\0'; + ext->is_set = true; + return 0; +} + +static int parse_u64(const char *value, __u64 *res) +{ + char *value_end; + int err; + + errno = 0; + *res = strtoull(value, &value_end, 0); + if (errno) { + err = -errno; + pr_warn("failed to parse '%s' as integer: %d\n", value, err); + return err; + } + if (*value_end) { + pr_warn("failed to parse '%s' as integer completely\n", value); + return -EINVAL; + } + return 0; +} + +static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) +{ + int bit_sz = ext->sz * 8; + + if (ext->sz == 8) + return true; + + /* Validate that value stored in u64 fits in integer of `ext->sz` + * bytes size without any loss of information. If the target integer + * is signed, we rely on the following limits of integer type of + * Y bits and subsequent transformation: + * + * -2^(Y-1) <= X <= 2^(Y-1) - 1 + * 0 <= X + 2^(Y-1) <= 2^Y - 1 + * 0 <= X + 2^(Y-1) < 2^Y + * + * For unsigned target integer, check that all the (64 - Y) bits are + * zero. + */ + if (ext->is_signed) + return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); + else + return (v >> bit_sz) == 0; +} + +static int set_ext_value_num(struct extern_desc *ext, void *ext_val, + __u64 value) +{ + if (ext->type != EXT_INT && ext->type != EXT_CHAR) { + pr_warn("extern %s=%llu should be integer\n", + ext->name, (unsigned long long)value); + return -EINVAL; + } + if (!is_ext_value_in_range(ext, value)) { + pr_warn("extern %s=%llu value doesn't fit in %d bytes\n", + ext->name, (unsigned long long)value, ext->sz); + return -ERANGE; + } + switch (ext->sz) { + case 1: *(__u8 *)ext_val = value; break; + case 2: *(__u16 *)ext_val = value; break; + case 4: *(__u32 *)ext_val = value; break; + case 8: *(__u64 *)ext_val = value; break; + default: + return -EINVAL; + } + ext->is_set = true; + return 0; +} + +static int bpf_object__process_kconfig_line(struct bpf_object *obj, + char *buf, void *data) +{ + struct extern_desc *ext; + char *sep, *value; + int len, err = 0; + void *ext_val; + __u64 num; + + if (strncmp(buf, "CONFIG_", 7)) + return 0; + + sep = strchr(buf, '='); + if (!sep) { + pr_warn("failed to parse '%s': no separator\n", buf); + return -EINVAL; + } + + /* Trim ending '\n' */ + len = strlen(buf); + if (buf[len - 1] == '\n') + buf[len - 1] = '\0'; + /* Split on '=' and ensure that a value is present. */ + *sep = '\0'; + if (!sep[1]) { + *sep = '='; + pr_warn("failed to parse '%s': no value\n", buf); + return -EINVAL; + } + + ext = find_extern_by_name(obj, buf); + if (!ext || ext->is_set) + return 0; + + ext_val = data + ext->data_off; + value = sep + 1; + + switch (*value) { + case 'y': case 'n': case 'm': + err = set_ext_value_tri(ext, ext_val, *value); + break; + case '"': + err = set_ext_value_str(ext, ext_val, value); + break; + default: + /* assume integer */ + err = parse_u64(value, &num); + if (err) { + pr_warn("extern %s=%s should be integer\n", + ext->name, value); + return err; + } + err = set_ext_value_num(ext, ext_val, num); + break; + } + if (err) + return err; + pr_debug("extern %s=%s\n", ext->name, value); + return 0; +} + +static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) +{ + char buf[PATH_MAX]; + struct utsname uts; + int len, err = 0; + gzFile file; + + uname(&uts); + len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + /* gzopen also accepts uncompressed files. */ + file = gzopen(buf, "r"); + if (!file) + file = gzopen("/proc/config.gz", "r"); + + if (!file) { + pr_warn("failed to open system Kconfig\n"); + return -ENOENT; + } + + while (gzgets(file, buf, sizeof(buf))) { + err = bpf_object__process_kconfig_line(obj, buf, data); + if (err) { + pr_warn("error parsing system Kconfig line '%s': %d\n", + buf, err); + goto out; + } + } + +out: + gzclose(file); + return err; +} + +static int bpf_object__read_kconfig_mem(struct bpf_object *obj, + const char *config, void *data) +{ + char buf[PATH_MAX]; + int err = 0; + FILE *file; + + file = fmemopen((void *)config, strlen(config), "r"); + if (!file) { + err = -errno; + pr_warn("failed to open in-memory Kconfig: %d\n", err); + return err; + } + + while (fgets(buf, sizeof(buf), file)) { + err = bpf_object__process_kconfig_line(obj, buf, data); + if (err) { + pr_warn("error parsing in-memory Kconfig line '%s': %d\n", + buf, err); + break; + } + } + + fclose(file); + return err; +} + +static int bpf_object__init_kconfig_map(struct bpf_object *obj) +{ + struct extern_desc *last_ext; + size_t map_sz; + int err; + + if (obj->nr_extern == 0) + return 0; + + last_ext = &obj->externs[obj->nr_extern - 1]; + map_sz = last_ext->data_off + last_ext->sz; + + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, + obj->efile.symbols_shndx, + NULL, map_sz); + if (err) + return err; + + obj->kconfig_map_idx = obj->nr_maps - 1; + + return 0; +} + static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) { Elf_Data *symbols = obj->efile.symbols; @@ -1060,6 +1817,20 @@ skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) return t; } +static const struct btf_type * +resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) +{ + const struct btf_type *t; + + t = skip_mods_and_typedefs(btf, id, NULL); + if (!btf_is_ptr(t)) + return NULL; + + t = skip_mods_and_typedefs(btf, t->type, res_id); + + return btf_is_func_proto(t) ? t : NULL; +} + /* * Fetch integer attribute of BTF map definition. Such attributes are * represented using a pointer to an array, in which dimensionality of array @@ -1242,15 +2013,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found key [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found key [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warn("map '%s': conflicting key size %u != %lld.\n", - map_name, map->def.key_size, sz); + pr_warn("map '%s': conflicting key size %u != %zd.\n", + map_name, map->def.key_size, (ssize_t)sz); return -EINVAL; } map->def.key_size = sz; @@ -1285,15 +2056,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found value [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found value [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warn("map '%s': conflicting value size %u != %lld.\n", - map_name, map->def.value_size, sz); + pr_warn("map '%s': conflicting value size %u != %zd.\n", + map_name, map->def.value_size, (ssize_t)sz); return -EINVAL; } map->def.value_size = sz; @@ -1393,28 +2164,24 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return 0; } -static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) +static int bpf_object__init_maps(struct bpf_object *obj, + const struct bpf_object_open_opts *opts) { - bool strict = !relaxed_maps; + const char *pin_root_path; + bool strict; int err; - err = bpf_object__init_user_maps(obj, strict); - if (err) - return err; - - err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); - if (err) - return err; + strict = !OPTS_GET(opts, relaxed_maps, false); + pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - err = bpf_object__init_global_data_maps(obj); + err = bpf_object__init_user_maps(obj, strict); + err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); + err = err ?: bpf_object__init_global_data_maps(obj); + err = err ?: bpf_object__init_kconfig_map(obj); + err = err ?: bpf_object__init_struct_ops_maps(obj); if (err) return err; - if (obj->nr_maps) { - qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), - compare_bpf_map); - } return 0; } @@ -1438,13 +2205,14 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) static void bpf_object__sanitize_btf(struct bpf_object *obj) { + bool has_func_global = obj->caps.btf_func_global; bool has_datasec = obj->caps.btf_datasec; bool has_func = obj->caps.btf_func; struct btf *btf = obj->btf; struct btf_type *t; int i, j, vlen; - if (!obj->btf || (has_func && has_datasec)) + if (!obj->btf || (has_func && has_datasec && has_func_global)) return; for (i = 1; i <= btf__get_nr_types(btf); i++) { @@ -1492,6 +2260,9 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj) } else if (!has_func && btf_is_func(t)) { /* replace FUNC with TYPEDEF */ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); + } else if (!has_func_global && btf_is_func(t)) { + /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ + t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); } } } @@ -1509,28 +2280,27 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) { - return obj->efile.btf_maps_shndx >= 0; + return obj->efile.btf_maps_shndx >= 0 || + obj->efile.st_ops_shndx >= 0 || + obj->nr_extern > 0; } static int bpf_object__init_btf(struct bpf_object *obj, Elf_Data *btf_data, Elf_Data *btf_ext_data) { - bool btf_required = bpf_object__is_btf_mandatory(obj); - int err = 0; + int err = -ENOENT; if (btf_data) { obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); if (IS_ERR(obj->btf)) { + err = PTR_ERR(obj->btf); + obj->btf = NULL; pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); goto out; } - err = btf__finalize_data(obj, obj->btf); - if (err) { - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); - goto out; - } + err = 0; } if (btf_ext_data) { if (!obj->btf) { @@ -1548,22 +2318,72 @@ static int bpf_object__init_btf(struct bpf_object *obj, } } out: - if (err || IS_ERR(obj->btf)) { - if (btf_required) - err = err ? : PTR_ERR(obj->btf); - else - err = 0; - if (!IS_ERR_OR_NULL(obj->btf)) - btf__free(obj->btf); - obj->btf = NULL; + if (err && bpf_object__is_btf_mandatory(obj)) { + pr_warn("BTF is required, but is missing or corrupted.\n"); + return err; } - if (btf_required && !obj->btf) { + return 0; +} + +static int bpf_object__finalize_btf(struct bpf_object *obj) +{ + int err; + + if (!obj->btf) + return 0; + + err = btf__finalize_data(obj, obj->btf); + if (!err) + return 0; + + pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + btf_ext__free(obj->btf_ext); + obj->btf_ext = NULL; + + if (bpf_object__is_btf_mandatory(obj)) { pr_warn("BTF is required, but is missing or corrupted.\n"); - return err == 0 ? -ENOENT : err; + return -ENOENT; } return 0; } +static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) +{ + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) + return true; + + /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs + * also need vmlinux BTF + */ + if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) + return true; + + return false; +} + +static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) +{ + struct bpf_program *prog; + int err; + + bpf_object__for_each_program(prog, obj) { + if (libbpf_prog_needs_vmlinux_btf(prog)) { + obj->btf_vmlinux = libbpf_find_kernel_btf(); + if (IS_ERR(obj->btf_vmlinux)) { + err = PTR_ERR(obj->btf_vmlinux); + pr_warn("Error loading vmlinux BTF: %d\n", err); + obj->btf_vmlinux = NULL; + return err; + } + return 0; + } + } + + return 0; +} + static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) { int err = 0; @@ -1592,8 +2412,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) return 0; } -static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) +static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; @@ -1665,6 +2484,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, return -LIBBPF_ERRNO__FORMAT; } obj->efile.symbols = data; + obj->efile.symbols_shndx = idx; obj->efile.strtabidx = sh.sh_link; } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { if (sh.sh_flags & SHF_EXECINSTR) { @@ -1683,12 +2503,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, name, obj->path, cp); return err; } - } else if (strcmp(name, ".data") == 0) { + } else if (strcmp(name, DATA_SEC) == 0) { obj->efile.data = data; obj->efile.data_shndx = idx; - } else if (strcmp(name, ".rodata") == 0) { + } else if (strcmp(name, RODATA_SEC) == 0) { obj->efile.rodata = data; obj->efile.rodata_shndx = idx; + } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { + obj->efile.st_ops_data = data; + obj->efile.st_ops_shndx = idx; } else { pr_debug("skip section(%d) %s\n", idx, name); } @@ -1698,7 +2521,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, int sec = sh.sh_info; /* points to other section */ /* Only do relo for section with exec instructions */ - if (!section_have_execinstr(obj, sec)) { + if (!section_have_execinstr(obj, sec) && + strcmp(name, ".rel" STRUCT_OPS_SEC)) { pr_debug("skip relo %s(%d) for section(%d)\n", name, idx, sec); continue; @@ -1716,7 +2540,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, obj->efile.reloc_sects[nr_sects].shdr = sh; obj->efile.reloc_sects[nr_sects].data = data; - } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { + } else if (sh.sh_type == SHT_NOBITS && + strcmp(name, BSS_SEC) == 0) { obj->efile.bss = data; obj->efile.bss_shndx = idx; } else { @@ -1728,14 +2553,217 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, pr_warn("Corrupted ELF file: index of strtab invalid\n"); return -LIBBPF_ERRNO__FORMAT; } - err = bpf_object__init_btf(obj, btf_data, btf_ext_data); - if (!err) - err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path); - if (!err) - err = bpf_object__sanitize_and_load_btf(obj); - if (!err) - err = bpf_object__init_prog_names(obj); - return err; + return bpf_object__init_btf(obj, btf_data, btf_ext_data); +} + +static bool sym_is_extern(const GElf_Sym *sym) +{ + int bind = GELF_ST_BIND(sym->st_info); + /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ + return sym->st_shndx == SHN_UNDEF && + (bind == STB_GLOBAL || bind == STB_WEAK) && + GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; +} + +static int find_extern_btf_id(const struct btf *btf, const char *ext_name) +{ + const struct btf_type *t; + const char *var_name; + int i, n; + + if (!btf) + return -ESRCH; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (!btf_is_var(t)) + continue; + + var_name = btf__name_by_offset(btf, t->name_off); + if (strcmp(var_name, ext_name)) + continue; + + if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) + return -EINVAL; + + return i; + } + + return -ENOENT; +} + +static enum extern_type find_extern_type(const struct btf *btf, int id, + bool *is_signed) +{ + const struct btf_type *t; + const char *name; + + t = skip_mods_and_typedefs(btf, id, NULL); + name = btf__name_by_offset(btf, t->name_off); + + if (is_signed) + *is_signed = false; + switch (btf_kind(t)) { + case BTF_KIND_INT: { + int enc = btf_int_encoding(t); + + if (enc & BTF_INT_BOOL) + return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN; + if (is_signed) + *is_signed = enc & BTF_INT_SIGNED; + if (t->size == 1) + return EXT_CHAR; + if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) + return EXT_UNKNOWN; + return EXT_INT; + } + case BTF_KIND_ENUM: + if (t->size != 4) + return EXT_UNKNOWN; + if (strcmp(name, "libbpf_tristate")) + return EXT_UNKNOWN; + return EXT_TRISTATE; + case BTF_KIND_ARRAY: + if (btf_array(t)->nelems == 0) + return EXT_UNKNOWN; + if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR) + return EXT_UNKNOWN; + return EXT_CHAR_ARR; + default: + return EXT_UNKNOWN; + } +} + +static int cmp_externs(const void *_a, const void *_b) +{ + const struct extern_desc *a = _a; + const struct extern_desc *b = _b; + + /* descending order by alignment requirements */ + if (a->align != b->align) + return a->align > b->align ? -1 : 1; + /* ascending order by size, within same alignment class */ + if (a->sz != b->sz) + return a->sz < b->sz ? -1 : 1; + /* resolve ties by name */ + return strcmp(a->name, b->name); +} + +static int bpf_object__collect_externs(struct bpf_object *obj) +{ + const struct btf_type *t; + struct extern_desc *ext; + int i, n, off, btf_id; + struct btf_type *sec; + const char *ext_name; + Elf_Scn *scn; + GElf_Shdr sh; + + if (!obj->efile.symbols) + return 0; + + scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx); + if (!scn) + return -LIBBPF_ERRNO__FORMAT; + if (gelf_getshdr(scn, &sh) != &sh) + return -LIBBPF_ERRNO__FORMAT; + n = sh.sh_size / sh.sh_entsize; + + pr_debug("looking for externs among %d symbols...\n", n); + for (i = 0; i < n; i++) { + GElf_Sym sym; + + if (!gelf_getsym(obj->efile.symbols, i, &sym)) + return -LIBBPF_ERRNO__FORMAT; + if (!sym_is_extern(&sym)) + continue; + ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name); + if (!ext_name || !ext_name[0]) + continue; + + ext = obj->externs; + ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); + if (!ext) + return -ENOMEM; + obj->externs = ext; + ext = &ext[obj->nr_extern]; + memset(ext, 0, sizeof(*ext)); + obj->nr_extern++; + + ext->btf_id = find_extern_btf_id(obj->btf, ext_name); + if (ext->btf_id <= 0) { + pr_warn("failed to find BTF for extern '%s': %d\n", + ext_name, ext->btf_id); + return ext->btf_id; + } + t = btf__type_by_id(obj->btf, ext->btf_id); + ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->sym_idx = i; + ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; + ext->sz = btf__resolve_size(obj->btf, t->type); + if (ext->sz <= 0) { + pr_warn("failed to resolve size of extern '%s': %d\n", + ext_name, ext->sz); + return ext->sz; + } + ext->align = btf__align_of(obj->btf, t->type); + if (ext->align <= 0) { + pr_warn("failed to determine alignment of extern '%s': %d\n", + ext_name, ext->align); + return -EINVAL; + } + ext->type = find_extern_type(obj->btf, t->type, + &ext->is_signed); + if (ext->type == EXT_UNKNOWN) { + pr_warn("extern '%s' type is unsupported\n", ext_name); + return -ENOTSUP; + } + } + pr_debug("collected %d externs total\n", obj->nr_extern); + + if (!obj->nr_extern) + return 0; + + /* sort externs by (alignment, size, name) and calculate their offsets + * within a map */ + qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); + off = 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + ext->data_off = roundup(off, ext->align); + off = ext->data_off + ext->sz; + pr_debug("extern #%d: symbol %d, off %u, name %s\n", + i, ext->sym_idx, ext->data_off, ext->name); + } + + btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC); + if (btf_id <= 0) { + pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC); + return -ESRCH; + } + + sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id); + sec->size = off; + n = btf_vlen(sec); + for (i = 0; i < n; i++) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + + t = btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, t->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + vs->offset = ext->data_off; + btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + } + + return 0; } static struct bpf_program * @@ -1765,6 +2793,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, return NULL; } +struct bpf_program * +bpf_object__find_program_by_name(const struct bpf_object *obj, + const char *name) +{ + struct bpf_program *prog; + + bpf_object__for_each_program(prog, obj) { + if (!strcmp(prog->name, name)) + return prog; + } + return NULL; +} + static bool bpf_object__shndx_is_data(const struct bpf_object *obj, int shndx) { @@ -1789,6 +2830,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) return LIBBPF_MAP_BSS; else if (shndx == obj->efile.rodata_shndx) return LIBBPF_MAP_RODATA; + else if (shndx == obj->efile.symbols_shndx) + return LIBBPF_MAP_KCONFIG; else return LIBBPF_MAP_UNSPEC; } @@ -1817,7 +2860,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } if (sym->st_value % 8) { - pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value); + pr_warn("bad call relo offset: %zu\n", + (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_CALL; @@ -1832,6 +2876,30 @@ static int bpf_program__record_reloc(struct bpf_program *prog, insn_idx, insn->code); return -LIBBPF_ERRNO__RELOC; } + + if (sym_is_extern(sym)) { + int sym_idx = GELF_R_SYM(rel->r_info); + int i, n = obj->nr_extern; + struct extern_desc *ext; + + for (i = 0; i < n; i++) { + ext = &obj->externs[i]; + if (ext->sym_idx == sym_idx) + break; + } + if (i >= n) { + pr_warn("extern relo failed to find extern for sym %d\n", + sym_idx); + return -LIBBPF_ERRNO__RELOC; + } + pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n", + i, ext->name, ext->sym_idx, ext->data_off, insn_idx); + reloc_desc->type = RELO_EXTERN; + reloc_desc->insn_idx = insn_idx; + reloc_desc->sym_off = ext->data_off; + return 0; + } + if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", name, shdr_idx); @@ -1859,8 +2927,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, break; } if (map_idx >= nr_maps) { - pr_warn("map relo failed to find map for sec %u, off %llu\n", - shdr_idx, (__u64)sym->st_value); + pr_warn("map relo failed to find map for sec %u, off %zu\n", + shdr_idx, (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_LD64; @@ -1875,11 +2943,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog, pr_warn("bad data relo against section %u\n", shdr_idx); return -LIBBPF_ERRNO__RELOC; } - if (!obj->caps.global_data) { - pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", - name, insn_idx); - return -LIBBPF_ERRNO__RELOC; - } for (map_idx = 0; map_idx < nr_maps; map_idx++) { map = &obj->maps[map_idx]; if (map->libbpf_type != type) @@ -1941,9 +3004,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name) ? : "<?>"; - pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", - (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), - (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), + pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n", + (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info), + (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info), GELF_ST_BIND(sym.st_info), sym.st_name, name, insn_idx); @@ -1961,8 +3024,12 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) __u32 key_type_id = 0, value_type_id = 0; int ret; - /* if it's BTF-defined map, we don't need to search for type IDs */ - if (map->sec_idx == obj->efile.btf_maps_shndx) + /* if it's BTF-defined map, we don't need to search for type IDs. + * For struct_ops map, it does not need btf_key_type_id and + * btf_value_type_id. + */ + if (map->sec_idx == obj->efile.btf_maps_shndx || + bpf_map__is_struct_ops(map)) return 0; if (!bpf_map__is_internal(map)) { @@ -2166,6 +3233,32 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj) return 0; } +static int bpf_object__probe_btf_func_global(struct bpf_object *obj) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + int btf_fd; + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs)); + if (btf_fd >= 0) { + obj->caps.btf_func_global = 1; + close(btf_fd); + return 1; + } + + return 0; +} + static int bpf_object__probe_btf_datasec(struct bpf_object *obj) { static const char strs[] = "\0x\0.data"; @@ -2221,6 +3314,7 @@ bpf_object__probe_caps(struct bpf_object *obj) bpf_object__probe_name, bpf_object__probe_global_data, bpf_object__probe_btf_func, + bpf_object__probe_btf_func_global, bpf_object__probe_btf_datasec, bpf_object__probe_array_mmap, }; @@ -2298,29 +3392,35 @@ bpf_object__reuse_map(struct bpf_map *map) static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { + enum libbpf_map_type map_type = map->libbpf_type; char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; - __u8 *data; - /* Nothing to do here since kernel already zero-initializes .bss map. */ - if (map->libbpf_type == LIBBPF_MAP_BSS) + /* kernel already zero-initializes .bss map. */ + if (map_type == LIBBPF_MAP_BSS) return 0; - data = map->libbpf_type == LIBBPF_MAP_DATA ? - obj->sections.data : obj->sections.rodata; + err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); + if (err) { + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); + pr_warn("Error setting initial map(%s) contents: %s\n", + map->name, cp); + return err; + } - err = bpf_map_update_elem(map->fd, &zero, data, 0); - /* Freeze .rodata map as read-only from syscall side. */ - if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { + /* Freeze .rodata and .kconfig map as read-only from syscall side. */ + if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { err = bpf_map_freeze(map->fd); if (err) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("Error freezing map(%s) as read-only: %s\n", map->name, cp); - err = 0; + return err; } } - return err; + return 0; } static int @@ -2381,6 +3481,9 @@ bpf_object__create_maps(struct bpf_object *obj) if (bpf_map_type__is_map_in_map(def->type) && map->inner_map_fd >= 0) create_attr.inner_map_fd = map->inner_map_fd; + if (bpf_map__is_struct_ops(map)) + create_attr.btf_vmlinux_value_type_id = + map->btf_vmlinux_value_type_id; if (obj->btf && !bpf_map_find_btf_info(obj, map)) { create_attr.btf_fd = btf__fd(obj->btf); @@ -2411,6 +3514,7 @@ err_out: cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("failed to create map (name: '%s'): %s(%d)\n", map->name, cp, err); + pr_perm_msg(err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; @@ -2536,6 +3640,21 @@ static bool str_is_empty(const char *s) return !s || !s[0]; } +static bool is_flex_arr(const struct btf *btf, + const struct bpf_core_accessor *acc, + const struct btf_array *arr) +{ + const struct btf_type *t; + + /* not a flexible array, if not inside a struct or has non-zero size */ + if (!acc->name || arr->nelems > 0) + return false; + + /* has to be the last member of enclosing struct */ + t = btf__type_by_id(btf, acc->type_id); + return acc->idx == btf_vlen(t) - 1; +} + /* * Turn bpf_field_reloc into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting @@ -2573,6 +3692,7 @@ static int bpf_core_spec_parse(const struct btf *btf, struct bpf_core_spec *spec) { int access_idx, parsed_len, i; + struct bpf_core_accessor *acc; const struct btf_type *t; const char *name; __u32 id; @@ -2620,6 +3740,7 @@ static int bpf_core_spec_parse(const struct btf *btf, return -EINVAL; access_idx = spec->raw_spec[i]; + acc = &spec->spec[spec->len]; if (btf_is_composite(t)) { const struct btf_member *m; @@ -2637,18 +3758,23 @@ static int bpf_core_spec_parse(const struct btf *btf, if (str_is_empty(name)) return -EINVAL; - spec->spec[spec->len].type_id = id; - spec->spec[spec->len].idx = access_idx; - spec->spec[spec->len].name = name; + acc->type_id = id; + acc->idx = access_idx; + acc->name = name; spec->len++; } id = m->type; } else if (btf_is_array(t)) { const struct btf_array *a = btf_array(t); + bool flex; t = skip_mods_and_typedefs(btf, a->type, &id); - if (!t || access_idx >= a->nelems) + if (!t) + return -EINVAL; + + flex = is_flex_arr(btf, acc - 1, a); + if (!flex && access_idx >= a->nelems) return -EINVAL; spec->spec[spec->len].type_id = id; @@ -2743,7 +3869,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (strncmp(local_name, targ_name, local_essent_len) == 0) { pr_debug("[%d] %s: found candidate [%d] %s\n", local_type_id, local_name, i, targ_name); - new_ids = realloc(cand_ids->data, cand_ids->len + 1); + new_ids = reallocarray(cand_ids->data, + cand_ids->len + 1, + sizeof(*cand_ids->data)); if (!new_ids) { err = -ENOMEM; goto err_out; @@ -2953,12 +4081,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, */ if (i > 0) { const struct btf_array *a; + bool flex; if (!btf_is_array(targ_type)) return 0; a = btf_array(targ_type); - if (local_acc->idx >= a->nelems) + flex = is_flex_arr(targ_btf, targ_acc - 1, a); + if (!flex && local_acc->idx >= a->nelems) return 0; if (!skip_mods_and_typedefs(targ_btf, a->type, &targ_id)) @@ -3109,25 +4239,38 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, */ static int bpf_core_reloc_insn(struct bpf_program *prog, const struct bpf_field_reloc *relo, + int relo_idx, const struct bpf_core_spec *local_spec, const struct bpf_core_spec *targ_spec) { - bool failed = false, validate = true; __u32 orig_val, new_val; struct bpf_insn *insn; + bool validate = true; int insn_idx, err; __u8 class; if (relo->insn_off % sizeof(struct bpf_insn)) return -EINVAL; insn_idx = relo->insn_off / sizeof(struct bpf_insn); + insn = &prog->insns[insn_idx]; + class = BPF_CLASS(insn->code); if (relo->kind == BPF_FIELD_EXISTS) { orig_val = 1; /* can't generate EXISTS relo w/o local field */ new_val = targ_spec ? 1 : 0; } else if (!targ_spec) { - failed = true; - new_val = (__u32)-1; + pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", + bpf_program__title(prog, false), relo_idx, insn_idx); + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with the following message: + * invalid func unknown#195896080 + */ + insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ + return 0; } else { err = bpf_core_calc_field_relo(prog, relo, local_spec, &orig_val, &validate); @@ -3139,26 +4282,47 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, return err; } - insn = &prog->insns[insn_idx]; - class = BPF_CLASS(insn->code); - - if (class == BPF_ALU || class == BPF_ALU64) { + switch (class) { + case BPF_ALU: + case BPF_ALU64: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; - if (!failed && validate && insn->imm != orig_val) { - pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - insn->imm, orig_val, new_val); + if (validate && insn->imm != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, insn->imm, orig_val, new_val); return -EINVAL; } orig_val = insn->imm; insn->imm = new_val; - pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - failed ? " w/ failed reloc" : "", orig_val, new_val); - } else { - pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", - bpf_program__title(prog, false), + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", + bpf_program__title(prog, false), relo_idx, insn_idx, + orig_val, new_val); + break; + case BPF_LDX: + case BPF_ST: + case BPF_STX: + if (validate && insn->off != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, insn->off, orig_val, new_val); + return -EINVAL; + } + if (new_val > SHRT_MAX) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, new_val); + return -ERANGE; + } + orig_val = insn->off; + insn->off = new_val; + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", + bpf_program__title(prog, false), relo_idx, insn_idx, + orig_val, new_val); + break; + default: + pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), relo_idx, insn_idx, insn->code, insn->src_reg, insn->dst_reg, insn->off, insn->imm); return -EINVAL; @@ -3167,92 +4331,6 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, return 0; } -static struct btf *btf_load_raw(const char *path) -{ - struct btf *btf; - size_t read_cnt; - struct stat st; - void *data; - FILE *f; - - if (stat(path, &st)) - return ERR_PTR(-errno); - - data = malloc(st.st_size); - if (!data) - return ERR_PTR(-ENOMEM); - - f = fopen(path, "rb"); - if (!f) { - btf = ERR_PTR(-errno); - goto cleanup; - } - - read_cnt = fread(data, 1, st.st_size, f); - fclose(f); - if (read_cnt < st.st_size) { - btf = ERR_PTR(-EBADF); - goto cleanup; - } - - btf = btf__new(data, read_cnt); - -cleanup: - free(data); - return btf; -} - -/* - * Probe few well-known locations for vmlinux kernel image and try to load BTF - * data out of it to use for target BTF. - */ -static struct btf *bpf_core_find_kernel_btf(void) -{ - struct { - const char *path_fmt; - bool raw_btf; - } locations[] = { - /* try canonical vmlinux BTF through sysfs first */ - { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, - /* fall back to trying to find vmlinux ELF on disk otherwise */ - { "/boot/vmlinux-%1$s" }, - { "/lib/modules/%1$s/vmlinux-%1$s" }, - { "/lib/modules/%1$s/build/vmlinux" }, - { "/usr/lib/modules/%1$s/kernel/vmlinux" }, - { "/usr/lib/debug/boot/vmlinux-%1$s" }, - { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, - { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, - }; - char path[PATH_MAX + 1]; - struct utsname buf; - struct btf *btf; - int i; - - uname(&buf); - - for (i = 0; i < ARRAY_SIZE(locations); i++) { - snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); - - if (access(path, R_OK)) - continue; - - if (locations[i].raw_btf) - btf = btf_load_raw(path); - else - btf = btf__parse_elf(path, NULL); - - pr_debug("loading kernel BTF '%s': %ld\n", - path, IS_ERR(btf) ? PTR_ERR(btf) : 0); - if (IS_ERR(btf)) - continue; - - return btf; - } - - pr_warn("failed to find valid kernel BTF\n"); - return ERR_PTR(-ESRCH); -} - /* Output spec definition in the format: * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b @@ -3443,24 +4521,33 @@ static int bpf_core_reloc_field(struct bpf_program *prog, } /* - * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is - * requested, it's expected that we might not find any candidates. - * In this case, if field wasn't found in any candidate, the list of - * candidates shouldn't change at all, we'll just handle relocating - * appropriately, depending on relo's kind. + * For BPF_FIELD_EXISTS relo or when used BPF program has field + * existence checks or kernel version/config checks, it's expected + * that we might not find any candidates. In this case, if field + * wasn't found in any candidate, the list of candidates shouldn't + * change at all, we'll just handle relocating appropriately, + * depending on relo's kind. */ if (j > 0) cand_ids->len = j; - if (j == 0 && !prog->obj->relaxed_core_relocs && - relo->kind != BPF_FIELD_EXISTS) { - pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", - prog_name, relo_idx, local_id, local_name, spec_str); - return -ESRCH; - } + /* + * If no candidates were found, it might be both a programmer error, + * as well as expected case, depending whether instruction w/ + * relocation is guarded in some way that makes it unreachable (dead + * code) if relocation can't be resolved. This is handled in + * bpf_core_reloc_insn() uniformly by replacing that instruction with + * BPF helper call insn (using invalid helper ID). If that instruction + * is indeed unreachable, then it will be ignored and eliminated by + * verifier. If it was an error, then verifier will complain and point + * to a specific instruction number in its log. + */ + if (j == 0) + pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); /* bpf_core_reloc_insn should know how to handle missing targ_spec */ - err = bpf_core_reloc_insn(prog, relo, &local_spec, + err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec, j ? &targ_spec : NULL); if (err) { pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", @@ -3487,7 +4574,7 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) if (targ_btf_path) targ_btf = btf__parse_elf(targ_btf_path, NULL); else - targ_btf = bpf_core_find_kernel_btf(); + targ_btf = libbpf_find_kernel_btf(); if (IS_ERR(targ_btf)) { pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); return PTR_ERR(targ_btf); @@ -3559,16 +4646,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, size_t new_cnt; int err; - if (relo->type != RELO_CALL) - return -LIBBPF_ERRNO__RELOC; - - if (prog->idx == obj->efile.text_shndx) { - pr_warn("relo in .text insn %d into off %d (insn #%d)\n", - relo->insn_idx, relo->sym_off, relo->sym_off / 8); - return -LIBBPF_ERRNO__RELOC; - } - - if (prog->main_prog_cnt == 0) { + if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) { text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); if (!text) { pr_warn("no .text section found yet relo into text exist\n"); @@ -3598,6 +4676,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, text->insns_cnt, text->section_name, prog->section_name); } + insn = &prog->insns[relo->insn_idx]; insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx; return 0; @@ -3623,27 +4702,37 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; + struct bpf_insn *insn = &prog->insns[relo->insn_idx]; - if (relo->type == RELO_LD64 || relo->type == RELO_DATA) { - struct bpf_insn *insn = &prog->insns[relo->insn_idx]; - - if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { - pr_warn("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; - } + if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { + pr_warn("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } - if (relo->type != RELO_DATA) { - insn[0].src_reg = BPF_PSEUDO_MAP_FD; - } else { - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[1].imm = insn[0].imm + relo->sym_off; - } + switch (relo->type) { + case RELO_LD64: + insn[0].src_reg = BPF_PSEUDO_MAP_FD; insn[0].imm = obj->maps[relo->map_idx].fd; - } else if (relo->type == RELO_CALL) { + break; + case RELO_DATA: + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[1].imm = insn[0].imm + relo->sym_off; + insn[0].imm = obj->maps[relo->map_idx].fd; + break; + case RELO_EXTERN: + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + insn[1].imm = relo->sym_off; + break; + case RELO_CALL: err = bpf_program__reloc_text(prog, obj, relo); if (err) return err; + break; + default: + pr_warn("relo #%d: bad relo type %d\n", i, relo->type); + return -EINVAL; } } @@ -3667,8 +4756,28 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } + /* ensure .text is relocated first, as it's going to be copied as-is + * later for sub-program calls + */ + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + if (prog->idx != obj->efile.text_shndx) + continue; + + err = bpf_program__relocate(prog, obj); + if (err) { + pr_warn("failed to relocate '%s'\n", prog->section_name); + return err; + } + break; + } + /* now relocate everything but .text, which by now is relocated + * properly, so we can copy raw sub-program instructions as is safely + */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; + if (prog->idx == obj->efile.text_shndx) + continue; err = bpf_program__relocate(prog, obj); if (err) { @@ -3679,6 +4788,10 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return 0; } +static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, + GElf_Shdr *shdr, + Elf_Data *data); + static int bpf_object__collect_reloc(struct bpf_object *obj) { int i, err; @@ -3699,6 +4812,15 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) return -LIBBPF_ERRNO__INTERNAL; } + if (idx == obj->efile.st_ops_shndx) { + err = bpf_object__collect_struct_ops_map_reloc(obj, + shdr, + data); + if (err) + return err; + continue; + } + prog = bpf_object__find_prog_by_idx(obj, idx); if (!prog) { pr_warn("relocation failed: no section(%d)\n", idx); @@ -3733,7 +4855,10 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; - if (prog->type == BPF_PROG_TYPE_TRACING) { + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) { + load_attr.attach_btf_id = prog->attach_btf_id; + } else if (prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_EXT) { load_attr.attach_prog_fd = prog->attach_prog_fd; load_attr.attach_btf_id = prog->attach_btf_id; } else { @@ -3778,6 +4903,7 @@ retry_load: ret = -errno; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("load bpf program failed: %s\n", cp); + pr_perm_msg(ret); if (log_buf && log_buf[0] != '\0') { ret = -LIBBPF_ERRNO__VERIFY; @@ -3807,11 +4933,19 @@ out: return ret; } -int -bpf_program__load(struct bpf_program *prog, - char *license, __u32 kern_version) +static int libbpf_find_attach_btf_id(struct bpf_program *prog); + +int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { - int err = 0, fd, i; + int err = 0, fd, i, btf_id; + + if (prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_EXT) { + btf_id = libbpf_find_attach_btf_id(prog); + if (btf_id <= 0) + return btf_id; + prog->attach_btf_id = btf_id; + } if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { @@ -3835,7 +4969,7 @@ bpf_program__load(struct bpf_program *prog, prog->section_name, prog->instances.nr); } err = load_program(prog, prog->insns, prog->insns_cnt, - license, kern_version, &fd); + license, kern_ver, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -3864,9 +4998,7 @@ bpf_program__load(struct bpf_program *prog, } err = load_program(prog, result.new_insn_ptr, - result.new_insn_cnt, - license, kern_version, &fd); - + result.new_insn_cnt, license, kern_ver, &fd); if (err) { pr_warn("Loading the %dth instance of program '%s' failed\n", i, prog->section_name); @@ -3910,20 +5042,14 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd); static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) + const struct bpf_object_open_opts *opts) { - const char *pin_root_path; + const char *obj_name, *kconfig; struct bpf_program *prog; struct bpf_object *obj; - const char *obj_name; char tmp_name[64]; - bool relaxed_maps; - __u32 attach_prog_fd; int err; if (elf_version(EV_CURRENT) == EV_NONE) { @@ -3951,23 +5077,32 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, if (IS_ERR(obj)) return obj; - obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); - relaxed_maps = OPTS_GET(opts, relaxed_maps, false); - pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); - - CHECK_ERR(bpf_object__elf_init(obj), err, out); - CHECK_ERR(bpf_object__check_endianness(obj), err, out); - CHECK_ERR(bpf_object__probe_caps(obj), err, out); - CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), - err, out); - CHECK_ERR(bpf_object__collect_reloc(obj), err, out); + kconfig = OPTS_GET(opts, kconfig, NULL); + if (kconfig) { + obj->kconfig = strdup(kconfig); + if (!obj->kconfig) + return ERR_PTR(-ENOMEM); + } + + err = bpf_object__elf_init(obj); + err = err ? : bpf_object__check_endianness(obj); + err = err ? : bpf_object__elf_collect(obj); + err = err ? : bpf_object__collect_externs(obj); + err = err ? : bpf_object__finalize_btf(obj); + err = err ? : bpf_object__init_maps(obj, opts); + err = err ? : bpf_object__init_prog_names(obj); + err = err ? : bpf_object__collect_reloc(obj); + if (err) + goto out; bpf_object__elf_finish(obj); bpf_object__for_each_program(prog, obj) { enum bpf_prog_type prog_type; enum bpf_attach_type attach_type; + if (prog->type != BPF_PROG_TYPE_UNSPEC) + continue; + err = libbpf_prog_type_by_name(prog->section_name, &prog_type, &attach_type); if (err == -ESRCH) @@ -3978,15 +5113,9 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_program__set_type(prog, prog_type); bpf_program__set_expected_attach_type(prog, attach_type); - if (prog_type == BPF_PROG_TYPE_TRACING) { - err = libbpf_find_attach_btf_id(prog->section_name, - attach_type, - attach_prog_fd); - if (err <= 0) - goto out; - prog->attach_btf_id = err; - prog->attach_prog_fd = attach_prog_fd; - } + if (prog_type == BPF_PROG_TYPE_TRACING || + prog_type == BPF_PROG_TYPE_EXT) + prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); } return obj; @@ -4026,7 +5155,7 @@ struct bpf_object *bpf_object__open(const char *path) } struct bpf_object * -bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) +bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) { if (!path) return ERR_PTR(-EINVAL); @@ -4038,7 +5167,7 @@ bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) + const struct bpf_object_open_opts *opts) { if (!obj_buf || obj_buf_sz == 0) return ERR_PTR(-EINVAL); @@ -4070,8 +5199,11 @@ int bpf_object__unload(struct bpf_object *obj) if (!obj) return -EINVAL; - for (i = 0; i < obj->nr_maps; i++) + for (i = 0; i < obj->nr_maps; i++) { zclose(obj->maps[i].fd); + if (obj->maps[i].st_ops) + zfree(&obj->maps[i].st_ops->kern_vdata); + } for (i = 0; i < obj->nr_programs; i++) bpf_program__unload(&obj->programs[i]); @@ -4079,6 +5211,92 @@ int bpf_object__unload(struct bpf_object *obj) return 0; } +static int bpf_object__sanitize_maps(struct bpf_object *obj) +{ + struct bpf_map *m; + + bpf_object__for_each_map(m, obj) { + if (!bpf_map__is_internal(m)) + continue; + if (!obj->caps.global_data) { + pr_warn("kernel doesn't support global data\n"); + return -ENOTSUP; + } + if (!obj->caps.array_mmap) + m->def.map_flags ^= BPF_F_MMAPABLE; + } + + return 0; +} + +static int bpf_object__resolve_externs(struct bpf_object *obj, + const char *extra_kconfig) +{ + bool need_config = false; + struct extern_desc *ext; + int err, i; + void *data; + + if (obj->nr_extern == 0) + return 0; + + data = obj->maps[obj->kconfig_map_idx].mmaped; + + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + void *ext_val = data + ext->data_off; + __u32 kver = get_kernel_version(); + + if (!kver) { + pr_warn("failed to get kernel version\n"); + return -EINVAL; + } + err = set_ext_value_num(ext, ext_val, kver); + if (err) + return err; + pr_debug("extern %s=0x%x\n", ext->name, kver); + } else if (strncmp(ext->name, "CONFIG_", 7) == 0) { + need_config = true; + } else { + pr_warn("unrecognized extern '%s'\n", ext->name); + return -EINVAL; + } + } + if (need_config && extra_kconfig) { + err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data); + if (err) + return -EINVAL; + need_config = false; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (!ext->is_set) { + need_config = true; + break; + } + } + } + if (need_config) { + err = bpf_object__read_kconfig_file(obj, data); + if (err) + return -EINVAL; + } + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + + if (!ext->is_set && !ext->is_weak) { + pr_warn("extern %s (strong) not resolved\n", ext->name); + return -ESRCH; + } else if (!ext->is_set) { + pr_debug("extern %s (weak) not resolved, defaulting to zero\n", + ext->name); + } + } + + return 0; +} + int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { struct bpf_object *obj; @@ -4097,9 +5315,21 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) obj->loaded = true; - CHECK_ERR(bpf_object__create_maps(obj), err, out); - CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); - CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out); + err = bpf_object__probe_caps(obj); + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); + err = err ? : bpf_object__sanitize_and_load_btf(obj); + err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj); + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); + err = err ? : bpf_object__create_maps(obj); + err = err ? : bpf_object__relocate(obj, attr->target_btf_path); + err = err ? : bpf_object__load_progs(obj, attr->log_level); + + btf__free(obj->btf_vmlinux); + obj->btf_vmlinux = NULL; + + if (err) + goto out; return 0; out: @@ -4670,17 +5900,33 @@ void bpf_object__close(struct bpf_object *obj) btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) { - zfree(&obj->maps[i].name); - zfree(&obj->maps[i].pin_path); - if (obj->maps[i].clear_priv) - obj->maps[i].clear_priv(&obj->maps[i], - obj->maps[i].priv); - obj->maps[i].priv = NULL; - obj->maps[i].clear_priv = NULL; + struct bpf_map *map = &obj->maps[i]; + + if (map->clear_priv) + map->clear_priv(map, map->priv); + map->priv = NULL; + map->clear_priv = NULL; + + if (map->mmaped) { + munmap(map->mmaped, bpf_map_mmap_sz(map)); + map->mmaped = NULL; + } + + if (map->st_ops) { + zfree(&map->st_ops->data); + zfree(&map->st_ops->progs); + zfree(&map->st_ops->kern_func_off); + zfree(&map->st_ops); + } + + zfree(&map->name); + zfree(&map->pin_path); } - zfree(&obj->sections.rodata); - zfree(&obj->sections.data); + zfree(&obj->kconfig); + zfree(&obj->externs); + obj->nr_extern = 0; + zfree(&obj->maps); obj->nr_maps = 0; @@ -4820,6 +6066,11 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) prog->prog_ifindex = ifindex; } +const char *bpf_program__name(const struct bpf_program *prog) +{ + return prog->name; +} + const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) { const char *title; @@ -4936,6 +6187,8 @@ BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); +BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); +BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); enum bpf_attach_type bpf_program__get_expected_attach_type(struct bpf_program *prog) @@ -4972,7 +6225,28 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, */ #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) -static const struct { +#define SEC_DEF(sec_pfx, ptype, ...) { \ + .sec = sec_pfx, \ + .len = sizeof(sec_pfx) - 1, \ + .prog_type = BPF_PROG_TYPE_##ptype, \ + __VA_ARGS__ \ +} + +struct bpf_sec_def; + +typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, + struct bpf_program *prog); + +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog); + +struct bpf_sec_def { const char *sec; size_t len; enum bpf_prog_type prog_type; @@ -4980,24 +6254,43 @@ static const struct { bool is_attachable; bool is_attach_btf; enum bpf_attach_type attach_type; -} section_names[] = { + attach_fn_t attach_fn; +}; + +static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), - BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT), + SEC_DEF("kprobe/", KPROBE, + .attach_fn = attach_kprobe), BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + SEC_DEF("kretprobe/", KPROBE, + .attach_fn = attach_kprobe), BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), - BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_RAW_TP), - BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FENTRY), - BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FEXIT), + SEC_DEF("tracepoint/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("tp/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("raw_tp/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("tp_btf/", TRACING, + .expected_attach_type = BPF_TRACE_RAW_TP, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fentry/", TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fexit/", TRACING, + .expected_attach_type = BPF_TRACE_FEXIT, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("freplace/", EXT, + .is_attach_btf = true, + .attach_fn = attach_trace), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -5052,6 +6345,7 @@ static const struct { BPF_CGROUP_GETSOCKOPT), BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT), + BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), }; #undef BPF_PROG_SEC_IMPL @@ -5059,12 +6353,26 @@ static const struct { #undef BPF_APROG_SEC #undef BPF_EAPROG_SEC #undef BPF_APROG_COMPAT +#undef SEC_DEF #define MAX_TYPE_NAME_SIZE 32 +static const struct bpf_sec_def *find_sec_def(const char *sec_name) +{ + int i, n = ARRAY_SIZE(section_defs); + + for (i = 0; i < n; i++) { + if (strncmp(sec_name, + section_defs[i].sec, section_defs[i].len)) + continue; + return §ion_defs[i]; + } + return NULL; +} + static char *libbpf_get_type_names(bool attach_type) { - int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; + int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; char *buf; buf = malloc(len); @@ -5073,16 +6381,16 @@ static char *libbpf_get_type_names(bool attach_type) buf[0] = '\0'; /* Forge string buf with all available names */ - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (attach_type && !section_names[i].is_attachable) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (attach_type && !section_defs[i].is_attachable) continue; - if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { + if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { free(buf); return NULL; } strcat(buf, " "); - strcat(buf, section_names[i].sec); + strcat(buf, section_defs[i].sec); } return buf; @@ -5091,57 +6399,205 @@ static char *libbpf_get_type_names(bool attach_type) int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type) { + const struct bpf_sec_def *sec_def; char *type_names; - int i; if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) - continue; - *prog_type = section_names[i].prog_type; - *expected_attach_type = section_names[i].expected_attach_type; + sec_def = find_sec_def(name); + if (sec_def) { + *prog_type = sec_def->prog_type; + *expected_attach_type = sec_def->expected_attach_type; return 0; } - pr_warn("failed to guess program type from ELF section '%s'\n", name); + + pr_debug("failed to guess program type from ELF section '%s'\n", name); type_names = libbpf_get_type_names(false); if (type_names != NULL) { - pr_info("supported section(type) names are:%s\n", type_names); + pr_debug("supported section(type) names are:%s\n", type_names); free(type_names); } return -ESRCH; } -#define BTF_PREFIX "btf_trace_" +static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, + size_t offset) +{ + struct bpf_map *map; + size_t i; + + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + if (!bpf_map__is_struct_ops(map)) + continue; + if (map->sec_offset <= offset && + offset - map->sec_offset < map->def.value_size) + return map; + } + + return NULL; +} + +/* Collect the reloc from ELF and populate the st_ops->progs[] */ +static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, + GElf_Shdr *shdr, + Elf_Data *data) +{ + const struct btf_member *member; + struct bpf_struct_ops *st_ops; + struct bpf_program *prog; + unsigned int shdr_idx; + const struct btf *btf; + struct bpf_map *map; + Elf_Data *symbols; + unsigned int moff; + const char *name; + __u32 member_idx; + GElf_Sym sym; + GElf_Rel rel; + int i, nrels; + + symbols = obj->efile.symbols; + btf = obj->btf; + nrels = shdr->sh_size / shdr->sh_entsize; + for (i = 0; i < nrels; i++) { + if (!gelf_getrel(data, i, &rel)) { + pr_warn("struct_ops reloc: failed to get %d reloc\n", i); + return -LIBBPF_ERRNO__FORMAT; + } + + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + pr_warn("struct_ops reloc: symbol %zx not found\n", + (size_t)GELF_R_SYM(rel.r_info)); + return -LIBBPF_ERRNO__FORMAT; + } + + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name) ? : "<?>"; + map = find_struct_ops_map_by_offset(obj, rel.r_offset); + if (!map) { + pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", + (size_t)rel.r_offset); + return -EINVAL; + } + + moff = rel.r_offset - map->sec_offset; + shdr_idx = sym.st_shndx; + st_ops = map->st_ops; + pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", + map->name, + (long long)(rel.r_info >> 32), + (long long)sym.st_value, + shdr_idx, (size_t)rel.r_offset, + map->sec_offset, sym.st_name, name); + + if (shdr_idx >= SHN_LORESERVE) { + pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n", + map->name, (size_t)rel.r_offset, shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + + member = find_member_by_offset(st_ops->type, moff * 8); + if (!member) { + pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", + map->name, moff); + return -EINVAL; + } + member_idx = member - btf_members(st_ops->type); + name = btf__name_by_offset(btf, member->name_off); + + if (!resolve_func_ptr(btf, member->type, NULL)) { + pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", + map->name, name); + return -EINVAL; + } + + prog = bpf_object__find_prog_by_idx(obj, shdr_idx); + if (!prog) { + pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", + map->name, shdr_idx, name); + return -EINVAL; + } + + if (prog->type == BPF_PROG_TYPE_UNSPEC) { + const struct bpf_sec_def *sec_def; + + sec_def = find_sec_def(prog->section_name); + if (sec_def && + sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) { + /* for pr_warn */ + prog->type = sec_def->prog_type; + goto invalid_prog; + } + + prog->type = BPF_PROG_TYPE_STRUCT_OPS; + prog->attach_btf_id = st_ops->type_id; + prog->expected_attach_type = member_idx; + } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || + prog->attach_btf_id != st_ops->type_id || + prog->expected_attach_type != member_idx) { + goto invalid_prog; + } + st_ops->progs[member_idx] = prog; + } + + return 0; + +invalid_prog: + pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", + map->name, prog->name, prog->section_name, prog->type, + prog->attach_btf_id, prog->expected_attach_type, name); + return -EINVAL; +} + +#define BTF_TRACE_PREFIX "btf_trace_" +#define BTF_MAX_NAME_SIZE 128 + +static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, + const char *name, __u32 kind) +{ + char btf_type_name[BTF_MAX_NAME_SIZE]; + int ret; + + ret = snprintf(btf_type_name, sizeof(btf_type_name), + "%s%s", prefix, name); + /* snprintf returns the number of characters written excluding the + * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it + * indicates truncation. + */ + if (ret < 0 || ret >= sizeof(btf_type_name)) + return -ENAMETOOLONG; + return btf__find_by_name_kind(btf, btf_type_name, kind); +} + +static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, + enum bpf_attach_type attach_type) +{ + int err; + + if (attach_type == BPF_TRACE_RAW_TP) + err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name, + BTF_KIND_TYPEDEF); + else + err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); + + return err; +} + int libbpf_find_vmlinux_btf_id(const char *name, enum bpf_attach_type attach_type) { - struct btf *btf = bpf_core_find_kernel_btf(); - char raw_tp_btf[128] = BTF_PREFIX; - char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; - const char *btf_name; - int err = -EINVAL; - __u32 kind; + struct btf *btf; + btf = libbpf_find_kernel_btf(); if (IS_ERR(btf)) { pr_warn("vmlinux BTF is not found\n"); return -EINVAL; } - if (attach_type == BPF_TRACE_RAW_TP) { - /* prepend "btf_trace_" prefix per kernel convention */ - strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); - btf_name = raw_tp_btf; - kind = BTF_KIND_TYPEDEF; - } else { - btf_name = name; - kind = BTF_KIND_FUNC; - } - err = btf__find_by_name_kind(btf, btf_name, kind); - btf__free(btf); - return err; + return __find_vmlinux_btf_id(btf, name, attach_type); } static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) @@ -5177,26 +6633,28 @@ out: return err; } -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd) +static int libbpf_find_attach_btf_id(struct bpf_program *prog) { + enum bpf_attach_type attach_type = prog->expected_attach_type; + __u32 attach_prog_fd = prog->attach_prog_fd; + const char *name = prog->section_name; int i, err; if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (!section_names[i].is_attach_btf) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (!section_defs[i].is_attach_btf) continue; - if (strncmp(name, section_names[i].sec, section_names[i].len)) + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; if (attach_prog_fd) - err = libbpf_find_prog_btf_id(name + section_names[i].len, + err = libbpf_find_prog_btf_id(name + section_defs[i].len, attach_prog_fd); else - err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, - attach_type); + err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, + name + section_defs[i].len, + attach_type); if (err <= 0) pr_warn("%s is not found in vmlinux BTF\n", name); return err; @@ -5214,18 +6672,18 @@ int libbpf_attach_type_by_name(const char *name, if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; - if (!section_names[i].is_attachable) + if (!section_defs[i].is_attachable) return -EINVAL; - *attach_type = section_names[i].attach_type; + *attach_type = section_defs[i].attach_type; return 0; } - pr_warn("failed to guess attach type based on ELF section name '%s'\n", name); + pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); type_names = libbpf_get_type_names(true); if (type_names != NULL) { - pr_info("attachable section(type) names are:%s\n", type_names); + pr_debug("attachable section(type) names are:%s\n", type_names); free(type_names); } @@ -5466,17 +6924,37 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, } struct bpf_link { + int (*detach)(struct bpf_link *link); int (*destroy)(struct bpf_link *link); + bool disconnected; }; +/* Release "ownership" of underlying BPF resource (typically, BPF program + * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected + * link, when destructed through bpf_link__destroy() call won't attempt to + * detach/unregisted that BPF resource. This is useful in situations where, + * say, attached BPF program has to outlive userspace program that attached it + * in the system. Depending on type of BPF program, though, there might be + * additional steps (like pinning BPF program in BPF FS) necessary to ensure + * exit of userspace program doesn't trigger automatic detachment and clean up + * inside the kernel. + */ +void bpf_link__disconnect(struct bpf_link *link) +{ + link->disconnected = true; +} + int bpf_link__destroy(struct bpf_link *link) { - int err; + int err = 0; if (!link) return 0; - err = link->destroy(link); + if (!link->disconnected && link->detach) + err = link->detach(link); + if (link->destroy) + link->destroy(link); free(link); return err; @@ -5487,7 +6965,7 @@ struct bpf_link_fd { int fd; /* hook FD */ }; -static int bpf_link__destroy_perf_event(struct bpf_link *link) +static int bpf_link__detach_perf_event(struct bpf_link *link) { struct bpf_link_fd *l = (void *)link; int err; @@ -5519,10 +6997,10 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_perf_event; + link->link.detach = &bpf_link__detach_perf_event; link->fd = pfd; if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { @@ -5679,6 +7157,18 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, return link; } +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + const char *func_name; + bool retprobe; + + func_name = bpf_program__title(prog, false) + sec->len; + retprobe = strcmp(sec->sec, "kretprobe/") == 0; + + return bpf_program__attach_kprobe(prog, retprobe, func_name); +} + struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, @@ -5791,7 +7281,33 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, return link; } -static int bpf_link__destroy_fd(struct bpf_link *link) +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + char *sec_name, *tp_cat, *tp_name; + struct bpf_link *link; + + sec_name = strdup(bpf_program__title(prog, false)); + if (!sec_name) + return ERR_PTR(-ENOMEM); + + /* extract "tp/<category>/<name>" */ + tp_cat = sec_name + sec->len; + tp_name = strchr(tp_cat, '/'); + if (!tp_name) { + link = ERR_PTR(-EINVAL); + goto out; + } + *tp_name = '\0'; + tp_name++; + + link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); +out: + free(sec_name); + return link; +} + +static int bpf_link__detach_fd(struct bpf_link *link) { struct bpf_link_fd *l = (void *)link; @@ -5812,10 +7328,10 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; + link->link.detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); if (pfd < 0) { @@ -5830,6 +7346,14 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return (struct bpf_link *)link; } +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + const char *tp_name = bpf_program__title(prog, false) + sec->len; + + return bpf_program__attach_raw_tracepoint(prog, tp_name); +} + struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; @@ -5843,10 +7367,10 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; + link->link.detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(NULL, prog_fd); if (pfd < 0) { @@ -5861,6 +7385,75 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) return (struct bpf_link *)link; } +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + return bpf_program__attach_trace(prog); +} + +struct bpf_link *bpf_program__attach(struct bpf_program *prog) +{ + const struct bpf_sec_def *sec_def; + + sec_def = find_sec_def(bpf_program__title(prog, false)); + if (!sec_def || !sec_def->attach_fn) + return ERR_PTR(-ESRCH); + + return sec_def->attach_fn(sec_def, prog); +} + +static int bpf_link__detach_struct_ops(struct bpf_link *link) +{ + struct bpf_link_fd *l = (void *)link; + __u32 zero = 0; + + if (bpf_map_delete_elem(l->fd, &zero)) + return -errno; + + return 0; +} + +struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) +{ + struct bpf_struct_ops *st_ops; + struct bpf_link_fd *link; + __u32 i, zero = 0; + int err; + + if (!bpf_map__is_struct_ops(map) || map->fd == -1) + return ERR_PTR(-EINVAL); + + link = calloc(1, sizeof(*link)); + if (!link) + return ERR_PTR(-EINVAL); + + st_ops = map->st_ops; + for (i = 0; i < btf_vlen(st_ops->type); i++) { + struct bpf_program *prog = st_ops->progs[i]; + void *kern_data; + int prog_fd; + + if (!prog) + continue; + + prog_fd = bpf_program__fd(prog); + kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; + *(unsigned long *)kern_data = prog_fd; + } + + err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0); + if (err) { + err = -errno; + free(link); + return ERR_PTR(err); + } + + link->link.detach = bpf_link__detach_struct_ops; + link->fd = map->fd; + + return (struct bpf_link *)link; +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -5944,7 +7537,7 @@ struct perf_buffer { size_t mmap_size; struct perf_cpu_buf **cpu_bufs; struct epoll_event *events; - int cpu_cnt; + int cpu_cnt; /* number of allocated CPU buffers */ int epoll_fd; /* perf event FD */ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ }; @@ -6078,11 +7671,13 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt, static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { + const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map = {}; char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; + bool *online = NULL; __u32 map_info_len; - int err, i; + int err, i, j, n; if (page_cnt & (page_cnt - 1)) { pr_warn("page count should be power of two, but is %zu\n", @@ -6151,20 +7746,32 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - for (i = 0; i < pb->cpu_cnt; i++) { + err = parse_cpu_mask_file(online_cpus_file, &online, &n); + if (err) { + pr_warn("failed to get online CPU mask: %d\n", err); + goto error; + } + + for (i = 0, j = 0; i < pb->cpu_cnt; i++) { struct perf_cpu_buf *cpu_buf; int cpu, map_key; cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; + /* in case user didn't explicitly requested particular CPUs to + * be attached to, skip offline/not present CPUs + */ + if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) + continue; + cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); if (IS_ERR(cpu_buf)) { err = PTR_ERR(cpu_buf); goto error; } - pb->cpu_bufs[i] = cpu_buf; + pb->cpu_bufs[j] = cpu_buf; err = bpf_map_update_elem(pb->map_fd, &map_key, &cpu_buf->fd, 0); @@ -6176,21 +7783,25 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - pb->events[i].events = EPOLLIN; - pb->events[i].data.ptr = cpu_buf; + pb->events[j].events = EPOLLIN; + pb->events[j].data.ptr = cpu_buf; if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, - &pb->events[i]) < 0) { + &pb->events[j]) < 0) { err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } + j++; } + pb->cpu_cnt = j; + free(online); return pb; error: + free(online); if (pb) perf_buffer__free(pb); return ERR_PTR(err); @@ -6521,62 +8132,267 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) } } -int libbpf_num_possible_cpus(void) +int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) { - static const char *fcpu = "/sys/devices/system/cpu/possible"; - int len = 0, n = 0, il = 0, ir = 0; - unsigned int start = 0, end = 0; - int tmp_cpus = 0; - static int cpus; - char buf[128]; - int error = 0; - int fd = -1; + int err = 0, n, len, start, end = -1; + bool *tmp; - tmp_cpus = READ_ONCE(cpus); - if (tmp_cpus > 0) - return tmp_cpus; + *mask = NULL; + *mask_sz = 0; + + /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ + while (*s) { + if (*s == ',' || *s == '\n') { + s++; + continue; + } + n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); + if (n <= 0 || n > 2) { + pr_warn("Failed to get CPU range %s: %d\n", s, n); + err = -EINVAL; + goto cleanup; + } else if (n == 1) { + end = start; + } + if (start < 0 || start > end) { + pr_warn("Invalid CPU range [%d,%d] in %s\n", + start, end, s); + err = -EINVAL; + goto cleanup; + } + tmp = realloc(*mask, end + 1); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + *mask = tmp; + memset(tmp + *mask_sz, 0, start - *mask_sz); + memset(tmp + start, 1, end - start + 1); + *mask_sz = end + 1; + s += len; + } + if (!*mask_sz) { + pr_warn("Empty CPU range\n"); + return -EINVAL; + } + return 0; +cleanup: + free(*mask); + *mask = NULL; + return err; +} + +int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) +{ + int fd, err = 0, len; + char buf[128]; fd = open(fcpu, O_RDONLY); if (fd < 0) { - error = errno; - pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); - return -error; + err = -errno; + pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { - error = len ? errno : EINVAL; - pr_warn("Failed to read # of possible cpus from %s: %s\n", - fcpu, strerror(error)); - return -error; + err = len ? -errno : -EINVAL; + pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + return err; } - if (len == sizeof(buf)) { - pr_warn("File %s size overflow\n", fcpu); - return -EOVERFLOW; + if (len >= sizeof(buf)) { + pr_warn("CPU mask is too big in file %s\n", fcpu); + return -E2BIG; } buf[len] = '\0'; - for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { - /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ - if (buf[ir] == ',' || buf[ir] == '\0') { - buf[ir] = '\0'; - n = sscanf(&buf[il], "%u-%u", &start, &end); - if (n <= 0) { - pr_warn("Failed to get # CPUs from %s\n", - &buf[il]); - return -EINVAL; - } else if (n == 1) { - end = start; - } - tmp_cpus += end - start + 1; - il = ir + 1; - } - } - if (tmp_cpus <= 0) { - pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); - return -EINVAL; + return parse_cpu_mask_str(buf, mask, mask_sz); +} + +int libbpf_num_possible_cpus(void) +{ + static const char *fcpu = "/sys/devices/system/cpu/possible"; + static int cpus; + int err, n, i, tmp_cpus; + bool *mask; + + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus; + + err = parse_cpu_mask_file(fcpu, &mask, &n); + if (err) + return err; + + tmp_cpus = 0; + for (i = 0; i < n; i++) { + if (mask[i]) + tmp_cpus++; } + free(mask); WRITE_ONCE(cpus, tmp_cpus); return tmp_cpus; } + +int bpf_object__open_skeleton(struct bpf_object_skeleton *s, + const struct bpf_object_open_opts *opts) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, + .object_name = s->name, + ); + struct bpf_object *obj; + int i; + + /* Attempt to preserve opts->object_name, unless overriden by user + * explicitly. Overwriting object name for skeletons is discouraged, + * as it breaks global data maps, because they contain object name + * prefix as their own map name prefix. When skeleton is generated, + * bpftool is making an assumption that this name will stay the same. + */ + if (opts) { + memcpy(&skel_opts, opts, sizeof(*opts)); + if (!opts->object_name) + skel_opts.object_name = s->name; + } + + obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); + if (IS_ERR(obj)) { + pr_warn("failed to initialize skeleton BPF object '%s': %ld\n", + s->name, PTR_ERR(obj)); + return PTR_ERR(obj); + } + + *s->obj = obj; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map **map = s->maps[i].map; + const char *name = s->maps[i].name; + void **mmaped = s->maps[i].mmaped; + + *map = bpf_object__find_map_by_name(obj, name); + if (!*map) { + pr_warn("failed to find skeleton map '%s'\n", name); + return -ESRCH; + } + + /* externs shouldn't be pre-setup from user code */ + if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) + *mmaped = (*map)->mmaped; + } + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_program **prog = s->progs[i].prog; + const char *name = s->progs[i].name; + + *prog = bpf_object__find_program_by_name(obj, name); + if (!*prog) { + pr_warn("failed to find skeleton program '%s'\n", name); + return -ESRCH; + } + } + + return 0; +} + +int bpf_object__load_skeleton(struct bpf_object_skeleton *s) +{ + int i, err; + + err = bpf_object__load(*s->obj); + if (err) { + pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); + return err; + } + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map *map = *s->maps[i].map; + size_t mmap_sz = bpf_map_mmap_sz(map); + int prot, map_fd = bpf_map__fd(map); + void **mmaped = s->maps[i].mmaped; + + if (!mmaped) + continue; + + if (!(map->def.map_flags & BPF_F_MMAPABLE)) { + *mmaped = NULL; + continue; + } + + if (map->def.map_flags & BPF_F_RDONLY_PROG) + prot = PROT_READ; + else + prot = PROT_READ | PROT_WRITE; + + /* Remap anonymous mmap()-ed "map initialization image" as + * a BPF map-backed mmap()-ed memory, but preserving the same + * memory address. This will cause kernel to change process' + * page table to point to a different piece of kernel memory, + * but from userspace point of view memory address (and its + * contents, being identical at this point) will stay the + * same. This mapping will be released by bpf_object__close() + * as per normal clean up procedure, so we don't need to worry + * about it from skeleton's clean up perspective. + */ + *mmaped = mmap(map->mmaped, mmap_sz, prot, + MAP_SHARED | MAP_FIXED, map_fd, 0); + if (*mmaped == MAP_FAILED) { + err = -errno; + *mmaped = NULL; + pr_warn("failed to re-mmap() map '%s': %d\n", + bpf_map__name(map), err); + return err; + } + } + + return 0; +} + +int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) +{ + int i; + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_program *prog = *s->progs[i].prog; + struct bpf_link **link = s->progs[i].link; + const struct bpf_sec_def *sec_def; + const char *sec_name = bpf_program__title(prog, false); + + sec_def = find_sec_def(sec_name); + if (!sec_def || !sec_def->attach_fn) + continue; + + *link = sec_def->attach_fn(sec_def, prog); + if (IS_ERR(*link)) { + pr_warn("failed to auto-attach program '%s': %ld\n", + bpf_program__name(prog), PTR_ERR(*link)); + return PTR_ERR(*link); + } + } + + return 0; +} + +void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) +{ + int i; + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_link **link = s->progs[i].link; + + if (!IS_ERR_OR_NULL(*link)) + bpf_link__destroy(*link); + *link = NULL; + } +} + +void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) +{ + if (s->progs) + bpf_object__detach_skeleton(s); + if (s->obj) + bpf_object__close(*s->obj); + free(s->maps); + free(s->progs); + free(s); +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0dbf4bfba0c4..3fe12c9d1f92 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -17,14 +17,12 @@ #include <sys/types.h> // for size_t #include <linux/bpf.h> +#include "libbpf_common.h" + #ifdef __cplusplus extern "C" { #endif -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - enum libbpf_errno { __LIBBPF_ERRNO__START = 4000, @@ -67,28 +65,6 @@ struct bpf_object_open_attr { enum bpf_prog_type prog_type; }; -/* Helper macro to declare and initialize libbpf options struct - * - * This dance with uninitialized declaration, followed by memset to zero, - * followed by assignment using compound literal syntax is done to preserve - * ability to use a nice struct field initialization syntax and **hopefully** - * have all the padding bytes initialized to zero. It's not guaranteed though, - * when copying literal, that compiler won't copy garbage in literal's padding - * bytes, but that's the best way I've found and it seems to work in practice. - * - * Macro declares opts struct of given type and name, zero-initializes, - * including any extra padding, it with memset() and then assigns initial - * values provided by users in struct initializer-syntax as varargs. - */ -#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ - struct TYPE NAME = ({ \ - memset(&NAME, 0, sizeof(struct TYPE)); \ - (struct TYPE) { \ - .sz = sizeof(struct TYPE), \ - __VA_ARGS__ \ - }; \ - }) - struct bpf_object_open_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; @@ -101,7 +77,11 @@ struct bpf_object_open_opts { const char *object_name; /* parse map definitions non-strictly, allowing extra attributes/data */ bool relaxed_maps; - /* process CO-RE relocations non-strictly, allowing them to fail */ + /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures. + * Value is ignored. Relocations always are processed non-strictly. + * Non-relocatable instructions are replaced with invalid ones to + * prevent accidental errors. + * */ bool relaxed_core_relocs; /* maps that set the 'pinning' attribute in their definition will have * their pin_path attribute set to a file in this directory, and be @@ -109,15 +89,19 @@ struct bpf_object_open_opts { */ const char *pin_root_path; __u32 attach_prog_fd; + /* Additional kernel config content that augments and overrides + * system Kconfig for CONFIG_xxx externs. + */ + const char *kconfig; }; -#define bpf_object_open_opts__last_field attach_prog_fd +#define bpf_object_open_opts__last_field kconfig LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * -bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts); +bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts); + const struct bpf_object_open_opts *opts); /* deprecated bpf_object__open variants */ LIBBPF_API struct bpf_object * @@ -126,11 +110,6 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, LIBBPF_API struct bpf_object * bpf_object__open_xattr(struct bpf_object_open_attr *attr); -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size); -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off); - enum libbpf_pin_type { LIBBPF_PIN_NONE, /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ @@ -161,6 +140,7 @@ struct bpf_object_load_attr { LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); LIBBPF_API int bpf_object__unload(struct bpf_object *obj); + LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); @@ -171,6 +151,9 @@ LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); LIBBPF_API struct bpf_program * bpf_object__find_program_by_title(const struct bpf_object *obj, const char *title); +LIBBPF_API struct bpf_program * +bpf_object__find_program_by_name(const struct bpf_object *obj, + const char *name); LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ @@ -214,6 +197,7 @@ LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog); LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); +LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); @@ -235,9 +219,12 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_link; +LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); LIBBPF_API struct bpf_link * +bpf_program__attach(struct bpf_program *prog); +LIBBPF_API struct bpf_link * bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); LIBBPF_API struct bpf_link * bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, @@ -256,6 +243,8 @@ bpf_program__attach_raw_tracepoint(struct bpf_program *prog, LIBBPF_API struct bpf_link * bpf_program__attach_trace(struct bpf_program *prog); +struct bpf_map; +LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_insn; /* @@ -332,6 +321,8 @@ LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, @@ -352,6 +343,8 @@ LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); /* * No need for __attribute__((packed)), all members of 'bpf_map_def' @@ -371,7 +364,6 @@ struct bpf_map_def { * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, * so no need to worry about a name clash. */ -struct bpf_map; LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); @@ -512,18 +504,6 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, bpf_perf_event_print_t fn, void *private_data); -struct nlattr; -typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); -int libbpf_netlink_open(unsigned int *nl_pid); -int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); -int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); -int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); -int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, - libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); - struct bpf_prog_linfo; struct bpf_prog_info; @@ -550,6 +530,7 @@ LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); /* * Get bpf_prog_info in continuous memory @@ -630,6 +611,50 @@ bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); */ LIBBPF_API int libbpf_num_possible_cpus(void); +struct bpf_map_skeleton { + const char *name; + struct bpf_map **map; + void **mmaped; +}; + +struct bpf_prog_skeleton { + const char *name; + struct bpf_program **prog; + struct bpf_link **link; +}; + +struct bpf_object_skeleton { + size_t sz; /* size of this struct, for forward/backward compatibility */ + + const char *name; + void *data; + size_t data_sz; + + struct bpf_object **obj; + + int map_cnt; + int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */ + struct bpf_map_skeleton *maps; + + int prog_cnt; + int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */ + struct bpf_prog_skeleton *progs; +}; + +LIBBPF_API int +bpf_object__open_skeleton(struct bpf_object_skeleton *s, + const struct bpf_object_open_opts *opts); +LIBBPF_API int bpf_object__load_skeleton(struct bpf_object_skeleton *s); +LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s); +LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s); +LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s); + +enum libbpf_tristate { + TRI_NO = 0, + TRI_YES = 1, + TRI_MODULE = 2, +}; + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 8ddc2c40e482..b035122142bb 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -208,3 +208,30 @@ LIBBPF_0.0.6 { btf__find_by_name_kind; libbpf_find_vmlinux_btf_id; } LIBBPF_0.0.5; + +LIBBPF_0.0.7 { + global: + btf_dump__emit_type_decl; + bpf_link__disconnect; + bpf_map__attach_struct_ops; + bpf_map_delete_batch; + bpf_map_lookup_and_delete_batch; + bpf_map_lookup_batch; + bpf_map_update_batch; + bpf_object__find_program_by_name; + bpf_object__attach_skeleton; + bpf_object__destroy_skeleton; + bpf_object__detach_skeleton; + bpf_object__load_skeleton; + bpf_object__open_skeleton; + bpf_probe_large_insn_limit; + bpf_prog_attach_xattr; + bpf_program__attach; + bpf_program__name; + bpf_program__is_extension; + bpf_program__is_struct_ops; + bpf_program__set_extension; + bpf_program__set_struct_ops; + btf__align_of; + libbpf_find_kernel_btf; +} LIBBPF_0.0.6; diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template index ac17fcef2108..b45ed534bdfb 100644 --- a/tools/lib/bpf/libbpf.pc.template +++ b/tools/lib/bpf/libbpf.pc.template @@ -8,5 +8,5 @@ Name: libbpf Description: BPF library Version: @VERSION@ Libs: -L${libdir} -lbpf -Requires.private: libelf +Requires.private: libelf zlib Cflags: -I${includedir} diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h new file mode 100644 index 000000000000..a23ae1ac27eb --- /dev/null +++ b/tools/lib/bpf/libbpf_common.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Common user-facing libbpf helpers. + * + * Copyright (c) 2019 Facebook + */ + +#ifndef __LIBBPF_LIBBPF_COMMON_H +#define __LIBBPF_LIBBPF_COMMON_H + +#include <string.h> + +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + +/* Helper macro to declare and initialize libbpf options struct + * + * This dance with uninitialized declaration, followed by memset to zero, + * followed by assignment using compound literal syntax is done to preserve + * ability to use a nice struct field initialization syntax and **hopefully** + * have all the padding bytes initialized to zero. It's not guaranteed though, + * when copying literal, that compiler won't copy garbage in literal's padding + * bytes, but that's the best way I've found and it seems to work in practice. + * + * Macro declares opts struct of given type and name, zero-initializes, + * including any extra padding, it with memset() and then assigns initial + * values provided by users in struct initializer-syntax as varargs. + */ +#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ + struct TYPE NAME = ({ \ + memset(&NAME, 0, sizeof(struct TYPE)); \ + (struct TYPE) { \ + .sz = sizeof(struct TYPE), \ + __VA_ARGS__ \ + }; \ + }) + +#endif /* __LIBBPF_LIBBPF_COMMON_H */ diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index 4343e40588c6..0afb51f7a919 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -13,6 +13,9 @@ #include "libbpf.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) #define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) #define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 97ac17a64a58..8c3afbd97747 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -76,7 +76,7 @@ static inline bool libbpf_validate_opts(const char *opts, for (i = opts_sz; i < user_sz; i++) { if (opts[i]) { - pr_warn("%s has non-zero extra bytes", + pr_warn("%s has non-zero extra bytes\n", type_name); return false; } @@ -95,9 +95,28 @@ static inline bool libbpf_validate_opts(const char *opts, #define OPTS_GET(opts, field, fallback_value) \ (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) +int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); +int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size); +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off); + +struct nlattr; +typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); +int libbpf_netlink_open(unsigned int *nl_pid); +int libbpf_nl_get_link(int sock, unsigned int nl_pid, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); +int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); +int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); +int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); + struct btf_ext_info { /* * info points to the individual info section (e.g. func_info and diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index a9eb8b322671..b782ebef6ac9 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -17,6 +17,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + static bool grep(const char *buffer, const char *pattern) { return !!strstr(buffer, pattern); @@ -103,6 +106,8 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_EXT: default: break; } @@ -251,6 +256,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_XSKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + case BPF_MAP_TYPE_STRUCT_OPS: default: break; } @@ -321,3 +327,24 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, return res; } + +/* + * Probe for availability of kernel commit (5.3): + * + * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") + */ +bool bpf_probe_large_insn_limit(__u32 ifindex) +{ + struct bpf_insn insns[BPF_MAXINSNS + 1]; + int i; + + for (i = 0; i < BPF_MAXINSNS; i++) + insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); + insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); + + errno = 0; + probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, + ifindex); + + return errno != E2BIG && errno != EINVAL; +} diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 5065c1aa1061..431bd25c6cdb 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -15,6 +15,9 @@ #include "libbpf_internal.h" #include "nlattr.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #ifndef SOL_NETLINK #define SOL_NETLINK 270 #endif diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 8db44bbfc66d..0ad41dfea8eb 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -13,6 +13,9 @@ #include <string.h> #include <stdio.h> +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = { [LIBBPF_NLA_U8] = sizeof(uint8_t), [LIBBPF_NLA_U16] = sizeof(uint16_t), diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c index b8064eedc177..146da01979c7 100644 --- a/tools/lib/bpf/str_error.c +++ b/tools/lib/bpf/str_error.c @@ -4,6 +4,9 @@ #include <stdio.h> #include "str_error.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + /* * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl * libc, while checking strerror_r() return to avoid having to check this in diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 8e0ffa800a71..9807903f121e 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -32,6 +32,9 @@ #include "libbpf_internal.h" #include "xsk.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #ifndef SOL_XDP #define SOL_XDP 283 #endif |