diff options
Diffstat (limited to 'tools/lib/bpf/libbpf.c')
-rw-r--r-- | tools/lib/bpf/libbpf.c | 1741 |
1 files changed, 1454 insertions, 287 deletions
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3f09772192f1..7513165b104f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -18,6 +18,7 @@ #include <stdarg.h> #include <libgen.h> #include <inttypes.h> +#include <limits.h> #include <string.h> #include <unistd.h> #include <endian.h> @@ -41,9 +42,11 @@ #include <sys/types.h> #include <sys/vfs.h> #include <sys/utsname.h> +#include <sys/resource.h> #include <tools/libc_compat.h> #include <libelf.h> #include <gelf.h> +#include <zlib.h> #include "libbpf.h" #include "bpf.h" @@ -99,14 +102,33 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) va_end(args); } -#define STRERR_BUFSIZE 128 +static void pr_perm_msg(int err) +{ + struct rlimit limit; + char buf[100]; + + if (err != -EPERM || geteuid() != 0) + return; + + err = getrlimit(RLIMIT_MEMLOCK, &limit); + if (err) + return; + + if (limit.rlim_cur == RLIM_INFINITY) + return; + + if (limit.rlim_cur < 1024) + snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); + else if (limit.rlim_cur < 1024*1024) + snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); + else + snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); -#define CHECK_ERR(action, err, out) do { \ - err = action; \ - if (err) \ - goto out; \ -} while (0) + pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", + buf); +} +#define STRERR_BUFSIZE 128 /* Copied from tools/perf/util/util.h */ #ifndef zfree @@ -146,6 +168,20 @@ struct bpf_capabilities { __u32 array_mmap:1; }; +enum reloc_type { + RELO_LD64, + RELO_CALL, + RELO_DATA, + RELO_EXTERN, +}; + +struct reloc_desc { + enum reloc_type type; + int insn_idx; + int map_idx; + int sym_off; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -164,16 +200,7 @@ struct bpf_program { size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; - struct reloc_desc { - enum { - RELO_LD64, - RELO_CALL, - RELO_DATA, - } type; - int insn_idx; - int map_idx; - int sym_off; - } *reloc_desc; + struct reloc_desc *reloc_desc; int nr_reloc; int log_level; @@ -202,22 +229,29 @@ struct bpf_program { __u32 prog_flags; }; +#define DATA_SEC ".data" +#define BSS_SEC ".bss" +#define RODATA_SEC ".rodata" +#define KCONFIG_SEC ".kconfig" + enum libbpf_map_type { LIBBPF_MAP_UNSPEC, LIBBPF_MAP_DATA, LIBBPF_MAP_BSS, LIBBPF_MAP_RODATA, + LIBBPF_MAP_KCONFIG, }; static const char * const libbpf_type_to_btf_name[] = { - [LIBBPF_MAP_DATA] = ".data", - [LIBBPF_MAP_BSS] = ".bss", - [LIBBPF_MAP_RODATA] = ".rodata", + [LIBBPF_MAP_DATA] = DATA_SEC, + [LIBBPF_MAP_BSS] = BSS_SEC, + [LIBBPF_MAP_RODATA] = RODATA_SEC, + [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC, }; struct bpf_map { - int fd; char *name; + int fd; int sec_idx; size_t sec_offset; int map_ifindex; @@ -228,14 +262,32 @@ struct bpf_map { void *priv; bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; + void *mmaped; char *pin_path; bool pinned; bool reused; }; -struct bpf_secdata { - void *rodata; - void *data; +enum extern_type { + EXT_UNKNOWN, + EXT_CHAR, + EXT_BOOL, + EXT_INT, + EXT_TRISTATE, + EXT_CHAR_ARR, +}; + +struct extern_desc { + const char *name; + int sym_idx; + int btf_id; + enum extern_type type; + int sz; + int align; + int data_off; + bool is_signed; + bool is_weak; + bool is_set; }; static LIST_HEAD(bpf_objects_list); @@ -250,7 +302,11 @@ struct bpf_object { struct bpf_map *maps; size_t nr_maps; size_t maps_cap; - struct bpf_secdata sections; + + char *kconfig; + struct extern_desc *externs; + int nr_extern; + int kconfig_map_idx; bool loaded; bool has_pseudo_calls; @@ -279,6 +335,7 @@ struct bpf_object { int maps_shndx; int btf_maps_shndx; int text_shndx; + int symbols_shndx; int data_shndx; int rodata_shndx; int bss_shndx; @@ -550,6 +607,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.data_shndx = -1; obj->efile.rodata_shndx = -1; obj->efile.bss_shndx = -1; + obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; @@ -748,13 +806,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, *size = 0; if (!name) { return -EINVAL; - } else if (!strcmp(name, ".data")) { + } else if (!strcmp(name, DATA_SEC)) { if (obj->efile.data) *size = obj->efile.data->d_size; - } else if (!strcmp(name, ".bss")) { + } else if (!strcmp(name, BSS_SEC)) { if (obj->efile.bss) *size = obj->efile.bss->d_size; - } else if (!strcmp(name, ".rodata")) { + } else if (!strcmp(name, RODATA_SEC)) { if (obj->efile.rodata) *size = obj->efile.rodata->d_size; } else { @@ -835,13 +893,38 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return &obj->maps[obj->nr_maps++]; } +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + long page_sz = sysconf(_SC_PAGE_SIZE); + size_t map_sz; + + map_sz = roundup(map->def.value_size, 8) * map->def.max_entries; + map_sz = roundup(map_sz, page_sz); + return map_sz; +} + +static char *internal_map_name(struct bpf_object *obj, + enum libbpf_map_type type) +{ + char map_name[BPF_OBJ_NAME_LEN]; + const char *sfx = libbpf_type_to_btf_name[type]; + int sfx_len = max((size_t)7, strlen(sfx)); + int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, + strlen(obj->name)); + + snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, + sfx_len, libbpf_type_to_btf_name[type]); + + return strdup(map_name); +} + static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, - int sec_idx, Elf_Data *data, void **data_buff) + int sec_idx, void *data, size_t data_sz) { - char map_name[BPF_OBJ_NAME_LEN]; struct bpf_map_def *def; struct bpf_map *map; + int err; map = bpf_object__add_map(obj); if (IS_ERR(map)) @@ -850,9 +933,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->libbpf_type = type; map->sec_idx = sec_idx; map->sec_offset = 0; - snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, - libbpf_type_to_btf_name[type]); - map->name = strdup(map_name); + map->name = internal_map_name(obj, type); if (!map->name) { pr_warn("failed to alloc map name\n"); return -ENOMEM; @@ -861,25 +942,29 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def = &map->def; def->type = BPF_MAP_TYPE_ARRAY; def->key_size = sizeof(int); - def->value_size = data->d_size; + def->value_size = data_sz; def->max_entries = 1; - def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; - if (obj->caps.array_mmap) - def->map_flags |= BPF_F_MMAPABLE; + def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG + ? BPF_F_RDONLY_PROG : 0; + def->map_flags |= BPF_F_MMAPABLE; pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", - map_name, map->sec_idx, map->sec_offset, def->map_flags); + map->name, map->sec_idx, map->sec_offset, def->map_flags); - if (data_buff) { - *data_buff = malloc(data->d_size); - if (!*data_buff) { - zfree(&map->name); - pr_warn("failed to alloc map content buffer\n"); - return -ENOMEM; - } - memcpy(*data_buff, data->d_buf, data->d_size); + map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (map->mmaped == MAP_FAILED) { + err = -errno; + map->mmaped = NULL; + pr_warn("failed to alloc map '%s' content buffer: %d\n", + map->name, err); + zfree(&map->name); + return err; } + if (data) + memcpy(map->mmaped, data, data_sz); + pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); return 0; } @@ -888,37 +973,332 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) { int err; - if (!obj->caps.global_data) - return 0; /* * Populate obj->maps with libbpf internal maps. */ if (obj->efile.data_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, obj->efile.data_shndx, - obj->efile.data, - &obj->sections.data); + obj->efile.data->d_buf, + obj->efile.data->d_size); if (err) return err; } if (obj->efile.rodata_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, obj->efile.rodata_shndx, - obj->efile.rodata, - &obj->sections.rodata); + obj->efile.rodata->d_buf, + obj->efile.rodata->d_size); if (err) return err; } if (obj->efile.bss_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, obj->efile.bss_shndx, - obj->efile.bss, NULL); + NULL, + obj->efile.bss->d_size); if (err) return err; } return 0; } + +static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, + const void *name) +{ + int i; + + for (i = 0; i < obj->nr_extern; i++) { + if (strcmp(obj->externs[i].name, name) == 0) + return &obj->externs[i]; + } + return NULL; +} + +static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, + char value) +{ + switch (ext->type) { + case EXT_BOOL: + if (value == 'm') { + pr_warn("extern %s=%c should be tristate or char\n", + ext->name, value); + return -EINVAL; + } + *(bool *)ext_val = value == 'y' ? true : false; + break; + case EXT_TRISTATE: + if (value == 'y') + *(enum libbpf_tristate *)ext_val = TRI_YES; + else if (value == 'm') + *(enum libbpf_tristate *)ext_val = TRI_MODULE; + else /* value == 'n' */ + *(enum libbpf_tristate *)ext_val = TRI_NO; + break; + case EXT_CHAR: + *(char *)ext_val = value; + break; + case EXT_UNKNOWN: + case EXT_INT: + case EXT_CHAR_ARR: + default: + pr_warn("extern %s=%c should be bool, tristate, or char\n", + ext->name, value); + return -EINVAL; + } + ext->is_set = true; + return 0; +} + +static int set_ext_value_str(struct extern_desc *ext, char *ext_val, + const char *value) +{ + size_t len; + + if (ext->type != EXT_CHAR_ARR) { + pr_warn("extern %s=%s should char array\n", ext->name, value); + return -EINVAL; + } + + len = strlen(value); + if (value[len - 1] != '"') { + pr_warn("extern '%s': invalid string config '%s'\n", + ext->name, value); + return -EINVAL; + } + + /* strip quotes */ + len -= 2; + if (len >= ext->sz) { + pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + ext->name, value, len, ext->sz - 1); + len = ext->sz - 1; + } + memcpy(ext_val, value + 1, len); + ext_val[len] = '\0'; + ext->is_set = true; + return 0; +} + +static int parse_u64(const char *value, __u64 *res) +{ + char *value_end; + int err; + + errno = 0; + *res = strtoull(value, &value_end, 0); + if (errno) { + err = -errno; + pr_warn("failed to parse '%s' as integer: %d\n", value, err); + return err; + } + if (*value_end) { + pr_warn("failed to parse '%s' as integer completely\n", value); + return -EINVAL; + } + return 0; +} + +static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) +{ + int bit_sz = ext->sz * 8; + + if (ext->sz == 8) + return true; + + /* Validate that value stored in u64 fits in integer of `ext->sz` + * bytes size without any loss of information. If the target integer + * is signed, we rely on the following limits of integer type of + * Y bits and subsequent transformation: + * + * -2^(Y-1) <= X <= 2^(Y-1) - 1 + * 0 <= X + 2^(Y-1) <= 2^Y - 1 + * 0 <= X + 2^(Y-1) < 2^Y + * + * For unsigned target integer, check that all the (64 - Y) bits are + * zero. + */ + if (ext->is_signed) + return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); + else + return (v >> bit_sz) == 0; +} + +static int set_ext_value_num(struct extern_desc *ext, void *ext_val, + __u64 value) +{ + if (ext->type != EXT_INT && ext->type != EXT_CHAR) { + pr_warn("extern %s=%llu should be integer\n", + ext->name, (unsigned long long)value); + return -EINVAL; + } + if (!is_ext_value_in_range(ext, value)) { + pr_warn("extern %s=%llu value doesn't fit in %d bytes\n", + ext->name, (unsigned long long)value, ext->sz); + return -ERANGE; + } + switch (ext->sz) { + case 1: *(__u8 *)ext_val = value; break; + case 2: *(__u16 *)ext_val = value; break; + case 4: *(__u32 *)ext_val = value; break; + case 8: *(__u64 *)ext_val = value; break; + default: + return -EINVAL; + } + ext->is_set = true; + return 0; +} + +static int bpf_object__process_kconfig_line(struct bpf_object *obj, + char *buf, void *data) +{ + struct extern_desc *ext; + char *sep, *value; + int len, err = 0; + void *ext_val; + __u64 num; + + if (strncmp(buf, "CONFIG_", 7)) + return 0; + + sep = strchr(buf, '='); + if (!sep) { + pr_warn("failed to parse '%s': no separator\n", buf); + return -EINVAL; + } + + /* Trim ending '\n' */ + len = strlen(buf); + if (buf[len - 1] == '\n') + buf[len - 1] = '\0'; + /* Split on '=' and ensure that a value is present. */ + *sep = '\0'; + if (!sep[1]) { + *sep = '='; + pr_warn("failed to parse '%s': no value\n", buf); + return -EINVAL; + } + + ext = find_extern_by_name(obj, buf); + if (!ext || ext->is_set) + return 0; + + ext_val = data + ext->data_off; + value = sep + 1; + + switch (*value) { + case 'y': case 'n': case 'm': + err = set_ext_value_tri(ext, ext_val, *value); + break; + case '"': + err = set_ext_value_str(ext, ext_val, value); + break; + default: + /* assume integer */ + err = parse_u64(value, &num); + if (err) { + pr_warn("extern %s=%s should be integer\n", + ext->name, value); + return err; + } + err = set_ext_value_num(ext, ext_val, num); + break; + } + if (err) + return err; + pr_debug("extern %s=%s\n", ext->name, value); + return 0; +} + +static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) +{ + char buf[PATH_MAX]; + struct utsname uts; + int len, err = 0; + gzFile file; + + uname(&uts); + len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + /* gzopen also accepts uncompressed files. */ + file = gzopen(buf, "r"); + if (!file) + file = gzopen("/proc/config.gz", "r"); + + if (!file) { + pr_warn("failed to open system Kconfig\n"); + return -ENOENT; + } + + while (gzgets(file, buf, sizeof(buf))) { + err = bpf_object__process_kconfig_line(obj, buf, data); + if (err) { + pr_warn("error parsing system Kconfig line '%s': %d\n", + buf, err); + goto out; + } + } + +out: + gzclose(file); + return err; +} + +static int bpf_object__read_kconfig_mem(struct bpf_object *obj, + const char *config, void *data) +{ + char buf[PATH_MAX]; + int err = 0; + FILE *file; + + file = fmemopen((void *)config, strlen(config), "r"); + if (!file) { + err = -errno; + pr_warn("failed to open in-memory Kconfig: %d\n", err); + return err; + } + + while (fgets(buf, sizeof(buf), file)) { + err = bpf_object__process_kconfig_line(obj, buf, data); + if (err) { + pr_warn("error parsing in-memory Kconfig line '%s': %d\n", + buf, err); + break; + } + } + + fclose(file); + return err; +} + +static int bpf_object__init_kconfig_map(struct bpf_object *obj) +{ + struct extern_desc *last_ext; + size_t map_sz; + int err; + + if (obj->nr_extern == 0) + return 0; + + last_ext = &obj->externs[obj->nr_extern - 1]; + map_sz = last_ext->data_off + last_ext->sz; + + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, + obj->efile.symbols_shndx, + NULL, map_sz); + if (err) + return err; + + obj->kconfig_map_idx = obj->nr_maps - 1; + + return 0; +} + static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) { Elf_Data *symbols = obj->efile.symbols; @@ -1242,15 +1622,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found key [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found key [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warn("map '%s': conflicting key size %u != %lld.\n", - map_name, map->def.key_size, sz); + pr_warn("map '%s': conflicting key size %u != %zd.\n", + map_name, map->def.key_size, (ssize_t)sz); return -EINVAL; } map->def.key_size = sz; @@ -1285,15 +1665,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found value [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found value [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warn("map '%s': conflicting value size %u != %lld.\n", - map_name, map->def.value_size, sz); + pr_warn("map '%s': conflicting value size %u != %zd.\n", + map_name, map->def.value_size, (ssize_t)sz); return -EINVAL; } map->def.value_size = sz; @@ -1393,21 +1773,20 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return 0; } -static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) +static int bpf_object__init_maps(struct bpf_object *obj, + const struct bpf_object_open_opts *opts) { - bool strict = !relaxed_maps; + const char *pin_root_path; + bool strict; int err; - err = bpf_object__init_user_maps(obj, strict); - if (err) - return err; - - err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); - if (err) - return err; + strict = !OPTS_GET(opts, relaxed_maps, false); + pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - err = bpf_object__init_global_data_maps(obj); + err = bpf_object__init_user_maps(obj, strict); + err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); + err = err ?: bpf_object__init_global_data_maps(obj); + err = err ?: bpf_object__init_kconfig_map(obj); if (err) return err; @@ -1509,7 +1888,8 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) { - return obj->efile.btf_maps_shndx >= 0; + return obj->efile.btf_maps_shndx >= 0 || + obj->nr_extern > 0; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -1526,11 +1906,6 @@ static int bpf_object__init_btf(struct bpf_object *obj, BTF_ELF_SEC, err); goto out; } - err = btf__finalize_data(obj, obj->btf); - if (err) { - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); - goto out; - } } if (btf_ext_data) { if (!obj->btf) { @@ -1564,6 +1939,30 @@ out: return 0; } +static int bpf_object__finalize_btf(struct bpf_object *obj) +{ + int err; + + if (!obj->btf) + return 0; + + err = btf__finalize_data(obj, obj->btf); + if (!err) + return 0; + + pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + btf_ext__free(obj->btf_ext); + obj->btf_ext = NULL; + + if (bpf_object__is_btf_mandatory(obj)) { + pr_warn("BTF is required, but is missing or corrupted.\n"); + return -ENOENT; + } + return 0; +} + static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) { int err = 0; @@ -1592,8 +1991,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) return 0; } -static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) +static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; @@ -1665,6 +2063,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, return -LIBBPF_ERRNO__FORMAT; } obj->efile.symbols = data; + obj->efile.symbols_shndx = idx; obj->efile.strtabidx = sh.sh_link; } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { if (sh.sh_flags & SHF_EXECINSTR) { @@ -1683,10 +2082,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, name, obj->path, cp); return err; } - } else if (strcmp(name, ".data") == 0) { + } else if (strcmp(name, DATA_SEC) == 0) { obj->efile.data = data; obj->efile.data_shndx = idx; - } else if (strcmp(name, ".rodata") == 0) { + } else if (strcmp(name, RODATA_SEC) == 0) { obj->efile.rodata = data; obj->efile.rodata_shndx = idx; } else { @@ -1716,7 +2115,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, obj->efile.reloc_sects[nr_sects].shdr = sh; obj->efile.reloc_sects[nr_sects].data = data; - } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { + } else if (sh.sh_type == SHT_NOBITS && + strcmp(name, BSS_SEC) == 0) { obj->efile.bss = data; obj->efile.bss_shndx = idx; } else { @@ -1728,14 +2128,217 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, pr_warn("Corrupted ELF file: index of strtab invalid\n"); return -LIBBPF_ERRNO__FORMAT; } - err = bpf_object__init_btf(obj, btf_data, btf_ext_data); - if (!err) - err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path); - if (!err) - err = bpf_object__sanitize_and_load_btf(obj); - if (!err) - err = bpf_object__init_prog_names(obj); - return err; + return bpf_object__init_btf(obj, btf_data, btf_ext_data); +} + +static bool sym_is_extern(const GElf_Sym *sym) +{ + int bind = GELF_ST_BIND(sym->st_info); + /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ + return sym->st_shndx == SHN_UNDEF && + (bind == STB_GLOBAL || bind == STB_WEAK) && + GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; +} + +static int find_extern_btf_id(const struct btf *btf, const char *ext_name) +{ + const struct btf_type *t; + const char *var_name; + int i, n; + + if (!btf) + return -ESRCH; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (!btf_is_var(t)) + continue; + + var_name = btf__name_by_offset(btf, t->name_off); + if (strcmp(var_name, ext_name)) + continue; + + if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) + return -EINVAL; + + return i; + } + + return -ENOENT; +} + +static enum extern_type find_extern_type(const struct btf *btf, int id, + bool *is_signed) +{ + const struct btf_type *t; + const char *name; + + t = skip_mods_and_typedefs(btf, id, NULL); + name = btf__name_by_offset(btf, t->name_off); + + if (is_signed) + *is_signed = false; + switch (btf_kind(t)) { + case BTF_KIND_INT: { + int enc = btf_int_encoding(t); + + if (enc & BTF_INT_BOOL) + return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN; + if (is_signed) + *is_signed = enc & BTF_INT_SIGNED; + if (t->size == 1) + return EXT_CHAR; + if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) + return EXT_UNKNOWN; + return EXT_INT; + } + case BTF_KIND_ENUM: + if (t->size != 4) + return EXT_UNKNOWN; + if (strcmp(name, "libbpf_tristate")) + return EXT_UNKNOWN; + return EXT_TRISTATE; + case BTF_KIND_ARRAY: + if (btf_array(t)->nelems == 0) + return EXT_UNKNOWN; + if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR) + return EXT_UNKNOWN; + return EXT_CHAR_ARR; + default: + return EXT_UNKNOWN; + } +} + +static int cmp_externs(const void *_a, const void *_b) +{ + const struct extern_desc *a = _a; + const struct extern_desc *b = _b; + + /* descending order by alignment requirements */ + if (a->align != b->align) + return a->align > b->align ? -1 : 1; + /* ascending order by size, within same alignment class */ + if (a->sz != b->sz) + return a->sz < b->sz ? -1 : 1; + /* resolve ties by name */ + return strcmp(a->name, b->name); +} + +static int bpf_object__collect_externs(struct bpf_object *obj) +{ + const struct btf_type *t; + struct extern_desc *ext; + int i, n, off, btf_id; + struct btf_type *sec; + const char *ext_name; + Elf_Scn *scn; + GElf_Shdr sh; + + if (!obj->efile.symbols) + return 0; + + scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx); + if (!scn) + return -LIBBPF_ERRNO__FORMAT; + if (gelf_getshdr(scn, &sh) != &sh) + return -LIBBPF_ERRNO__FORMAT; + n = sh.sh_size / sh.sh_entsize; + + pr_debug("looking for externs among %d symbols...\n", n); + for (i = 0; i < n; i++) { + GElf_Sym sym; + + if (!gelf_getsym(obj->efile.symbols, i, &sym)) + return -LIBBPF_ERRNO__FORMAT; + if (!sym_is_extern(&sym)) + continue; + ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name); + if (!ext_name || !ext_name[0]) + continue; + + ext = obj->externs; + ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); + if (!ext) + return -ENOMEM; + obj->externs = ext; + ext = &ext[obj->nr_extern]; + memset(ext, 0, sizeof(*ext)); + obj->nr_extern++; + + ext->btf_id = find_extern_btf_id(obj->btf, ext_name); + if (ext->btf_id <= 0) { + pr_warn("failed to find BTF for extern '%s': %d\n", + ext_name, ext->btf_id); + return ext->btf_id; + } + t = btf__type_by_id(obj->btf, ext->btf_id); + ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->sym_idx = i; + ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; + ext->sz = btf__resolve_size(obj->btf, t->type); + if (ext->sz <= 0) { + pr_warn("failed to resolve size of extern '%s': %d\n", + ext_name, ext->sz); + return ext->sz; + } + ext->align = btf__align_of(obj->btf, t->type); + if (ext->align <= 0) { + pr_warn("failed to determine alignment of extern '%s': %d\n", + ext_name, ext->align); + return -EINVAL; + } + ext->type = find_extern_type(obj->btf, t->type, + &ext->is_signed); + if (ext->type == EXT_UNKNOWN) { + pr_warn("extern '%s' type is unsupported\n", ext_name); + return -ENOTSUP; + } + } + pr_debug("collected %d externs total\n", obj->nr_extern); + + if (!obj->nr_extern) + return 0; + + /* sort externs by (alignment, size, name) and calculate their offsets + * within a map */ + qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); + off = 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + ext->data_off = roundup(off, ext->align); + off = ext->data_off + ext->sz; + pr_debug("extern #%d: symbol %d, off %u, name %s\n", + i, ext->sym_idx, ext->data_off, ext->name); + } + + btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC); + if (btf_id <= 0) { + pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC); + return -ESRCH; + } + + sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id); + sec->size = off; + n = btf_vlen(sec); + for (i = 0; i < n; i++) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + + t = btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, t->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + vs->offset = ext->data_off; + btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + } + + return 0; } static struct bpf_program * @@ -1765,6 +2368,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, return NULL; } +struct bpf_program * +bpf_object__find_program_by_name(const struct bpf_object *obj, + const char *name) +{ + struct bpf_program *prog; + + bpf_object__for_each_program(prog, obj) { + if (!strcmp(prog->name, name)) + return prog; + } + return NULL; +} + static bool bpf_object__shndx_is_data(const struct bpf_object *obj, int shndx) { @@ -1789,6 +2405,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) return LIBBPF_MAP_BSS; else if (shndx == obj->efile.rodata_shndx) return LIBBPF_MAP_RODATA; + else if (shndx == obj->efile.symbols_shndx) + return LIBBPF_MAP_KCONFIG; else return LIBBPF_MAP_UNSPEC; } @@ -1817,7 +2435,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } if (sym->st_value % 8) { - pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value); + pr_warn("bad call relo offset: %zu\n", + (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_CALL; @@ -1832,6 +2451,30 @@ static int bpf_program__record_reloc(struct bpf_program *prog, insn_idx, insn->code); return -LIBBPF_ERRNO__RELOC; } + + if (sym_is_extern(sym)) { + int sym_idx = GELF_R_SYM(rel->r_info); + int i, n = obj->nr_extern; + struct extern_desc *ext; + + for (i = 0; i < n; i++) { + ext = &obj->externs[i]; + if (ext->sym_idx == sym_idx) + break; + } + if (i >= n) { + pr_warn("extern relo failed to find extern for sym %d\n", + sym_idx); + return -LIBBPF_ERRNO__RELOC; + } + pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n", + i, ext->name, ext->sym_idx, ext->data_off, insn_idx); + reloc_desc->type = RELO_EXTERN; + reloc_desc->insn_idx = insn_idx; + reloc_desc->sym_off = ext->data_off; + return 0; + } + if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", name, shdr_idx); @@ -1859,8 +2502,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, break; } if (map_idx >= nr_maps) { - pr_warn("map relo failed to find map for sec %u, off %llu\n", - shdr_idx, (__u64)sym->st_value); + pr_warn("map relo failed to find map for sec %u, off %zu\n", + shdr_idx, (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_LD64; @@ -1875,11 +2518,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog, pr_warn("bad data relo against section %u\n", shdr_idx); return -LIBBPF_ERRNO__RELOC; } - if (!obj->caps.global_data) { - pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", - name, insn_idx); - return -LIBBPF_ERRNO__RELOC; - } for (map_idx = 0; map_idx < nr_maps; map_idx++) { map = &obj->maps[map_idx]; if (map->libbpf_type != type) @@ -1941,9 +2579,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name) ? : "<?>"; - pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", - (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), - (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), + pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n", + (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info), + (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info), GELF_ST_BIND(sym.st_info), sym.st_name, name, insn_idx); @@ -2298,29 +2936,35 @@ bpf_object__reuse_map(struct bpf_map *map) static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { + enum libbpf_map_type map_type = map->libbpf_type; char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; - __u8 *data; - /* Nothing to do here since kernel already zero-initializes .bss map. */ - if (map->libbpf_type == LIBBPF_MAP_BSS) + /* kernel already zero-initializes .bss map. */ + if (map_type == LIBBPF_MAP_BSS) return 0; - data = map->libbpf_type == LIBBPF_MAP_DATA ? - obj->sections.data : obj->sections.rodata; + err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); + if (err) { + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); + pr_warn("Error setting initial map(%s) contents: %s\n", + map->name, cp); + return err; + } - err = bpf_map_update_elem(map->fd, &zero, data, 0); - /* Freeze .rodata map as read-only from syscall side. */ - if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { + /* Freeze .rodata and .kconfig map as read-only from syscall side. */ + if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { err = bpf_map_freeze(map->fd); if (err) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("Error freezing map(%s) as read-only: %s\n", map->name, cp); - err = 0; + return err; } } - return err; + return 0; } static int @@ -2411,6 +3055,7 @@ err_out: cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("failed to create map (name: '%s'): %s(%d)\n", map->name, cp, err); + pr_perm_msg(err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; @@ -2536,6 +3181,21 @@ static bool str_is_empty(const char *s) return !s || !s[0]; } +static bool is_flex_arr(const struct btf *btf, + const struct bpf_core_accessor *acc, + const struct btf_array *arr) +{ + const struct btf_type *t; + + /* not a flexible array, if not inside a struct or has non-zero size */ + if (!acc->name || arr->nelems > 0) + return false; + + /* has to be the last member of enclosing struct */ + t = btf__type_by_id(btf, acc->type_id); + return acc->idx == btf_vlen(t) - 1; +} + /* * Turn bpf_field_reloc into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting @@ -2573,6 +3233,7 @@ static int bpf_core_spec_parse(const struct btf *btf, struct bpf_core_spec *spec) { int access_idx, parsed_len, i; + struct bpf_core_accessor *acc; const struct btf_type *t; const char *name; __u32 id; @@ -2620,6 +3281,7 @@ static int bpf_core_spec_parse(const struct btf *btf, return -EINVAL; access_idx = spec->raw_spec[i]; + acc = &spec->spec[spec->len]; if (btf_is_composite(t)) { const struct btf_member *m; @@ -2637,18 +3299,23 @@ static int bpf_core_spec_parse(const struct btf *btf, if (str_is_empty(name)) return -EINVAL; - spec->spec[spec->len].type_id = id; - spec->spec[spec->len].idx = access_idx; - spec->spec[spec->len].name = name; + acc->type_id = id; + acc->idx = access_idx; + acc->name = name; spec->len++; } id = m->type; } else if (btf_is_array(t)) { const struct btf_array *a = btf_array(t); + bool flex; t = skip_mods_and_typedefs(btf, a->type, &id); - if (!t || access_idx >= a->nelems) + if (!t) + return -EINVAL; + + flex = is_flex_arr(btf, acc - 1, a); + if (!flex && access_idx >= a->nelems) return -EINVAL; spec->spec[spec->len].type_id = id; @@ -2953,12 +3620,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, */ if (i > 0) { const struct btf_array *a; + bool flex; if (!btf_is_array(targ_type)) return 0; a = btf_array(targ_type); - if (local_acc->idx >= a->nelems) + flex = is_flex_arr(targ_btf, targ_acc - 1, a); + if (!flex && local_acc->idx >= a->nelems) return 0; if (!skip_mods_and_typedefs(targ_btf, a->type, &targ_id)) @@ -3142,11 +3811,13 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, insn = &prog->insns[insn_idx]; class = BPF_CLASS(insn->code); - if (class == BPF_ALU || class == BPF_ALU64) { + switch (class) { + case BPF_ALU: + case BPF_ALU64: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; if (!failed && validate && insn->imm != orig_val) { - pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", + pr_warn("prog '%s': unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", bpf_program__title(prog, false), insn_idx, insn->imm, orig_val, new_val); return -EINVAL; @@ -3156,7 +3827,29 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", bpf_program__title(prog, false), insn_idx, failed ? " w/ failed reloc" : "", orig_val, new_val); - } else { + break; + case BPF_LDX: + case BPF_ST: + case BPF_STX: + if (!failed && validate && insn->off != orig_val) { + pr_warn("prog '%s': unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), insn_idx, + insn->off, orig_val, new_val); + return -EINVAL; + } + if (new_val > SHRT_MAX) { + pr_warn("prog '%s': insn #%d (LD/LDX/ST/STX) value too big: %u\n", + bpf_program__title(prog, false), insn_idx, + new_val); + return -ERANGE; + } + orig_val = insn->off; + insn->off = new_val; + pr_debug("prog '%s': patched insn #%d (LD/LDX/ST/STX)%s off %u -> %u\n", + bpf_program__title(prog, false), insn_idx, + failed ? " w/ failed reloc" : "", orig_val, new_val); + break; + default: pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", bpf_program__title(prog, false), insn_idx, insn->code, insn->src_reg, insn->dst_reg, @@ -3559,9 +4252,6 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, size_t new_cnt; int err; - if (relo->type != RELO_CALL) - return -LIBBPF_ERRNO__RELOC; - if (prog->idx == obj->efile.text_shndx) { pr_warn("relo in .text insn %d into off %d (insn #%d)\n", relo->insn_idx, relo->sym_off, relo->sym_off / 8); @@ -3623,27 +4313,37 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; + struct bpf_insn *insn = &prog->insns[relo->insn_idx]; - if (relo->type == RELO_LD64 || relo->type == RELO_DATA) { - struct bpf_insn *insn = &prog->insns[relo->insn_idx]; - - if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { - pr_warn("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; - } + if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { + pr_warn("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } - if (relo->type != RELO_DATA) { - insn[0].src_reg = BPF_PSEUDO_MAP_FD; - } else { - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[1].imm = insn[0].imm + relo->sym_off; - } + switch (relo->type) { + case RELO_LD64: + insn[0].src_reg = BPF_PSEUDO_MAP_FD; + insn[0].imm = obj->maps[relo->map_idx].fd; + break; + case RELO_DATA: + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[1].imm = insn[0].imm + relo->sym_off; insn[0].imm = obj->maps[relo->map_idx].fd; - } else if (relo->type == RELO_CALL) { + break; + case RELO_EXTERN: + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + insn[1].imm = relo->sym_off; + break; + case RELO_CALL: err = bpf_program__reloc_text(prog, obj, relo); if (err) return err; + break; + default: + pr_warn("relo #%d: bad relo type %d\n", i, relo->type); + return -EINVAL; } } @@ -3778,6 +4478,7 @@ retry_load: ret = -errno; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("load bpf program failed: %s\n", cp); + pr_perm_msg(ret); if (log_buf && log_buf[0] != '\0') { ret = -LIBBPF_ERRNO__VERIFY; @@ -3807,11 +4508,22 @@ out: return ret; } -int -bpf_program__load(struct bpf_program *prog, - char *license, __u32 kern_version) +static int libbpf_find_attach_btf_id(const char *name, + enum bpf_attach_type attach_type, + __u32 attach_prog_fd); + +int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { - int err = 0, fd, i; + int err = 0, fd, i, btf_id; + + if (prog->type == BPF_PROG_TYPE_TRACING) { + btf_id = libbpf_find_attach_btf_id(prog->section_name, + prog->expected_attach_type, + prog->attach_prog_fd); + if (btf_id <= 0) + return btf_id; + prog->attach_btf_id = btf_id; + } if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { @@ -3835,7 +4547,7 @@ bpf_program__load(struct bpf_program *prog, prog->section_name, prog->instances.nr); } err = load_program(prog, prog->insns, prog->insns_cnt, - license, kern_version, &fd); + license, kern_ver, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -3864,9 +4576,7 @@ bpf_program__load(struct bpf_program *prog, } err = load_program(prog, result.new_insn_ptr, - result.new_insn_cnt, - license, kern_version, &fd); - + result.new_insn_cnt, license, kern_ver, &fd); if (err) { pr_warn("Loading the %dth instance of program '%s' failed\n", i, prog->section_name); @@ -3910,20 +4620,14 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd); static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) + const struct bpf_object_open_opts *opts) { - const char *pin_root_path; + const char *obj_name, *kconfig; struct bpf_program *prog; struct bpf_object *obj; - const char *obj_name; char tmp_name[64]; - bool relaxed_maps; - __u32 attach_prog_fd; int err; if (elf_version(EV_CURRENT) == EV_NONE) { @@ -3952,16 +4656,23 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, return obj; obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); - relaxed_maps = OPTS_GET(opts, relaxed_maps, false); - pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); - - CHECK_ERR(bpf_object__elf_init(obj), err, out); - CHECK_ERR(bpf_object__check_endianness(obj), err, out); - CHECK_ERR(bpf_object__probe_caps(obj), err, out); - CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), - err, out); - CHECK_ERR(bpf_object__collect_reloc(obj), err, out); + kconfig = OPTS_GET(opts, kconfig, NULL); + if (kconfig) { + obj->kconfig = strdup(kconfig); + if (!obj->kconfig) + return ERR_PTR(-ENOMEM); + } + + err = bpf_object__elf_init(obj); + err = err ? : bpf_object__check_endianness(obj); + err = err ? : bpf_object__elf_collect(obj); + err = err ? : bpf_object__collect_externs(obj); + err = err ? : bpf_object__finalize_btf(obj); + err = err ? : bpf_object__init_maps(obj, opts); + err = err ? : bpf_object__init_prog_names(obj); + err = err ? : bpf_object__collect_reloc(obj); + if (err) + goto out; bpf_object__elf_finish(obj); bpf_object__for_each_program(prog, obj) { @@ -3978,15 +4689,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_program__set_type(prog, prog_type); bpf_program__set_expected_attach_type(prog, attach_type); - if (prog_type == BPF_PROG_TYPE_TRACING) { - err = libbpf_find_attach_btf_id(prog->section_name, - attach_type, - attach_prog_fd); - if (err <= 0) - goto out; - prog->attach_btf_id = err; - prog->attach_prog_fd = attach_prog_fd; - } + if (prog_type == BPF_PROG_TYPE_TRACING) + prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); } return obj; @@ -4026,7 +4730,7 @@ struct bpf_object *bpf_object__open(const char *path) } struct bpf_object * -bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) +bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) { if (!path) return ERR_PTR(-EINVAL); @@ -4038,7 +4742,7 @@ bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) + const struct bpf_object_open_opts *opts) { if (!obj_buf || obj_buf_sz == 0) return ERR_PTR(-EINVAL); @@ -4079,6 +4783,92 @@ int bpf_object__unload(struct bpf_object *obj) return 0; } +static int bpf_object__sanitize_maps(struct bpf_object *obj) +{ + struct bpf_map *m; + + bpf_object__for_each_map(m, obj) { + if (!bpf_map__is_internal(m)) + continue; + if (!obj->caps.global_data) { + pr_warn("kernel doesn't support global data\n"); + return -ENOTSUP; + } + if (!obj->caps.array_mmap) + m->def.map_flags ^= BPF_F_MMAPABLE; + } + + return 0; +} + +static int bpf_object__resolve_externs(struct bpf_object *obj, + const char *extra_kconfig) +{ + bool need_config = false; + struct extern_desc *ext; + int err, i; + void *data; + + if (obj->nr_extern == 0) + return 0; + + data = obj->maps[obj->kconfig_map_idx].mmaped; + + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + void *ext_val = data + ext->data_off; + __u32 kver = get_kernel_version(); + + if (!kver) { + pr_warn("failed to get kernel version\n"); + return -EINVAL; + } + err = set_ext_value_num(ext, ext_val, kver); + if (err) + return err; + pr_debug("extern %s=0x%x\n", ext->name, kver); + } else if (strncmp(ext->name, "CONFIG_", 7) == 0) { + need_config = true; + } else { + pr_warn("unrecognized extern '%s'\n", ext->name); + return -EINVAL; + } + } + if (need_config && extra_kconfig) { + err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data); + if (err) + return -EINVAL; + need_config = false; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (!ext->is_set) { + need_config = true; + break; + } + } + } + if (need_config) { + err = bpf_object__read_kconfig_file(obj, data); + if (err) + return -EINVAL; + } + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + + if (!ext->is_set && !ext->is_weak) { + pr_warn("extern %s (strong) not resolved\n", ext->name); + return -ESRCH; + } else if (!ext->is_set) { + pr_debug("extern %s (weak) not resolved, defaulting to zero\n", + ext->name); + } + } + + return 0; +} + int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { struct bpf_object *obj; @@ -4097,9 +4887,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) obj->loaded = true; - CHECK_ERR(bpf_object__create_maps(obj), err, out); - CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); - CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out); + err = bpf_object__probe_caps(obj); + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); + err = err ? : bpf_object__sanitize_and_load_btf(obj); + err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__create_maps(obj); + err = err ? : bpf_object__relocate(obj, attr->target_btf_path); + err = err ? : bpf_object__load_progs(obj, attr->log_level); + if (err) + goto out; return 0; out: @@ -4670,17 +5466,26 @@ void bpf_object__close(struct bpf_object *obj) btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) { - zfree(&obj->maps[i].name); - zfree(&obj->maps[i].pin_path); - if (obj->maps[i].clear_priv) - obj->maps[i].clear_priv(&obj->maps[i], - obj->maps[i].priv); - obj->maps[i].priv = NULL; - obj->maps[i].clear_priv = NULL; + struct bpf_map *map = &obj->maps[i]; + + if (map->clear_priv) + map->clear_priv(map, map->priv); + map->priv = NULL; + map->clear_priv = NULL; + + if (map->mmaped) { + munmap(map->mmaped, bpf_map_mmap_sz(map)); + map->mmaped = NULL; + } + + zfree(&map->name); + zfree(&map->pin_path); } - zfree(&obj->sections.rodata); - zfree(&obj->sections.data); + zfree(&obj->kconfig); + zfree(&obj->externs); + obj->nr_extern = 0; + zfree(&obj->maps); obj->nr_maps = 0; @@ -4820,6 +5625,11 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) prog->prog_ifindex = ifindex; } +const char *bpf_program__name(const struct bpf_program *prog) +{ + return prog->name; +} + const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) { const char *title; @@ -4972,7 +5782,28 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, */ #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) -static const struct { +#define SEC_DEF(sec_pfx, ptype, ...) { \ + .sec = sec_pfx, \ + .len = sizeof(sec_pfx) - 1, \ + .prog_type = BPF_PROG_TYPE_##ptype, \ + __VA_ARGS__ \ +} + +struct bpf_sec_def; + +typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, + struct bpf_program *prog); + +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog); + +struct bpf_sec_def { const char *sec; size_t len; enum bpf_prog_type prog_type; @@ -4980,24 +5811,40 @@ static const struct { bool is_attachable; bool is_attach_btf; enum bpf_attach_type attach_type; -} section_names[] = { + attach_fn_t attach_fn; +}; + +static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), - BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT), + SEC_DEF("kprobe/", KPROBE, + .attach_fn = attach_kprobe), BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + SEC_DEF("kretprobe/", KPROBE, + .attach_fn = attach_kprobe), BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), - BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_RAW_TP), - BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FENTRY), - BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FEXIT), + SEC_DEF("tracepoint/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("tp/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("raw_tp/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("tp_btf/", TRACING, + .expected_attach_type = BPF_TRACE_RAW_TP, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fentry/", TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fexit/", TRACING, + .expected_attach_type = BPF_TRACE_FEXIT, + .is_attach_btf = true, + .attach_fn = attach_trace), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -5059,12 +5906,26 @@ static const struct { #undef BPF_APROG_SEC #undef BPF_EAPROG_SEC #undef BPF_APROG_COMPAT +#undef SEC_DEF #define MAX_TYPE_NAME_SIZE 32 +static const struct bpf_sec_def *find_sec_def(const char *sec_name) +{ + int i, n = ARRAY_SIZE(section_defs); + + for (i = 0; i < n; i++) { + if (strncmp(sec_name, + section_defs[i].sec, section_defs[i].len)) + continue; + return §ion_defs[i]; + } + return NULL; +} + static char *libbpf_get_type_names(bool attach_type) { - int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; + int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; char *buf; buf = malloc(len); @@ -5073,16 +5934,16 @@ static char *libbpf_get_type_names(bool attach_type) buf[0] = '\0'; /* Forge string buf with all available names */ - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (attach_type && !section_names[i].is_attachable) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (attach_type && !section_defs[i].is_attachable) continue; - if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { + if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { free(buf); return NULL; } strcat(buf, " "); - strcat(buf, section_names[i].sec); + strcat(buf, section_defs[i].sec); } return buf; @@ -5091,23 +5952,23 @@ static char *libbpf_get_type_names(bool attach_type) int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type) { + const struct bpf_sec_def *sec_def; char *type_names; - int i; if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) - continue; - *prog_type = section_names[i].prog_type; - *expected_attach_type = section_names[i].expected_attach_type; + sec_def = find_sec_def(name); + if (sec_def) { + *prog_type = sec_def->prog_type; + *expected_attach_type = sec_def->expected_attach_type; return 0; } - pr_warn("failed to guess program type from ELF section '%s'\n", name); + + pr_debug("failed to guess program type from ELF section '%s'\n", name); type_names = libbpf_get_type_names(false); if (type_names != NULL) { - pr_info("supported section(type) names are:%s\n", type_names); + pr_debug("supported section(type) names are:%s\n", type_names); free(type_names); } @@ -5186,16 +6047,16 @@ static int libbpf_find_attach_btf_id(const char *name, if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (!section_names[i].is_attach_btf) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (!section_defs[i].is_attach_btf) continue; - if (strncmp(name, section_names[i].sec, section_names[i].len)) + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; if (attach_prog_fd) - err = libbpf_find_prog_btf_id(name + section_names[i].len, + err = libbpf_find_prog_btf_id(name + section_defs[i].len, attach_prog_fd); else - err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, + err = libbpf_find_vmlinux_btf_id(name + section_defs[i].len, attach_type); if (err <= 0) pr_warn("%s is not found in vmlinux BTF\n", name); @@ -5214,18 +6075,18 @@ int libbpf_attach_type_by_name(const char *name, if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; - if (!section_names[i].is_attachable) + if (!section_defs[i].is_attachable) return -EINVAL; - *attach_type = section_names[i].attach_type; + *attach_type = section_defs[i].attach_type; return 0; } - pr_warn("failed to guess attach type based on ELF section name '%s'\n", name); + pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); type_names = libbpf_get_type_names(true); if (type_names != NULL) { - pr_info("attachable section(type) names are:%s\n", type_names); + pr_debug("attachable section(type) names are:%s\n", type_names); free(type_names); } @@ -5466,17 +6327,37 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, } struct bpf_link { + int (*detach)(struct bpf_link *link); int (*destroy)(struct bpf_link *link); + bool disconnected; }; +/* Release "ownership" of underlying BPF resource (typically, BPF program + * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected + * link, when destructed through bpf_link__destroy() call won't attempt to + * detach/unregisted that BPF resource. This is useful in situations where, + * say, attached BPF program has to outlive userspace program that attached it + * in the system. Depending on type of BPF program, though, there might be + * additional steps (like pinning BPF program in BPF FS) necessary to ensure + * exit of userspace program doesn't trigger automatic detachment and clean up + * inside the kernel. + */ +void bpf_link__disconnect(struct bpf_link *link) +{ + link->disconnected = true; +} + int bpf_link__destroy(struct bpf_link *link) { - int err; + int err = 0; if (!link) return 0; - err = link->destroy(link); + if (!link->disconnected && link->detach) + err = link->detach(link); + if (link->destroy) + link->destroy(link); free(link); return err; @@ -5487,7 +6368,7 @@ struct bpf_link_fd { int fd; /* hook FD */ }; -static int bpf_link__destroy_perf_event(struct bpf_link *link) +static int bpf_link__detach_perf_event(struct bpf_link *link) { struct bpf_link_fd *l = (void *)link; int err; @@ -5519,10 +6400,10 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_perf_event; + link->link.detach = &bpf_link__detach_perf_event; link->fd = pfd; if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { @@ -5679,6 +6560,18 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, return link; } +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + const char *func_name; + bool retprobe; + + func_name = bpf_program__title(prog, false) + sec->len; + retprobe = strcmp(sec->sec, "kretprobe/") == 0; + + return bpf_program__attach_kprobe(prog, retprobe, func_name); +} + struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, @@ -5791,7 +6684,33 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, return link; } -static int bpf_link__destroy_fd(struct bpf_link *link) +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + char *sec_name, *tp_cat, *tp_name; + struct bpf_link *link; + + sec_name = strdup(bpf_program__title(prog, false)); + if (!sec_name) + return ERR_PTR(-ENOMEM); + + /* extract "tp/<category>/<name>" */ + tp_cat = sec_name + sec->len; + tp_name = strchr(tp_cat, '/'); + if (!tp_name) { + link = ERR_PTR(-EINVAL); + goto out; + } + *tp_name = '\0'; + tp_name++; + + link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); +out: + free(sec_name); + return link; +} + +static int bpf_link__detach_fd(struct bpf_link *link) { struct bpf_link_fd *l = (void *)link; @@ -5812,10 +6731,10 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; + link->link.detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); if (pfd < 0) { @@ -5830,6 +6749,14 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return (struct bpf_link *)link; } +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + const char *tp_name = bpf_program__title(prog, false) + sec->len; + + return bpf_program__attach_raw_tracepoint(prog, tp_name); +} + struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; @@ -5843,10 +6770,10 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; + link->link.detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(NULL, prog_fd); if (pfd < 0) { @@ -5861,6 +6788,23 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) return (struct bpf_link *)link; } +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + return bpf_program__attach_trace(prog); +} + +struct bpf_link *bpf_program__attach(struct bpf_program *prog) +{ + const struct bpf_sec_def *sec_def; + + sec_def = find_sec_def(bpf_program__title(prog, false)); + if (!sec_def || !sec_def->attach_fn) + return ERR_PTR(-ESRCH); + + return sec_def->attach_fn(sec_def, prog); +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -5944,7 +6888,7 @@ struct perf_buffer { size_t mmap_size; struct perf_cpu_buf **cpu_bufs; struct epoll_event *events; - int cpu_cnt; + int cpu_cnt; /* number of allocated CPU buffers */ int epoll_fd; /* perf event FD */ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ }; @@ -6078,11 +7022,13 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt, static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { + const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map = {}; char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; + bool *online = NULL; __u32 map_info_len; - int err, i; + int err, i, j, n; if (page_cnt & (page_cnt - 1)) { pr_warn("page count should be power of two, but is %zu\n", @@ -6151,20 +7097,32 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - for (i = 0; i < pb->cpu_cnt; i++) { + err = parse_cpu_mask_file(online_cpus_file, &online, &n); + if (err) { + pr_warn("failed to get online CPU mask: %d\n", err); + goto error; + } + + for (i = 0, j = 0; i < pb->cpu_cnt; i++) { struct perf_cpu_buf *cpu_buf; int cpu, map_key; cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; + /* in case user didn't explicitly requested particular CPUs to + * be attached to, skip offline/not present CPUs + */ + if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) + continue; + cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); if (IS_ERR(cpu_buf)) { err = PTR_ERR(cpu_buf); goto error; } - pb->cpu_bufs[i] = cpu_buf; + pb->cpu_bufs[j] = cpu_buf; err = bpf_map_update_elem(pb->map_fd, &map_key, &cpu_buf->fd, 0); @@ -6176,21 +7134,25 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - pb->events[i].events = EPOLLIN; - pb->events[i].data.ptr = cpu_buf; + pb->events[j].events = EPOLLIN; + pb->events[j].data.ptr = cpu_buf; if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, - &pb->events[i]) < 0) { + &pb->events[j]) < 0) { err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } + j++; } + pb->cpu_cnt = j; + free(online); return pb; error: + free(online); if (pb) perf_buffer__free(pb); return ERR_PTR(err); @@ -6521,62 +7483,267 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) } } -int libbpf_num_possible_cpus(void) +int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) { - static const char *fcpu = "/sys/devices/system/cpu/possible"; - int len = 0, n = 0, il = 0, ir = 0; - unsigned int start = 0, end = 0; - int tmp_cpus = 0; - static int cpus; - char buf[128]; - int error = 0; - int fd = -1; + int err = 0, n, len, start, end = -1; + bool *tmp; - tmp_cpus = READ_ONCE(cpus); - if (tmp_cpus > 0) - return tmp_cpus; + *mask = NULL; + *mask_sz = 0; + + /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ + while (*s) { + if (*s == ',' || *s == '\n') { + s++; + continue; + } + n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); + if (n <= 0 || n > 2) { + pr_warn("Failed to get CPU range %s: %d\n", s, n); + err = -EINVAL; + goto cleanup; + } else if (n == 1) { + end = start; + } + if (start < 0 || start > end) { + pr_warn("Invalid CPU range [%d,%d] in %s\n", + start, end, s); + err = -EINVAL; + goto cleanup; + } + tmp = realloc(*mask, end + 1); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + *mask = tmp; + memset(tmp + *mask_sz, 0, start - *mask_sz); + memset(tmp + start, 1, end - start + 1); + *mask_sz = end + 1; + s += len; + } + if (!*mask_sz) { + pr_warn("Empty CPU range\n"); + return -EINVAL; + } + return 0; +cleanup: + free(*mask); + *mask = NULL; + return err; +} + +int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) +{ + int fd, err = 0, len; + char buf[128]; fd = open(fcpu, O_RDONLY); if (fd < 0) { - error = errno; - pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); - return -error; + err = -errno; + pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { - error = len ? errno : EINVAL; - pr_warn("Failed to read # of possible cpus from %s: %s\n", - fcpu, strerror(error)); - return -error; + err = len ? -errno : -EINVAL; + pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + return err; } - if (len == sizeof(buf)) { - pr_warn("File %s size overflow\n", fcpu); - return -EOVERFLOW; + if (len >= sizeof(buf)) { + pr_warn("CPU mask is too big in file %s\n", fcpu); + return -E2BIG; } buf[len] = '\0'; - for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { - /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ - if (buf[ir] == ',' || buf[ir] == '\0') { - buf[ir] = '\0'; - n = sscanf(&buf[il], "%u-%u", &start, &end); - if (n <= 0) { - pr_warn("Failed to get # CPUs from %s\n", - &buf[il]); - return -EINVAL; - } else if (n == 1) { - end = start; - } - tmp_cpus += end - start + 1; - il = ir + 1; - } - } - if (tmp_cpus <= 0) { - pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); - return -EINVAL; + return parse_cpu_mask_str(buf, mask, mask_sz); +} + +int libbpf_num_possible_cpus(void) +{ + static const char *fcpu = "/sys/devices/system/cpu/possible"; + static int cpus; + int err, n, i, tmp_cpus; + bool *mask; + + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus; + + err = parse_cpu_mask_file(fcpu, &mask, &n); + if (err) + return err; + + tmp_cpus = 0; + for (i = 0; i < n; i++) { + if (mask[i]) + tmp_cpus++; } + free(mask); WRITE_ONCE(cpus, tmp_cpus); return tmp_cpus; } + +int bpf_object__open_skeleton(struct bpf_object_skeleton *s, + const struct bpf_object_open_opts *opts) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, + .object_name = s->name, + ); + struct bpf_object *obj; + int i; + + /* Attempt to preserve opts->object_name, unless overriden by user + * explicitly. Overwriting object name for skeletons is discouraged, + * as it breaks global data maps, because they contain object name + * prefix as their own map name prefix. When skeleton is generated, + * bpftool is making an assumption that this name will stay the same. + */ + if (opts) { + memcpy(&skel_opts, opts, sizeof(*opts)); + if (!opts->object_name) + skel_opts.object_name = s->name; + } + + obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); + if (IS_ERR(obj)) { + pr_warn("failed to initialize skeleton BPF object '%s': %ld\n", + s->name, PTR_ERR(obj)); + return PTR_ERR(obj); + } + + *s->obj = obj; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map **map = s->maps[i].map; + const char *name = s->maps[i].name; + void **mmaped = s->maps[i].mmaped; + + *map = bpf_object__find_map_by_name(obj, name); + if (!*map) { + pr_warn("failed to find skeleton map '%s'\n", name); + return -ESRCH; + } + + /* externs shouldn't be pre-setup from user code */ + if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) + *mmaped = (*map)->mmaped; + } + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_program **prog = s->progs[i].prog; + const char *name = s->progs[i].name; + + *prog = bpf_object__find_program_by_name(obj, name); + if (!*prog) { + pr_warn("failed to find skeleton program '%s'\n", name); + return -ESRCH; + } + } + + return 0; +} + +int bpf_object__load_skeleton(struct bpf_object_skeleton *s) +{ + int i, err; + + err = bpf_object__load(*s->obj); + if (err) { + pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); + return err; + } + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map *map = *s->maps[i].map; + size_t mmap_sz = bpf_map_mmap_sz(map); + int prot, map_fd = bpf_map__fd(map); + void **mmaped = s->maps[i].mmaped; + + if (!mmaped) + continue; + + if (!(map->def.map_flags & BPF_F_MMAPABLE)) { + *mmaped = NULL; + continue; + } + + if (map->def.map_flags & BPF_F_RDONLY_PROG) + prot = PROT_READ; + else + prot = PROT_READ | PROT_WRITE; + + /* Remap anonymous mmap()-ed "map initialization image" as + * a BPF map-backed mmap()-ed memory, but preserving the same + * memory address. This will cause kernel to change process' + * page table to point to a different piece of kernel memory, + * but from userspace point of view memory address (and its + * contents, being identical at this point) will stay the + * same. This mapping will be released by bpf_object__close() + * as per normal clean up procedure, so we don't need to worry + * about it from skeleton's clean up perspective. + */ + *mmaped = mmap(map->mmaped, mmap_sz, prot, + MAP_SHARED | MAP_FIXED, map_fd, 0); + if (*mmaped == MAP_FAILED) { + err = -errno; + *mmaped = NULL; + pr_warn("failed to re-mmap() map '%s': %d\n", + bpf_map__name(map), err); + return err; + } + } + + return 0; +} + +int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) +{ + int i; + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_program *prog = *s->progs[i].prog; + struct bpf_link **link = s->progs[i].link; + const struct bpf_sec_def *sec_def; + const char *sec_name = bpf_program__title(prog, false); + + sec_def = find_sec_def(sec_name); + if (!sec_def || !sec_def->attach_fn) + continue; + + *link = sec_def->attach_fn(sec_def, prog); + if (IS_ERR(*link)) { + pr_warn("failed to auto-attach program '%s': %ld\n", + bpf_program__name(prog), PTR_ERR(*link)); + return PTR_ERR(*link); + } + } + + return 0; +} + +void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) +{ + int i; + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_link **link = s->progs[i].link; + + if (!IS_ERR_OR_NULL(*link)) + bpf_link__destroy(*link); + *link = NULL; + } +} + +void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) +{ + if (s->progs) + bpf_object__detach_skeleton(s); + if (s->obj) + bpf_object__close(*s->obj); + free(s->maps); + free(s->progs); + free(s); +} |