diff options
author | David S. Miller <davem@davemloft.net> | 2019-05-31 21:21:18 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-05-31 21:21:18 -0700 |
commit | 0462eaacee493f7e2d87551a35d38be93ca723f8 (patch) | |
tree | c2d454ff64156281c9b4ce071194cb9a47e5dd1a /tools | |
parent | 33aae28285b73e013f7f697a61f569c5b48c6650 (diff) | |
parent | cd5385029f1d2e6879b78fff1a7b15514004af17 (diff) | |
download | linux-0462eaacee493f7e2d87551a35d38be93ca723f8.tar.gz linux-0462eaacee493f7e2d87551a35d38be93ca723f8.tar.bz2 linux-0462eaacee493f7e2d87551a35d38be93ca723f8.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says:
====================
pull-request: bpf-next 2019-05-31
The following pull-request contains BPF updates for your *net-next* tree.
Lots of exciting new features in the first PR of this developement cycle!
The main changes are:
1) misc verifier improvements, from Alexei.
2) bpftool can now convert btf to valid C, from Andrii.
3) verifier can insert explicit ZEXT insn when requested by 32-bit JITs.
This feature greatly improves BPF speed on 32-bit architectures. From Jiong.
4) cgroups will now auto-detach bpf programs. This fixes issue of thousands
bpf programs got stuck in dying cgroups. From Roman.
5) new bpf_send_signal() helper, from Yonghong.
6) cgroup inet skb programs can signal CN to the stack, from Lawrence.
7) miscellaneous cleanups, from many developers.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools')
68 files changed, 5496 insertions, 490 deletions
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 2dbc1413fabd..6694a0fc8f99 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -19,10 +19,11 @@ SYNOPSIS BTF COMMANDS ============= -| **bpftool** **btf dump** *BTF_SRC* +| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] | **bpftool** **btf help** | | *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } +| *FORMAT* := { **raw** | **c** } | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } @@ -31,23 +32,27 @@ DESCRIPTION **bpftool btf dump** *BTF_SRC* Dump BTF entries from a given *BTF_SRC*. - When **id** is specified, BTF object with that ID will be - loaded and all its BTF types emitted. + When **id** is specified, BTF object with that ID will be + loaded and all its BTF types emitted. - When **map** is provided, it's expected that map has - associated BTF object with BTF types describing key and - value. It's possible to select whether to dump only BTF - type(s) associated with key (**key**), value (**value**), - both key and value (**kv**), or all BTF types present in - associated BTF object (**all**). If not specified, **kv** - is assumed. + When **map** is provided, it's expected that map has + associated BTF object with BTF types describing key and + value. It's possible to select whether to dump only BTF + type(s) associated with key (**key**), value (**value**), + both key and value (**kv**), or all BTF types present in + associated BTF object (**all**). If not specified, **kv** + is assumed. - When **prog** is provided, it's expected that program has - associated BTF object with BTF types. + When **prog** is provided, it's expected that program has + associated BTF object with BTF types. - When specifying *FILE*, an ELF file is expected, containing - .BTF section with well-defined BTF binary format data, - typically produced by clang or pahole. + When specifying *FILE*, an ELF file is expected, containing + .BTF section with well-defined BTF binary format data, + typically produced by clang or pahole. + + **format** option can be used to override default (raw) + output format. Raw (**raw**) or C-syntax (**c**) output + formats are supported. **bpftool btf help** Print short help message. @@ -67,6 +72,10 @@ OPTIONS -p, --pretty Generate human-readable JSON output. Implies **-j**. + -d, --debug + Print all logs available from libbpf, including debug-level + information. + EXAMPLES ======== **# bpftool btf dump id 1226** diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index ac26876389c2..36807735e2a5 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -113,6 +113,10 @@ OPTIONS -f, --bpffs Show file names of pinned programs. + -d, --debug + Print all logs available from libbpf, including debug-level + information. + EXAMPLES ======== | diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index 14180e887082..4d08f35034a2 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -73,6 +73,10 @@ OPTIONS -p, --pretty Generate human-readable JSON output. Implies **-j**. + -d, --debug + Print all logs available from libbpf, including debug-level + information. + SEE ALSO ======== **bpf**\ (2), diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 13ef27b39f20..490b4501cb6e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -152,6 +152,10 @@ OPTIONS Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. + -d, --debug + Print all logs available from libbpf, including debug-level + information. + EXAMPLES ======== **# bpftool map show** diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 934580850f42..d8e5237a2085 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -65,6 +65,10 @@ OPTIONS -p, --pretty Generate human-readable JSON output. Implies **-j**. + -d, --debug + Print all logs available from libbpf, including debug-level + information. + EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index 0c7576523a21..e252bd0bc434 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -53,6 +53,10 @@ OPTIONS -p, --pretty Generate human-readable JSON output. Implies **-j**. + -d, --debug + Print all logs available from libbpf, including debug-level + information. + EXAMPLES ======== diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index e8118544d118..228a5c863cc7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -174,6 +174,11 @@ OPTIONS Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. + -d, --debug + Print all logs available, even debug-level information. This + includes logs from libbpf as well as from the verifier, when + attempting to load programs. + EXAMPLES ======== **# bpftool prog show** diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 3e562d7fd56f..6a9c52ef84a9 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -66,6 +66,10 @@ OPTIONS Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. + -d, --debug + Print all logs available, even debug-level information. This + includes logs from libbpf as well as from the verifier, when + attempting to load programs. SEE ALSO ======== diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 50e402a5a9c8..2725e27dfa42 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -71,6 +71,12 @@ _bpftool_get_prog_tags() command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) ) } +_bpftool_get_btf_ids() +{ + COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ + command sed -n 's/.*"btf_id": \(.*\),\?$/\1/p' )" -- "$cur" ) ) +} + _bpftool_get_obj_map_names() { local obj @@ -181,7 +187,7 @@ _bpftool() # Deal with options if [[ ${words[cword]} == -* ]]; then - local c='--version --json --pretty --bpffs --mapcompat' + local c='--version --json --pretty --bpffs --mapcompat --debug' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi @@ -635,14 +641,30 @@ _bpftool() map) _bpftool_get_map_ids ;; + dump) + _bpftool_get_btf_ids + ;; esac return 0 ;; + format) + COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) ) + ;; *) - if [[ $cword == 6 ]] && [[ ${words[3]} == "map" ]]; then - COMPREPLY+=( $( compgen -W 'key value kv all' -- \ - "$cur" ) ) - fi + # emit extra options + case ${words[3]} in + id|file) + _bpftool_once_attr 'format' + ;; + map|prog) + if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then + COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) ) + fi + _bpftool_once_attr 'format' + ;; + *) + ;; + esac return 0 ;; esac diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 7317438ecd9e..1b8ec91899e6 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -8,8 +8,8 @@ #include <stdio.h> #include <string.h> #include <unistd.h> -#include <gelf.h> #include <bpf.h> +#include <libbpf.h> #include <linux/btf.h> #include "btf.h" @@ -340,109 +340,40 @@ static int dump_btf_raw(const struct btf *btf, return 0; } -static bool check_btf_endianness(GElf_Ehdr *ehdr) +static void __printf(2, 0) btf_dump_printf(void *ctx, + const char *fmt, va_list args) { - static unsigned int const endian = 1; - - switch (ehdr->e_ident[EI_DATA]) { - case ELFDATA2LSB: - return *(unsigned char const *)&endian == 1; - case ELFDATA2MSB: - return *(unsigned char const *)&endian == 0; - default: - return 0; - } + vfprintf(stdout, fmt, args); } -static int btf_load_from_elf(const char *path, struct btf **btf) +static int dump_btf_c(const struct btf *btf, + __u32 *root_type_ids, int root_type_cnt) { - int err = -1, fd = -1, idx = 0; - Elf_Data *btf_data = NULL; - Elf_Scn *scn = NULL; - Elf *elf = NULL; - GElf_Ehdr ehdr; - - if (elf_version(EV_CURRENT) == EV_NONE) { - p_err("failed to init libelf for %s", path); - return -1; - } - - fd = open(path, O_RDONLY); - if (fd < 0) { - p_err("failed to open %s: %s", path, strerror(errno)); - return -1; - } - - elf = elf_begin(fd, ELF_C_READ, NULL); - if (!elf) { - p_err("failed to open %s as ELF file", path); - goto done; - } - if (!gelf_getehdr(elf, &ehdr)) { - p_err("failed to get EHDR from %s", path); - goto done; - } - if (!check_btf_endianness(&ehdr)) { - p_err("non-native ELF endianness is not supported"); - goto done; - } - if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { - p_err("failed to get e_shstrndx from %s\n", path); - goto done; - } + struct btf_dump *d; + int err = 0, i; - while ((scn = elf_nextscn(elf, scn)) != NULL) { - GElf_Shdr sh; - char *name; + d = btf_dump__new(btf, NULL, NULL, btf_dump_printf); + if (IS_ERR(d)) + return PTR_ERR(d); - idx++; - if (gelf_getshdr(scn, &sh) != &sh) { - p_err("failed to get section(%d) header from %s", - idx, path); - goto done; - } - name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); - if (!name) { - p_err("failed to get section(%d) name from %s", - idx, path); - goto done; - } - if (strcmp(name, BTF_ELF_SEC) == 0) { - btf_data = elf_getdata(scn, 0); - if (!btf_data) { - p_err("failed to get section(%d, %s) data from %s", - idx, name, path); + if (root_type_cnt) { + for (i = 0; i < root_type_cnt; i++) { + err = btf_dump__dump_type(d, root_type_ids[i]); + if (err) goto done; - } - break; } - } - - if (!btf_data) { - p_err("%s ELF section not found in %s", BTF_ELF_SEC, path); - goto done; - } + } else { + int cnt = btf__get_nr_types(btf); - *btf = btf__new(btf_data->d_buf, btf_data->d_size); - if (IS_ERR(*btf)) { - err = PTR_ERR(*btf); - *btf = NULL; - p_err("failed to load BTF data from %s: %s", - path, strerror(err)); - goto done; + for (i = 1; i <= cnt; i++) { + err = btf_dump__dump_type(d, i); + if (err) + goto done; + } } - err = 0; done: - if (err) { - if (*btf) { - btf__free(*btf); - *btf = NULL; - } - } - if (elf) - elf_end(elf); - close(fd); + btf_dump__free(d); return err; } @@ -451,6 +382,7 @@ static int do_dump(int argc, char **argv) struct btf *btf = NULL; __u32 root_type_ids[2]; int root_type_cnt = 0; + bool dump_c = false; __u32 btf_id = -1; const char *src; int fd = -1; @@ -522,9 +454,14 @@ static int do_dump(int argc, char **argv) } NEXT_ARG(); } else if (is_prefix(src, "file")) { - err = btf_load_from_elf(*argv, &btf); - if (err) + btf = btf__parse_elf(*argv, NULL); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); + btf = NULL; + p_err("failed to load BTF from %s: %s", + *argv, strerror(err)); goto done; + } NEXT_ARG(); } else { err = -1; @@ -532,6 +469,29 @@ static int do_dump(int argc, char **argv) goto done; } + while (argc) { + if (is_prefix(*argv, "format")) { + NEXT_ARG(); + if (argc < 1) { + p_err("expecting value for 'format' option\n"); + goto done; + } + if (strcmp(*argv, "c") == 0) { + dump_c = true; + } else if (strcmp(*argv, "raw") == 0) { + dump_c = false; + } else { + p_err("unrecognized format specifier: '%s', possible values: raw, c", + *argv); + goto done; + } + NEXT_ARG(); + } else { + p_err("unrecognized option: '%s'", *argv); + goto done; + } + } + if (!btf) { err = btf__get_from_id(btf_id, &btf); if (err) { @@ -545,7 +505,16 @@ static int do_dump(int argc, char **argv) } } - dump_btf_raw(btf, root_type_ids, root_type_cnt); + if (dump_c) { + if (json_output) { + p_err("JSON output for C-syntax dump is not supported"); + err = -ENOTSUP; + goto done; + } + err = dump_btf_c(btf, root_type_ids, root_type_cnt); + } else { + err = dump_btf_raw(btf, root_type_ids, root_type_cnt); + } done: close(fd); @@ -561,10 +530,11 @@ static int do_help(int argc, char **argv) } fprintf(stderr, - "Usage: %s btf dump BTF_SRC\n" + "Usage: %s btf dump BTF_SRC [format FORMAT]\n" " %s btf help\n" "\n" " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" + " FORMAT := { raw | c }\n" " " HELP_SPEC_MAP "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS "\n" diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 1ac1fc520e6a..4879f6395c7e 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -10,6 +10,7 @@ #include <string.h> #include <bpf.h> +#include <libbpf.h> #include "main.h" @@ -25,6 +26,7 @@ bool pretty_output; bool json_output; bool show_pinned; bool block_mount; +bool verifier_logs; int bpf_flags; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; @@ -77,6 +79,13 @@ static int do_version(int argc, char **argv) return 0; } +static int __printf(2, 0) +print_all_levels(__maybe_unused enum libbpf_print_level level, + const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} + int cmd_select(const struct cmd *cmds, int argc, char **argv, int (*help)(int argc, char **argv)) { @@ -317,6 +326,7 @@ int main(int argc, char **argv) { "bpffs", no_argument, NULL, 'f' }, { "mapcompat", no_argument, NULL, 'm' }, { "nomount", no_argument, NULL, 'n' }, + { "debug", no_argument, NULL, 'd' }, { 0 } }; int opt, ret; @@ -332,7 +342,7 @@ int main(int argc, char **argv) hash_init(map_table.table); opterr = 0; - while ((opt = getopt_long(argc, argv, "Vhpjfmn", + while ((opt = getopt_long(argc, argv, "Vhpjfmnd", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -362,6 +372,10 @@ int main(int argc, char **argv) case 'n': block_mount = true; break; + case 'd': + libbpf_set_print(print_all_levels); + verifier_logs = true; + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 3d63feb7f852..28a2a5857e14 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -91,6 +91,7 @@ extern json_writer_t *json_wtr; extern bool json_output; extern bool show_pinned; extern bool block_mount; +extern bool verifier_logs; extern int bpf_flags; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 26336bad0442..1f209c80d906 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -750,10 +750,11 @@ static int do_detach(int argc, char **argv) static int load_with_options(int argc, char **argv, bool first_prog_only) { - enum bpf_attach_type expected_attach_type; - struct bpf_object_open_attr attr = { - .prog_type = BPF_PROG_TYPE_UNSPEC, + struct bpf_object_load_attr load_attr = { 0 }; + struct bpf_object_open_attr open_attr = { + .prog_type = BPF_PROG_TYPE_UNSPEC, }; + enum bpf_attach_type expected_attach_type; struct map_replace *map_replace = NULL; struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; @@ -767,7 +768,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) if (!REQ_ARGS(2)) return -1; - attr.file = GET_ARG(); + open_attr.file = GET_ARG(); pinfile = GET_ARG(); while (argc) { @@ -776,7 +777,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) NEXT_ARG(); - if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) { + if (open_attr.prog_type != BPF_PROG_TYPE_UNSPEC) { p_err("program type already specified"); goto err_free_reuse_maps; } @@ -793,7 +794,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) strcat(type, *argv); strcat(type, "/"); - err = libbpf_prog_type_by_name(type, &attr.prog_type, + err = libbpf_prog_type_by_name(type, + &open_attr.prog_type, &expected_attach_type); free(type); if (err < 0) @@ -881,16 +883,16 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) set_max_rlimit(); - obj = __bpf_object__open_xattr(&attr, bpf_flags); + obj = __bpf_object__open_xattr(&open_attr, bpf_flags); if (IS_ERR_OR_NULL(obj)) { p_err("failed to open object file"); goto err_free_reuse_maps; } bpf_object__for_each_program(pos, obj) { - enum bpf_prog_type prog_type = attr.prog_type; + enum bpf_prog_type prog_type = open_attr.prog_type; - if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) { + if (open_attr.prog_type == BPF_PROG_TYPE_UNSPEC) { const char *sec_name = bpf_program__title(pos, false); err = libbpf_prog_type_by_name(sec_name, &prog_type, @@ -960,7 +962,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - err = bpf_object__load(obj); + load_attr.obj = obj; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + load_attr.log_level = 1 + 2 + 4; + + err = bpf_object__load_xattr(&load_attr); if (err) { p_err("failed to load object file"); goto err_close_obj; diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 0bb17bf88b18..494d7ae3614d 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -31,9 +31,7 @@ void kernel_syms_load(struct dump_data *dd) if (!fp) return; - while (!feof(fp)) { - if (!fgets(buff, sizeof(buff), fp)) - break; + while (fgets(buff, sizeof(buff), fp)) { tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1, sizeof(*dd->sym_mapping)); if (!tmp) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 63e0cf66f01a..7c6aef253173 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -260,6 +260,24 @@ enum bpf_attach_type { */ #define BPF_F_ANY_ALIGNMENT (1U << 1) +/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose. + * Verifier does sub-register def/use analysis and identifies instructions whose + * def only matters for low 32-bit, high 32-bit is never referenced later + * through implicit zero extension. Therefore verifier notifies JIT back-ends + * that it is safe to ignore clearing high 32-bit for these instructions. This + * saves some back-ends a lot of code-gen. However such optimization is not + * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends + * hence hasn't used verifier's analysis result. But, we really want to have a + * way to be able to verify the correctness of the described optimization on + * x86_64 on which testsuites are frequently exercised. + * + * So, this flag is introduced. Once it is set, verifier will randomize high + * 32-bit for those instructions who has been identified as safe to ignore them. + * Then, if verifier is not doing correct analysis, such randomization will + * regress tests to expose bugs. + */ +#define BPF_F_TEST_RND_HI32 (1U << 2) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * two extensions: * @@ -2672,6 +2690,20 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf-local-storage cannot be found. + * + * int bpf_send_signal(u32 sig) + * Description + * Send signal *sig* to the current task. + * Return + * 0 on success or successfully queued. + * + * **-EBUSY** if work queue under nmi is full. + * + * **-EINVAL** if *sig* is invalid. + * + * **-EPERM** if no permission to send the *sig*. + * + * **-EAGAIN** if bpf program can try again. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2782,7 +2814,8 @@ union bpf_attr { FN(strtol), \ FN(strtoul), \ FN(sk_storage_get), \ - FN(sk_storage_delete), + FN(sk_storage_delete), \ + FN(send_signal), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/include/uapi/linux/if_tun.h b/tools/include/uapi/linux/if_tun.h new file mode 100644 index 000000000000..454ae31b93c7 --- /dev/null +++ b/tools/include/uapi/linux/if_tun.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Universal TUN/TAP device driver. + * Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _UAPI__IF_TUN_H +#define _UAPI__IF_TUN_H + +#include <linux/types.h> +#include <linux/if_ether.h> +#include <linux/filter.h> + +/* Read queue size */ +#define TUN_READQ_SIZE 500 +/* TUN device type flags: deprecated. Use IFF_TUN/IFF_TAP instead. */ +#define TUN_TUN_DEV IFF_TUN +#define TUN_TAP_DEV IFF_TAP +#define TUN_TYPE_MASK 0x000f + +/* Ioctl defines */ +#define TUNSETNOCSUM _IOW('T', 200, int) +#define TUNSETDEBUG _IOW('T', 201, int) +#define TUNSETIFF _IOW('T', 202, int) +#define TUNSETPERSIST _IOW('T', 203, int) +#define TUNSETOWNER _IOW('T', 204, int) +#define TUNSETLINK _IOW('T', 205, int) +#define TUNSETGROUP _IOW('T', 206, int) +#define TUNGETFEATURES _IOR('T', 207, unsigned int) +#define TUNSETOFFLOAD _IOW('T', 208, unsigned int) +#define TUNSETTXFILTER _IOW('T', 209, unsigned int) +#define TUNGETIFF _IOR('T', 210, unsigned int) +#define TUNGETSNDBUF _IOR('T', 211, int) +#define TUNSETSNDBUF _IOW('T', 212, int) +#define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog) +#define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog) +#define TUNGETVNETHDRSZ _IOR('T', 215, int) +#define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) +#define TUNSETIFINDEX _IOW('T', 218, unsigned int) +#define TUNGETFILTER _IOR('T', 219, struct sock_fprog) +#define TUNSETVNETLE _IOW('T', 220, int) +#define TUNGETVNETLE _IOR('T', 221, int) +/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on + * little-endian hosts. Not all kernel configurations support them, but all + * configurations that support SET also support GET. + */ +#define TUNSETVNETBE _IOW('T', 222, int) +#define TUNGETVNETBE _IOR('T', 223, int) +#define TUNSETSTEERINGEBPF _IOR('T', 224, int) +#define TUNSETFILTEREBPF _IOR('T', 225, int) +#define TUNSETCARRIER _IOW('T', 226, int) +#define TUNGETDEVNETNS _IO('T', 227) + +/* TUNSETIFF ifr flags */ +#define IFF_TUN 0x0001 +#define IFF_TAP 0x0002 +#define IFF_NAPI 0x0010 +#define IFF_NAPI_FRAGS 0x0020 +#define IFF_NO_PI 0x1000 +/* This flag has no real effect */ +#define IFF_ONE_QUEUE 0x2000 +#define IFF_VNET_HDR 0x4000 +#define IFF_TUN_EXCL 0x8000 +#define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 +/* read-only flag */ +#define IFF_PERSIST 0x0800 +#define IFF_NOFILTER 0x1000 + +/* Socket options */ +#define TUN_TX_TIMESTAMP 1 + +/* Features for GSO (TUNSETOFFLOAD). */ +#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ +#define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */ +#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */ +#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ +#define TUN_F_UFO 0x10 /* I can handle UFO packets */ + +/* Protocol info prepended to the packets (when IFF_NO_PI is not set) */ +#define TUN_PKT_STRIP 0x0001 +struct tun_pi { + __u16 flags; + __be16 proto; +}; + +/* + * Filter spec (used for SETXXFILTER ioctls) + * This stuff is applicable only to the TAP (Ethernet) devices. + * If the count is zero the filter is disabled and the driver accepts + * all packets (promisc mode). + * If the filter is enabled in order to accept broadcast packets + * broadcast addr must be explicitly included in the addr list. + */ +#define TUN_FLT_ALLMULTI 0x0001 /* Accept all multicast packets */ +struct tun_filter { + __u16 flags; /* TUN_FLT_ flags see above */ + __u16 count; /* Number of addresses */ + __u8 addr[0][ETH_ALEN]; +}; + +#endif /* _UAPI__IF_TUN_H */ diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index ee9d5362f35b..e3962cfbc9a6 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1,3 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ + netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ + btf_dump.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index f91639bf5650..9312066a1ae3 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -3,7 +3,7 @@ BPF_VERSION = 0 BPF_PATCHLEVEL = 0 -BPF_EXTRAVERSION = 3 +BPF_EXTRAVERSION = 4 MAKEFLAGS += --no-print-directory @@ -204,6 +204,16 @@ check_abi: $(OUTPUT)libbpf.so "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ "Please make sure all LIBBPF_API symbols are" \ "versioned in $(VERSION_SCRIPT)." >&2; \ + readelf -s --wide $(OUTPUT)libbpf-in.o | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'| \ + sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ + readelf -s --wide $(OUTPUT)libbpf.so | \ + grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ + sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ + diff -u $(OUTPUT)libbpf_global_syms.tmp \ + $(OUTPUT)libbpf_versioned_syms.tmp; \ + rm $(OUTPUT)libbpf_global_syms.tmp \ + $(OUTPUT)libbpf_versioned_syms.tmp; \ exit 1; \ fi diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index c4a48086dc9a..0d4b4fe10a84 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -256,6 +256,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, if (load_attr->name) memcpy(attr.prog_name, load_attr->name, min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); + attr.prog_flags = load_attr->prog_flags; fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 9593fec75652..ff42ca043dc8 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -87,6 +87,7 @@ struct bpf_load_program_attr { const void *line_info; __u32 line_info_cnt; __u32 log_level; + __u32 prog_flags; }; /* Flags to direct loading requirements */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 03348c4d6bd4..b2478e98c367 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -4,14 +4,17 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <fcntl.h> #include <unistd.h> #include <errno.h> #include <linux/err.h> #include <linux/btf.h> +#include <gelf.h> #include "btf.h" #include "bpf.h" #include "libbpf.h" #include "libbpf_internal.h" +#include "hashmap.h" #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) @@ -417,6 +420,132 @@ done: return btf; } +static bool btf_check_endianness(const GElf_Ehdr *ehdr) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return ehdr->e_ident[EI_DATA] == ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return ehdr->e_ident[EI_DATA] == ELFDATA2MSB; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif +} + +struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) +{ + Elf_Data *btf_data = NULL, *btf_ext_data = NULL; + int err = 0, fd = -1, idx = 0; + struct btf *btf = NULL; + Elf_Scn *scn = NULL; + Elf *elf = NULL; + GElf_Ehdr ehdr; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warning("failed to init libelf for %s\n", path); + return ERR_PTR(-LIBBPF_ERRNO__LIBELF); + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + err = -errno; + pr_warning("failed to open %s: %s\n", path, strerror(errno)); + return ERR_PTR(err); + } + + err = -LIBBPF_ERRNO__FORMAT; + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (!elf) { + pr_warning("failed to open %s as ELF file\n", path); + goto done; + } + if (!gelf_getehdr(elf, &ehdr)) { + pr_warning("failed to get EHDR from %s\n", path); + goto done; + } + if (!btf_check_endianness(&ehdr)) { + pr_warning("non-native ELF endianness is not supported\n"); + goto done; + } + if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { + pr_warning("failed to get e_shstrndx from %s\n", path); + goto done; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + char *name; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + pr_warning("failed to get section(%d) header from %s\n", + idx, path); + goto done; + } + name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); + if (!name) { + pr_warning("failed to get section(%d) name from %s\n", + idx, path); + goto done; + } + if (strcmp(name, BTF_ELF_SEC) == 0) { + btf_data = elf_getdata(scn, 0); + if (!btf_data) { + pr_warning("failed to get section(%d, %s) data from %s\n", + idx, name, path); + goto done; + } + continue; + } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { + btf_ext_data = elf_getdata(scn, 0); + if (!btf_ext_data) { + pr_warning("failed to get section(%d, %s) data from %s\n", + idx, name, path); + goto done; + } + continue; + } + } + + err = 0; + + if (!btf_data) { + err = -ENOENT; + goto done; + } + btf = btf__new(btf_data->d_buf, btf_data->d_size); + if (IS_ERR(btf)) + goto done; + + if (btf_ext && btf_ext_data) { + *btf_ext = btf_ext__new(btf_ext_data->d_buf, + btf_ext_data->d_size); + if (IS_ERR(*btf_ext)) + goto done; + } else if (btf_ext) { + *btf_ext = NULL; + } +done: + if (elf) + elf_end(elf); + close(fd); + + if (err) + return ERR_PTR(err); + /* + * btf is always parsed before btf_ext, so no need to clean up + * btf_ext, if btf loading failed + */ + if (IS_ERR(btf)) + return btf; + if (btf_ext && IS_ERR(*btf_ext)) { + btf__free(btf); + err = PTR_ERR(*btf_ext); + return ERR_PTR(err); + } + return btf; +} + static int compare_vsi_off(const void *_a, const void *_b) { const struct btf_var_secinfo *a = _a; @@ -1165,16 +1294,9 @@ done: return err; } -#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14) -#define BTF_DEDUP_TABLE_MAX_SIZE_LOG 31 #define BTF_UNPROCESSED_ID ((__u32)-1) #define BTF_IN_PROGRESS_ID ((__u32)-2) -struct btf_dedup_node { - struct btf_dedup_node *next; - __u32 type_id; -}; - struct btf_dedup { /* .BTF section to be deduped in-place */ struct btf *btf; @@ -1190,7 +1312,7 @@ struct btf_dedup { * candidates, which is fine because we rely on subsequent * btf_xxx_equal() checks to authoritatively verify type equality. */ - struct btf_dedup_node **dedup_table; + struct hashmap *dedup_table; /* Canonical types map */ __u32 *map; /* Hypothetical mapping, used during type graph equivalence checks */ @@ -1215,30 +1337,18 @@ struct btf_str_ptrs { __u32 cap; }; -static inline __u32 hash_combine(__u32 h, __u32 value) +static long hash_combine(long h, long value) { -/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ -#define GOLDEN_RATIO_PRIME 0x9e370001UL - return h * 37 + value * GOLDEN_RATIO_PRIME; -#undef GOLDEN_RATIO_PRIME + return h * 31 + value; } -#define for_each_dedup_cand(d, hash, node) \ - for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \ - node; \ - node = node->next) +#define for_each_dedup_cand(d, node, hash) \ + hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash) -static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id) +static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id) { - struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node)); - int bucket = hash & (d->opts.dedup_table_size - 1); - - if (!node) - return -ENOMEM; - node->type_id = type_id; - node->next = d->dedup_table[bucket]; - d->dedup_table[bucket] = node; - return 0; + return hashmap__append(d->dedup_table, + (void *)hash, (void *)(long)type_id); } static int btf_dedup_hypot_map_add(struct btf_dedup *d, @@ -1267,36 +1377,10 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d) d->hypot_cnt = 0; } -static void btf_dedup_table_free(struct btf_dedup *d) -{ - struct btf_dedup_node *head, *tmp; - int i; - - if (!d->dedup_table) - return; - - for (i = 0; i < d->opts.dedup_table_size; i++) { - while (d->dedup_table[i]) { - tmp = d->dedup_table[i]; - d->dedup_table[i] = tmp->next; - free(tmp); - } - - head = d->dedup_table[i]; - while (head) { - tmp = head; - head = head->next; - free(tmp); - } - } - - free(d->dedup_table); - d->dedup_table = NULL; -} - static void btf_dedup_free(struct btf_dedup *d) { - btf_dedup_table_free(d); + hashmap__free(d->dedup_table); + d->dedup_table = NULL; free(d->map); d->map = NULL; @@ -1310,40 +1394,43 @@ static void btf_dedup_free(struct btf_dedup *d) free(d); } -/* Find closest power of two >= to size, capped at 2^max_size_log */ -static __u32 roundup_pow2_max(__u32 size, int max_size_log) +static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx) { - int i; + return (size_t)key; +} - for (i = 0; i < max_size_log && (1U << i) < size; i++) - ; - return 1U << i; +static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx) +{ + return 0; } +static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx) +{ + return k1 == k2; +} static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, const struct btf_dedup_opts *opts) { struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); + hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; int i, err = 0; - __u32 sz; if (!d) return ERR_PTR(-ENOMEM); d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; - sz = opts && opts->dedup_table_size ? opts->dedup_table_size - : BTF_DEDUP_TABLE_DEFAULT_SIZE; - sz = roundup_pow2_max(sz, BTF_DEDUP_TABLE_MAX_SIZE_LOG); - d->opts.dedup_table_size = sz; + /* dedup_table_size is now used only to force collisions in tests */ + if (opts && opts->dedup_table_size == 1) + hash_fn = btf_dedup_collision_hash_fn; d->btf = btf; d->btf_ext = btf_ext; - d->dedup_table = calloc(d->opts.dedup_table_size, - sizeof(struct btf_dedup_node *)); - if (!d->dedup_table) { - err = -ENOMEM; + d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(d->dedup_table)) { + err = PTR_ERR(d->dedup_table); + d->dedup_table = NULL; goto done; } @@ -1662,9 +1749,9 @@ done: return err; } -static __u32 btf_hash_common(struct btf_type *t) +static long btf_hash_common(struct btf_type *t) { - __u32 h; + long h; h = hash_combine(0, t->name_off); h = hash_combine(h, t->info); @@ -1680,10 +1767,10 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) } /* Calculate type signature hash of INT. */ -static __u32 btf_hash_int(struct btf_type *t) +static long btf_hash_int(struct btf_type *t) { __u32 info = *(__u32 *)(t + 1); - __u32 h; + long h; h = btf_hash_common(t); h = hash_combine(h, info); @@ -1703,9 +1790,9 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) } /* Calculate type signature hash of ENUM. */ -static __u32 btf_hash_enum(struct btf_type *t) +static long btf_hash_enum(struct btf_type *t) { - __u32 h; + long h; /* don't hash vlen and enum members to support enum fwd resolving */ h = hash_combine(0, t->name_off); @@ -1757,11 +1844,11 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) * as referenced type IDs equivalence is established separately during type * graph equivalence check algorithm. */ -static __u32 btf_hash_struct(struct btf_type *t) +static long btf_hash_struct(struct btf_type *t) { struct btf_member *member = (struct btf_member *)(t + 1); __u32 vlen = BTF_INFO_VLEN(t->info); - __u32 h = btf_hash_common(t); + long h = btf_hash_common(t); int i; for (i = 0; i < vlen; i++) { @@ -1804,10 +1891,10 @@ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) * under assumption that they were already resolved to canonical type IDs and * are not going to change. */ -static __u32 btf_hash_array(struct btf_type *t) +static long btf_hash_array(struct btf_type *t) { struct btf_array *info = (struct btf_array *)(t + 1); - __u32 h = btf_hash_common(t); + long h = btf_hash_common(t); h = hash_combine(h, info->type); h = hash_combine(h, info->index_type); @@ -1858,11 +1945,11 @@ static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) * under assumption that they were already resolved to canonical type IDs and * are not going to change. */ -static inline __u32 btf_hash_fnproto(struct btf_type *t) +static long btf_hash_fnproto(struct btf_type *t) { struct btf_param *member = (struct btf_param *)(t + 1); __u16 vlen = BTF_INFO_VLEN(t->info); - __u32 h = btf_hash_common(t); + long h = btf_hash_common(t); int i; for (i = 0; i < vlen; i++) { @@ -1880,7 +1967,7 @@ static inline __u32 btf_hash_fnproto(struct btf_type *t) * This function is called during reference types deduplication to compare * FUNC_PROTO to potential canonical representative. */ -static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) +static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) { struct btf_param *m1, *m2; __u16 vlen; @@ -1906,7 +1993,7 @@ static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) * IDs. This check is performed during type graph equivalence check and * referenced types equivalence is checked separately. */ -static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) +static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) { struct btf_param *m1, *m2; __u16 vlen; @@ -1937,11 +2024,12 @@ static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) { struct btf_type *t = d->btf->types[type_id]; + struct hashmap_entry *hash_entry; struct btf_type *cand; - struct btf_dedup_node *cand_node; /* if we don't find equivalent type, then we are canonical */ __u32 new_id = type_id; - __u32 h; + __u32 cand_id; + long h; switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_CONST: @@ -1960,10 +2048,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_INT: h = btf_hash_int(t); - for_each_dedup_cand(d, h, cand_node) { - cand = d->btf->types[cand_node->type_id]; + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; if (btf_equal_int(t, cand)) { - new_id = cand_node->type_id; + new_id = cand_id; break; } } @@ -1971,10 +2060,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_ENUM: h = btf_hash_enum(t); - for_each_dedup_cand(d, h, cand_node) { - cand = d->btf->types[cand_node->type_id]; + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; if (btf_equal_enum(t, cand)) { - new_id = cand_node->type_id; + new_id = cand_id; break; } if (d->opts.dont_resolve_fwds) @@ -1982,21 +2072,22 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) if (btf_compat_enum(t, cand)) { if (btf_is_enum_fwd(t)) { /* resolve fwd to full enum */ - new_id = cand_node->type_id; + new_id = cand_id; break; } /* resolve canonical enum fwd to full enum */ - d->map[cand_node->type_id] = type_id; + d->map[cand_id] = type_id; } } break; case BTF_KIND_FWD: h = btf_hash_common(t); - for_each_dedup_cand(d, h, cand_node) { - cand = d->btf->types[cand_node->type_id]; + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; if (btf_equal_common(t, cand)) { - new_id = cand_node->type_id; + new_id = cand_id; break; } } @@ -2397,12 +2488,12 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) */ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) { - struct btf_dedup_node *cand_node; struct btf_type *cand_type, *t; + struct hashmap_entry *hash_entry; /* if we don't find equivalent type, then we are canonical */ __u32 new_id = type_id; __u16 kind; - __u32 h; + long h; /* already deduped or is in process of deduping (loop detected) */ if (d->map[type_id] <= BTF_MAX_NR_TYPES) @@ -2415,7 +2506,8 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) return 0; h = btf_hash_struct(t); - for_each_dedup_cand(d, h, cand_node) { + for_each_dedup_cand(d, hash_entry, h) { + __u32 cand_id = (__u32)(long)hash_entry->value; int eq; /* @@ -2428,17 +2520,17 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because * FWD and compatible STRUCT/UNION are considered equivalent. */ - cand_type = d->btf->types[cand_node->type_id]; + cand_type = d->btf->types[cand_id]; if (!btf_shallow_equal_struct(t, cand_type)) continue; btf_dedup_clear_hypot_map(d); - eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id); + eq = btf_dedup_is_equiv(d, type_id, cand_id); if (eq < 0) return eq; if (!eq) continue; - new_id = cand_node->type_id; + new_id = cand_id; btf_dedup_merge_hypot_map(d); break; } @@ -2488,12 +2580,12 @@ static int btf_dedup_struct_types(struct btf_dedup *d) */ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) { - struct btf_dedup_node *cand_node; + struct hashmap_entry *hash_entry; + __u32 new_id = type_id, cand_id; struct btf_type *t, *cand; /* if we don't find equivalent type, then we are representative type */ - __u32 new_id = type_id; int ref_type_id; - __u32 h; + long h; if (d->map[type_id] == BTF_IN_PROGRESS_ID) return -ELOOP; @@ -2516,10 +2608,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) t->type = ref_type_id; h = btf_hash_common(t); - for_each_dedup_cand(d, h, cand_node) { - cand = d->btf->types[cand_node->type_id]; + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; if (btf_equal_common(t, cand)) { - new_id = cand_node->type_id; + new_id = cand_id; break; } } @@ -2539,10 +2632,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) info->index_type = ref_type_id; h = btf_hash_array(t); - for_each_dedup_cand(d, h, cand_node) { - cand = d->btf->types[cand_node->type_id]; + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; if (btf_equal_array(t, cand)) { - new_id = cand_node->type_id; + new_id = cand_id; break; } } @@ -2570,10 +2664,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) } h = btf_hash_fnproto(t); - for_each_dedup_cand(d, h, cand_node) { - cand = d->btf->types[cand_node->type_id]; + for_each_dedup_cand(d, hash_entry, h) { + cand_id = (__u32)(long)hash_entry->value; + cand = d->btf->types[cand_id]; if (btf_equal_fnproto(t, cand)) { - new_id = cand_node->type_id; + new_id = cand_id; break; } } @@ -2600,7 +2695,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d) if (err < 0) return err; } - btf_dedup_table_free(d); + /* we won't need d->dedup_table anymore */ + hashmap__free(d->dedup_table); + d->dedup_table = NULL; return 0; } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index c7b399e81fce..ba4ffa831aa4 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -4,6 +4,7 @@ #ifndef __LIBBPF_BTF_H #define __LIBBPF_BTF_H +#include <stdarg.h> #include <linux/types.h> #ifdef __cplusplus @@ -59,6 +60,8 @@ struct btf_ext_header { LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); +LIBBPF_API struct btf *btf__parse_elf(const char *path, + struct btf_ext **btf_ext); LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, @@ -100,6 +103,22 @@ struct btf_dedup_opts { LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, const struct btf_dedup_opts *opts); +struct btf_dump; + +struct btf_dump_opts { + void *ctx; +}; + +typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); + +LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn); +LIBBPF_API void btf_dump__free(struct btf_dump *d); + +LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c new file mode 100644 index 000000000000..4b22db77e2cc --- /dev/null +++ b/tools/lib/bpf/btf_dump.c @@ -0,0 +1,1336 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * BTF-to-C type converter. + * + * Copyright (c) 2019 Facebook + */ + +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <linux/err.h> +#include <linux/btf.h> +#include "btf.h" +#include "hashmap.h" +#include "libbpf.h" +#include "libbpf_internal.h" + +#define min(x, y) ((x) < (y) ? (x) : (y)) +#define max(x, y) ((x) < (y) ? (y) : (x)) + +static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; +static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; + +static const char *pfx(int lvl) +{ + return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl]; +} + +enum btf_dump_type_order_state { + NOT_ORDERED, + ORDERING, + ORDERED, +}; + +enum btf_dump_type_emit_state { + NOT_EMITTED, + EMITTING, + EMITTED, +}; + +/* per-type auxiliary state */ +struct btf_dump_type_aux_state { + /* topological sorting state */ + enum btf_dump_type_order_state order_state: 2; + /* emitting state used to determine the need for forward declaration */ + enum btf_dump_type_emit_state emit_state: 2; + /* whether forward declaration was already emitted */ + __u8 fwd_emitted: 1; + /* whether unique non-duplicate name was already assigned */ + __u8 name_resolved: 1; +}; + +struct btf_dump { + const struct btf *btf; + const struct btf_ext *btf_ext; + btf_dump_printf_fn_t printf_fn; + struct btf_dump_opts opts; + + /* per-type auxiliary state */ + struct btf_dump_type_aux_state *type_states; + /* per-type optional cached unique name, must be freed, if present */ + const char **cached_names; + + /* topo-sorted list of dependent type definitions */ + __u32 *emit_queue; + int emit_queue_cap; + int emit_queue_cnt; + + /* + * stack of type declarations (e.g., chain of modifiers, arrays, + * funcs, etc) + */ + __u32 *decl_stack; + int decl_stack_cap; + int decl_stack_cnt; + + /* maps struct/union/enum name to a number of name occurrences */ + struct hashmap *type_names; + /* + * maps typedef identifiers and enum value names to a number of such + * name occurrences + */ + struct hashmap *ident_names; +}; + +static size_t str_hash_fn(const void *key, void *ctx) +{ + const char *s = key; + size_t h = 0; + + while (*s) { + h = h * 31 + *s; + s++; + } + return h; +} + +static bool str_equal_fn(const void *a, const void *b, void *ctx) +{ + return strcmp(a, b) == 0; +} + +static __u16 btf_kind_of(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info); +} + +static __u16 btf_vlen_of(const struct btf_type *t) +{ + return BTF_INFO_VLEN(t->info); +} + +static bool btf_kflag_of(const struct btf_type *t) +{ + return BTF_INFO_KFLAG(t->info); +} + +static const char *btf_name_of(const struct btf_dump *d, __u32 name_off) +{ + return btf__name_by_offset(d->btf, name_off); +} + +static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + d->printf_fn(d->opts.ctx, fmt, args); + va_end(args); +} + +struct btf_dump *btf_dump__new(const struct btf *btf, + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn) +{ + struct btf_dump *d; + int err; + + d = calloc(1, sizeof(struct btf_dump)); + if (!d) + return ERR_PTR(-ENOMEM); + + d->btf = btf; + d->btf_ext = btf_ext; + d->printf_fn = printf_fn; + d->opts.ctx = opts ? opts->ctx : NULL; + + d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); + if (IS_ERR(d->type_names)) { + err = PTR_ERR(d->type_names); + d->type_names = NULL; + btf_dump__free(d); + return ERR_PTR(err); + } + d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); + if (IS_ERR(d->ident_names)) { + err = PTR_ERR(d->ident_names); + d->ident_names = NULL; + btf_dump__free(d); + return ERR_PTR(err); + } + + return d; +} + +void btf_dump__free(struct btf_dump *d) +{ + int i, cnt; + + if (!d) + return; + + free(d->type_states); + if (d->cached_names) { + /* any set cached name is owned by us and should be freed */ + for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) { + if (d->cached_names[i]) + free((void *)d->cached_names[i]); + } + } + free(d->cached_names); + free(d->emit_queue); + free(d->decl_stack); + hashmap__free(d->type_names); + hashmap__free(d->ident_names); + + free(d); +} + +static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); +static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); + +/* + * Dump BTF type in a compilable C syntax, including all the necessary + * dependent types, necessary for compilation. If some of the dependent types + * were already emitted as part of previous btf_dump__dump_type() invocation + * for another type, they won't be emitted again. This API allows callers to + * filter out BTF types according to user-defined criterias and emitted only + * minimal subset of types, necessary to compile everything. Full struct/union + * definitions will still be emitted, even if the only usage is through + * pointer and could be satisfied with just a forward declaration. + * + * Dumping is done in two high-level passes: + * 1. Topologically sort type definitions to satisfy C rules of compilation. + * 2. Emit type definitions in C syntax. + * + * Returns 0 on success; <0, otherwise. + */ +int btf_dump__dump_type(struct btf_dump *d, __u32 id) +{ + int err, i; + + if (id > btf__get_nr_types(d->btf)) + return -EINVAL; + + /* type states are lazily allocated, as they might not be needed */ + if (!d->type_states) { + d->type_states = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->type_states[0])); + if (!d->type_states) + return -ENOMEM; + d->cached_names = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->cached_names[0])); + if (!d->cached_names) + return -ENOMEM; + + /* VOID is special */ + d->type_states[0].order_state = ORDERED; + d->type_states[0].emit_state = EMITTED; + } + + d->emit_queue_cnt = 0; + err = btf_dump_order_type(d, id, false); + if (err < 0) + return err; + + for (i = 0; i < d->emit_queue_cnt; i++) + btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/); + + return 0; +} + +static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id) +{ + __u32 *new_queue; + size_t new_cap; + + if (d->emit_queue_cnt >= d->emit_queue_cap) { + new_cap = max(16, d->emit_queue_cap * 3 / 2); + new_queue = realloc(d->emit_queue, + new_cap * sizeof(new_queue[0])); + if (!new_queue) + return -ENOMEM; + d->emit_queue = new_queue; + d->emit_queue_cap = new_cap; + } + + d->emit_queue[d->emit_queue_cnt++] = id; + return 0; +} + +/* + * Determine order of emitting dependent types and specified type to satisfy + * C compilation rules. This is done through topological sorting with an + * additional complication which comes from C rules. The main idea for C is + * that if some type is "embedded" into a struct/union, it's size needs to be + * known at the time of definition of containing type. E.g., for: + * + * struct A {}; + * struct B { struct A x; } + * + * struct A *HAS* to be defined before struct B, because it's "embedded", + * i.e., it is part of struct B layout. But in the following case: + * + * struct A; + * struct B { struct A *x; } + * struct A {}; + * + * it's enough to just have a forward declaration of struct A at the time of + * struct B definition, as struct B has a pointer to struct A, so the size of + * field x is known without knowing struct A size: it's sizeof(void *). + * + * Unfortunately, there are some trickier cases we need to handle, e.g.: + * + * struct A {}; // if this was forward-declaration: compilation error + * struct B { + * struct { // anonymous struct + * struct A y; + * } *x; + * }; + * + * In this case, struct B's field x is a pointer, so it's size is known + * regardless of the size of (anonymous) struct it points to. But because this + * struct is anonymous and thus defined inline inside struct B, *and* it + * embeds struct A, compiler requires full definition of struct A to be known + * before struct B can be defined. This creates a transitive dependency + * between struct A and struct B. If struct A was forward-declared before + * struct B definition and fully defined after struct B definition, that would + * trigger compilation error. + * + * All this means that while we are doing topological sorting on BTF type + * graph, we need to determine relationships between different types (graph + * nodes): + * - weak link (relationship) between X and Y, if Y *CAN* be + * forward-declared at the point of X definition; + * - strong link, if Y *HAS* to be fully-defined before X can be defined. + * + * The rule is as follows. Given a chain of BTF types from X to Y, if there is + * BTF_KIND_PTR type in the chain and at least one non-anonymous type + * Z (excluding X, including Y), then link is weak. Otherwise, it's strong. + * Weak/strong relationship is determined recursively during DFS traversal and + * is returned as a result from btf_dump_order_type(). + * + * btf_dump_order_type() is trying to avoid unnecessary forward declarations, + * but it is not guaranteeing that no extraneous forward declarations will be + * emitted. + * + * To avoid extra work, algorithm marks some of BTF types as ORDERED, when + * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT, + * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the + * entire graph path, so depending where from one came to that BTF type, it + * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM, + * once they are processed, there is no need to do it again, so they are + * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces + * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But + * in any case, once those are processed, no need to do it again, as the + * result won't change. + * + * Returns: + * - 1, if type is part of strong link (so there is strong topological + * ordering requirements); + * - 0, if type is part of weak link (so can be satisfied through forward + * declaration); + * - <0, on error (e.g., unsatisfiable type loop detected). + */ +static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) +{ + /* + * Order state is used to detect strong link cycles, but only for BTF + * kinds that are or could be an independent definition (i.e., + * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays, + * func_protos, modifiers are just means to get to these definitions. + * Int/void don't need definitions, they are assumed to be always + * properly defined. We also ignore datasec, var, and funcs for now. + * So for all non-defining kinds, we never even set ordering state, + * for defining kinds we set ORDERING and subsequently ORDERED if it + * forms a strong link. + */ + struct btf_dump_type_aux_state *tstate = &d->type_states[id]; + const struct btf_type *t; + __u16 kind, vlen; + int err, i; + + /* return true, letting typedefs know that it's ok to be emitted */ + if (tstate->order_state == ORDERED) + return 1; + + t = btf__type_by_id(d->btf, id); + kind = btf_kind_of(t); + + if (tstate->order_state == ORDERING) { + /* type loop, but resolvable through fwd declaration */ + if ((kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION) && + through_ptr && t->name_off != 0) + return 0; + pr_warning("unsatisfiable type cycle, id:[%u]\n", id); + return -ELOOP; + } + + switch (kind) { + case BTF_KIND_INT: + tstate->order_state = ORDERED; + return 0; + + case BTF_KIND_PTR: + err = btf_dump_order_type(d, t->type, true); + tstate->order_state = ORDERED; + return err; + + case BTF_KIND_ARRAY: { + const struct btf_array *a = (void *)(t + 1); + + return btf_dump_order_type(d, a->type, through_ptr); + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = (void *)(t + 1); + /* + * struct/union is part of strong link, only if it's embedded + * (so no ptr in a path) or it's anonymous (so has to be + * defined inline, even if declared through ptr) + */ + if (through_ptr && t->name_off != 0) + return 0; + + tstate->order_state = ORDERING; + + vlen = btf_vlen_of(t); + for (i = 0; i < vlen; i++, m++) { + err = btf_dump_order_type(d, m->type, false); + if (err < 0) + return err; + } + + if (t->name_off != 0) { + err = btf_dump_add_emit_queue_id(d, id); + if (err < 0) + return err; + } + + tstate->order_state = ORDERED; + return 1; + } + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + if (t->name_off != 0) { + err = btf_dump_add_emit_queue_id(d, id); + if (err) + return err; + } + tstate->order_state = ORDERED; + return 1; + + case BTF_KIND_TYPEDEF: { + int is_strong; + + is_strong = btf_dump_order_type(d, t->type, through_ptr); + if (is_strong < 0) + return is_strong; + + /* typedef is similar to struct/union w.r.t. fwd-decls */ + if (through_ptr && !is_strong) + return 0; + + /* typedef is always a named definition */ + err = btf_dump_add_emit_queue_id(d, id); + if (err) + return err; + + d->type_states[id].order_state = ORDERED; + return 1; + } + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return btf_dump_order_type(d, t->type, through_ptr); + + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = (void *)(t + 1); + bool is_strong; + + err = btf_dump_order_type(d, t->type, through_ptr); + if (err < 0) + return err; + is_strong = err > 0; + + vlen = btf_vlen_of(t); + for (i = 0; i < vlen; i++, p++) { + err = btf_dump_order_type(d, p->type, through_ptr); + if (err < 0) + return err; + if (err > 0) + is_strong = true; + } + return is_strong; + } + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DATASEC: + d->type_states[id].order_state = ORDERED; + return 0; + + default: + return -EINVAL; + } +} + +static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, + const struct btf_type *t); +static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, int lvl); + +static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, + const struct btf_type *t); +static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, int lvl); + +static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, + const struct btf_type *t); + +static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, int lvl); + +/* a local view into a shared stack */ +struct id_stack { + const __u32 *ids; + int cnt; +}; + +static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, + const char *fname, int lvl); +static void btf_dump_emit_type_chain(struct btf_dump *d, + struct id_stack *decl_stack, + const char *fname, int lvl); + +static const char *btf_dump_type_name(struct btf_dump *d, __u32 id); +static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id); +static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, + const char *orig_name); + +static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id) +{ + const struct btf_type *t = btf__type_by_id(d->btf, id); + + /* __builtin_va_list is a compiler built-in, which causes compilation + * errors, when compiling w/ different compiler, then used to compile + * original code (e.g., GCC to compile kernel, Clang to use generated + * C header from BTF). As it is built-in, it should be already defined + * properly internally in compiler. + */ + if (t->name_off == 0) + return false; + return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0; +} + +/* + * Emit C-syntax definitions of types from chains of BTF types. + * + * High-level handling of determining necessary forward declarations are handled + * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type + * declarations/definitions in C syntax are handled by a combo of + * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to + * corresponding btf_dump_emit_*_{def,fwd}() functions. + * + * We also keep track of "containing struct/union type ID" to determine when + * we reference it from inside and thus can avoid emitting unnecessary forward + * declaration. + * + * This algorithm is designed in such a way, that even if some error occurs + * (either technical, e.g., out of memory, or logical, i.e., malformed BTF + * that doesn't comply to C rules completely), algorithm will try to proceed + * and produce as much meaningful output as possible. + */ +static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) +{ + struct btf_dump_type_aux_state *tstate = &d->type_states[id]; + bool top_level_def = cont_id == 0; + const struct btf_type *t; + __u16 kind; + + if (tstate->emit_state == EMITTED) + return; + + t = btf__type_by_id(d->btf, id); + kind = btf_kind_of(t); + + if (top_level_def && t->name_off == 0) { + pr_warning("unexpected nameless definition, id:[%u]\n", id); + return; + } + + if (tstate->emit_state == EMITTING) { + if (tstate->fwd_emitted) + return; + + switch (kind) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* + * if we are referencing a struct/union that we are + * part of - then no need for fwd declaration + */ + if (id == cont_id) + return; + if (t->name_off == 0) { + pr_warning("anonymous struct/union loop, id:[%u]\n", + id); + return; + } + btf_dump_emit_struct_fwd(d, id, t); + btf_dump_printf(d, ";\n\n"); + tstate->fwd_emitted = 1; + break; + case BTF_KIND_TYPEDEF: + /* + * for typedef fwd_emitted means typedef definition + * was emitted, but it can be used only for "weak" + * references through pointer only, not for embedding + */ + if (!btf_dump_is_blacklisted(d, id)) { + btf_dump_emit_typedef_def(d, id, t, 0); + btf_dump_printf(d, ";\n\n"); + }; + tstate->fwd_emitted = 1; + break; + default: + break; + } + + return; + } + + switch (kind) { + case BTF_KIND_INT: + tstate->emit_state = EMITTED; + break; + case BTF_KIND_ENUM: + if (top_level_def) { + btf_dump_emit_enum_def(d, id, t, 0); + btf_dump_printf(d, ";\n\n"); + } + tstate->emit_state = EMITTED; + break; + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + btf_dump_emit_type(d, t->type, cont_id); + break; + case BTF_KIND_ARRAY: { + const struct btf_array *a = (void *)(t + 1); + + btf_dump_emit_type(d, a->type, cont_id); + break; + } + case BTF_KIND_FWD: + btf_dump_emit_fwd_def(d, id, t); + btf_dump_printf(d, ";\n\n"); + tstate->emit_state = EMITTED; + break; + case BTF_KIND_TYPEDEF: + tstate->emit_state = EMITTING; + btf_dump_emit_type(d, t->type, id); + /* + * typedef can server as both definition and forward + * declaration; at this stage someone depends on + * typedef as a forward declaration (refers to it + * through pointer), so unless we already did it, + * emit typedef as a forward declaration + */ + if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) { + btf_dump_emit_typedef_def(d, id, t, 0); + btf_dump_printf(d, ";\n\n"); + } + tstate->emit_state = EMITTED; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + tstate->emit_state = EMITTING; + /* if it's a top-level struct/union definition or struct/union + * is anonymous, then in C we'll be emitting all fields and + * their types (as opposed to just `struct X`), so we need to + * make sure that all types, referenced from struct/union + * members have necessary forward-declarations, where + * applicable + */ + if (top_level_def || t->name_off == 0) { + const struct btf_member *m = (void *)(t + 1); + __u16 vlen = btf_vlen_of(t); + int i, new_cont_id; + + new_cont_id = t->name_off == 0 ? cont_id : id; + for (i = 0; i < vlen; i++, m++) + btf_dump_emit_type(d, m->type, new_cont_id); + } else if (!tstate->fwd_emitted && id != cont_id) { + btf_dump_emit_struct_fwd(d, id, t); + btf_dump_printf(d, ";\n\n"); + tstate->fwd_emitted = 1; + } + + if (top_level_def) { + btf_dump_emit_struct_def(d, id, t, 0); + btf_dump_printf(d, ";\n\n"); + tstate->emit_state = EMITTED; + } else { + tstate->emit_state = NOT_EMITTED; + } + break; + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = (void *)(t + 1); + __u16 vlen = btf_vlen_of(t); + int i; + + btf_dump_emit_type(d, t->type, cont_id); + for (i = 0; i < vlen; i++, p++) + btf_dump_emit_type(d, p->type, cont_id); + + break; + } + default: + break; + } +} + +static int btf_align_of(const struct btf *btf, __u32 id) +{ + const struct btf_type *t = btf__type_by_id(btf, id); + __u16 kind = btf_kind_of(t); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + return min(sizeof(void *), t->size); + case BTF_KIND_PTR: + return sizeof(void *); + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return btf_align_of(btf, t->type); + case BTF_KIND_ARRAY: { + const struct btf_array *a = (void *)(t + 1); + + return btf_align_of(btf, a->type); + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = (void *)(t + 1); + __u16 vlen = btf_vlen_of(t); + int i, align = 1; + + for (i = 0; i < vlen; i++, m++) + align = max(align, btf_align_of(btf, m->type)); + + return align; + } + default: + pr_warning("unsupported BTF_KIND:%u\n", btf_kind_of(t)); + return 1; + } +} + +static bool btf_is_struct_packed(const struct btf *btf, __u32 id, + const struct btf_type *t) +{ + const struct btf_member *m; + int align, i, bit_sz; + __u16 vlen; + bool kflag; + + align = btf_align_of(btf, id); + /* size of a non-packed struct has to be a multiple of its alignment*/ + if (t->size % align) + return true; + + m = (void *)(t + 1); + kflag = btf_kflag_of(t); + vlen = btf_vlen_of(t); + /* all non-bitfield fields have to be naturally aligned */ + for (i = 0; i < vlen; i++, m++) { + align = btf_align_of(btf, m->type); + bit_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0; + if (bit_sz == 0 && m->offset % (8 * align) != 0) + return true; + } + + /* + * if original struct was marked as packed, but its layout is + * naturally aligned, we'll detect that it's not packed + */ + return false; +} + +static int chip_away_bits(int total, int at_most) +{ + return total % at_most ? : at_most; +} + +static void btf_dump_emit_bit_padding(const struct btf_dump *d, + int cur_off, int m_off, int m_bit_sz, + int align, int lvl) +{ + int off_diff = m_off - cur_off; + int ptr_bits = sizeof(void *) * 8; + + if (off_diff <= 0) + /* no gap */ + return; + if (m_bit_sz == 0 && off_diff < align * 8) + /* natural padding will take care of a gap */ + return; + + while (off_diff > 0) { + const char *pad_type; + int pad_bits; + + if (ptr_bits > 32 && off_diff > 32) { + pad_type = "long"; + pad_bits = chip_away_bits(off_diff, ptr_bits); + } else if (off_diff > 16) { + pad_type = "int"; + pad_bits = chip_away_bits(off_diff, 32); + } else if (off_diff > 8) { + pad_type = "short"; + pad_bits = chip_away_bits(off_diff, 16); + } else { + pad_type = "char"; + pad_bits = chip_away_bits(off_diff, 8); + } + btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits); + off_diff -= pad_bits; + } +} + +static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, + const struct btf_type *t) +{ + btf_dump_printf(d, "%s %s", + btf_kind_of(t) == BTF_KIND_STRUCT ? "struct" : "union", + btf_dump_type_name(d, id)); +} + +static void btf_dump_emit_struct_def(struct btf_dump *d, + __u32 id, + const struct btf_type *t, + int lvl) +{ + const struct btf_member *m = (void *)(t + 1); + bool kflag = btf_kflag_of(t), is_struct; + int align, i, packed, off = 0; + __u16 vlen = btf_vlen_of(t); + + is_struct = btf_kind_of(t) == BTF_KIND_STRUCT; + packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; + align = packed ? 1 : btf_align_of(d->btf, id); + + btf_dump_printf(d, "%s%s%s {", + is_struct ? "struct" : "union", + t->name_off ? " " : "", + btf_dump_type_name(d, id)); + + for (i = 0; i < vlen; i++, m++) { + const char *fname; + int m_off, m_sz; + + fname = btf_name_of(d, m->name_off); + m_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0; + m_off = kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset; + align = packed ? 1 : btf_align_of(d->btf, m->type); + + btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); + btf_dump_printf(d, "\n%s", pfx(lvl + 1)); + btf_dump_emit_type_decl(d, m->type, fname, lvl + 1); + + if (m_sz) { + btf_dump_printf(d, ": %d", m_sz); + off = m_off + m_sz; + } else { + m_sz = max(0, btf__resolve_size(d->btf, m->type)); + off = m_off + m_sz * 8; + } + btf_dump_printf(d, ";"); + } + + if (vlen) + btf_dump_printf(d, "\n"); + btf_dump_printf(d, "%s}", pfx(lvl)); + if (packed) + btf_dump_printf(d, " __attribute__((packed))"); +} + +static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id, + const struct btf_type *t) +{ + btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id)); +} + +static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, + int lvl) +{ + const struct btf_enum *v = (void *)(t+1); + __u16 vlen = btf_vlen_of(t); + const char *name; + size_t dup_cnt; + int i; + + btf_dump_printf(d, "enum%s%s", + t->name_off ? " " : "", + btf_dump_type_name(d, id)); + + if (vlen) { + btf_dump_printf(d, " {"); + for (i = 0; i < vlen; i++, v++) { + name = btf_name_of(d, v->name_off); + /* enumerators share namespace with typedef idents */ + dup_cnt = btf_dump_name_dups(d, d->ident_names, name); + if (dup_cnt > 1) { + btf_dump_printf(d, "\n%s%s___%zu = %d,", + pfx(lvl + 1), name, dup_cnt, + (__s32)v->val); + } else { + btf_dump_printf(d, "\n%s%s = %d,", + pfx(lvl + 1), name, + (__s32)v->val); + } + } + btf_dump_printf(d, "\n%s}", pfx(lvl)); + } +} + +static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id, + const struct btf_type *t) +{ + const char *name = btf_dump_type_name(d, id); + + if (btf_kflag_of(t)) + btf_dump_printf(d, "union %s", name); + else + btf_dump_printf(d, "struct %s", name); +} + +static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, + const struct btf_type *t, int lvl) +{ + const char *name = btf_dump_ident_name(d, id); + + btf_dump_printf(d, "typedef "); + btf_dump_emit_type_decl(d, t->type, name, lvl); +} + +static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id) +{ + __u32 *new_stack; + size_t new_cap; + + if (d->decl_stack_cnt >= d->decl_stack_cap) { + new_cap = max(16, d->decl_stack_cap * 3 / 2); + new_stack = realloc(d->decl_stack, + new_cap * sizeof(new_stack[0])); + if (!new_stack) + return -ENOMEM; + d->decl_stack = new_stack; + d->decl_stack_cap = new_cap; + } + + d->decl_stack[d->decl_stack_cnt++] = id; + + return 0; +} + +/* + * Emit type declaration (e.g., field type declaration in a struct or argument + * declaration in function prototype) in correct C syntax. + * + * For most types it's trivial, but there are few quirky type declaration + * cases worth mentioning: + * - function prototypes (especially nesting of function prototypes); + * - arrays; + * - const/volatile/restrict for pointers vs other types. + * + * For a good discussion of *PARSING* C syntax (as a human), see + * Peter van der Linden's "Expert C Programming: Deep C Secrets", + * Ch.3 "Unscrambling Declarations in C". + * + * It won't help with BTF to C conversion much, though, as it's an opposite + * problem. So we came up with this algorithm in reverse to van der Linden's + * parsing algorithm. It goes from structured BTF representation of type + * declaration to a valid compilable C syntax. + * + * For instance, consider this C typedef: + * typedef const int * const * arr[10] arr_t; + * It will be represented in BTF with this chain of BTF types: + * [typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int] + * + * Notice how [const] modifier always goes before type it modifies in BTF type + * graph, but in C syntax, const/volatile/restrict modifiers are written to + * the right of pointers, but to the left of other types. There are also other + * quirks, like function pointers, arrays of them, functions returning other + * functions, etc. + * + * We handle that by pushing all the types to a stack, until we hit "terminal" + * type (int/enum/struct/union/fwd). Then depending on the kind of a type on + * top of a stack, modifiers are handled differently. Array/function pointers + * have also wildly different syntax and how nesting of them are done. See + * code for authoritative definition. + * + * To avoid allocating new stack for each independent chain of BTF types, we + * share one bigger stack, with each chain working only on its own local view + * of a stack frame. Some care is required to "pop" stack frames after + * processing type declaration chain. + */ +static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, + const char *fname, int lvl) +{ + struct id_stack decl_stack; + const struct btf_type *t; + int err, stack_start; + __u16 kind; + + stack_start = d->decl_stack_cnt; + for (;;) { + err = btf_dump_push_decl_stack_id(d, id); + if (err < 0) { + /* + * if we don't have enough memory for entire type decl + * chain, restore stack, emit warning, and try to + * proceed nevertheless + */ + pr_warning("not enough memory for decl stack:%d", err); + d->decl_stack_cnt = stack_start; + return; + } + + /* VOID */ + if (id == 0) + break; + + t = btf__type_by_id(d->btf, id); + kind = btf_kind_of(t); + switch (kind) { + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_FUNC_PROTO: + id = t->type; + break; + case BTF_KIND_ARRAY: { + const struct btf_array *a = (void *)(t + 1); + + id = a->type; + break; + } + case BTF_KIND_INT: + case BTF_KIND_ENUM: + case BTF_KIND_FWD: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_TYPEDEF: + goto done; + default: + pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", + kind, id); + goto done; + } + } +done: + /* + * We might be inside a chain of declarations (e.g., array of function + * pointers returning anonymous (so inlined) structs, having another + * array field). Each of those needs its own "stack frame" to handle + * emitting of declarations. Those stack frames are non-overlapping + * portions of shared btf_dump->decl_stack. To make it a bit nicer to + * handle this set of nested stacks, we create a view corresponding to + * our own "stack frame" and work with it as an independent stack. + * We'll need to clean up after emit_type_chain() returns, though. + */ + decl_stack.ids = d->decl_stack + stack_start; + decl_stack.cnt = d->decl_stack_cnt - stack_start; + btf_dump_emit_type_chain(d, &decl_stack, fname, lvl); + /* + * emit_type_chain() guarantees that it will pop its entire decl_stack + * frame before returning. But it works with a read-only view into + * decl_stack, so it doesn't actually pop anything from the + * perspective of shared btf_dump->decl_stack, per se. We need to + * reset decl_stack state to how it was before us to avoid it growing + * all the time. + */ + d->decl_stack_cnt = stack_start; +} + +static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack) +{ + const struct btf_type *t; + __u32 id; + + while (decl_stack->cnt) { + id = decl_stack->ids[decl_stack->cnt - 1]; + t = btf__type_by_id(d->btf, id); + + switch (btf_kind_of(t)) { + case BTF_KIND_VOLATILE: + btf_dump_printf(d, "volatile "); + break; + case BTF_KIND_CONST: + btf_dump_printf(d, "const "); + break; + case BTF_KIND_RESTRICT: + btf_dump_printf(d, "restrict "); + break; + default: + return; + } + decl_stack->cnt--; + } +} + +static bool btf_is_mod_kind(const struct btf *btf, __u32 id) +{ + const struct btf_type *t = btf__type_by_id(btf, id); + + switch (btf_kind_of(t)) { + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return true; + default: + return false; + } +} + +static void btf_dump_emit_name(const struct btf_dump *d, + const char *name, bool last_was_ptr) +{ + bool separate = name[0] && !last_was_ptr; + + btf_dump_printf(d, "%s%s", separate ? " " : "", name); +} + +static void btf_dump_emit_type_chain(struct btf_dump *d, + struct id_stack *decls, + const char *fname, int lvl) +{ + /* + * last_was_ptr is used to determine if we need to separate pointer + * asterisk (*) from previous part of type signature with space, so + * that we get `int ***`, instead of `int * * *`. We default to true + * for cases where we have single pointer in a chain. E.g., in ptr -> + * func_proto case. func_proto will start a new emit_type_chain call + * with just ptr, which should be emitted as (*) or (*<fname>), so we + * don't want to prepend space for that last pointer. + */ + bool last_was_ptr = true; + const struct btf_type *t; + const char *name; + __u16 kind; + __u32 id; + + while (decls->cnt) { + id = decls->ids[--decls->cnt]; + if (id == 0) { + /* VOID is a special snowflake */ + btf_dump_emit_mods(d, decls); + btf_dump_printf(d, "void"); + last_was_ptr = false; + continue; + } + + t = btf__type_by_id(d->btf, id); + kind = btf_kind_of(t); + + switch (kind) { + case BTF_KIND_INT: + btf_dump_emit_mods(d, decls); + name = btf_name_of(d, t->name_off); + btf_dump_printf(d, "%s", name); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + btf_dump_emit_mods(d, decls); + /* inline anonymous struct/union */ + if (t->name_off == 0) + btf_dump_emit_struct_def(d, id, t, lvl); + else + btf_dump_emit_struct_fwd(d, id, t); + break; + case BTF_KIND_ENUM: + btf_dump_emit_mods(d, decls); + /* inline anonymous enum */ + if (t->name_off == 0) + btf_dump_emit_enum_def(d, id, t, lvl); + else + btf_dump_emit_enum_fwd(d, id, t); + break; + case BTF_KIND_FWD: + btf_dump_emit_mods(d, decls); + btf_dump_emit_fwd_def(d, id, t); + break; + case BTF_KIND_TYPEDEF: + btf_dump_emit_mods(d, decls); + btf_dump_printf(d, "%s", btf_dump_ident_name(d, id)); + break; + case BTF_KIND_PTR: + btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *"); + break; + case BTF_KIND_VOLATILE: + btf_dump_printf(d, " volatile"); + break; + case BTF_KIND_CONST: + btf_dump_printf(d, " const"); + break; + case BTF_KIND_RESTRICT: + btf_dump_printf(d, " restrict"); + break; + case BTF_KIND_ARRAY: { + const struct btf_array *a = (void *)(t + 1); + const struct btf_type *next_t; + __u32 next_id; + bool multidim; + /* + * GCC has a bug + * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354) + * which causes it to emit extra const/volatile + * modifiers for an array, if array's element type has + * const/volatile modifiers. Clang doesn't do that. + * In general, it doesn't seem very meaningful to have + * a const/volatile modifier for array, so we are + * going to silently skip them here. + */ + while (decls->cnt) { + next_id = decls->ids[decls->cnt - 1]; + if (btf_is_mod_kind(d->btf, next_id)) + decls->cnt--; + else + break; + } + + if (decls->cnt == 0) { + btf_dump_emit_name(d, fname, last_was_ptr); + btf_dump_printf(d, "[%u]", a->nelems); + return; + } + + next_t = btf__type_by_id(d->btf, next_id); + multidim = btf_kind_of(next_t) == BTF_KIND_ARRAY; + /* we need space if we have named non-pointer */ + if (fname[0] && !last_was_ptr) + btf_dump_printf(d, " "); + /* no parentheses for multi-dimensional array */ + if (!multidim) + btf_dump_printf(d, "("); + btf_dump_emit_type_chain(d, decls, fname, lvl); + if (!multidim) + btf_dump_printf(d, ")"); + btf_dump_printf(d, "[%u]", a->nelems); + return; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p = (void *)(t + 1); + __u16 vlen = btf_vlen_of(t); + int i; + + btf_dump_emit_mods(d, decls); + if (decls->cnt) { + btf_dump_printf(d, " ("); + btf_dump_emit_type_chain(d, decls, fname, lvl); + btf_dump_printf(d, ")"); + } else { + btf_dump_emit_name(d, fname, last_was_ptr); + } + btf_dump_printf(d, "("); + /* + * Clang for BPF target generates func_proto with no + * args as a func_proto with a single void arg (e.g., + * `int (*f)(void)` vs just `int (*f)()`). We are + * going to pretend there are no args for such case. + */ + if (vlen == 1 && p->type == 0) { + btf_dump_printf(d, ")"); + return; + } + + for (i = 0; i < vlen; i++, p++) { + if (i > 0) + btf_dump_printf(d, ", "); + + /* last arg of type void is vararg */ + if (i == vlen - 1 && p->type == 0) { + btf_dump_printf(d, "..."); + break; + } + + name = btf_name_of(d, p->name_off); + btf_dump_emit_type_decl(d, p->type, name, lvl); + } + + btf_dump_printf(d, ")"); + return; + } + default: + pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", + kind, id); + return; + } + + last_was_ptr = kind == BTF_KIND_PTR; + } + + btf_dump_emit_name(d, fname, last_was_ptr); +} + +/* return number of duplicates (occurrences) of a given name */ +static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, + const char *orig_name) +{ + size_t dup_cnt = 0; + + hashmap__find(name_map, orig_name, (void **)&dup_cnt); + dup_cnt++; + hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); + + return dup_cnt; +} + +static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, + struct hashmap *name_map) +{ + struct btf_dump_type_aux_state *s = &d->type_states[id]; + const struct btf_type *t = btf__type_by_id(d->btf, id); + const char *orig_name = btf_name_of(d, t->name_off); + const char **cached_name = &d->cached_names[id]; + size_t dup_cnt; + + if (t->name_off == 0) + return ""; + + if (s->name_resolved) + return *cached_name ? *cached_name : orig_name; + + dup_cnt = btf_dump_name_dups(d, name_map, orig_name); + if (dup_cnt > 1) { + const size_t max_len = 256; + char new_name[max_len]; + + snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt); + *cached_name = strdup(new_name); + } + + s->name_resolved = 1; + return *cached_name ? *cached_name : orig_name; +} + +static const char *btf_dump_type_name(struct btf_dump *d, __u32 id) +{ + return btf_dump_resolve_name(d, id, d->type_names); +} + +static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id) +{ + return btf_dump_resolve_name(d, id, d->ident_names); +} diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c new file mode 100644 index 000000000000..6122272943e6 --- /dev/null +++ b/tools/lib/bpf/hashmap.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * Generic non-thread safe hash map implementation. + * + * Copyright (c) 2019 Facebook + */ +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <linux/err.h> +#include "hashmap.h" + +/* start with 4 buckets */ +#define HASHMAP_MIN_CAP_BITS 2 + +static void hashmap_add_entry(struct hashmap_entry **pprev, + struct hashmap_entry *entry) +{ + entry->next = *pprev; + *pprev = entry; +} + +static void hashmap_del_entry(struct hashmap_entry **pprev, + struct hashmap_entry *entry) +{ + *pprev = entry->next; + entry->next = NULL; +} + +void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, void *ctx) +{ + map->hash_fn = hash_fn; + map->equal_fn = equal_fn; + map->ctx = ctx; + + map->buckets = NULL; + map->cap = 0; + map->cap_bits = 0; + map->sz = 0; +} + +struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, + void *ctx) +{ + struct hashmap *map = malloc(sizeof(struct hashmap)); + + if (!map) + return ERR_PTR(-ENOMEM); + hashmap__init(map, hash_fn, equal_fn, ctx); + return map; +} + +void hashmap__clear(struct hashmap *map) +{ + free(map->buckets); + map->cap = map->cap_bits = map->sz = 0; +} + +void hashmap__free(struct hashmap *map) +{ + if (!map) + return; + + hashmap__clear(map); + free(map); +} + +size_t hashmap__size(const struct hashmap *map) +{ + return map->sz; +} + +size_t hashmap__capacity(const struct hashmap *map) +{ + return map->cap; +} + +static bool hashmap_needs_to_grow(struct hashmap *map) +{ + /* grow if empty or more than 75% filled */ + return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap); +} + +static int hashmap_grow(struct hashmap *map) +{ + struct hashmap_entry **new_buckets; + struct hashmap_entry *cur, *tmp; + size_t new_cap_bits, new_cap; + size_t h; + int bkt; + + new_cap_bits = map->cap_bits + 1; + if (new_cap_bits < HASHMAP_MIN_CAP_BITS) + new_cap_bits = HASHMAP_MIN_CAP_BITS; + + new_cap = 1UL << new_cap_bits; + new_buckets = calloc(new_cap, sizeof(new_buckets[0])); + if (!new_buckets) + return -ENOMEM; + + hashmap__for_each_entry_safe(map, cur, tmp, bkt) { + h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits); + hashmap_add_entry(&new_buckets[h], cur); + } + + map->cap = new_cap; + map->cap_bits = new_cap_bits; + free(map->buckets); + map->buckets = new_buckets; + + return 0; +} + +static bool hashmap_find_entry(const struct hashmap *map, + const void *key, size_t hash, + struct hashmap_entry ***pprev, + struct hashmap_entry **entry) +{ + struct hashmap_entry *cur, **prev_ptr; + + if (!map->buckets) + return false; + + for (prev_ptr = &map->buckets[hash], cur = *prev_ptr; + cur; + prev_ptr = &cur->next, cur = cur->next) { + if (map->equal_fn(cur->key, key, map->ctx)) { + if (pprev) + *pprev = prev_ptr; + *entry = cur; + return true; + } + } + + return false; +} + +int hashmap__insert(struct hashmap *map, const void *key, void *value, + enum hashmap_insert_strategy strategy, + const void **old_key, void **old_value) +{ + struct hashmap_entry *entry; + size_t h; + int err; + + if (old_key) + *old_key = NULL; + if (old_value) + *old_value = NULL; + + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + if (strategy != HASHMAP_APPEND && + hashmap_find_entry(map, key, h, NULL, &entry)) { + if (old_key) + *old_key = entry->key; + if (old_value) + *old_value = entry->value; + + if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) { + entry->key = key; + entry->value = value; + return 0; + } else if (strategy == HASHMAP_ADD) { + return -EEXIST; + } + } + + if (strategy == HASHMAP_UPDATE) + return -ENOENT; + + if (hashmap_needs_to_grow(map)) { + err = hashmap_grow(map); + if (err) + return err; + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + } + + entry = malloc(sizeof(struct hashmap_entry)); + if (!entry) + return -ENOMEM; + + entry->key = key; + entry->value = value; + hashmap_add_entry(&map->buckets[h], entry); + map->sz++; + + return 0; +} + +bool hashmap__find(const struct hashmap *map, const void *key, void **value) +{ + struct hashmap_entry *entry; + size_t h; + + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + if (!hashmap_find_entry(map, key, h, NULL, &entry)) + return false; + + if (value) + *value = entry->value; + return true; +} + +bool hashmap__delete(struct hashmap *map, const void *key, + const void **old_key, void **old_value) +{ + struct hashmap_entry **pprev, *entry; + size_t h; + + h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits); + if (!hashmap_find_entry(map, key, h, &pprev, &entry)) + return false; + + if (old_key) + *old_key = entry->key; + if (old_value) + *old_value = entry->value; + + hashmap_del_entry(pprev, entry); + free(entry); + map->sz--; + + return true; +} + diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h new file mode 100644 index 000000000000..03748a742146 --- /dev/null +++ b/tools/lib/bpf/hashmap.h @@ -0,0 +1,173 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Generic non-thread safe hash map implementation. + * + * Copyright (c) 2019 Facebook + */ +#ifndef __LIBBPF_HASHMAP_H +#define __LIBBPF_HASHMAP_H + +#include <stdbool.h> +#include <stddef.h> +#include "libbpf_internal.h" + +static inline size_t hash_bits(size_t h, int bits) +{ + /* shuffle bits and return requested number of upper bits */ + return (h * 11400714819323198485llu) >> (__WORDSIZE - bits); +} + +typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); +typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); + +struct hashmap_entry { + const void *key; + void *value; + struct hashmap_entry *next; +}; + +struct hashmap { + hashmap_hash_fn hash_fn; + hashmap_equal_fn equal_fn; + void *ctx; + + struct hashmap_entry **buckets; + size_t cap; + size_t cap_bits; + size_t sz; +}; + +#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ + .hash_fn = (hash_fn), \ + .equal_fn = (equal_fn), \ + .ctx = (ctx), \ + .buckets = NULL, \ + .cap = 0, \ + .cap_bits = 0, \ + .sz = 0, \ +} + +void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, void *ctx); +struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, + hashmap_equal_fn equal_fn, + void *ctx); +void hashmap__clear(struct hashmap *map); +void hashmap__free(struct hashmap *map); + +size_t hashmap__size(const struct hashmap *map); +size_t hashmap__capacity(const struct hashmap *map); + +/* + * Hashmap insertion strategy: + * - HASHMAP_ADD - only add key/value if key doesn't exist yet; + * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise, + * update value; + * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do + * nothing and return -ENOENT; + * - HASHMAP_APPEND - always add key/value pair, even if key already exists. + * This turns hashmap into a multimap by allowing multiple values to be + * associated with the same key. Most useful read API for such hashmap is + * hashmap__for_each_key_entry() iteration. If hashmap__find() is still + * used, it will return last inserted key/value entry (first in a bucket + * chain). + */ +enum hashmap_insert_strategy { + HASHMAP_ADD, + HASHMAP_SET, + HASHMAP_UPDATE, + HASHMAP_APPEND, +}; + +/* + * hashmap__insert() adds key/value entry w/ various semantics, depending on + * provided strategy value. If a given key/value pair replaced already + * existing key/value pair, both old key and old value will be returned + * through old_key and old_value to allow calling code do proper memory + * management. + */ +int hashmap__insert(struct hashmap *map, const void *key, void *value, + enum hashmap_insert_strategy strategy, + const void **old_key, void **old_value); + +static inline int hashmap__add(struct hashmap *map, + const void *key, void *value) +{ + return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL); +} + +static inline int hashmap__set(struct hashmap *map, + const void *key, void *value, + const void **old_key, void **old_value) +{ + return hashmap__insert(map, key, value, HASHMAP_SET, + old_key, old_value); +} + +static inline int hashmap__update(struct hashmap *map, + const void *key, void *value, + const void **old_key, void **old_value) +{ + return hashmap__insert(map, key, value, HASHMAP_UPDATE, + old_key, old_value); +} + +static inline int hashmap__append(struct hashmap *map, + const void *key, void *value) +{ + return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL); +} + +bool hashmap__delete(struct hashmap *map, const void *key, + const void **old_key, void **old_value); + +bool hashmap__find(const struct hashmap *map, const void *key, void **value); + +/* + * hashmap__for_each_entry - iterate over all entries in hashmap + * @map: hashmap to iterate + * @cur: struct hashmap_entry * used as a loop cursor + * @bkt: integer used as a bucket loop cursor + */ +#define hashmap__for_each_entry(map, cur, bkt) \ + for (bkt = 0; bkt < map->cap; bkt++) \ + for (cur = map->buckets[bkt]; cur; cur = cur->next) + +/* + * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe + * against removals + * @map: hashmap to iterate + * @cur: struct hashmap_entry * used as a loop cursor + * @tmp: struct hashmap_entry * used as a temporary next cursor storage + * @bkt: integer used as a bucket loop cursor + */ +#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ + for (bkt = 0; bkt < map->cap; bkt++) \ + for (cur = map->buckets[bkt]; \ + cur && ({tmp = cur->next; true; }); \ + cur = tmp) + +/* + * hashmap__for_each_key_entry - iterate over entries associated with given key + * @map: hashmap to iterate + * @cur: struct hashmap_entry * used as a loop cursor + * @key: key to iterate entries for + */ +#define hashmap__for_each_key_entry(map, cur, _key) \ + for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ + map->cap_bits); \ + map->buckets ? map->buckets[bkt] : NULL; }); \ + cur; \ + cur = cur->next) \ + if (map->equal_fn(cur->key, (_key), map->ctx)) + +#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ + for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ + map->cap_bits); \ + cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + cur && ({ tmp = cur->next; true; }); \ + cur = tmp) \ + if (map->equal_fn(cur->key, (_key), map->ctx)) + +#endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 197b574406b3..ba89d9727137 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -188,6 +188,7 @@ struct bpf_program { void *line_info; __u32 line_info_rec_size; __u32 line_info_cnt; + __u32 prog_flags; }; enum libbpf_map_type { @@ -348,8 +349,11 @@ static int bpf_program__init(void *data, size_t size, char *section_name, int idx, struct bpf_program *prog) { - if (size < sizeof(struct bpf_insn)) { - pr_warning("corrupted section '%s'\n", section_name); + const size_t bpf_insn_sz = sizeof(struct bpf_insn); + + if (size == 0 || size % bpf_insn_sz) { + pr_warning("corrupted section '%s', size: %zu\n", + section_name, size); return -EINVAL; } @@ -375,9 +379,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, section_name); goto errout; } - prog->insns_cnt = size / sizeof(struct bpf_insn); - memcpy(prog->insns, data, - prog->insns_cnt * sizeof(struct bpf_insn)); + prog->insns_cnt = size / bpf_insn_sz; + memcpy(prog->insns, data, size); prog->idx = idx; prog->instances.fds = NULL; prog->instances.nr = -1; @@ -494,15 +497,14 @@ static struct bpf_object *bpf_object__new(const char *path, strcpy(obj->path, path); /* Using basename() GNU version which doesn't modify arg. */ - strncpy(obj->name, basename((void *)path), - sizeof(obj->name) - 1); + strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1); end = strchr(obj->name, '.'); if (end) *end = 0; obj->efile.fd = -1; /* - * Caller of this function should also calls + * Caller of this function should also call * bpf_object__elf_finish() after data collection to return * obj_buf to user. If not, we should duplicate the buffer to * avoid user freeing them before elf finish. @@ -562,38 +564,35 @@ static int bpf_object__elf_init(struct bpf_object *obj) } else { obj->efile.fd = open(obj->path, O_RDONLY); if (obj->efile.fd < 0) { - char errmsg[STRERR_BUFSIZE]; - char *cp = libbpf_strerror_r(errno, errmsg, - sizeof(errmsg)); + char errmsg[STRERR_BUFSIZE], *cp; + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warning("failed to open %s: %s\n", obj->path, cp); - return -errno; + return err; } obj->efile.elf = elf_begin(obj->efile.fd, - LIBBPF_ELF_C_READ_MMAP, - NULL); + LIBBPF_ELF_C_READ_MMAP, NULL); } if (!obj->efile.elf) { - pr_warning("failed to open %s as ELF file\n", - obj->path); + pr_warning("failed to open %s as ELF file\n", obj->path); err = -LIBBPF_ERRNO__LIBELF; goto errout; } if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { - pr_warning("failed to get EHDR from %s\n", - obj->path); + pr_warning("failed to get EHDR from %s\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; } ep = &obj->efile.ehdr; /* Old LLVM set e_machine to EM_NONE */ - if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) { - pr_warning("%s is not an eBPF object file\n", - obj->path); + if (ep->e_type != ET_REL || + (ep->e_machine && ep->e_machine != EM_BPF)) { + pr_warning("%s is not an eBPF object file\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; } @@ -604,47 +603,31 @@ errout: return err; } -static int -bpf_object__check_endianness(struct bpf_object *obj) -{ - static unsigned int const endian = 1; - - switch (obj->efile.ehdr.e_ident[EI_DATA]) { - case ELFDATA2LSB: - /* We are big endian, BPF obj is little endian. */ - if (*(unsigned char const *)&endian != 1) - goto mismatch; - break; - - case ELFDATA2MSB: - /* We are little endian, BPF obj is big endian. */ - if (*(unsigned char const *)&endian != 0) - goto mismatch; - break; - default: - return -LIBBPF_ERRNO__ENDIAN; - } - - return 0; - -mismatch: - pr_warning("Error: endianness mismatch.\n"); +static int bpf_object__check_endianness(struct bpf_object *obj) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB) + return 0; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB) + return 0; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + pr_warning("endianness mismatch.\n"); return -LIBBPF_ERRNO__ENDIAN; } static int -bpf_object__init_license(struct bpf_object *obj, - void *data, size_t size) +bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) { - memcpy(obj->license, data, - min(size, sizeof(obj->license) - 1)); + memcpy(obj->license, data, min(size, sizeof(obj->license) - 1)); pr_debug("license of %s is %s\n", obj->path, obj->license); return 0; } static int -bpf_object__init_kversion(struct bpf_object *obj, - void *data, size_t size) +bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) { __u32 kver; @@ -654,8 +637,7 @@ bpf_object__init_kversion(struct bpf_object *obj, } memcpy(&kver, data, sizeof(kver)); obj->kern_version = kver; - pr_debug("kernel version of %s is %x\n", obj->path, - obj->kern_version); + pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); return 0; } @@ -811,8 +793,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map, def->key_size = sizeof(int); def->value_size = data->d_size; def->max_entries = 1; - def->map_flags = type == LIBBPF_MAP_RODATA ? - BPF_F_RDONLY_PROG : 0; + def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; if (data_buff) { *data_buff = malloc(data->d_size); if (!*data_buff) { @@ -827,8 +808,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map, return 0; } -static int -bpf_object__init_maps(struct bpf_object *obj, int flags) +static int bpf_object__init_maps(struct bpf_object *obj, int flags) { int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0; bool strict = !(flags & MAPS_RELAX_COMPAT); @@ -930,6 +910,11 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name); + if (!map_name) { + pr_warning("failed to get map #%d name sym string for obj %s\n", + map_idx, obj->path); + return -LIBBPF_ERRNO__FORMAT; + } obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC; obj->maps[map_idx].offset = sym.st_value; @@ -1104,8 +1089,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) /* Elf is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { - pr_warning("failed to get e_shstrndx from %s\n", - obj->path); + pr_warning("failed to get e_shstrndx from %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } @@ -1226,7 +1210,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) { pr_warning("Corrupted ELF file: index of strtab invalid\n"); - return LIBBPF_ERRNO__FORMAT; + return -LIBBPF_ERRNO__FORMAT; } if (btf_data) { obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); @@ -1346,8 +1330,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, size_t nr_maps = obj->nr_maps; int i, nrels; - pr_debug("collecting relocating info for: '%s'\n", - prog->section_name); + pr_debug("collecting relocating info for: '%s'\n", prog->section_name); nrels = shdr->sh_size / shdr->sh_entsize; prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); @@ -1372,9 +1355,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, - GELF_R_SYM(rel.r_info), - &sym)) { + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { pr_warning("relocation: symbol %"PRIx64" not found\n", GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; @@ -1435,8 +1416,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, if (maps[map_idx].libbpf_type != type) continue; if (type != LIBBPF_MAP_UNSPEC || - (type == LIBBPF_MAP_UNSPEC && - maps[map_idx].offset == sym.st_value)) { + maps[map_idx].offset == sym.st_value) { pr_debug("relocation: find map %zd (%s) for insn %u\n", map_idx, maps[map_idx].name, insn_idx); break; @@ -1444,7 +1424,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, } if (map_idx >= nr_maps) { - pr_warning("bpf relocation: map_idx %d large than %d\n", + pr_warning("bpf relocation: map_idx %d larger than %d\n", (int)map_idx, (int)nr_maps - 1); return -LIBBPF_ERRNO__RELOC; } @@ -1756,7 +1736,7 @@ bpf_object__create_maps(struct bpf_object *obj) create_attr.key_size = def->key_size; create_attr.value_size = def->value_size; create_attr.max_entries = def->max_entries; - create_attr.btf_fd = 0; + create_attr.btf_fd = -1; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; if (bpf_map_type__is_map_in_map(def->type) && @@ -1770,11 +1750,11 @@ bpf_object__create_maps(struct bpf_object *obj) } *pfd = bpf_create_map_xattr(&create_attr); - if (*pfd < 0 && create_attr.btf_key_type_id) { + if (*pfd < 0 && create_attr.btf_fd >= 0) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", map->name, cp, errno); - create_attr.btf_fd = 0; + create_attr.btf_fd = -1; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; @@ -1803,7 +1783,7 @@ err_out: } } - pr_debug("create map %s: fd=%d\n", map->name, *pfd); + pr_debug("created map %s: fd=%d\n", map->name, *pfd); } return 0; @@ -1824,18 +1804,14 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err, if (btf_prog_info) { /* * Some info has already been found but has problem - * in the last btf_ext reloc. Must have to error - * out. + * in the last btf_ext reloc. Must have to error out. */ pr_warning("Error in relocating %s for sec %s.\n", info_name, prog->section_name); return err; } - /* - * Have problem loading the very first info. Ignore - * the rest. - */ + /* Have problem loading the very first info. Ignore the rest. */ pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", info_name, prog->section_name, info_name); return 0; @@ -2039,9 +2015,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) return -LIBBPF_ERRNO__RELOC; } - err = bpf_program__collect_reloc(prog, - shdr, data, - obj); + err = bpf_program__collect_reloc(prog, shdr, data, obj); if (err) return err; } @@ -2058,6 +2032,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, char *log_buf; int ret; + if (!insns || !insns_cnt) + return -EINVAL; + memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); load_attr.prog_type = prog->type; load_attr.expected_attach_type = prog->expected_attach_type; @@ -2068,7 +2045,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; + load_attr.prog_btf_fd = prog->btf_fd; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; @@ -2076,8 +2053,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.line_info_rec_size = prog->line_info_rec_size; load_attr.line_info_cnt = prog->line_info_cnt; load_attr.log_level = prog->log_level; - if (!load_attr.insns || !load_attr.insns_cnt) - return -EINVAL; + load_attr.prog_flags = prog->prog_flags; retry_load: log_buf = malloc(log_buf_size); @@ -2222,7 +2198,7 @@ static bool bpf_program__is_function_storage(struct bpf_program *prog, } static int -bpf_object__load_progs(struct bpf_object *obj) +bpf_object__load_progs(struct bpf_object *obj, int log_level) { size_t i; int err; @@ -2230,6 +2206,7 @@ bpf_object__load_progs(struct bpf_object *obj) for (i = 0; i < obj->nr_programs; i++) { if (bpf_program__is_function_storage(&obj->programs[i], obj)) continue; + obj->programs[i].log_level |= log_level; err = bpf_program__load(&obj->programs[i], obj->license, obj->kern_version); @@ -2356,11 +2333,9 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf, snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", (unsigned long)obj_buf, (unsigned long)obj_buf_sz); - tmp_name[sizeof(tmp_name) - 1] = '\0'; name = tmp_name; } - pr_debug("loading object '%s' from buffer\n", - name); + pr_debug("loading object '%s' from buffer\n", name); return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true); } @@ -2381,10 +2356,14 @@ int bpf_object__unload(struct bpf_object *obj) return 0; } -int bpf_object__load(struct bpf_object *obj) +int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { + struct bpf_object *obj; int err; + if (!attr) + return -EINVAL; + obj = attr->obj; if (!obj) return -EINVAL; @@ -2397,7 +2376,7 @@ int bpf_object__load(struct bpf_object *obj) CHECK_ERR(bpf_object__create_maps(obj), err, out); CHECK_ERR(bpf_object__relocate(obj), err, out); - CHECK_ERR(bpf_object__load_progs(obj), err, out); + CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out); return 0; out: @@ -2406,6 +2385,15 @@ out: return err; } +int bpf_object__load(struct bpf_object *obj) +{ + struct bpf_object_load_attr attr = { + .obj = obj, + }; + + return bpf_object__load_xattr(&attr); +} + static int check_path(const char *path) { char *cp, errmsg[STRERR_BUFSIZE]; @@ -3458,9 +3446,7 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) long libbpf_get_error(const void *ptr) { - if (IS_ERR(ptr)) - return PTR_ERR(ptr); - return 0; + return PTR_ERR_OR_ZERO(ptr); } int bpf_prog_load(const char *file, enum bpf_prog_type type, @@ -3521,6 +3507,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, expected_attach_type); prog->log_level = attr->log_level; + prog->prog_flags = attr->prog_flags; if (!first_prog) first_prog = prog; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c5ff00515ce7..1af0d48178c8 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -89,8 +89,14 @@ LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); LIBBPF_API void bpf_object__close(struct bpf_object *object); +struct bpf_object_load_attr { + struct bpf_object *obj; + int log_level; +}; + /* Load/unload object into/from kernel */ LIBBPF_API int bpf_object__load(struct bpf_object *obj); +LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); @@ -320,6 +326,7 @@ struct bpf_prog_load_attr { enum bpf_attach_type expected_attach_type; int ifindex; int log_level; + int prog_flags; }; LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 673001787cba..46dcda89df21 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -164,3 +164,12 @@ LIBBPF_0.0.3 { bpf_map_freeze; btf__finalize_data; } LIBBPF_0.0.2; + +LIBBPF_0.0.4 { + global: + btf_dump__dump_type; + btf_dump__free; + btf_dump__new; + btf__parse_elf; + bpf_object__load_xattr; +} LIBBPF_0.0.3; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index f3025b4d90e1..850f7bdec5cb 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -9,6 +9,8 @@ #ifndef __LIBBPF_LIBBPF_INTERNAL_H #define __LIBBPF_LIBBPF_INTERNAL_H +#include "libbpf.h" + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index dd5d69529382..7470327edcfe 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -22,6 +22,7 @@ test_lirc_mode2_user get_cgroup_id_user test_skb_cgroup_id_user test_socket_cookie +test_cgroup_attach test_cgroup_storage test_select_reuseport test_flow_dissector @@ -35,3 +36,6 @@ test_sysctl alu32 libbpf.pc libbpf.so.* +test_hashmap +test_btf_dump +xdping diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 66f2dca1dee1..2b426ae1cdc9 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -15,7 +15,9 @@ LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy LLVM_READELF ?= llvm-readelf BTF_PAHOLE ?= pahole -CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \ + -Dbpf_prog_load=bpf_prog_test_load \ + -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lrt -lpthread # Order correspond to 'make run_tests' order @@ -23,7 +25,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ - test_netcnt test_tcpnotify_user test_sock_fields test_sysctl + test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ + test_btf_dump test_cgroup_attach xdping BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) @@ -54,7 +57,8 @@ TEST_PROGS := test_kmod.sh \ test_lwt_ip_encap.sh \ test_tcp_check_syncookie.sh \ test_tc_tunnel.sh \ - test_tc_edt.sh + test_tc_edt.sh \ + test_xdping.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ @@ -78,9 +82,9 @@ $(OUTPUT)/test_maps: map_tests/*.c BPFOBJ := $(OUTPUT)/libbpf.a -$(TEST_GEN_PROGS): $(BPFOBJ) +$(TEST_GEN_PROGS): test_stub.o $(BPFOBJ) -$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a +$(TEST_GEN_PROGS_EXTENDED): test_stub.o $(OUTPUT)/libbpf.a $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c @@ -96,6 +100,7 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c +$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c .PHONY: force @@ -176,7 +181,7 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\ $(ALU32_BUILD_DIR)/urandom_read $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \ -o $(ALU32_BUILD_DIR)/test_progs_32 \ - test_progs.c trace_helpers.c prog_tests/*.c \ + test_progs.c test_stub.c trace_helpers.c prog_tests/*.c \ $(OUTPUT)/libbpf.a $(LDLIBS) $(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 5f6f9e7aba2a..e6d243b7cd74 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -8,6 +8,14 @@ */ #define SEC(NAME) __attribute__((section(NAME), used)) +/* helper macro to print out debug messages */ +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + /* helper functions called from eBPF programs written in C */ static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) BPF_FUNC_map_lookup_elem; @@ -216,6 +224,7 @@ static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, (void *) BPF_FUNC_sk_storage_get; static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = (void *)BPF_FUNC_sk_storage_delete; +static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 6692a40a6979..0d89f0396be4 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -34,6 +34,60 @@ CGROUP_WORK_DIR, path) /** + * enable_all_controllers() - Enable all available cgroup v2 controllers + * + * Enable all available cgroup v2 controllers in order to increase + * the code coverage. + * + * If successful, 0 is returned. + */ +int enable_all_controllers(char *cgroup_path) +{ + char path[PATH_MAX + 1]; + char buf[PATH_MAX]; + char *c, *c2; + int fd, cfd; + size_t len; + + snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path); + fd = open(path, O_RDONLY); + if (fd < 0) { + log_err("Opening cgroup.controllers: %s", path); + return 1; + } + + len = read(fd, buf, sizeof(buf) - 1); + if (len < 0) { + close(fd); + log_err("Reading cgroup.controllers: %s", path); + return 1; + } + buf[len] = 0; + close(fd); + + /* No controllers available? We're probably on cgroup v1. */ + if (len == 0) + return 0; + + snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); + cfd = open(path, O_RDWR); + if (cfd < 0) { + log_err("Opening cgroup.subtree_control: %s", path); + return 1; + } + + for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { + if (dprintf(cfd, "+%s\n", c) <= 0) { + log_err("Enabling controller %s: %s", c, path); + close(cfd); + return 1; + } + } + close(cfd); + return 0; +} + +/** * setup_cgroup_environment() - Setup the cgroup environment * * After calling this function, cleanup_cgroup_environment should be called @@ -71,6 +125,9 @@ int setup_cgroup_environment(void) return 1; } + if (enable_all_controllers(cgroup_workdir)) + return 1; + return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index b74e2f6e96d0..c0091137074b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -12,7 +12,7 @@ static int libbpf_debug_print(enum libbpf_print_level level, return vfprintf(stderr, "%s", args); } -static int check_load(const char *file) +static int check_load(const char *file, enum bpf_prog_type type) { struct bpf_prog_load_attr attr; struct bpf_object *obj = NULL; @@ -20,8 +20,9 @@ static int check_load(const char *file) memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); attr.file = file; - attr.prog_type = BPF_PROG_TYPE_SCHED_CLS; + attr.prog_type = type; attr.log_level = 4; + attr.prog_flags = BPF_F_TEST_RND_HI32; err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); bpf_object__close(obj); if (err) @@ -31,19 +32,24 @@ static int check_load(const char *file) void test_bpf_verif_scale(void) { - const char *file1 = "./test_verif_scale1.o"; - const char *file2 = "./test_verif_scale2.o"; - const char *file3 = "./test_verif_scale3.o"; - int err; + const char *scale[] = { + "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o" + }; + const char *pyperf[] = { + "./pyperf50.o", "./pyperf100.o", "./pyperf180.o" + }; + int err, i; if (verifier_stats) libbpf_set_print(libbpf_debug_print); - err = check_load(file1); - err |= check_load(file2); - err |= check_load(file3); - if (!err) - printf("test_verif_scale:OK\n"); - else - printf("test_verif_scale:FAIL\n"); + for (i = 0; i < ARRAY_SIZE(scale); i++) { + err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS); + printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK"); + } + + for (i = 0; i < ARRAY_SIZE(pyperf); i++) { + err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT); + printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK"); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c new file mode 100644 index 000000000000..67cea1686305 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +static volatile int sigusr1_received = 0; + +static void sigusr1_handler(int signum) +{ + sigusr1_received++; +} + +static int test_send_signal_common(struct perf_event_attr *attr, + int prog_type, + const char *test_name) +{ + int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd; + const char *file = "./test_send_signal_kern.o"; + struct bpf_object *obj = NULL; + int pipe_c2p[2], pipe_p2c[2]; + __u32 key = 0, duration = 0; + char buf[256]; + pid_t pid; + __u64 val; + + if (CHECK(pipe(pipe_c2p), test_name, + "pipe pipe_c2p error: %s\n", strerror(errno))) + goto no_fork_done; + + if (CHECK(pipe(pipe_p2c), test_name, + "pipe pipe_p2c error: %s\n", strerror(errno))) { + close(pipe_c2p[0]); + close(pipe_c2p[1]); + goto no_fork_done; + } + + pid = fork(); + if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) { + close(pipe_c2p[0]); + close(pipe_c2p[1]); + close(pipe_p2c[0]); + close(pipe_p2c[1]); + goto no_fork_done; + } + + if (pid == 0) { + /* install signal handler and notify parent */ + signal(SIGUSR1, sigusr1_handler); + + close(pipe_c2p[0]); /* close read */ + close(pipe_p2c[1]); /* close write */ + + /* notify parent signal handler is installed */ + write(pipe_c2p[1], buf, 1); + + /* make sure parent enabled bpf program to send_signal */ + read(pipe_p2c[0], buf, 1); + + /* wait a little for signal handler */ + sleep(1); + + if (sigusr1_received) + write(pipe_c2p[1], "2", 1); + else + write(pipe_c2p[1], "0", 1); + + /* wait for parent notification and exit */ + read(pipe_p2c[0], buf, 1); + + close(pipe_c2p[1]); + close(pipe_p2c[0]); + exit(0); + } + + close(pipe_c2p[1]); /* close write */ + close(pipe_p2c[0]); /* close read */ + + err = bpf_prog_load(file, prog_type, &obj, &prog_fd); + if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n", + strerror(errno))) + goto prog_load_failure; + + pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1, + -1 /* group id */, 0 /* flags */); + if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n", + strerror(errno))) { + err = -1; + goto close_prog; + } + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n", + strerror(errno))) + goto disable_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n", + strerror(errno))) + goto disable_pmu; + + err = -1; + info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map"); + if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map")) + goto disable_pmu; + + status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map"); + if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map")) + goto disable_pmu; + + /* wait until child signal handler installed */ + read(pipe_c2p[0], buf, 1); + + /* trigger the bpf send_signal */ + key = 0; + val = (((__u64)(SIGUSR1)) << 32) | pid; + bpf_map_update_elem(info_map_fd, &key, &val, 0); + + /* notify child that bpf program can send_signal now */ + write(pipe_p2c[1], buf, 1); + + /* wait for result */ + err = read(pipe_c2p[0], buf, 1); + if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno))) + goto disable_pmu; + if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) { + err = -1; + goto disable_pmu; + } + + err = CHECK(buf[0] != '2', test_name, "incorrect result\n"); + + /* notify child safe to exit */ + write(pipe_p2c[1], buf, 1); + +disable_pmu: + close(pmu_fd); +close_prog: + bpf_object__close(obj); +prog_load_failure: + close(pipe_c2p[0]); + close(pipe_p2c[1]); + wait(NULL); +no_fork_done: + return err; +} + +static int test_send_signal_tracepoint(void) +{ + const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id"; + struct perf_event_attr attr = { + .type = PERF_TYPE_TRACEPOINT, + .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN, + .sample_period = 1, + .wakeup_events = 1, + }; + __u32 duration = 0; + int bytes, efd; + char buf[256]; + + efd = open(id_path, O_RDONLY, 0); + if (CHECK(efd < 0, "tracepoint", + "open syscalls/sys_enter_nanosleep/id failure: %s\n", + strerror(errno))) + return -1; + + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint", + "read syscalls/sys_enter_nanosleep/id failure: %s\n", + strerror(errno))) + return -1; + + attr.config = strtol(buf, NULL, 0); + + return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint"); +} + +static int test_send_signal_nmi(void) +{ + struct perf_event_attr attr = { + .sample_freq = 50, + .freq = 1, + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + }; + + return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, "perf_event"); +} + +void test_send_signal(void) +{ + int ret = 0; + + ret |= test_send_signal_tracepoint(); + ret |= test_send_signal_nmi(); + if (!ret) + printf("test_send_signal:OK\n"); + else + printf("test_send_signal:FAIL\n"); +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c new file mode 100644 index 000000000000..8f44767a75fa --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * BTF-to-C dumper tests for bitfield. + * + * Copyright (c) 2019 Facebook + */ +#include <stdbool.h> + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct bitfields_only_mixed_types { + * int a: 3; + * long int b: 2; + * _Bool c: 1; + * enum { + * A = 0, + * B = 1, + * } d: 1; + * short e: 5; + * int: 20; + * unsigned int f: 30; + *}; + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ + +struct bitfields_only_mixed_types { + int a: 3; + long int b: 2; + bool c: 1; /* it's really a _Bool type */ + enum { + A, /* A = 0, dumper is very explicit */ + B, /* B = 1, same */ + } d: 1; + short e: 5; + /* 20-bit padding here */ + unsigned f: 30; /* this gets aligned on 4-byte boundary */ +}; + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct bitfield_mixed_with_others { + * char: 4; + * int a: 4; + * short b; + * long int c; + * long int d: 8; + * int e; + * int f; + *}; + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ +struct bitfield_mixed_with_others { + long: 4; /* char is enough as a backing field */ + int a: 4; + /* 8-bit implicit padding */ + short b; /* combined with previous bitfield */ + /* 4 more bytes of implicit padding */ + long c; + long d: 8; + /* 24 bits implicit padding */ + int e; /* combined with previous bitfield */ + int f; + /* 4 bytes of padding */ +}; + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct bitfield_flushed { + * int a: 4; + * long: 60; + * long int b: 16; + *}; + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ +struct bitfield_flushed { + int a: 4; + long: 0; /* flush until next natural alignment boundary */ + long b: 16; +}; + +int f(struct { + struct bitfields_only_mixed_types _1; + struct bitfield_mixed_with_others _2; + struct bitfield_flushed _3; +} *_) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c new file mode 100644 index 000000000000..ba97165bdb28 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * BTF-to-C dumper test for multi-dimensional array output. + * + * Copyright (c) 2019 Facebook + */ +/* ----- START-EXPECTED-OUTPUT ----- */ +typedef int arr_t[2]; + +typedef int multiarr_t[3][4][5]; + +typedef int *ptr_arr_t[6]; + +typedef int *ptr_multiarr_t[7][8][9][10]; + +typedef int * (*fn_ptr_arr_t[11])(); + +typedef int * (*fn_ptr_multiarr_t[12][13])(); + +struct root_struct { + arr_t _1; + multiarr_t _2; + ptr_arr_t _3; + ptr_multiarr_t _4; + fn_ptr_arr_t _5; + fn_ptr_multiarr_t _6; +}; + +/* ------ END-EXPECTED-OUTPUT ------ */ + +int f(struct root_struct *s) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c new file mode 100644 index 000000000000..92a4ad428710 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * BTF-to-C dumper test validating no name versioning happens between + * independent C namespaces (struct/union/enum vs typedef/enum values). + * + * Copyright (c) 2019 Facebook + */ +/* ----- START-EXPECTED-OUTPUT ----- */ +struct S { + int S; + int U; +}; + +typedef struct S S; + +union U { + int S; + int U; +}; + +typedef union U U; + +enum E { + V = 0, +}; + +typedef enum E E; + +struct A {}; + +union B {}; + +enum C { + A = 1, + B = 2, + C = 3, +}; + +struct X {}; + +union Y {}; + +enum Z; + +typedef int X; + +typedef int Y; + +typedef int Z; + +/*------ END-EXPECTED-OUTPUT ------ */ + +int f(struct { + struct S _1; + S _2; + union U _3; + U _4; + enum E _5; + E _6; + struct A a; + union B b; + enum C c; + struct X x; + union Y y; + enum Z *z; + X xx; + Y yy; + Z zz; +} *_) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c new file mode 100644 index 000000000000..7c95702ee4cb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * BTF-to-C dumper test for topological sorting of dependent structs. + * + * Copyright (c) 2019 Facebook + */ +/* ----- START-EXPECTED-OUTPUT ----- */ +struct s1 {}; + +struct s3; + +struct s4; + +struct s2 { + struct s2 *s2; + struct s3 *s3; + struct s4 *s4; +}; + +struct s3 { + struct s1 s1; + struct s2 s2; +}; + +struct s4 { + struct s1 s1; + struct s3 s3; +}; + +struct list_head { + struct list_head *next; + struct list_head *prev; +}; + +struct hlist_node { + struct hlist_node *next; + struct hlist_node **pprev; +}; + +struct hlist_head { + struct hlist_node *first; +}; + +struct callback_head { + struct callback_head *next; + void (*func)(struct callback_head *); +}; + +struct root_struct { + struct s4 s4; + struct list_head l; + struct hlist_node n; + struct hlist_head h; + struct callback_head cb; +}; + +/*------ END-EXPECTED-OUTPUT ------ */ + +int f(struct root_struct *root) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c new file mode 100644 index 000000000000..1cef3bec1dc7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * BTF-to-C dumper tests for struct packing determination. + * + * Copyright (c) 2019 Facebook + */ +/* ----- START-EXPECTED-OUTPUT ----- */ +struct packed_trailing_space { + int a; + short b; +} __attribute__((packed)); + +struct non_packed_trailing_space { + int a; + short b; +}; + +struct packed_fields { + short a; + int b; +} __attribute__((packed)); + +struct non_packed_fields { + short a; + int b; +}; + +struct nested_packed { + char: 4; + int a: 4; + long int b; + struct { + char c; + int d; + } __attribute__((packed)) e; +} __attribute__((packed)); + +union union_is_never_packed { + int a: 4; + char b; + char c: 1; +}; + +union union_does_not_need_packing { + struct { + long int a; + int b; + } __attribute__((packed)); + int c; +}; + +union jump_code_union { + char code[5]; + struct { + char jump; + int offset; + } __attribute__((packed)); +}; + +/*------ END-EXPECTED-OUTPUT ------ */ + +int f(struct { + struct packed_trailing_space _1; + struct non_packed_trailing_space _2; + struct packed_fields _3; + struct non_packed_fields _4; + struct nested_packed _5; + union union_is_never_packed _6; + union union_does_not_need_packing _7; + union jump_code_union _8; +} *_) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c new file mode 100644 index 000000000000..3a62119c7498 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * BTF-to-C dumper tests for implicit and explicit padding between fields and + * at the end of a struct. + * + * Copyright (c) 2019 Facebook + */ +/* ----- START-EXPECTED-OUTPUT ----- */ +struct padded_implicitly { + int a; + long int b; + char c; +}; + +/* ------ END-EXPECTED-OUTPUT ------ */ + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct padded_explicitly { + * int a; + * int: 32; + * int b; + *}; + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ + +struct padded_explicitly { + int a; + int: 1; /* algo will explicitly pad with full 32 bits here */ + int b; +}; + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct padded_a_lot { + * int a; + * long: 32; + * long: 64; + * long: 64; + * int b; + *}; + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ + +struct padded_a_lot { + int a; + /* 32 bit of implicit padding here, which algo will make explicit */ + long: 64; + long: 64; + int b; +}; + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct padded_cache_line { + * int a; + * long: 32; + * long: 64; + * long: 64; + * long: 64; + * int b; + *}; + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ + +struct padded_cache_line { + int a; + int b __attribute__((aligned(32))); +}; + +/* ----- START-EXPECTED-OUTPUT ----- */ +/* + *struct zone_padding { + * char x[0]; + *}; + * + *struct zone { + * int a; + * short b; + * short: 16; + * struct zone_padding __pad__; + *}; + * + */ +/* ------ END-EXPECTED-OUTPUT ------ */ + +struct zone_padding { + char x[0]; +} __attribute__((__aligned__(8))); + +struct zone { + int a; + short b; + short: 16; + struct zone_padding __pad__; +}; + +int f(struct { + struct padded_implicitly _1; + struct padded_explicitly _2; + struct padded_a_lot _3; + struct padded_cache_line _4; + struct zone _5; +} *_) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c new file mode 100644 index 000000000000..d4a02fe44a12 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * BTF-to-C dumper test for majority of C syntax quirks. + * + * Copyright (c) 2019 Facebook + */ +/* ----- START-EXPECTED-OUTPUT ----- */ +enum e1 { + A = 0, + B = 1, +}; + +enum e2 { + C = 100, + D = -100, + E = 0, +}; + +typedef enum e2 e2_t; + +typedef enum { + F = 0, + G = 1, + H = 2, +} e3_t; + +typedef int int_t; + +typedef volatile const int * volatile const crazy_ptr_t; + +typedef int *****we_need_to_go_deeper_ptr_t; + +typedef volatile const we_need_to_go_deeper_ptr_t * restrict * volatile * const * restrict volatile * restrict const * volatile const * restrict volatile const how_about_this_ptr_t; + +typedef int *ptr_arr_t[10]; + +typedef void (*fn_ptr1_t)(int); + +typedef void (*printf_fn_t)(const char *, ...); + +/* ------ END-EXPECTED-OUTPUT ------ */ +/* + * While previous function pointers are pretty trivial (C-syntax-level + * trivial), the following are deciphered here for future generations: + * + * - `fn_ptr2_t`: function, taking anonymous struct as a first arg and pointer + * to a function, that takes int and returns int, as a second arg; returning + * a pointer to a const pointer to a char. Equivalent to: + * typedef struct { int a; } s_t; + * typedef int (*fn_t)(int); + * typedef char * const * (*fn_ptr2_t)(s_t, fn_t); + * + * - `fn_complext_t`: pointer to a function returning struct and accepting + * union and struct. All structs and enum are anonymous and defined inline. + * + * - `signal_t: pointer to a function accepting a pointer to a function as an + * argument and returning pointer to a function as a result. Sane equivalent: + * typedef void (*signal_handler_t)(int); + * typedef signal_handler_t (*signal_ptr_t)(int, signal_handler_t); + * + * - fn_ptr_arr1_t: array of pointers to a function accepting pointer to + * a pointer to an int and returning pointer to a char. Easy. + * + * - fn_ptr_arr2_t: array of const pointers to a function taking no arguments + * and returning a const pointer to a function, that takes pointer to a + * `int -> char *` function and returns pointer to a char. Equivalent: + * typedef char * (*fn_input_t)(int); + * typedef char * (*fn_output_outer_t)(fn_input_t); + * typedef const fn_output_outer_t (* fn_output_inner_t)(); + * typedef const fn_output_inner_t fn_ptr_arr2_t[5]; + */ +/* ----- START-EXPECTED-OUTPUT ----- */ +typedef char * const * (*fn_ptr2_t)(struct { + int a; +}, int (*)(int)); + +typedef struct { + int a; + void (*b)(int, struct { + int c; + }, union { + char d; + int e[5]; + }); +} (*fn_complex_t)(union { + void *f; + char g[16]; +}, struct { + int h; +}); + +typedef void (* (*signal_t)(int, void (*)(int)))(int); + +typedef char * (*fn_ptr_arr1_t[10])(int **); + +typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int)); + +struct struct_w_typedefs { + int_t a; + crazy_ptr_t b; + we_need_to_go_deeper_ptr_t c; + how_about_this_ptr_t d; + ptr_arr_t e; + fn_ptr1_t f; + printf_fn_t g; + fn_ptr2_t h; + fn_complex_t i; + signal_t j; + fn_ptr_arr1_t k; + fn_ptr_arr2_t l; +}; + +typedef struct { + int x; + int y; + int z; +} anon_struct_t; + +struct struct_fwd; + +typedef struct struct_fwd struct_fwd_t; + +typedef struct struct_fwd *struct_fwd_ptr_t; + +union union_fwd; + +typedef union union_fwd union_fwd_t; + +typedef union union_fwd *union_fwd_ptr_t; + +struct struct_empty {}; + +struct struct_simple { + int a; + char b; + const int_t *p; + struct struct_empty s; + enum e2 e; + enum { + ANON_VAL1 = 1, + ANON_VAL2 = 2, + } f; + int arr1[13]; + enum e2 arr2[5]; +}; + +union union_empty {}; + +union union_simple { + void *ptr; + int num; + int_t num2; + union union_empty u; +}; + +struct struct_in_struct { + struct struct_simple simple; + union union_simple also_simple; + struct { + int a; + } not_so_hard_as_well; + union { + int b; + int c; + } anon_union_is_good; + struct { + int d; + int e; + }; + union { + int f; + int g; + }; +}; + +struct struct_with_embedded_stuff { + int a; + struct { + int b; + struct { + struct struct_with_embedded_stuff *c; + const char *d; + } e; + union { + volatile long int f; + void * restrict g; + }; + }; + union { + const int_t *h; + void (*i)(char, int, void *); + } j; + enum { + K = 100, + L = 200, + } m; + char n[16]; + struct { + char o; + int p; + void (*q)(int); + } r[5]; + struct struct_in_struct s[10]; + int t[11]; +}; + +struct root_struct { + enum e1 _1; + enum e2 _2; + e2_t _2_1; + e3_t _2_2; + struct struct_w_typedefs _3; + anon_struct_t _7; + struct struct_fwd *_8; + struct_fwd_t *_9; + struct_fwd_ptr_t _10; + union union_fwd *_11; + union_fwd_t *_12; + union_fwd_ptr_t _13; + struct struct_with_embedded_stuff _14; +}; + +/* ------ END-EXPECTED-OUTPUT ------ */ + +int f(struct root_struct *s) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h new file mode 100644 index 000000000000..0cc5e4ee90bd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <stdint.h> +#include <stddef.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" + +#define FUNCTION_NAME_LEN 64 +#define FILE_NAME_LEN 128 +#define TASK_COMM_LEN 16 + +typedef struct { + int PyThreadState_frame; + int PyThreadState_thread; + int PyFrameObject_back; + int PyFrameObject_code; + int PyFrameObject_lineno; + int PyCodeObject_filename; + int PyCodeObject_name; + int String_data; + int String_size; +} OffsetConfig; + +typedef struct { + uintptr_t current_state_addr; + uintptr_t tls_key_addr; + OffsetConfig offsets; + bool use_tls; +} PidData; + +typedef struct { + uint32_t success; +} Stats; + +typedef struct { + char name[FUNCTION_NAME_LEN]; + char file[FILE_NAME_LEN]; +} Symbol; + +typedef struct { + uint32_t pid; + uint32_t tid; + char comm[TASK_COMM_LEN]; + int32_t kernel_stack_id; + int32_t user_stack_id; + bool thread_current; + bool pthread_match; + bool stack_complete; + int16_t stack_len; + int32_t stack[STACK_MAX_LEN]; + + int has_meta; + int metadata; + char dummy_safeguard; +} Event; + + +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; +}; + +typedef int pid_t; + +typedef struct { + void* f_back; // PyFrameObject.f_back, previous frame + void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject + void* co_filename; // PyCodeObject.co_filename + void* co_name; // PyCodeObject.co_name +} FrameData; + +static inline __attribute__((__always_inline__)) void* +get_thread_state(void* tls_base, PidData* pidData) +{ + void* thread_state; + int key; + + bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); + bpf_probe_read(&thread_state, sizeof(thread_state), + tls_base + 0x310 + key * 0x10 + 0x08); + return thread_state; +} + +static inline __attribute__((__always_inline__)) bool +get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol) +{ + // read data from PyFrameObject + bpf_probe_read(&frame->f_back, + sizeof(frame->f_back), + frame_ptr + pidData->offsets.PyFrameObject_back); + bpf_probe_read(&frame->f_code, + sizeof(frame->f_code), + frame_ptr + pidData->offsets.PyFrameObject_code); + + // read data from PyCodeObject + if (!frame->f_code) + return false; + bpf_probe_read(&frame->co_filename, + sizeof(frame->co_filename), + frame->f_code + pidData->offsets.PyCodeObject_filename); + bpf_probe_read(&frame->co_name, + sizeof(frame->co_name), + frame->f_code + pidData->offsets.PyCodeObject_name); + // read actual names into symbol + if (frame->co_filename) + bpf_probe_read_str(&symbol->file, + sizeof(symbol->file), + frame->co_filename + pidData->offsets.String_data); + if (frame->co_name) + bpf_probe_read_str(&symbol->name, + sizeof(symbol->name), + frame->co_name + pidData->offsets.String_data); + return true; +} + +struct bpf_elf_map SEC("maps") pidmap = { + .type = BPF_MAP_TYPE_HASH, + .size_key = sizeof(int), + .size_value = sizeof(PidData), + .max_elem = 1, +}; + +struct bpf_elf_map SEC("maps") eventmap = { + .type = BPF_MAP_TYPE_HASH, + .size_key = sizeof(int), + .size_value = sizeof(Event), + .max_elem = 1, +}; + +struct bpf_elf_map SEC("maps") symbolmap = { + .type = BPF_MAP_TYPE_HASH, + .size_key = sizeof(Symbol), + .size_value = sizeof(int), + .max_elem = 1, +}; + +struct bpf_elf_map SEC("maps") statsmap = { + .type = BPF_MAP_TYPE_ARRAY, + .size_key = sizeof(Stats), + .size_value = sizeof(int), + .max_elem = 1, +}; + +struct bpf_elf_map SEC("maps") perfmap = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .size_key = sizeof(int), + .size_value = sizeof(int), + .max_elem = 32, +}; + +struct bpf_elf_map SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .size_key = sizeof(int), + .size_value = sizeof(long long) * 127, + .max_elem = 1000, +}; + +static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx) +{ + uint64_t pid_tgid = bpf_get_current_pid_tgid(); + pid_t pid = (pid_t)(pid_tgid >> 32); + PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid); + if (!pidData) + return 0; + + int zero = 0; + Event* event = bpf_map_lookup_elem(&eventmap, &zero); + if (!event) + return 0; + + event->pid = pid; + + event->tid = (pid_t)pid_tgid; + bpf_get_current_comm(&event->comm, sizeof(event->comm)); + + event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK); + event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); + + void* thread_state_current = (void*)0; + bpf_probe_read(&thread_state_current, + sizeof(thread_state_current), + (void*)(long)pidData->current_state_addr); + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + void* tls_base = (void*)task; + + void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData) + : thread_state_current; + event->thread_current = thread_state == thread_state_current; + + if (pidData->use_tls) { + uint64_t pthread_created; + uint64_t pthread_self; + bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10); + + bpf_probe_read(&pthread_created, + sizeof(pthread_created), + thread_state + pidData->offsets.PyThreadState_thread); + event->pthread_match = pthread_created == pthread_self; + } else { + event->pthread_match = 1; + } + + if (event->pthread_match || !pidData->use_tls) { + void* frame_ptr; + FrameData frame; + Symbol sym = {}; + int cur_cpu = bpf_get_smp_processor_id(); + + bpf_probe_read(&frame_ptr, + sizeof(frame_ptr), + thread_state + pidData->offsets.PyThreadState_frame); + + int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); + if (symbol_counter == NULL) + return 0; +#pragma unroll + /* Unwind python stack */ + for (int i = 0; i < STACK_MAX_LEN; ++i) { + if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { + int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; + int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); + if (!symbol_id) { + bpf_map_update_elem(&symbolmap, &sym, &zero, 0); + symbol_id = bpf_map_lookup_elem(&symbolmap, &sym); + if (!symbol_id) + return 0; + } + if (*symbol_id == new_symbol_id) + (*symbol_counter)++; + event->stack[i] = *symbol_id; + event->stack_len = i + 1; + frame_ptr = frame.f_back; + } + } + event->stack_complete = frame_ptr == NULL; + } else { + event->stack_complete = 1; + } + + Stats* stats = bpf_map_lookup_elem(&statsmap, &zero); + if (stats) + stats->success++; + + event->has_meta = 0; + bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata)); + return 0; +} + +SEC("raw_tracepoint/kfree_skb") +int on_event(struct pt_regs* ctx) +{ + int i, ret = 0; + ret |= __on_event(ctx); + ret |= __on_event(ctx); + ret |= __on_event(ctx); + ret |= __on_event(ctx); + ret |= __on_event(ctx); + return ret; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/pyperf100.c b/tools/testing/selftests/bpf/progs/pyperf100.c new file mode 100644 index 000000000000..29786325db54 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf100.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#define STACK_MAX_LEN 100 +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c new file mode 100644 index 000000000000..c39f559d3100 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf180.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#define STACK_MAX_LEN 180 +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf50.c b/tools/testing/selftests/bpf/progs/pyperf50.c new file mode 100644 index 000000000000..ef7ce340a292 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf50.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#define STACK_MAX_LEN 50 +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index 0f92858f6226..ed3e4a551c57 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -5,13 +5,6 @@ int _version SEC("version") = 1; -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index 12a7b5c82ed6..65fbfdb6cd3a 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -5,13 +5,6 @@ int _version SEC("version") = 1; -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - SEC("sk_msg1") int bpf_prog1(struct sk_msg_md *msg) { diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index 2ce7634a4012..bdc22be46f2e 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -5,13 +5,6 @@ int _version SEC("version") = 1; -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - struct bpf_map_def SEC("maps") sock_map_rx = { .type = BPF_MAP_TYPE_SOCKMAP, .key_size = sizeof(int), diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c index 0575751bc1bc..7c7cb3177463 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c @@ -6,13 +6,6 @@ #include "bpf_helpers.h" #include "bpf_endian.h" -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - /* Packet parsing state machine helpers. */ #define cursor_advance(_cursor, _len) \ ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c new file mode 100644 index 000000000000..45a1a1a2c345 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") info_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(info_map, __u32, __u64); + +struct bpf_map_def SEC("maps") status_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(status_map, __u32, __u64); + +SEC("send_signal_demo") +int bpf_send_signal_test(void *ctx) +{ + __u64 *info_val, *status_val; + __u32 key = 0, pid, sig; + int ret; + + status_val = bpf_map_lookup_elem(&status_map, &key); + if (!status_val || *status_val != 0) + return 0; + + info_val = bpf_map_lookup_elem(&info_map, &key); + if (!info_val || *info_val == 0) + return 0; + + sig = *info_val >> 32; + pid = *info_val & 0xffffFFFF; + + if ((bpf_get_current_pid_tgid() >> 32) == pid) { + ret = bpf_send_signal(sig); + if (ret == 0) + *status_val = 1; + } + + return 0; +} +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 5e4aac74f9d0..4fe6aaad22a4 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -15,13 +15,6 @@ #include <linux/udp.h> #include "bpf_helpers.h" -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - static __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c new file mode 100644 index 000000000000..87393e7c667c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdping_kern.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */ + +#define KBUILD_MODNAME "foo" +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/icmp.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#include "xdping.h" + +struct bpf_map_def SEC("maps") ping_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(struct pinginfo), + .max_entries = 256, +}; + +static __always_inline void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +static __always_inline __u16 csum_fold_helper(__wsum sum) +{ + sum = (sum & 0xffff) + (sum >> 16); + return ~((sum & 0xffff) + (sum >> 16)); +} + +static __always_inline __u16 ipv4_csum(void *data_start, int data_size) +{ + __wsum sum; + + sum = bpf_csum_diff(0, 0, data_start, data_size, 0); + return csum_fold_helper(sum); +} + +#define ICMP_ECHO_LEN 64 + +static __always_inline int icmp_check(struct xdp_md *ctx, int type) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + struct icmphdr *icmph; + struct iphdr *iph; + + if (data + sizeof(*eth) + sizeof(*iph) + ICMP_ECHO_LEN > data_end) + return XDP_PASS; + + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return XDP_PASS; + + iph = data + sizeof(*eth); + + if (iph->protocol != IPPROTO_ICMP) + return XDP_PASS; + + if (bpf_ntohs(iph->tot_len) - sizeof(*iph) != ICMP_ECHO_LEN) + return XDP_PASS; + + icmph = data + sizeof(*eth) + sizeof(*iph); + + if (icmph->type != type) + return XDP_PASS; + + return XDP_TX; +} + +SEC("xdpclient") +int xdping_client(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct pinginfo *pinginfo = NULL; + struct ethhdr *eth = data; + struct icmphdr *icmph; + struct iphdr *iph; + __u64 recvtime; + __be32 raddr; + __be16 seq; + int ret; + __u8 i; + + ret = icmp_check(ctx, ICMP_ECHOREPLY); + + if (ret != XDP_TX) + return ret; + + iph = data + sizeof(*eth); + icmph = data + sizeof(*eth) + sizeof(*iph); + raddr = iph->saddr; + + /* Record time reply received. */ + recvtime = bpf_ktime_get_ns(); + pinginfo = bpf_map_lookup_elem(&ping_map, &raddr); + if (!pinginfo || pinginfo->seq != icmph->un.echo.sequence) + return XDP_PASS; + + if (pinginfo->start) { +#pragma clang loop unroll(full) + for (i = 0; i < XDPING_MAX_COUNT; i++) { + if (pinginfo->times[i] == 0) + break; + } + /* verifier is fussy here... */ + if (i < XDPING_MAX_COUNT) { + pinginfo->times[i] = recvtime - + pinginfo->start; + pinginfo->start = 0; + i++; + } + /* No more space for values? */ + if (i == pinginfo->count || i == XDPING_MAX_COUNT) + return XDP_PASS; + } + + /* Now convert reply back into echo request. */ + swap_src_dst_mac(data); + iph->saddr = iph->daddr; + iph->daddr = raddr; + icmph->type = ICMP_ECHO; + seq = bpf_htons(bpf_ntohs(icmph->un.echo.sequence) + 1); + icmph->un.echo.sequence = seq; + icmph->checksum = 0; + icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN); + + pinginfo->seq = seq; + pinginfo->start = bpf_ktime_get_ns(); + + return XDP_TX; +} + +SEC("xdpserver") +int xdping_server(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + struct icmphdr *icmph; + struct iphdr *iph; + __be32 raddr; + int ret; + + ret = icmp_check(ctx, ICMP_ECHO); + + if (ret != XDP_TX) + return ret; + + iph = data + sizeof(*eth); + icmph = data + sizeof(*eth) + sizeof(*iph); + raddr = iph->saddr; + + /* Now convert request into echo reply. */ + swap_src_dst_mac(data); + iph->saddr = iph->daddr; + iph->daddr = raddr; + icmph->type = ICMP_ECHOREPLY; + icmph->checksum = 0; + icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN); + + return XDP_TX; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 42c1ce988945..289daf54dec4 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -4025,62 +4025,13 @@ static struct btf_file_test file_tests[] = { }, }; -static int file_has_btf_elf(const char *fn, bool *has_btf_ext) -{ - Elf_Scn *scn = NULL; - GElf_Ehdr ehdr; - int ret = 0; - int elf_fd; - Elf *elf; - - if (CHECK(elf_version(EV_CURRENT) == EV_NONE, - "elf_version(EV_CURRENT) == EV_NONE")) - return -1; - - elf_fd = open(fn, O_RDONLY); - if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno)) - return -1; - - elf = elf_begin(elf_fd, ELF_C_READ, NULL); - if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) { - ret = -1; - goto done; - } - - if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) { - ret = -1; - goto done; - } - - while ((scn = elf_nextscn(elf, scn))) { - const char *sh_name; - GElf_Shdr sh; - - if (CHECK(gelf_getshdr(scn, &sh) != &sh, - "file:%s gelf_getshdr != &sh", fn)) { - ret = -1; - goto done; - } - - sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); - if (!strcmp(sh_name, BTF_ELF_SEC)) - ret = 1; - if (!strcmp(sh_name, BTF_EXT_ELF_SEC)) - *has_btf_ext = true; - } - -done: - close(elf_fd); - elf_end(elf); - return ret; -} - static int do_test_file(unsigned int test_num) { const struct btf_file_test *test = &file_tests[test_num - 1]; const char *expected_fnames[] = {"_dummy_tracepoint", "test_long_fname_1", "test_long_fname_2"}; + struct btf_ext *btf_ext = NULL; struct bpf_prog_info info = {}; struct bpf_object *obj = NULL; struct bpf_func_info *finfo; @@ -4095,15 +4046,19 @@ static int do_test_file(unsigned int test_num) fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, test->file); - err = file_has_btf_elf(test->file, &has_btf_ext); - if (err == -1) - return err; - - if (err == 0) { - fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC); - skip_cnt++; - return 0; + btf = btf__parse_elf(test->file, &btf_ext); + if (IS_ERR(btf)) { + if (PTR_ERR(btf) == -ENOENT) { + fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC); + skip_cnt++; + return 0; + } + return PTR_ERR(btf); } + btf__free(btf); + + has_btf_ext = btf_ext != NULL; + btf_ext__free(btf_ext); obj = bpf_object__open(test->file); if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) diff --git a/tools/testing/selftests/bpf/test_btf_dump.c b/tools/testing/selftests/bpf/test_btf_dump.c new file mode 100644 index 000000000000..8f850823d35f --- /dev/null +++ b/tools/testing/selftests/bpf/test_btf_dump.c @@ -0,0 +1,143 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <linux/err.h> +#include <btf.h> + +#define CHECK(condition, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \ + fprintf(stderr, format); \ + } \ + __ret; \ +}) + +void btf_dump_printf(void *ctx, const char *fmt, va_list args) +{ + vfprintf(ctx, fmt, args); +} + +struct btf_dump_test_case { + const char *name; + struct btf_dump_opts opts; +} btf_dump_test_cases[] = { + {.name = "btf_dump_test_case_syntax", .opts = {}}, + {.name = "btf_dump_test_case_ordering", .opts = {}}, + {.name = "btf_dump_test_case_padding", .opts = {}}, + {.name = "btf_dump_test_case_packing", .opts = {}}, + {.name = "btf_dump_test_case_bitfields", .opts = {}}, + {.name = "btf_dump_test_case_multidim", .opts = {}}, + {.name = "btf_dump_test_case_namespacing", .opts = {}}, +}; + +static int btf_dump_all_types(const struct btf *btf, + const struct btf_dump_opts *opts) +{ + size_t type_cnt = btf__get_nr_types(btf); + struct btf_dump *d; + int err = 0, id; + + d = btf_dump__new(btf, NULL, opts, btf_dump_printf); + if (IS_ERR(d)) + return PTR_ERR(d); + + for (id = 1; id <= type_cnt; id++) { + err = btf_dump__dump_type(d, id); + if (err) + goto done; + } + +done: + btf_dump__free(d); + return err; +} + +int test_btf_dump_case(int n, struct btf_dump_test_case *test_case) +{ + char test_file[256], out_file[256], diff_cmd[1024]; + struct btf *btf = NULL; + int err = 0, fd = -1; + FILE *f = NULL; + + fprintf(stderr, "Test case #%d (%s): ", n, test_case->name); + + snprintf(test_file, sizeof(test_file), "%s.o", test_case->name); + + btf = btf__parse_elf(test_file, NULL); + if (CHECK(IS_ERR(btf), + "failed to load test BTF: %ld\n", PTR_ERR(btf))) { + err = -PTR_ERR(btf); + btf = NULL; + goto done; + } + + snprintf(out_file, sizeof(out_file), + "/tmp/%s.output.XXXXXX", test_case->name); + fd = mkstemp(out_file); + if (CHECK(fd < 0, "failed to create temp output file: %d\n", fd)) { + err = fd; + goto done; + } + f = fdopen(fd, "w"); + if (CHECK(f == NULL, "failed to open temp output file: %s(%d)\n", + strerror(errno), errno)) { + close(fd); + goto done; + } + + test_case->opts.ctx = f; + err = btf_dump_all_types(btf, &test_case->opts); + fclose(f); + close(fd); + if (CHECK(err, "failure during C dumping: %d\n", err)) { + goto done; + } + + snprintf(test_file, sizeof(test_file), "progs/%s.c", test_case->name); + /* + * Diff test output and expected test output, contained between + * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case. + * For expected output lines, everything before '*' is stripped out. + * Also lines containing comment start and comment end markers are + * ignored. + */ + snprintf(diff_cmd, sizeof(diff_cmd), + "awk '/START-EXPECTED-OUTPUT/{out=1;next} " + "/END-EXPECTED-OUTPUT/{out=0} " + "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */ + "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'", + test_file, out_file); + err = system(diff_cmd); + if (CHECK(err, + "differing test output, output=%s, err=%d, diff cmd:\n%s\n", + out_file, err, diff_cmd)) + goto done; + + remove(out_file); + fprintf(stderr, "OK\n"); + +done: + btf__free(btf); + return err; +} + +int main() { + int test_case_cnt, i, err, failed = 0; + + test_case_cnt = sizeof(btf_dump_test_cases) / + sizeof(btf_dump_test_cases[0]); + + for (i = 0; i < test_case_cnt; i++) { + err = test_btf_dump_case(i, &btf_dump_test_cases[i]); + if (err) + failed++; + } + + fprintf(stderr, "%d tests succeeded, %d tests failed.\n", + test_case_cnt - failed, failed); + + return failed; +} diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c new file mode 100644 index 000000000000..7671909ee1cb --- /dev/null +++ b/tools/testing/selftests/bpf/test_cgroup_attach.c @@ -0,0 +1,571 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* eBPF example program: + * + * - Creates arraymap in kernel with 4 bytes keys and 8 byte values + * + * - Loads eBPF program + * + * The eBPF program accesses the map passed in to store two pieces of + * information. The number of invocations of the program, which maps + * to the number of packets received, is stored to key 0. Key 1 is + * incremented on each iteration by the number of bytes stored in + * the skb. The program also stores the number of received bytes + * in the cgroup storage. + * + * - Attaches the new program to a cgroup using BPF_PROG_ATTACH + * + * - Every second, reads map[0] and map[1] to see how many bytes and + * packets were seen on any socket of tasks in the given cgroup. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <unistd.h> +#include <linux/filter.h> + +#include <linux/bpf.h> +#include <bpf/bpf.h> + +#include "bpf_util.h" +#include "bpf_rlimit.h" +#include "cgroup_helpers.h" + +#define FOO "/foo" +#define BAR "/foo/bar/" +#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +#ifdef DEBUG +#define debug(args...) printf(args) +#else +#define debug(args...) +#endif + +static int prog_load(int verdict) +{ + int ret; + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + + if (ret < 0) { + log_err("Loading program"); + printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); + return 0; + } + return ret; +} + +static int test_foo_bar(void) +{ + int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0; + + allow_prog = prog_load(1); + if (!allow_prog) + goto err; + + drop_prog = prog_load(0); + if (!drop_prog) + goto err; + + if (setup_cgroup_environment()) + goto err; + + /* Create cgroup /foo, get fd, and join it */ + foo = create_and_get_cgroup(FOO); + if (foo < 0) + goto err; + + if (join_cgroup(FOO)) + goto err; + + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { + log_err("Attaching prog to /foo"); + goto err; + } + + debug("Attached DROP prog. This ping in cgroup /foo should fail...\n"); + assert(system(PING_CMD) != 0); + + /* Create cgroup /foo/bar, get fd, and join it */ + bar = create_and_get_cgroup(BAR); + if (bar < 0) + goto err; + + if (join_cgroup(BAR)) + goto err; + + debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); + assert(system(PING_CMD) != 0); + + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { + log_err("Attaching prog to /foo/bar"); + goto err; + } + + debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); + assert(system(PING_CMD) == 0); + + if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching program from /foo/bar"); + goto err; + } + + debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n" + "This ping in cgroup /foo/bar should fail...\n"); + assert(system(PING_CMD) != 0); + + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { + log_err("Attaching prog to /foo/bar"); + goto err; + } + + if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching program from /foo"); + goto err; + } + + debug("Attached PASS from /foo/bar and detached DROP from /foo.\n" + "This ping in cgroup /foo/bar should pass...\n"); + assert(system(PING_CMD) == 0); + + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { + log_err("Attaching prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { + errno = 0; + log_err("Unexpected success attaching prog to /foo/bar"); + goto err; + } + + if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching program from /foo/bar"); + goto err; + } + + if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { + errno = 0; + log_err("Unexpected success in double detach from /foo"); + goto err; + } + + if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching non-overridable prog to /foo"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { + errno = 0; + log_err("Unexpected success attaching non-overridable prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { + errno = 0; + log_err("Unexpected success attaching overridable prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { + errno = 0; + log_err("Unexpected success attaching overridable prog to /foo"); + goto err; + } + + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching different non-overridable prog to /foo"); + goto err; + } + + goto out; + +err: + rc = 1; + +out: + close(foo); + close(bar); + cleanup_cgroup_environment(); + if (!rc) + printf("#override:PASS\n"); + else + printf("#override:FAIL\n"); + return rc; +} + +static int map_fd = -1; + +static int prog_load_cnt(int verdict, int val) +{ + int cgroup_storage_fd, percpu_cgroup_storage_fd; + + if (map_fd < 0) + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + if (cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + percpu_cgroup_storage_fd = bpf_create_map( + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + if (percpu_cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + struct bpf_insn prog[] = { + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + + BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_MOV64_IMM(BPF_REG_1, val), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), + + BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), + + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + int ret; + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + + if (ret < 0) { + log_err("Loading program"); + printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); + return 0; + } + close(cgroup_storage_fd); + return ret; +} + + +static int test_multiprog(void) +{ + __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; + int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; + int drop_prog, allow_prog[6] = {}, rc = 0; + unsigned long long value; + int i = 0; + + for (i = 0; i < 6; i++) { + allow_prog[i] = prog_load_cnt(1, 1 << i); + if (!allow_prog[i]) + goto err; + } + drop_prog = prog_load_cnt(0, 1); + if (!drop_prog) + goto err; + + if (setup_cgroup_environment()) + goto err; + + cg1 = create_and_get_cgroup("/cg1"); + if (cg1 < 0) + goto err; + cg2 = create_and_get_cgroup("/cg1/cg2"); + if (cg2 < 0) + goto err; + cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); + if (cg3 < 0) + goto err; + cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); + if (cg4 < 0) + goto err; + cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); + if (cg5 < 0) + goto err; + + if (join_cgroup("/cg1/cg2/cg3/cg4/cg5")) + goto err; + + if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI)) { + log_err("Attaching prog to cg1"); + goto err; + } + if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI)) { + log_err("Unexpected success attaching the same prog to cg1"); + goto err; + } + if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI)) { + log_err("Attaching prog2 to cg1"); + goto err; + } + if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { + log_err("Attaching prog to cg2"); + goto err; + } + if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI)) { + log_err("Attaching prog to cg3"); + goto err; + } + if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { + log_err("Attaching prog to cg4"); + goto err; + } + if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching prog to cg5"); + goto err; + } + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 8 + 32); + + /* query the number of effective progs in cg5 */ + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + NULL, NULL, &prog_cnt) == 0); + assert(prog_cnt == 4); + /* retrieve prog_ids of effective progs in cg5 */ + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + &attach_flags, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 4); + assert(attach_flags == 0); + saved_prog_id = prog_ids[0]; + /* check enospc handling */ + prog_ids[0] = 0; + prog_cnt = 2; + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + &attach_flags, prog_ids, &prog_cnt) == -1 && + errno == ENOSPC); + assert(prog_cnt == 4); + /* check that prog_ids are returned even when buffer is too small */ + assert(prog_ids[0] == saved_prog_id); + /* retrieve prog_id of single attached prog in cg5 */ + prog_ids[0] = 0; + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, + NULL, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 1); + assert(prog_ids[0] == saved_prog_id); + + /* detach bottom program and ping again */ + if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching prog from cg5"); + goto err; + } + value = 0; + assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 8 + 16); + + /* detach 3rd from bottom program and ping again */ + errno = 0; + if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) { + log_err("Unexpected success on detach from cg3"); + goto err; + } + if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching from cg3"); + goto err; + } + value = 0; + assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 16); + + /* detach 2nd from bottom program and ping again */ + if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching prog from cg4"); + goto err; + } + value = 0; + assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 4); + + prog_cnt = 4; + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + &attach_flags, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 3); + assert(attach_flags == 0); + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, + NULL, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 0); + goto out; +err: + rc = 1; + +out: + for (i = 0; i < 6; i++) + if (allow_prog[i] > 0) + close(allow_prog[i]); + close(cg1); + close(cg2); + close(cg3); + close(cg4); + close(cg5); + cleanup_cgroup_environment(); + if (!rc) + printf("#multi:PASS\n"); + else + printf("#multi:FAIL\n"); + return rc; +} + +static int test_autodetach(void) +{ + __u32 prog_cnt = 4, attach_flags; + int allow_prog[2] = {0}; + __u32 prog_ids[2] = {0}; + int cg = 0, i, rc = -1; + void *ptr = NULL; + int attempts; + + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { + allow_prog[i] = prog_load_cnt(1, 1 << i); + if (!allow_prog[i]) + goto err; + } + + if (setup_cgroup_environment()) + goto err; + + /* create a cgroup, attach two programs and remember their ids */ + cg = create_and_get_cgroup("/cg_autodetach"); + if (cg < 0) + goto err; + + if (join_cgroup("/cg_autodetach")) + goto err; + + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { + if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI)) { + log_err("Attaching prog[%d] to cg:egress", i); + goto err; + } + } + + /* make sure that programs are attached and run some traffic */ + assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags, + prog_ids, &prog_cnt) == 0); + assert(system(PING_CMD) == 0); + + /* allocate some memory (4Mb) to pin the original cgroup */ + ptr = malloc(4 * (1 << 20)); + if (!ptr) + goto err; + + /* close programs and cgroup fd */ + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { + close(allow_prog[i]); + allow_prog[i] = 0; + } + + close(cg); + cg = 0; + + /* leave the cgroup and remove it. don't detach programs */ + cleanup_cgroup_environment(); + + /* wait for the asynchronous auto-detachment. + * wait for no more than 5 sec and give up. + */ + for (i = 0; i < ARRAY_SIZE(prog_ids); i++) { + for (attempts = 5; attempts >= 0; attempts--) { + int fd = bpf_prog_get_fd_by_id(prog_ids[i]); + + if (fd < 0) + break; + + /* don't leave the fd open */ + close(fd); + + if (!attempts) + goto err; + + sleep(1); + } + } + + rc = 0; +err: + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) + if (allow_prog[i] > 0) + close(allow_prog[i]); + if (cg) + close(cg); + free(ptr); + cleanup_cgroup_environment(); + if (!rc) + printf("#autodetach:PASS\n"); + else + printf("#autodetach:FAIL\n"); + return rc; +} + +int main(void) +{ + int (*tests[])(void) = { + test_foo_bar, + test_multiprog, + test_autodetach, + }; + int errors = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(tests); i++) + if (tests[i]()) + errors++; + + if (errors) + printf("test_cgroup_attach:FAIL\n"); + else + printf("test_cgroup_attach:PASS\n"); + + return errors ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/test_hashmap.c new file mode 100644 index 000000000000..b64094c981e3 --- /dev/null +++ b/tools/testing/selftests/bpf/test_hashmap.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * Tests for libbpf's hashmap. + * + * Copyright (c) 2019 Facebook + */ +#include <stdio.h> +#include <errno.h> +#include <linux/err.h> +#include "hashmap.h" + +#define CHECK(condition, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \ + fprintf(stderr, format); \ + } \ + __ret; \ +}) + +size_t hash_fn(const void *k, void *ctx) +{ + return (long)k; +} + +bool equal_fn(const void *a, const void *b, void *ctx) +{ + return (long)a == (long)b; +} + +static inline size_t next_pow_2(size_t n) +{ + size_t r = 1; + + while (r < n) + r <<= 1; + return r; +} + +static inline size_t exp_cap(size_t sz) +{ + size_t r = next_pow_2(sz); + + if (sz * 4 / 3 > r) + r <<= 1; + return r; +} + +#define ELEM_CNT 62 + +int test_hashmap_generic(void) +{ + struct hashmap_entry *entry, *tmp; + int err, bkt, found_cnt, i; + long long found_msk; + struct hashmap *map; + + fprintf(stderr, "%s: ", __func__); + + map = hashmap__new(hash_fn, equal_fn, NULL); + if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map))) + return 1; + + for (i = 0; i < ELEM_CNT; i++) { + const void *oldk, *k = (const void *)(long)i; + void *oldv, *v = (void *)(long)(1024 + i); + + err = hashmap__update(map, k, v, &oldk, &oldv); + if (CHECK(err != -ENOENT, "unexpected result: %d\n", err)) + return 1; + + if (i % 2) { + err = hashmap__add(map, k, v); + } else { + err = hashmap__set(map, k, v, &oldk, &oldv); + if (CHECK(oldk != NULL || oldv != NULL, + "unexpected k/v: %p=%p\n", oldk, oldv)) + return 1; + } + + if (CHECK(err, "failed to add k/v %ld = %ld: %d\n", + (long)k, (long)v, err)) + return 1; + + if (CHECK(!hashmap__find(map, k, &oldv), + "failed to find key %ld\n", (long)k)) + return 1; + if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv)) + return 1; + } + + if (CHECK(hashmap__size(map) != ELEM_CNT, + "invalid map size: %zu\n", hashmap__size(map))) + return 1; + if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)), + "unexpected map capacity: %zu\n", hashmap__capacity(map))) + return 1; + + found_msk = 0; + hashmap__for_each_entry(map, entry, bkt) { + long k = (long)entry->key; + long v = (long)entry->value; + + found_msk |= 1ULL << k; + if (CHECK(v - k != 1024, "invalid k/v pair: %ld = %ld\n", k, v)) + return 1; + } + if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, + "not all keys iterated: %llx\n", found_msk)) + return 1; + + for (i = 0; i < ELEM_CNT; i++) { + const void *oldk, *k = (const void *)(long)i; + void *oldv, *v = (void *)(long)(256 + i); + + err = hashmap__add(map, k, v); + if (CHECK(err != -EEXIST, "unexpected add result: %d\n", err)) + return 1; + + if (i % 2) + err = hashmap__update(map, k, v, &oldk, &oldv); + else + err = hashmap__set(map, k, v, &oldk, &oldv); + + if (CHECK(err, "failed to update k/v %ld = %ld: %d\n", + (long)k, (long)v, err)) + return 1; + if (CHECK(!hashmap__find(map, k, &oldv), + "failed to find key %ld\n", (long)k)) + return 1; + if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv)) + return 1; + } + + if (CHECK(hashmap__size(map) != ELEM_CNT, + "invalid updated map size: %zu\n", hashmap__size(map))) + return 1; + if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)), + "unexpected map capacity: %zu\n", hashmap__capacity(map))) + return 1; + + found_msk = 0; + hashmap__for_each_entry_safe(map, entry, tmp, bkt) { + long k = (long)entry->key; + long v = (long)entry->value; + + found_msk |= 1ULL << k; + if (CHECK(v - k != 256, + "invalid updated k/v pair: %ld = %ld\n", k, v)) + return 1; + } + if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, + "not all keys iterated after update: %llx\n", found_msk)) + return 1; + + found_cnt = 0; + hashmap__for_each_key_entry(map, entry, (void *)0) { + found_cnt++; + } + if (CHECK(!found_cnt, "didn't find any entries for key 0\n")) + return 1; + + found_msk = 0; + found_cnt = 0; + hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) { + const void *oldk, *k; + void *oldv, *v; + + k = entry->key; + v = entry->value; + + found_cnt++; + found_msk |= 1ULL << (long)k; + + if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), + "failed to delete k/v %ld = %ld\n", + (long)k, (long)v)) + return 1; + if (CHECK(oldk != k || oldv != v, + "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n", + (long)k, (long)v, (long)oldk, (long)oldv)) + return 1; + if (CHECK(hashmap__delete(map, k, &oldk, &oldv), + "unexpectedly deleted k/v %ld = %ld\n", + (long)oldk, (long)oldv)) + return 1; + } + + if (CHECK(!found_cnt || !found_msk, + "didn't delete any key entries\n")) + return 1; + if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt, + "invalid updated map size (already deleted: %d): %zu\n", + found_cnt, hashmap__size(map))) + return 1; + if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)), + "unexpected map capacity: %zu\n", hashmap__capacity(map))) + return 1; + + hashmap__for_each_entry_safe(map, entry, tmp, bkt) { + const void *oldk, *k; + void *oldv, *v; + + k = entry->key; + v = entry->value; + + found_cnt++; + found_msk |= 1ULL << (long)k; + + if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), + "failed to delete k/v %ld = %ld\n", + (long)k, (long)v)) + return 1; + if (CHECK(oldk != k || oldv != v, + "invalid old k/v: expect %ld = %ld, got %ld = %ld\n", + (long)k, (long)v, (long)oldk, (long)oldv)) + return 1; + if (CHECK(hashmap__delete(map, k, &oldk, &oldv), + "unexpectedly deleted k/v %ld = %ld\n", + (long)k, (long)v)) + return 1; + } + + if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1, + "not all keys were deleted: found_cnt:%d, found_msk:%llx\n", + found_cnt, found_msk)) + return 1; + if (CHECK(hashmap__size(map) != 0, + "invalid updated map size (already deleted: %d): %zu\n", + found_cnt, hashmap__size(map))) + return 1; + + found_cnt = 0; + hashmap__for_each_entry(map, entry, bkt) { + CHECK(false, "unexpected map entries left: %ld = %ld\n", + (long)entry->key, (long)entry->value); + return 1; + } + + hashmap__free(map); + hashmap__for_each_entry(map, entry, bkt) { + CHECK(false, "unexpected map entries left: %ld = %ld\n", + (long)entry->key, (long)entry->value); + return 1; + } + + fprintf(stderr, "OK\n"); + return 0; +} + +size_t collision_hash_fn(const void *k, void *ctx) +{ + return 0; +} + +int test_hashmap_multimap(void) +{ + void *k1 = (void *)0, *k2 = (void *)1; + struct hashmap_entry *entry; + struct hashmap *map; + long found_msk; + int err, bkt; + + fprintf(stderr, "%s: ", __func__); + + /* force collisions */ + map = hashmap__new(collision_hash_fn, equal_fn, NULL); + if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map))) + return 1; + + + /* set up multimap: + * [0] -> 1, 2, 4; + * [1] -> 8, 16, 32; + */ + err = hashmap__append(map, k1, (void *)1); + if (CHECK(err, "failed to add k/v: %d\n", err)) + return 1; + err = hashmap__append(map, k1, (void *)2); + if (CHECK(err, "failed to add k/v: %d\n", err)) + return 1; + err = hashmap__append(map, k1, (void *)4); + if (CHECK(err, "failed to add k/v: %d\n", err)) + return 1; + + err = hashmap__append(map, k2, (void *)8); + if (CHECK(err, "failed to add k/v: %d\n", err)) + return 1; + err = hashmap__append(map, k2, (void *)16); + if (CHECK(err, "failed to add k/v: %d\n", err)) + return 1; + err = hashmap__append(map, k2, (void *)32); + if (CHECK(err, "failed to add k/v: %d\n", err)) + return 1; + + if (CHECK(hashmap__size(map) != 6, + "invalid map size: %zu\n", hashmap__size(map))) + return 1; + + /* verify global iteration still works and sees all values */ + found_msk = 0; + hashmap__for_each_entry(map, entry, bkt) { + found_msk |= (long)entry->value; + } + if (CHECK(found_msk != (1 << 6) - 1, + "not all keys iterated: %lx\n", found_msk)) + return 1; + + /* iterate values for key 1 */ + found_msk = 0; + hashmap__for_each_key_entry(map, entry, k1) { + found_msk |= (long)entry->value; + } + if (CHECK(found_msk != (1 | 2 | 4), + "invalid k1 values: %lx\n", found_msk)) + return 1; + + /* iterate values for key 2 */ + found_msk = 0; + hashmap__for_each_key_entry(map, entry, k2) { + found_msk |= (long)entry->value; + } + if (CHECK(found_msk != (8 | 16 | 32), + "invalid k2 values: %lx\n", found_msk)) + return 1; + + fprintf(stderr, "OK\n"); + return 0; +} + +int test_hashmap_empty() +{ + struct hashmap_entry *entry; + int bkt; + struct hashmap *map; + void *k = (void *)0; + + fprintf(stderr, "%s: ", __func__); + + /* force collisions */ + map = hashmap__new(hash_fn, equal_fn, NULL); + if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map))) + return 1; + + if (CHECK(hashmap__size(map) != 0, + "invalid map size: %zu\n", hashmap__size(map))) + return 1; + if (CHECK(hashmap__capacity(map) != 0, + "invalid map capacity: %zu\n", hashmap__capacity(map))) + return 1; + if (CHECK(hashmap__find(map, k, NULL), "unexpected find\n")) + return 1; + if (CHECK(hashmap__delete(map, k, NULL, NULL), "unexpected delete\n")) + return 1; + + hashmap__for_each_entry(map, entry, bkt) { + CHECK(false, "unexpected iterated entry\n"); + return 1; + } + hashmap__for_each_key_entry(map, entry, k) { + CHECK(false, "unexpected key entry\n"); + return 1; + } + + fprintf(stderr, "OK\n"); + return 0; +} + +int main(int argc, char **argv) +{ + bool failed = false; + + if (test_hashmap_generic()) + failed = true; + if (test_hashmap_multimap()) + failed = true; + if (test_hashmap_empty()) + failed = true; + + return failed; +} diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 3f110eaaf29c..5d0c4f0baeff 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -745,6 +745,7 @@ static int load_path(const struct sock_addr_test *test, const char *path) attr.file = path; attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; attr.expected_attach_type = test->expected_attach_type; + attr.prog_flags = BPF_F_TEST_RND_HI32; if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { if (test->expected_result != LOAD_REJECT) diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c index e089477fa0a3..f0fc103261a4 100644 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ b/tools/testing/selftests/bpf/test_sock_fields.c @@ -414,6 +414,7 @@ int main(int argc, char **argv) struct bpf_prog_load_attr attr = { .file = "test_sock_fields_kern.o", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .prog_flags = BPF_F_TEST_RND_HI32, }; int cgroup_fd, egress_fd, ingress_fd, err; struct bpf_program *ingress_prog; diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index e51d63786ff8..cac8ee57a013 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -148,6 +148,7 @@ static int run_test(int cgfd) memset(&attr, 0, sizeof(attr)); attr.file = SOCKET_COOKIE_PROG; attr.prog_type = BPF_PROG_TYPE_UNSPEC; + attr.prog_flags = BPF_F_TEST_RND_HI32; err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd); if (err) { diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index e7639f66a941..4e7d3da21357 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -28,13 +28,6 @@ * are established and verdicts are decided. */ -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - struct bpf_map_def SEC("maps") sock_map = { .type = TEST_MAP_TYPE, .key_size = sizeof(int), diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c new file mode 100644 index 000000000000..84e81a89e2f9 --- /dev/null +++ b/tools/testing/selftests/bpf/test_stub.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <string.h> + +int bpf_prog_test_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_prog_load_attr attr; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = type; + attr.expected_attach_type = 0; + attr.prog_flags = BPF_F_TEST_RND_HI32; + + return bpf_prog_load_xattr(&attr, pobj, prog_fd); +} + +int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) +{ + struct bpf_load_program_attr load_attr; + + memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); + load_attr.prog_type = type; + load_attr.expected_attach_type = 0; + load_attr.name = NULL; + load_attr.insns = insns; + load_attr.insns_cnt = insns_cnt; + load_attr.license = license; + load_attr.kern_version = kern_version; + load_attr.prog_flags = BPF_F_TEST_RND_HI32; + + return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); +} diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 546aee3e9fb4..bd12ec97a44d 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -696,30 +696,57 @@ check_err() bpf_tunnel_test() { + local errors=0 + echo "Testing GRE tunnel..." test_gre + errors=$(( $errors + $? )) + echo "Testing IP6GRE tunnel..." test_ip6gre + errors=$(( $errors + $? )) + echo "Testing IP6GRETAP tunnel..." test_ip6gretap + errors=$(( $errors + $? )) + echo "Testing ERSPAN tunnel..." test_erspan v2 + errors=$(( $errors + $? )) + echo "Testing IP6ERSPAN tunnel..." test_ip6erspan v2 + errors=$(( $errors + $? )) + echo "Testing VXLAN tunnel..." test_vxlan + errors=$(( $errors + $? )) + echo "Testing IP6VXLAN tunnel..." test_ip6vxlan + errors=$(( $errors + $? )) + echo "Testing GENEVE tunnel..." test_geneve + errors=$(( $errors + $? )) + echo "Testing IP6GENEVE tunnel..." test_ip6geneve + errors=$(( $errors + $? )) + echo "Testing IPIP tunnel..." test_ipip + errors=$(( $errors + $? )) + echo "Testing IPIP6 tunnel..." test_ipip6 + errors=$(( $errors + $? )) + echo "Testing IPSec tunnel..." test_xfrm_tunnel + errors=$(( $errors + $? )) + + return $errors } trap cleanup 0 3 6 @@ -728,4 +755,9 @@ trap cleanup_exit 2 9 cleanup bpf_tunnel_test +if [ $? -ne 0 ]; then + echo -e "$(basename $0): ${RED}FAIL${NC}" + exit 1 +fi +echo -e "$(basename $0): ${GREEN}PASS${NC}" exit 0 diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index ccd896b98cac..cd0248c54e25 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -138,32 +138,36 @@ static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self) loop: for (j = 0; j < PUSH_CNT; j++) { insn[i++] = BPF_LD_ABS(BPF_B, 0); - insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); + /* jump to error label */ + insn[i] = BPF_JMP32_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 3); i++; insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1); insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2); insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_vlan_push), - insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); + insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3); i++; } for (j = 0; j < PUSH_CNT; j++) { insn[i++] = BPF_LD_ABS(BPF_B, 0); - insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2); + insn[i] = BPF_JMP32_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 3); i++; insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_vlan_pop), - insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2); + insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3); i++; } if (++k < 5) goto loop; - for (; i < len - 1; i++) - insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef); + for (; i < len - 3; i++) + insn[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0xbef); + insn[len - 3] = BPF_JMP_A(1); + /* error label */ + insn[len - 2] = BPF_MOV32_IMM(BPF_REG_0, 0); insn[len - 1] = BPF_EXIT_INSN(); self->prog_len = len; } @@ -171,8 +175,13 @@ loop: static void bpf_fill_jump_around_ld_abs(struct bpf_test *self) { struct bpf_insn *insn = self->fill_insns; - /* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns */ - unsigned int len = (1 << 15) / 6; + /* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns, + * but on arches like arm, ppc etc, there will be one BPF_ZEXT inserted + * to extend the error value of the inlined ld_abs sequence which then + * contains 7 insns. so, set the dividend to 7 so the testcase could + * work on all arches. + */ + unsigned int len = (1 << 15) / 7; int i = 0; insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); @@ -210,33 +219,35 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) self->retval = (uint32_t)res; } -/* test the sequence of 1k jumps */ +#define MAX_JMP_SEQ 8192 + +/* test the sequence of 8k jumps */ static void bpf_fill_scale1(struct bpf_test *self) { struct bpf_insn *insn = self->fill_insns; int i = 0, k = 0; insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); - /* test to check that the sequence of 1024 jumps is acceptable */ - while (k++ < 1024) { + /* test to check that the long sequence of jumps is acceptable */ + while (k++ < MAX_JMP_SEQ) { insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32); - insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2); + insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2); insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10); insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, -8 * (k % 64 + 1)); } - /* every jump adds 1024 steps to insn_processed, so to stay exactly - * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT + /* every jump adds 1 step to insn_processed, so to stay exactly + * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT */ - while (i < MAX_TEST_INSNS - 1025) - insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42); + while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1) + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42); insn[i] = BPF_EXIT_INSN(); self->prog_len = i + 1; self->retval = 42; } -/* test the sequence of 1k jumps in inner most function (function depth 8)*/ +/* test the sequence of 8k jumps in inner most function (function depth 8)*/ static void bpf_fill_scale2(struct bpf_test *self) { struct bpf_insn *insn = self->fill_insns; @@ -248,20 +259,21 @@ static void bpf_fill_scale2(struct bpf_test *self) insn[i++] = BPF_EXIT_INSN(); } insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); - /* test to check that the sequence of 1024 jumps is acceptable */ - while (k++ < 1024) { + /* test to check that the long sequence of jumps is acceptable */ + k = 0; + while (k++ < MAX_JMP_SEQ) { insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32); - insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2); + insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2); insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10); insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, -8 * (k % (64 - 4 * FUNC_NEST) + 1)); } - /* every jump adds 1024 steps to insn_processed, so to stay exactly - * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT + /* every jump adds 1 step to insn_processed, so to stay exactly + * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT */ - while (i < MAX_TEST_INSNS - 1025) - insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42); + while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1) + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42); insn[i] = BPF_EXIT_INSN(); self->prog_len = i + 1; self->retval = 42; @@ -870,7 +882,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (fixup_skips != skips) return; - pflags = 0; + pflags = BPF_F_TEST_RND_HI32; if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh new file mode 100755 index 000000000000..c2f0ddb45531 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdping.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# xdping tests +# Here we setup and teardown configuration required to run +# xdping, exercising its options. +# +# Setup is similar to test_tunnel tests but without the tunnel. +# +# Topology: +# --------- +# root namespace | tc_ns0 namespace +# | +# ---------- | ---------- +# | veth1 | --------- | veth0 | +# ---------- peer ---------- +# +# Device Configuration +# -------------------- +# Root namespace with BPF +# Device names and addresses: +# veth1 IP: 10.1.1.200 +# xdp added to veth1, xdpings originate from here. +# +# Namespace tc_ns0 with BPF +# Device names and addresses: +# veth0 IPv4: 10.1.1.100 +# For some tests xdping run in server mode here. +# + +readonly TARGET_IP="10.1.1.100" +readonly TARGET_NS="xdp_ns0" + +readonly LOCAL_IP="10.1.1.200" + +setup() +{ + ip netns add $TARGET_NS + ip link add veth0 type veth peer name veth1 + ip link set veth0 netns $TARGET_NS + ip netns exec $TARGET_NS ip addr add ${TARGET_IP}/24 dev veth0 + ip addr add ${LOCAL_IP}/24 dev veth1 + ip netns exec $TARGET_NS ip link set veth0 up + ip link set veth1 up +} + +cleanup() +{ + set +e + ip netns delete $TARGET_NS 2>/dev/null + ip link del veth1 2>/dev/null + if [[ $server_pid -ne 0 ]]; then + kill -TERM $server_pid + fi +} + +test() +{ + client_args="$1" + server_args="$2" + + echo "Test client args '$client_args'; server args '$server_args'" + + server_pid=0 + if [[ -n "$server_args" ]]; then + ip netns exec $TARGET_NS ./xdping $server_args & + server_pid=$! + sleep 10 + fi + ./xdping $client_args $TARGET_IP + + if [[ $server_pid -ne 0 ]]; then + kill -TERM $server_pid + server_pid=0 + fi + + echo "Test client args '$client_args'; server args '$server_args': PASS" +} + +set -e + +server_pid=0 + +trap cleanup EXIT + +setup + +for server_args in "" "-I veth0 -s -S" ; do + # client in skb mode + client_args="-I veth1 -S" + test "$client_args" "$server_args" + + # client with count of 10 RTT measurements. + client_args="-I veth1 -S -c 10" + test "$client_args" "$server_args" +done + +echo "OK. All tests passed" +exit 0 diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 9a9fc6c9b70b..b47f205f0310 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -30,9 +30,7 @@ int load_kallsyms(void) if (!f) return -ENOENT; - while (!feof(f)) { - if (!fgets(buf, sizeof(buf), f)) - break; + while (fgets(buf, sizeof(buf), f)) { if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) break; if (!addr) diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c new file mode 100644 index 000000000000..d60a343b1371 --- /dev/null +++ b/tools/testing/selftests/bpf/xdping.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */ + +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <arpa/inet.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libgen.h> +#include <sys/resource.h> +#include <net/if.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netdb.h> + +#include "bpf/bpf.h" +#include "bpf/libbpf.h" + +#include "xdping.h" + +static int ifindex; +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; + +static void cleanup(int sig) +{ + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + if (sig) + exit(1); +} + +static int get_stats(int fd, __u16 count, __u32 raddr) +{ + struct pinginfo pinginfo = { 0 }; + char inaddrbuf[INET_ADDRSTRLEN]; + struct in_addr inaddr; + __u16 i; + + inaddr.s_addr = raddr; + + printf("\nXDP RTT data:\n"); + + if (bpf_map_lookup_elem(fd, &raddr, &pinginfo)) { + perror("bpf_map_lookup elem: "); + return 1; + } + + for (i = 0; i < count; i++) { + if (pinginfo.times[i] == 0) + break; + + printf("64 bytes from %s: icmp_seq=%d ttl=64 time=%#.5f ms\n", + inet_ntop(AF_INET, &inaddr, inaddrbuf, + sizeof(inaddrbuf)), + count + i + 1, + (double)pinginfo.times[i]/1000000); + } + + if (i < count) { + fprintf(stderr, "Expected %d samples, got %d.\n", count, i); + return 1; + } + + bpf_map_delete_elem(fd, &raddr); + + return 0; +} + +static void show_usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] -I interface destination\n\n" + "OPTS:\n" + " -c count Stop after sending count requests\n" + " (default %d, max %d)\n" + " -I interface interface name\n" + " -N Run in driver mode\n" + " -s Server mode\n" + " -S Run in skb mode\n", + prog, XDPING_DEFAULT_COUNT, XDPING_MAX_COUNT); +} + +int main(int argc, char **argv) +{ + __u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE; + struct addrinfo *a, hints = { .ai_family = AF_INET }; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + __u16 count = XDPING_DEFAULT_COUNT; + struct pinginfo pinginfo = { 0 }; + const char *optstr = "c:I:NsS"; + struct bpf_program *main_prog; + int prog_fd = -1, map_fd = -1; + struct sockaddr_in rin; + struct bpf_object *obj; + struct bpf_map *map; + char *ifname = NULL; + char filename[256]; + int opt, ret = 1; + __u32 raddr = 0; + int server = 0; + char cmd[256]; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'c': + count = atoi(optarg); + if (count < 1 || count > XDPING_MAX_COUNT) { + fprintf(stderr, + "min count is 1, max count is %d\n", + XDPING_MAX_COUNT); + return 1; + } + break; + case 'I': + ifname = optarg; + ifindex = if_nametoindex(ifname); + if (!ifindex) { + fprintf(stderr, "Could not get interface %s\n", + ifname); + return 1; + } + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + case 's': + /* use server program */ + server = 1; + break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + default: + show_usage(basename(argv[0])); + return 1; + } + } + + if (!ifname) { + show_usage(basename(argv[0])); + return 1; + } + if (!server && optind == argc) { + show_usage(basename(argv[0])); + return 1; + } + + if ((xdp_flags & mode_flags) == mode_flags) { + fprintf(stderr, "-N or -S can be specified, not both.\n"); + show_usage(basename(argv[0])); + return 1; + } + + if (!server) { + /* Only supports IPv4; see hints initiailization above. */ + if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) { + fprintf(stderr, "Could not resolve %s\n", argv[optind]); + return 1; + } + memcpy(&rin, a->ai_addr, sizeof(rin)); + raddr = rin.sin_addr.s_addr; + freeaddrinfo(a); + } + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) { + fprintf(stderr, "load of %s failed\n", filename); + return 1; + } + + main_prog = bpf_object__find_program_by_title(obj, + server ? "xdpserver" : + "xdpclient"); + if (main_prog) + prog_fd = bpf_program__fd(main_prog); + if (!main_prog || prog_fd < 0) { + fprintf(stderr, "could not find xdping program"); + return 1; + } + + map = bpf_map__next(NULL, obj); + if (map) + map_fd = bpf_map__fd(map); + if (!map || map_fd < 0) { + fprintf(stderr, "Could not find ping map"); + goto done; + } + + signal(SIGINT, cleanup); + signal(SIGTERM, cleanup); + + printf("Setting up XDP for %s, please wait...\n", ifname); + + printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n"); + + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { + fprintf(stderr, "Link set xdp fd failed for %s\n", ifname); + goto done; + } + + if (server) { + close(prog_fd); + close(map_fd); + printf("Running server on %s; press Ctrl+C to exit...\n", + ifname); + do { } while (1); + } + + /* Start xdping-ing from last regular ping reply, e.g. for a count + * of 10 ICMP requests, we start xdping-ing using reply with seq number + * 10. The reason the last "real" ping RTT is much higher is that + * the ping program sees the ICMP reply associated with the last + * XDP-generated packet, so ping doesn't get a reply until XDP is done. + */ + pinginfo.seq = htons(count); + pinginfo.count = count; + + if (bpf_map_update_elem(map_fd, &raddr, &pinginfo, BPF_ANY)) { + fprintf(stderr, "could not communicate with BPF map: %s\n", + strerror(errno)); + cleanup(0); + goto done; + } + + /* We need to wait for XDP setup to complete. */ + sleep(10); + + snprintf(cmd, sizeof(cmd), "ping -c %d -I %s %s", + count, ifname, argv[optind]); + + printf("\nNormal ping RTT data\n"); + printf("[Ignore final RTT; it is distorted by XDP using the reply]\n"); + + ret = system(cmd); + + if (!ret) + ret = get_stats(map_fd, count, raddr); + + cleanup(0); + +done: + if (prog_fd > 0) + close(prog_fd); + if (map_fd > 0) + close(map_fd); + + return ret; +} diff --git a/tools/testing/selftests/bpf/xdping.h b/tools/testing/selftests/bpf/xdping.h new file mode 100644 index 000000000000..afc578df77be --- /dev/null +++ b/tools/testing/selftests/bpf/xdping.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */ + +#define XDPING_MAX_COUNT 10 +#define XDPING_DEFAULT_COUNT 4 + +struct pinginfo { + __u64 start; + __be16 seq; + __u16 count; + __u32 pad; + __u64 times[XDPING_MAX_COUNT]; +}; |