diff options
author | David S. Miller <davem@davemloft.net> | 2018-07-04 08:53:53 +0900 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-07-04 08:53:53 +0900 |
commit | b68034087a6d0b16e0fc5265b98107274084ce93 (patch) | |
tree | 823750a590920514a5e4c161a11a594abb68a451 /samples | |
parent | 44a4c4698ee86daa3f44ad4b370f5a9f58831d29 (diff) | |
parent | 0b9e3d543f9fa6a8abdac04f974176ee02312860 (diff) | |
download | linux-b68034087a6d0b16e0fc5265b98107274084ce93.tar.gz linux-b68034087a6d0b16e0fc5265b98107274084ce93.tar.bz2 linux-b68034087a6d0b16e0fc5265b98107274084ce93.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2018-07-03
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Various improvements to bpftool and libbpf, that is, bpftool build
speed improvements, missing BPF program types added for detection
by section name, ability to load programs from '.text' section is
made to work again, and better bash completion handling, from Jakub.
2) Improvements to nfp JIT's map read handling which allows for optimizing
memcpy from map to packet, from Jiong.
3) New BPF sample is added which demonstrates XDP in combination with
bpf_perf_event_output() helper to sample packets on all CPUs, from Toke.
4) Add a new BPF kselftest case for tracking connect(2) BPF hooks
infrastructure in combination with TFO, from Andrey.
5) Extend the XDP/BPF xdp_rxq_info sample code with a cmdline option to
read payload from packet data in order to use it for benchmarking.
Also for '--action XDP_TX' option implement swapping of MAC addresses
to avoid drops on some hardware seen during testing, from Jesper.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
-rw-r--r-- | samples/bpf/Makefile | 4 | ||||
-rw-r--r-- | samples/bpf/xdp_rxq_info_kern.c | 43 | ||||
-rw-r--r-- | samples/bpf/xdp_rxq_info_user.c | 45 | ||||
-rw-r--r-- | samples/bpf/xdp_sample_pkts_kern.c | 66 | ||||
-rw-r--r-- | samples/bpf/xdp_sample_pkts_user.c | 169 |
5 files changed, 321 insertions, 6 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1303af10e54d..9ea2f7b64869 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail hostprogs-y += xdpsock hostprogs-y += xdp_fwd hostprogs-y += task_fd_query +hostprogs-y += xdp_sample_pkts # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -107,6 +108,7 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := bpf_load.o xdpsock_user.o xdp_fwd-objs := bpf_load.o xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) +xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o always += xdpsock_kern.o always += xdp_fwd_kern.o always += task_fd_query_kern.o +always += xdp_sample_pkts_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -179,6 +182,7 @@ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/ +HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/ HOST_LOADLIBES += $(LIBBPF) -lelf HOSTLOADLIBES_tracex4 += -lrt diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c index 3fd209291653..222a83eed1cb 100644 --- a/samples/bpf/xdp_rxq_info_kern.c +++ b/samples/bpf/xdp_rxq_info_kern.c @@ -4,6 +4,8 @@ * Example howto extract XDP RX-queue info */ #include <uapi/linux/bpf.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/in.h> #include "bpf_helpers.h" /* Config setup from with userspace @@ -14,6 +16,12 @@ struct config { __u32 action; int ifindex; + __u32 options; +}; +enum cfg_options_flags { + NO_TOUCH = 0x0U, + READ_MEM = 0x1U, + SWAP_MAC = 0x2U, }; struct bpf_map_def SEC("maps") config_map = { .type = BPF_MAP_TYPE_ARRAY, @@ -45,6 +53,23 @@ struct bpf_map_def SEC("maps") rx_queue_index_map = { .max_entries = MAX_RXQs + 1, }; +static __always_inline +void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + SEC("xdp_prog0") int xdp_prognum0(struct xdp_md *ctx) { @@ -90,6 +115,24 @@ int xdp_prognum0(struct xdp_md *ctx) if (key == MAX_RXQs) rxq_rec->issue++; + /* Default: Don't touch packet data, only count packets */ + if (unlikely(config->options & (READ_MEM|SWAP_MAC))) { + struct ethhdr *eth = data; + + if (eth + 1 > data_end) + return XDP_ABORTED; + + /* Avoid compiler removing this: Drop non 802.3 Ethertypes */ + if (ntohs(eth->h_proto) < ETH_P_802_3_MIN) + return XDP_ABORTED; + + /* XDP_TX requires changing MAC-addrs, else HW may drop. + * Can also be enabled with --swapmac (for test purposes) + */ + if (unlikely(config->options & SWAP_MAC)) + swap_src_dst_mac(data); + } + return config->action; } diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index e4e9ba52bff0..248a7eab9531 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -50,6 +50,8 @@ static const struct option long_options[] = { {"sec", required_argument, NULL, 's' }, {"no-separators", no_argument, NULL, 'z' }, {"action", required_argument, NULL, 'a' }, + {"readmem", no_argument, NULL, 'r' }, + {"swapmac", no_argument, NULL, 'm' }, {0, 0, NULL, 0 } }; @@ -66,6 +68,12 @@ static void int_exit(int sig) struct config { __u32 action; int ifindex; + __u32 options; +}; +enum cfg_options_flags { + NO_TOUCH = 0x0U, + READ_MEM = 0x1U, + SWAP_MAC = 0x2U, }; #define XDP_ACTION_MAX (XDP_TX + 1) #define XDP_ACTION_MAX_STRLEN 11 @@ -109,6 +117,18 @@ static void list_xdp_actions(void) printf("\n"); } +static char* options2str(enum cfg_options_flags flag) +{ + if (flag == NO_TOUCH) + return "no_touch"; + if (flag & SWAP_MAC) + return "swapmac"; + if (flag & READ_MEM) + return "read"; + fprintf(stderr, "ERR: Unknown config option flags"); + exit(EXIT_FAIL); +} + static void usage(char *argv[]) { int i; @@ -305,7 +325,7 @@ static __u64 calc_errs_pps(struct datarec *r, static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, - int action) + int action, __u32 cfg_opt) { unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; unsigned int nr_cpus = bpf_num_possible_cpus(); @@ -316,8 +336,8 @@ static void stats_print(struct stats_record *stats_rec, int i; /* Header */ - printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n", - ifname, ifindex, action2str(action)); + printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n", + ifname, ifindex, action2str(action), options2str(cfg_opt)); /* stats_global_map */ { @@ -399,7 +419,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b) *b = tmp; } -static void stats_poll(int interval, int action) +static void stats_poll(int interval, int action, __u32 cfg_opt) { struct stats_record *record, *prev; @@ -410,7 +430,7 @@ static void stats_poll(int interval, int action) while (1) { swap(&prev, &record); stats_collect(record); - stats_print(record, prev, action); + stats_print(record, prev, action, cfg_opt); sleep(interval); } @@ -421,6 +441,7 @@ static void stats_poll(int interval, int action) int main(int argc, char **argv) { + __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, @@ -435,6 +456,7 @@ int main(int argc, char **argv) int interval = 2; __u32 key = 0; + char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 }; int action = XDP_PASS; /* Default action */ char *action_str = NULL; @@ -496,6 +518,12 @@ int main(int argc, char **argv) action_str = (char *)&action_str_buf; strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN); break; + case 'r': + cfg_options |= READ_MEM; + break; + case 'm': + cfg_options |= SWAP_MAC; + break; case 'h': error: default: @@ -523,6 +551,11 @@ int main(int argc, char **argv) } cfg.action = action; + /* XDP_TX requires changing MAC-addrs, else HW may drop */ + if (action == XDP_TX) + cfg_options |= SWAP_MAC; + cfg.options = cfg_options; + /* Trick to pretty printf with thousands separators use %' */ if (use_separators) setlocale(LC_NUMERIC, "en_US"); @@ -542,6 +575,6 @@ int main(int argc, char **argv) return EXIT_FAIL_XDP; } - stats_poll(interval, action); + stats_poll(interval, action, cfg_options); return EXIT_OK; } diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c new file mode 100644 index 000000000000..f7ca8b850978 --- /dev/null +++ b/samples/bpf/xdp_sample_pkts_kern.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +#define SAMPLE_SIZE 64ul +#define MAX_CPUS 128 + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = MAX_CPUS, +}; + +SEC("xdp_sample") +int xdp_sample_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + + /* Metadata will be in the perf event before the packet data. */ + struct S { + u16 cookie; + u16 pkt_len; + } __packed metadata; + + if (data < data_end) { + /* The XDP perf_event_output handler will use the upper 32 bits + * of the flags argument as a number of bytes to include of the + * packet payload in the event data. If the size is too big, the + * call to bpf_perf_event_output will fail and return -EFAULT. + * + * See bpf_xdp_event_output in net/core/filter.c. + * + * The BPF_F_CURRENT_CPU flag means that the event output fd + * will be indexed by the CPU number in the event map. + */ + u64 flags = BPF_F_CURRENT_CPU; + u16 sample_size; + int ret; + + metadata.cookie = 0xdead; + metadata.pkt_len = (u16)(data_end - data); + sample_size = min(metadata.pkt_len, SAMPLE_SIZE); + flags |= (u64)sample_size << 32; + + ret = bpf_perf_event_output(ctx, &my_map, flags, + &metadata, sizeof(metadata)); + if (ret) + bpf_printk("perf_event_output failed: %d\n", ret); + } + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c new file mode 100644 index 000000000000..8dd87c1eb560 --- /dev/null +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <linux/perf_event.h> +#include <linux/bpf.h> +#include <net/if.h> +#include <errno.h> +#include <assert.h> +#include <sys/sysinfo.h> +#include <sys/ioctl.h> +#include <signal.h> +#include <libbpf.h> +#include <bpf/bpf.h> + +#include "perf-sys.h" +#include "trace_helpers.h" + +#define MAX_CPUS 128 +static int pmu_fds[MAX_CPUS], if_idx; +static struct perf_event_mmap_page *headers[MAX_CPUS]; +static char *if_name; + +static int do_attach(int idx, int fd, const char *name) +{ + int err; + + err = bpf_set_link_xdp_fd(idx, fd, 0); + if (err < 0) + printf("ERROR: failed to attach program to %s\n", name); + + return err; +} + +static int do_detach(int idx, const char *name) +{ + int err; + + err = bpf_set_link_xdp_fd(idx, -1, 0); + if (err < 0) + printf("ERROR: failed to detach program from %s\n", name); + + return err; +} + +#define SAMPLE_SIZE 64 + +static int print_bpf_output(void *data, int size) +{ + struct { + __u16 cookie; + __u16 pkt_len; + __u8 pkt_data[SAMPLE_SIZE]; + } __packed *e = data; + int i; + + if (e->cookie != 0xdead) { + printf("BUG cookie %x sized %d\n", + e->cookie, size); + return LIBBPF_PERF_EVENT_ERROR; + } + + printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len); + for (i = 0; i < 14 && i < e->pkt_len; i++) + printf("%02x ", e->pkt_data[i]); + printf("\n"); + + return LIBBPF_PERF_EVENT_CONT; +} + +static void test_bpf_perf_event(int map_fd, int num) +{ + struct perf_event_attr attr = { + .sample_type = PERF_SAMPLE_RAW, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + .wakeup_events = 1, /* get an fd notification for every event */ + }; + int i; + + for (i = 0; i < num; i++) { + int key = i; + + pmu_fds[i] = sys_perf_event_open(&attr, -1/*pid*/, i/*cpu*/, + -1/*group_fd*/, 0); + + assert(pmu_fds[i] >= 0); + assert(bpf_map_update_elem(map_fd, &key, + &pmu_fds[i], BPF_ANY) == 0); + ioctl(pmu_fds[i], PERF_EVENT_IOC_ENABLE, 0); + } +} + +static void sig_handler(int signo) +{ + do_detach(if_idx, if_name); + exit(0); +} + +int main(int argc, char **argv) +{ + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + struct bpf_object *obj; + struct bpf_map *map; + int prog_fd, map_fd; + char filename[256]; + int ret, err, i; + int numcpus; + + if (argc < 2) { + printf("Usage: %s <ifname>\n", argv[0]); + return 1; + } + + numcpus = get_nprocs(); + if (numcpus > MAX_CPUS) + numcpus = MAX_CPUS; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; + + if (!prog_fd) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + map = bpf_map__next(NULL, obj); + if (!map) { + printf("finding a map in obj file failed\n"); + return 1; + } + map_fd = bpf_map__fd(map); + + if_idx = if_nametoindex(argv[1]); + if (!if_idx) + if_idx = strtoul(argv[1], NULL, 0); + + if (!if_idx) { + fprintf(stderr, "Invalid ifname\n"); + return 1; + } + if_name = argv[1]; + err = do_attach(if_idx, prog_fd, argv[1]); + if (err) + return err; + + if (signal(SIGINT, sig_handler) || + signal(SIGHUP, sig_handler) || + signal(SIGTERM, sig_handler)) { + perror("signal"); + return 1; + } + + test_bpf_perf_event(map_fd, numcpus); + + for (i = 0; i < numcpus; i++) + if (perf_event_mmap_header(pmu_fds[i], &headers[i]) < 0) + return 1; + + ret = perf_event_poller_multi(pmu_fds, headers, numcpus, + print_bpf_output); + kill(0, SIGINT); + return ret; +} |