From 90da4b3208d32bdb5489ca08b91af16ed4a68d00 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 16 Nov 2020 12:12:43 +0100 Subject: samples/bpf: Increment Tx stats at sending Increment the statistics over how many Tx packets have been sent at the time of sending instead of at the time of completion. This as a completion event means that the buffer has been sent AND returned to user space. The packet always gets sent shortly after sendto() is called. The kernel might, for performance reasons, decide to not return every single buffer to user space immediately after sending, for example, only after a batch of packets have been transmitted. Incrementing the number of packets sent at completion, will in that case be confusing as if you send a single packet, the counter might show zero for a while even though the packet has been transmitted. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/1605525167-14450-2-git-send-email-magnus.karlsson@gmail.com --- samples/bpf/xdpsock_user.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 1149e94ca32f..2567f0db5aca 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1146,7 +1146,6 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, xsk_ring_prod__submit(&xsk->umem->fq, rcvd); xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; - xsk->ring_stats.tx_npkts += rcvd; } } @@ -1168,7 +1167,6 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk, if (rcvd > 0) { xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; - xsk->ring_stats.tx_npkts += rcvd; } } @@ -1260,6 +1258,7 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) } xsk_ring_prod__submit(&xsk->tx, batch_size); + xsk->ring_stats.tx_npkts += batch_size; xsk->outstanding_tx += batch_size; *frame_nb += batch_size; *frame_nb %= NUM_FRAMES; @@ -1348,6 +1347,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) } return; } + xsk->ring_stats.rx_npkts += rcvd; ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); while (ret != rcvd) { @@ -1379,7 +1379,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) xsk_ring_prod__submit(&xsk->tx, rcvd); xsk_ring_cons__release(&xsk->rx, rcvd); - xsk->ring_stats.rx_npkts += rcvd; + xsk->ring_stats.tx_npkts += rcvd; xsk->outstanding_tx += rcvd; } -- cgit v1.2.3 From c5815ac7e2aaff4f00b2b9e21d84b9f2fddddb48 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:04 +0000 Subject: samples: bpf: Refactor hbm program with libbpf This commit refactors the existing cgroup programs with libbpf bpf loader. Since bpf_program__attach doesn't support cgroup program attachment, this explicitly attaches cgroup bpf program with bpf_program__attach_cgroup(bpf_prog, cg1). Also, to change attach_type of bpf program, this uses libbpf's bpf_program__set_expected_attach_type helper to switch EGRESS to INGRESS. To keep bpf program attached to the cgroup hierarchy even after the exit, this commit uses the BPF_LINK_PINNING to pin the link attachment even after it is closed. Besides, this program was broken due to the typo of BPF MAP definition. But this commit solves the problem by fixing this from 'queue_stats' map struct hvm_queue_stats -> hbm_queue_stats. Fixes: 36b5d471135c ("selftests/bpf: samples/bpf: Split off legacy stuff from bpf_helpers.h") Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-2-danieltimlee@gmail.com --- samples/bpf/.gitignore | 3 ++ samples/bpf/Makefile | 2 +- samples/bpf/do_hbm_test.sh | 32 ++++++------- samples/bpf/hbm.c | 111 ++++++++++++++++++++++++--------------------- samples/bpf/hbm_kern.h | 2 +- 5 files changed, 78 insertions(+), 72 deletions(-) (limited to 'samples') diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index b2f29bc8dc43..0b9548ea8477 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -52,3 +52,6 @@ xdp_tx_iptunnel xdpsock xsk_fwd testfile.img +hbm_out.log +iperf.* +*.out diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index aeebf5d12f32..7c61118525f7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -110,7 +110,7 @@ xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) -hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) +hbm-objs := hbm.o $(CGROUP_HELPERS) # Tell kbuild to always build the programs always-y := $(tprogs-y) diff --git a/samples/bpf/do_hbm_test.sh b/samples/bpf/do_hbm_test.sh index ffe4c0607341..21790ea5c460 100755 --- a/samples/bpf/do_hbm_test.sh +++ b/samples/bpf/do_hbm_test.sh @@ -91,6 +91,16 @@ qdisc="" flags="" do_stats=0 +BPFFS=/sys/fs/bpf +function config_bpffs () { + if mount | grep $BPFFS > /dev/null; then + echo "bpffs already mounted" + else + echo "bpffs not mounted. Mounting..." + mount -t bpf none $BPFFS + fi +} + function start_hbm () { rm -f hbm.out echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out @@ -192,6 +202,7 @@ processArgs () { } processArgs +config_bpffs if [ $debug_flag -eq 1 ] ; then rm -f hbm_out.log @@ -201,7 +212,7 @@ hbm_pid=$(start_hbm) usleep 100000 host=`hostname` -cg_base_dir=/sys/fs/cgroup +cg_base_dir=/sys/fs/cgroup/unified cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" echo $$ >> $cg_dir/cgroup.procs @@ -411,23 +422,8 @@ fi sleep 1 -# Detach any BPF programs that may have lingered -ttx=`bpftool cgroup tree | grep hbm` -v=2 -for x in $ttx ; do - if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then - cg=$x ; v=0 - else - if [ $v -eq 0 ] ; then - id=$x ; v=1 - else - if [ $v -eq 1 ] ; then - type=$x ; bpftool cgroup detach $cg $type id $id - v=0 - fi - fi - fi -done +# Detach any pinned BPF programs that may have lingered +rm -rf $BPFFS/hbm* if [ $use_netperf -ne 0 ] ; then if [ "$server" == "" ] ; then diff --git a/samples/bpf/hbm.c b/samples/bpf/hbm.c index 400e741a56eb..b0c18efe7928 100644 --- a/samples/bpf/hbm.c +++ b/samples/bpf/hbm.c @@ -46,7 +46,6 @@ #include #include -#include "bpf_load.h" #include "bpf_rlimit.h" #include "cgroup_helpers.h" #include "hbm.h" @@ -70,9 +69,9 @@ static void do_error(char *msg, bool errno_flag); #define DEBUGFS "/sys/kernel/debug/tracing/" -struct bpf_object *obj; -int bpfprog_fd; -int cgroup_storage_fd; +static struct bpf_program *bpf_prog; +static struct bpf_object *obj; +static int queue_stats_fd; static void read_trace_pipe2(void) { @@ -121,56 +120,50 @@ static void do_error(char *msg, bool errno_flag) static int prog_load(char *prog) { - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .file = prog, - .expected_attach_type = BPF_CGROUP_INET_EGRESS, - }; - int map_fd; - struct bpf_map *map; - - int ret = 0; - - if (access(prog, O_RDONLY) < 0) { - printf("Error accessing file %s: %s\n", prog, strerror(errno)); + obj = bpf_object__open_file(prog, NULL); + if (libbpf_get_error(obj)) { + printf("ERROR: opening BPF object file failed\n"); return 1; } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &bpfprog_fd)) - ret = 1; - if (!ret) { - map = bpf_object__find_map_by_name(obj, "queue_stats"); - map_fd = bpf_map__fd(map); - if (map_fd < 0) { - printf("Map not found: %s\n", strerror(map_fd)); - ret = 1; - } + + /* load BPF program */ + if (bpf_object__load(obj)) { + printf("ERROR: loading BPF object file failed\n"); + goto err; } - if (ret) { - printf("ERROR: bpf_prog_load_xattr failed for: %s\n", prog); - printf(" Output from verifier:\n%s\n------\n", bpf_log_buf); - ret = -1; - } else { - ret = map_fd; + bpf_prog = bpf_object__find_program_by_title(obj, "cgroup_skb/egress"); + if (!bpf_prog) { + printf("ERROR: finding a prog in obj file failed\n"); + goto err; + } + + queue_stats_fd = bpf_object__find_map_fd_by_name(obj, "queue_stats"); + if (queue_stats_fd < 0) { + printf("ERROR: finding a map in obj file failed\n"); + goto err; } - return ret; + return 0; + +err: + bpf_object__close(obj); + return 1; } static int run_bpf_prog(char *prog, int cg_id) { - int map_fd; - int rc = 0; + struct hbm_queue_stats qstats = {0}; + char cg_dir[100], cg_pin_path[100]; + struct bpf_link *link = NULL; int key = 0; int cg1 = 0; - int type = BPF_CGROUP_INET_EGRESS; - char cg_dir[100]; - struct hbm_queue_stats qstats = {0}; + int rc = 0; sprintf(cg_dir, "/hbm%d", cg_id); - map_fd = prog_load(prog); - if (map_fd == -1) - return 1; + rc = prog_load(prog); + if (rc != 0) + return rc; if (setup_cgroup_environment()) { printf("ERROR: setting cgroup environment\n"); @@ -190,16 +183,24 @@ static int run_bpf_prog(char *prog, int cg_id) qstats.stats = stats_flag ? 1 : 0; qstats.loopback = loopback_flag ? 1 : 0; qstats.no_cn = no_cn_flag ? 1 : 0; - if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) { + if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY)) { printf("ERROR: Could not update map element\n"); goto err; } if (!outFlag) - type = BPF_CGROUP_INET_INGRESS; - if (bpf_prog_attach(bpfprog_fd, cg1, type, 0)) { - printf("ERROR: bpf_prog_attach fails!\n"); - log_err("Attaching prog"); + bpf_program__set_expected_attach_type(bpf_prog, BPF_CGROUP_INET_INGRESS); + + link = bpf_program__attach_cgroup(bpf_prog, cg1); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: bpf_program__attach_cgroup failed\n"); + goto err; + } + + sprintf(cg_pin_path, "/sys/fs/bpf/hbm%d", cg_id); + rc = bpf_link__pin(link, cg_pin_path); + if (rc < 0) { + printf("ERROR: bpf_link__pin failed: %d\n", rc); goto err; } @@ -213,7 +214,7 @@ static int run_bpf_prog(char *prog, int cg_id) #define DELTA_RATE_CHECK 10000 /* in us */ #define RATE_THRESHOLD 9500000000 /* 9.5 Gbps */ - bpf_map_lookup_elem(map_fd, &key, &qstats); + bpf_map_lookup_elem(queue_stats_fd, &key, &qstats); if (gettimeofday(&t0, NULL) < 0) do_error("gettimeofday failed", true); t_last = t0; @@ -242,7 +243,7 @@ static int run_bpf_prog(char *prog, int cg_id) fclose(fin); printf(" new_eth_tx_bytes:%llu\n", new_eth_tx_bytes); - bpf_map_lookup_elem(map_fd, &key, &qstats); + bpf_map_lookup_elem(queue_stats_fd, &key, &qstats); new_cg_tx_bytes = qstats.bytes_total; delta_bytes = new_eth_tx_bytes - last_eth_tx_bytes; last_eth_tx_bytes = new_eth_tx_bytes; @@ -289,14 +290,14 @@ static int run_bpf_prog(char *prog, int cg_id) rate = minRate; qstats.rate = rate; } - if (bpf_map_update_elem(map_fd, &key, &qstats, BPF_ANY)) + if (bpf_map_update_elem(queue_stats_fd, &key, &qstats, BPF_ANY)) do_error("update map element fails", false); } } else { sleep(dur); } // Get stats! - if (stats_flag && bpf_map_lookup_elem(map_fd, &key, &qstats)) { + if (stats_flag && bpf_map_lookup_elem(queue_stats_fd, &key, &qstats)) { char fname[100]; FILE *fout; @@ -394,14 +395,20 @@ static int run_bpf_prog(char *prog, int cg_id) if (debugFlag) read_trace_pipe2(); - return rc; + goto cleanup; + err: rc = 1; - if (cg1) +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); + + if (cg1 != -1) close(cg1); - cleanup_cgroup_environment(); + if (rc != 0) + cleanup_cgroup_environment(); return rc; } diff --git a/samples/bpf/hbm_kern.h b/samples/bpf/hbm_kern.h index e00f26f6afba..722b3fadb467 100644 --- a/samples/bpf/hbm_kern.h +++ b/samples/bpf/hbm_kern.h @@ -69,7 +69,7 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); __type(key, u32); - __type(value, struct hvm_queue_stats); + __type(value, struct hbm_queue_stats); } queue_stats SEC(".maps"); struct hbm_pkt_info { -- cgit v1.2.3 From d89af13c92056c46dfc4bcb3d90efe88937c3381 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:05 +0000 Subject: samples: bpf: Refactor test_cgrp2_sock2 program with libbpf This commit refactors the existing cgroup program with libbpf bpf loader. The original test_cgrp2_sock2 has keeped the bpf program attached to the cgroup hierarchy even after the exit of user program. To implement the same functionality with libbpf, this commit uses the BPF_LINK_PINNING to pin the link attachment even after it is closed. Since this uses LINK instead of ATTACH, detach of bpf program from cgroup with 'test_cgrp2_sock' is not used anymore. The code to mount the bpf was added to the .sh file in case the bpff was not mounted on /sys/fs/bpf. Additionally, to fix the problem that shell script cannot find the binary object from the current path, relative path './' has been added in front of binary. Fixes: 554ae6e792ef3 ("samples/bpf: add userspace example for prohibiting sockets") Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-3-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/test_cgrp2_sock2.c | 61 +++++++++++++++++++++++++++++------------ samples/bpf/test_cgrp2_sock2.sh | 21 +++++++++++--- 3 files changed, 62 insertions(+), 22 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 7c61118525f7..d31e082c369e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -82,7 +82,7 @@ test_overhead-objs := bpf_load.o test_overhead_user.o test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o test_cgrp2_attach-objs := test_cgrp2_attach.o test_cgrp2_sock-objs := test_cgrp2_sock.o -test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o +test_cgrp2_sock2-objs := test_cgrp2_sock2.o xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := xdp1_user.o diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c index a9277b118c33..e7060aaa2f5a 100644 --- a/samples/bpf/test_cgrp2_sock2.c +++ b/samples/bpf/test_cgrp2_sock2.c @@ -20,9 +20,9 @@ #include #include #include +#include #include "bpf_insn.h" -#include "bpf_load.h" static int usage(const char *argv0) { @@ -32,37 +32,64 @@ static int usage(const char *argv0) int main(int argc, char **argv) { - int cg_fd, ret, filter_id = 0; + int cg_fd, err, ret = EXIT_FAILURE, filter_id = 0, prog_cnt = 0; + const char *link_pin_path = "/sys/fs/bpf/test_cgrp2_sock2"; + struct bpf_link *link = NULL; + struct bpf_program *progs[2]; + struct bpf_program *prog; + struct bpf_object *obj; if (argc < 3) return usage(argv[0]); + if (argc > 3) + filter_id = atoi(argv[3]); + cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); if (cg_fd < 0) { printf("Failed to open cgroup path: '%s'\n", strerror(errno)); - return EXIT_FAILURE; + return ret; } - if (load_bpf_file(argv[2])) - return EXIT_FAILURE; - - printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); + obj = bpf_object__open_file(argv[2], NULL); + if (libbpf_get_error(obj)) { + printf("ERROR: opening BPF object file failed\n"); + return ret; + } - if (argc > 3) - filter_id = atoi(argv[3]); + bpf_object__for_each_program(prog, obj) { + progs[prog_cnt] = prog; + prog_cnt++; + } if (filter_id >= prog_cnt) { printf("Invalid program id; program not found in file\n"); - return EXIT_FAILURE; + goto cleanup; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + printf("ERROR: loading BPF object file failed\n"); + goto cleanup; } - ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, - BPF_CGROUP_INET_SOCK_CREATE, 0); - if (ret < 0) { - printf("Failed to attach prog to cgroup: '%s'\n", - strerror(errno)); - return EXIT_FAILURE; + link = bpf_program__attach_cgroup(progs[filter_id], cg_fd); + if (libbpf_get_error(link)) { + printf("ERROR: bpf_program__attach failed\n"); + link = NULL; + goto cleanup; } - return EXIT_SUCCESS; + err = bpf_link__pin(link, link_pin_path); + if (err < 0) { + printf("ERROR: bpf_link__pin failed: %d\n", err); + goto cleanup; + } + + ret = EXIT_SUCCESS; + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); + return ret; } diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh index 0f396a86e0cb..6a3dbe642b2b 100755 --- a/samples/bpf/test_cgrp2_sock2.sh +++ b/samples/bpf/test_cgrp2_sock2.sh @@ -1,6 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +BPFFS=/sys/fs/bpf +LINK_PIN=$BPFFS/test_cgrp2_sock2 + function config_device { ip netns add at_ns0 ip link add veth0 type veth peer name veth0b @@ -21,16 +24,22 @@ function config_cgroup { echo $$ >> /tmp/cgroupv2/foo/cgroup.procs } +function config_bpffs { + if mount | grep $BPFFS > /dev/null; then + echo "bpffs already mounted" + else + echo "bpffs not mounted. Mounting..." + mount -t bpf none $BPFFS + fi +} function attach_bpf { - test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1 + ./test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1 [ $? -ne 0 ] && exit 1 } function cleanup { - if [ -d /tmp/cgroupv2/foo ]; then - test_cgrp2_sock -d /tmp/cgroupv2/foo - fi + rm -rf $LINK_PIN ip link del veth0b ip netns delete at_ns0 umount /tmp/cgroupv2 @@ -42,6 +51,7 @@ cleanup 2>/dev/null set -e config_device config_cgroup +config_bpffs set +e # @@ -62,6 +72,9 @@ if [ $? -eq 0 ]; then exit 1 fi +rm -rf $LINK_PIN +sleep 1 # Wait for link detach + # # Test 2 - fail ping # -- cgit v1.2.3 From 4fe6641526dbee115d9d037e94a344f4f448aaa4 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:06 +0000 Subject: samples: bpf: Refactor task_fd_query program with libbpf This commit refactors the existing kprobe program with libbpf bpf loader. To attach bpf program, this uses generic bpf_program__attach() approach rather than using bpf_load's load_bpf_file(). To attach bpf to perf_event, instead of using previous ioctl method, this commit uses bpf_program__attach_perf_event since it manages the enable of perf_event and attach of BPF programs to it, which is much more intuitive way to achieve. Also, explicit close(fd) has been removed since event will be closed inside bpf_link__destroy() automatically. Furthermore, to prevent conflict of same named uprobe events, O_TRUNC flag has been used to clear 'uprobe_events' interface. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-4-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/task_fd_query_user.c | 101 ++++++++++++++++++++++++++++----------- 2 files changed, 75 insertions(+), 28 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index d31e082c369e..3bffd42e1482 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -107,7 +107,7 @@ xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := xdpsock_user.o xsk_fwd-objs := xsk_fwd.o xdp_fwd-objs := xdp_fwd_user.o -task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) +task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) hbm-objs := hbm.o $(CGROUP_HELPERS) diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index b68bd2f8fdc9..f6b772faa348 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -15,12 +15,15 @@ #include #include +#include #include -#include "bpf_load.h" #include "bpf_util.h" #include "perf-sys.h" #include "trace_helpers.h" +static struct bpf_program *progs[2]; +static struct bpf_link *links[2]; + #define CHECK_PERROR_RET(condition) ({ \ int __ret = !!(condition); \ if (__ret) { \ @@ -86,21 +89,22 @@ static int bpf_get_retprobe_bit(const char *event_type) return ret; } -static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name, +static int test_debug_fs_kprobe(int link_idx, const char *fn_name, __u32 expected_fd_type) { __u64 probe_offset, probe_addr; __u32 len, prog_id, fd_type; + int err, event_fd; char buf[256]; - int err; len = sizeof(buf); - err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len, + event_fd = bpf_link__fd(links[link_idx]); + err = bpf_task_fd_query(getpid(), event_fd, 0, buf, &len, &prog_id, &fd_type, &probe_offset, &probe_addr); if (err < 0) { printf("FAIL: %s, for event_fd idx %d, fn_name %s\n", - __func__, prog_fd_idx, fn_name); + __func__, link_idx, fn_name); perror(" :"); return -1; } @@ -108,7 +112,7 @@ static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name, fd_type != expected_fd_type || probe_offset != 0x0 || probe_addr != 0x0) { printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n", - prog_fd_idx); + link_idx); printf("buf: %s, fd_type: %u, probe_offset: 0x%llx," " probe_addr: 0x%llx\n", buf, fd_type, probe_offset, probe_addr); @@ -125,12 +129,13 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type, int is_return_bit = bpf_get_retprobe_bit(event_type); int type = bpf_find_probe_type(event_type); struct perf_event_attr attr = {}; - int fd; + struct bpf_link *link; + int fd, err = -1; if (type < 0 || is_return_bit < 0) { printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n", __func__, type, is_return_bit); - return -1; + return err; } attr.sample_period = 1; @@ -149,14 +154,21 @@ static int test_nondebug_fs_kuprobe_common(const char *event_type, attr.type = type; fd = sys_perf_event_open(&attr, -1, 0, -1, 0); - CHECK_PERROR_RET(fd < 0); + link = bpf_program__attach_perf_event(progs[0], fd); + if (libbpf_get_error(link)) { + printf("ERROR: bpf_program__attach_perf_event failed\n"); + link = NULL; + close(fd); + goto cleanup; + } - CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0); - CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len, prog_id, fd_type, probe_offset, probe_addr) < 0); + err = 0; - return 0; +cleanup: + bpf_link__destroy(link); + return err; } static int test_nondebug_fs_probe(const char *event_type, const char *name, @@ -215,17 +227,18 @@ static int test_nondebug_fs_probe(const char *event_type, const char *name, static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) { + char buf[256], event_alias[sizeof("test_1234567890")]; const char *event_type = "uprobe"; struct perf_event_attr attr = {}; - char buf[256], event_alias[sizeof("test_1234567890")]; __u64 probe_offset, probe_addr; __u32 len, prog_id, fd_type; - int err, res, kfd, efd; + int err = -1, res, kfd, efd; + struct bpf_link *link; ssize_t bytes; snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type); - kfd = open(buf, O_WRONLY | O_APPEND, 0); + kfd = open(buf, O_WRONLY | O_TRUNC, 0); CHECK_PERROR_RET(kfd < 0); res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid()); @@ -254,10 +267,15 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) attr.type = PERF_TYPE_TRACEPOINT; attr.sample_period = 1; attr.wakeup_events = 1; + kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); - CHECK_PERROR_RET(kfd < 0); - CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); - CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0); + link = bpf_program__attach_perf_event(progs[0], kfd); + if (libbpf_get_error(link)) { + printf("ERROR: bpf_program__attach_perf_event failed\n"); + link = NULL; + close(kfd); + goto cleanup; + } len = sizeof(buf); err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len, @@ -283,9 +301,11 @@ static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) probe_offset); return -1; } + err = 0; - close(kfd); - return 0; +cleanup: + bpf_link__destroy(link); + return err; } int main(int argc, char **argv) @@ -294,21 +314,42 @@ int main(int argc, char **argv) extern char __executable_start; char filename[256], buf[256]; __u64 uprobe_file_offset; + struct bpf_program *prog; + struct bpf_object *obj; + int i = 0, err = -1; - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); if (setrlimit(RLIMIT_MEMLOCK, &r)) { perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; + return err; } if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); - return 1; + return err; + } + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return err; } - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + progs[i] = prog; + links[i] = bpf_program__attach(progs[i]); + if (libbpf_get_error(links[i])) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + links[i] = NULL; + goto cleanup; + } + i++; } /* test two functions in the corresponding *_kern.c file */ @@ -378,6 +419,12 @@ int main(int argc, char **argv) false)); CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset, true)); + err = 0; - return 0; +cleanup: + for (i--; i >= 0; i--) + bpf_link__destroy(links[i]); + + bpf_object__close(obj); + return err; } -- cgit v1.2.3 From 763af200d6160b2f79f45cbf9a85b8dc6e20f2c7 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:07 +0000 Subject: samples: bpf: Refactor ibumad program with libbpf This commit refactors the existing ibumad program with libbpf bpf loader. Attach/detach of Tracepoint bpf programs has been managed with the generic bpf_program__attach() and bpf_link__destroy() from the libbpf. Also, instead of using the previous BPF MAP definition, this commit refactors ibumad MAP definition with the new BTF-defined MAP format. To verify that this bpf program works without an infiniband device, try loading ib_umad kernel module and test the program as follows: # modprobe ib_umad # ./ibumad Moreover, TRACE_HELPERS has been removed from the Makefile since it is not used on this program. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-5-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/ibumad_kern.c | 26 ++++++++--------- samples/bpf/ibumad_user.c | 71 +++++++++++++++++++++++++++++++++++------------ 3 files changed, 68 insertions(+), 31 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3bffd42e1482..09a249477554 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -109,7 +109,7 @@ xsk_fwd-objs := xsk_fwd.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) -ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) +ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) # Tell kbuild to always build the programs diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c index 3a91b4c1989a..26dcd4dde946 100644 --- a/samples/bpf/ibumad_kern.c +++ b/samples/bpf/ibumad_kern.c @@ -16,19 +16,19 @@ #include -struct bpf_map_def SEC("maps") read_count = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), /* class; u32 required */ - .value_size = sizeof(u64), /* count of mads read */ - .max_entries = 256, /* Room for all Classes */ -}; - -struct bpf_map_def SEC("maps") write_count = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(u32), /* class; u32 required */ - .value_size = sizeof(u64), /* count of mads written */ - .max_entries = 256, /* Room for all Classes */ -}; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); /* class; u32 required */ + __type(value, u64); /* count of mads read */ + __uint(max_entries, 256); /* Room for all Classes */ +} read_count SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); /* class; u32 required */ + __type(value, u64); /* count of mads written */ + __uint(max_entries, 256); /* Room for all Classes */ +} write_count SEC(".maps"); #undef DEBUG #ifndef DEBUG diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c index fa06eef31a84..d83d8102f489 100644 --- a/samples/bpf/ibumad_user.c +++ b/samples/bpf/ibumad_user.c @@ -23,10 +23,15 @@ #include #include -#include "bpf_load.h" +#include #include "bpf_util.h" #include +static struct bpf_link *tp_links[3]; +static struct bpf_object *obj; +static int map_fd[2]; +static int tp_cnt; + static void dump_counts(int fd) { __u32 key; @@ -53,6 +58,11 @@ static void dump_all_counts(void) static void dump_exit(int sig) { dump_all_counts(); + /* Detach tracepoints */ + while (tp_cnt) + bpf_link__destroy(tp_links[--tp_cnt]); + + bpf_object__close(obj); exit(0); } @@ -73,19 +83,11 @@ static void usage(char *cmd) int main(int argc, char **argv) { + struct bpf_program *prog; unsigned long delay = 5; + char filename[256]; int longindex = 0; - int opt; - char bpf_file[256]; - - /* Create the eBPF kernel code path name. - * This follows the pattern of all of the other bpf samples - */ - snprintf(bpf_file, sizeof(bpf_file), "%s_kern.o", argv[0]); - - /* Do one final dump when exiting */ - signal(SIGINT, dump_exit); - signal(SIGTERM, dump_exit); + int opt, err = -1; while ((opt = getopt_long(argc, argv, "hd:rSw", long_options, &longindex)) != -1) { @@ -107,16 +109,51 @@ int main(int argc, char **argv) } } - if (load_bpf_file(bpf_file)) { - fprintf(stderr, "ERROR: failed to load eBPF from file : %s\n", - bpf_file); - return 1; + /* Do one final dump when exiting */ + signal(SIGINT, dump_exit); + signal(SIGTERM, dump_exit); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return err; + } + + /* load BPF program */ + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + map_fd[0] = bpf_object__find_map_fd_by_name(obj, "read_count"); + map_fd[1] = bpf_object__find_map_fd_by_name(obj, "write_count"); + if (map_fd[0] < 0 || map_fd[1] < 0) { + fprintf(stderr, "ERROR: finding a map in obj file failed\n"); + goto cleanup; + } + + bpf_object__for_each_program(prog, obj) { + tp_links[tp_cnt] = bpf_program__attach(prog); + if (libbpf_get_error(tp_links[tp_cnt])) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + tp_links[tp_cnt] = NULL; + goto cleanup; + } + tp_cnt++; } while (1) { sleep(delay); dump_all_counts(); } + err = 0; + +cleanup: + /* Detach tracepoints */ + while (tp_cnt) + bpf_link__destroy(tp_links[--tp_cnt]); - return 0; + bpf_object__close(obj); + return err; } -- cgit v1.2.3 From c6497df0ddc3363c2c940d92c3af0b2620477003 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:08 +0000 Subject: samples: bpf: Refactor test_overhead program with libbpf This commit refactors the existing program with libbpf bpf loader. Since the kprobe, tracepoint and raw_tracepoint bpf program can be attached with single bpf_program__attach() interface, so the corresponding function of libbpf is used here. Rather than specifying the number of cpus inside the code, this commit uses the number of available cpus with _SC_NPROCESSORS_ONLN. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-6-danieltimlee@gmail.com --- samples/bpf/Makefile | 2 +- samples/bpf/test_overhead_user.c | 82 +++++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 24 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 09a249477554..25380e04897e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -78,7 +78,7 @@ lathist-objs := lathist_user.o offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS) spintest-objs := spintest_user.o $(TRACE_HELPERS) map_perf_test-objs := map_perf_test_user.o -test_overhead-objs := bpf_load.o test_overhead_user.o +test_overhead-objs := test_overhead_user.o test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o test_cgrp2_attach-objs := test_cgrp2_attach.o test_cgrp2_sock-objs := test_cgrp2_sock.o diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c index 94f74112a20e..819a6fe86f89 100644 --- a/samples/bpf/test_overhead_user.c +++ b/samples/bpf/test_overhead_user.c @@ -18,10 +18,14 @@ #include #include #include -#include "bpf_load.h" +#include #define MAX_CNT 1000000 +static struct bpf_link *links[2]; +static struct bpf_object *obj; +static int cnt; + static __u64 time_get_ns(void) { struct timespec ts; @@ -115,20 +119,54 @@ static void run_perf_test(int tasks, int flags) } } +static int load_progs(char *filename) +{ + struct bpf_program *prog; + int err = 0; + + obj = bpf_object__open_file(filename, NULL); + err = libbpf_get_error(obj); + if (err < 0) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return err; + } + + /* load BPF program */ + err = bpf_object__load(obj); + if (err < 0) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + return err; + } + + bpf_object__for_each_program(prog, obj) { + links[cnt] = bpf_program__attach(prog); + err = libbpf_get_error(links[cnt]); + if (err < 0) { + fprintf(stderr, "ERROR: bpf_program__attach failed\n"); + links[cnt] = NULL; + return err; + } + cnt++; + } + + return err; +} + static void unload_progs(void) { - close(prog_fd[0]); - close(prog_fd[1]); - close(event_fd[0]); - close(event_fd[1]); + while (cnt) + bpf_link__destroy(links[--cnt]); + + bpf_object__close(obj); } int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; - char filename[256]; - int num_cpu = 8; + int num_cpu = sysconf(_SC_NPROCESSORS_ONLN); int test_flags = ~0; + char filename[256]; + int err = 0; setrlimit(RLIMIT_MEMLOCK, &r); @@ -145,38 +183,36 @@ int main(int argc, char **argv) if (test_flags & 0xC) { snprintf(filename, sizeof(filename), "%s_kprobe_kern.o", argv[0]); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; - } + printf("w/KPROBE\n"); - run_perf_test(num_cpu, test_flags >> 2); + err = load_progs(filename); + if (!err) + run_perf_test(num_cpu, test_flags >> 2); + unload_progs(); } if (test_flags & 0x30) { snprintf(filename, sizeof(filename), "%s_tp_kern.o", argv[0]); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; - } printf("w/TRACEPOINT\n"); - run_perf_test(num_cpu, test_flags >> 4); + err = load_progs(filename); + if (!err) + run_perf_test(num_cpu, test_flags >> 4); + unload_progs(); } if (test_flags & 0xC0) { snprintf(filename, sizeof(filename), "%s_raw_tp_kern.o", argv[0]); - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); - return 1; - } printf("w/RAW_TRACEPOINT\n"); - run_perf_test(num_cpu, test_flags >> 6); + err = load_progs(filename); + if (!err) + run_perf_test(num_cpu, test_flags >> 6); + unload_progs(); } - return 0; + return err; } -- cgit v1.2.3 From 0afe0a998c40085a6342e1aeb4c510cccba46caf Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:09 +0000 Subject: samples: bpf: Fix lwt_len_hist reusing previous BPF map Currently, lwt_len_hist's map lwt_len_hist_map is uses pinning, and the map isn't cleared on test end. This leds to reuse of that map for each test, which prevents the results of the test from being accurate. This commit fixes the problem by removing of pinned map from bpffs. Also, this commit add the executable permission to shell script files. Fixes: f74599f7c5309 ("bpf: Add tests and samples for LWT-BPF") Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-7-danieltimlee@gmail.com --- samples/bpf/lwt_len_hist.sh | 2 ++ samples/bpf/test_lwt_bpf.sh | 0 2 files changed, 2 insertions(+) mode change 100644 => 100755 samples/bpf/lwt_len_hist.sh mode change 100644 => 100755 samples/bpf/test_lwt_bpf.sh (limited to 'samples') diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh old mode 100644 new mode 100755 index 090b96eaf7f7..0eda9754f50b --- a/samples/bpf/lwt_len_hist.sh +++ b/samples/bpf/lwt_len_hist.sh @@ -8,6 +8,8 @@ VETH1=tst_lwt1b TRACE_ROOT=/sys/kernel/debug/tracing function cleanup { + # To reset saved histogram, remove pinned map + rm /sys/fs/bpf/tc/globals/lwt_len_hist_map ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null ip link del $VETH0 2> /dev/null ip link del $VETH1 2> /dev/null diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh old mode 100644 new mode 100755 -- cgit v1.2.3 From ceb5dea5654354fb4e6e393c99f1d0bf4debab0e Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Tue, 24 Nov 2020 09:03:10 +0000 Subject: samples: bpf: Remove bpf_load loader completely Numerous refactoring that rewrites BPF programs written with bpf_load to use the libbpf loader was finally completed, resulting in BPF programs using bpf_load within the kernel being completely no longer present. This commit removes bpf_load, an outdated bpf loader that is difficult to keep up with the latest kernel BPF and causes confusion. Also, this commit removes the unused trace_helper and bpf_load from samples/bpf target objects from Makefile. Signed-off-by: Daniel T. Lee Signed-off-by: Andrii Nakryiko Acked-by: Jesper Dangaard Brouer Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201124090310.24374-8-danieltimlee@gmail.com --- samples/bpf/Makefile | 10 +- samples/bpf/bpf_load.c | 667 ---------------------------------------- samples/bpf/bpf_load.h | 57 ---- samples/bpf/xdp2skb_meta_kern.c | 2 +- 4 files changed, 5 insertions(+), 731 deletions(-) delete mode 100644 samples/bpf/bpf_load.c delete mode 100644 samples/bpf/bpf_load.h (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 25380e04897e..05db041f8b18 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -73,7 +73,7 @@ tracex5-objs := tracex5_user.o $(TRACE_HELPERS) tracex6-objs := tracex6_user.o tracex7-objs := tracex7_user.o test_probe_write_user-objs := test_probe_write_user_user.o -trace_output-objs := trace_output_user.o $(TRACE_HELPERS) +trace_output-objs := trace_output_user.o lathist-objs := lathist_user.o offwaketime-objs := offwaketime_user.o $(TRACE_HELPERS) spintest-objs := spintest_user.o $(TRACE_HELPERS) @@ -91,8 +91,8 @@ test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o trace_event-objs := trace_event_user.o $(TRACE_HELPERS) sampleip-objs := sampleip_user.o $(TRACE_HELPERS) -tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o -lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o +tc_l2_redirect-objs := tc_l2_redirect_user.o +lwt_len_hist-objs := lwt_len_hist_user.o xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o test_map_in_map-objs := test_map_in_map_user.o per_socket_stats_example-objs := cookie_uid_helper_example.o @@ -108,7 +108,7 @@ xdpsock-objs := xdpsock_user.o xsk_fwd-objs := xsk_fwd.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) -xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) +xdp_sample_pkts-objs := xdp_sample_pkts_user.o ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) @@ -197,8 +197,6 @@ TPROGS_CFLAGS += --sysroot=$(SYSROOT) TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib endif -TPROGCFLAGS_bpf_load.o += -Wno-unused-variable - TPROGS_LDLIBS += $(LIBBPF) -lelf -lz TPROGLDLIBS_tracex4 += -lrt TPROGLDLIBS_trace_output += -lrt diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c deleted file mode 100644 index c5ad528f046e..000000000000 --- a/samples/bpf/bpf_load.c +++ /dev/null @@ -1,667 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "bpf_load.h" -#include "perf-sys.h" - -#define DEBUGFS "/sys/kernel/debug/tracing/" - -static char license[128]; -static int kern_version; -static bool processed_sec[128]; -char bpf_log_buf[BPF_LOG_BUF_SIZE]; -int map_fd[MAX_MAPS]; -int prog_fd[MAX_PROGS]; -int event_fd[MAX_PROGS]; -int prog_cnt; -int prog_array_fd = -1; - -struct bpf_map_data map_data[MAX_MAPS]; -int map_data_count; - -static int populate_prog_array(const char *event, int prog_fd) -{ - int ind = atoi(event), err; - - err = bpf_map_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); - if (err < 0) { - printf("failed to store prog_fd in prog_array\n"); - return -1; - } - return 0; -} - -static int write_kprobe_events(const char *val) -{ - int fd, ret, flags; - - if (val == NULL) - return -1; - else if (val[0] == '\0') - flags = O_WRONLY | O_TRUNC; - else - flags = O_WRONLY | O_APPEND; - - fd = open(DEBUGFS "kprobe_events", flags); - - ret = write(fd, val, strlen(val)); - close(fd); - - return ret; -} - -static int load_and_attach(const char *event, struct bpf_insn *prog, int size) -{ - bool is_socket = strncmp(event, "socket", 6) == 0; - bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; - bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; - bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; - bool is_raw_tracepoint = strncmp(event, "raw_tracepoint/", 15) == 0; - bool is_xdp = strncmp(event, "xdp", 3) == 0; - bool is_perf_event = strncmp(event, "perf_event", 10) == 0; - bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; - bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; - bool is_sockops = strncmp(event, "sockops", 7) == 0; - bool is_sk_skb = strncmp(event, "sk_skb", 6) == 0; - bool is_sk_msg = strncmp(event, "sk_msg", 6) == 0; - size_t insns_cnt = size / sizeof(struct bpf_insn); - enum bpf_prog_type prog_type; - char buf[256]; - int fd, efd, err, id; - struct perf_event_attr attr = {}; - - attr.type = PERF_TYPE_TRACEPOINT; - attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = 1; - attr.wakeup_events = 1; - - if (is_socket) { - prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - } else if (is_kprobe || is_kretprobe) { - prog_type = BPF_PROG_TYPE_KPROBE; - } else if (is_tracepoint) { - prog_type = BPF_PROG_TYPE_TRACEPOINT; - } else if (is_raw_tracepoint) { - prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT; - } else if (is_xdp) { - prog_type = BPF_PROG_TYPE_XDP; - } else if (is_perf_event) { - prog_type = BPF_PROG_TYPE_PERF_EVENT; - } else if (is_cgroup_skb) { - prog_type = BPF_PROG_TYPE_CGROUP_SKB; - } else if (is_cgroup_sk) { - prog_type = BPF_PROG_TYPE_CGROUP_SOCK; - } else if (is_sockops) { - prog_type = BPF_PROG_TYPE_SOCK_OPS; - } else if (is_sk_skb) { - prog_type = BPF_PROG_TYPE_SK_SKB; - } else if (is_sk_msg) { - prog_type = BPF_PROG_TYPE_SK_MSG; - } else { - printf("Unknown event '%s'\n", event); - return -1; - } - - if (prog_cnt == MAX_PROGS) - return -1; - - fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version, - bpf_log_buf, BPF_LOG_BUF_SIZE); - if (fd < 0) { - printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf); - return -1; - } - - prog_fd[prog_cnt++] = fd; - - if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) - return 0; - - if (is_socket || is_sockops || is_sk_skb || is_sk_msg) { - if (is_socket) - event += 6; - else - event += 7; - if (*event != '/') - return 0; - event++; - if (!isdigit(*event)) { - printf("invalid prog number\n"); - return -1; - } - return populate_prog_array(event, fd); - } - - if (is_raw_tracepoint) { - efd = bpf_raw_tracepoint_open(event + 15, fd); - if (efd < 0) { - printf("tracepoint %s %s\n", event + 15, strerror(errno)); - return -1; - } - event_fd[prog_cnt - 1] = efd; - return 0; - } - - if (is_kprobe || is_kretprobe) { - bool need_normal_check = true; - const char *event_prefix = ""; - - if (is_kprobe) - event += 7; - else - event += 10; - - if (*event == 0) { - printf("event name cannot be empty\n"); - return -1; - } - - if (isdigit(*event)) - return populate_prog_array(event, fd); - -#ifdef __x86_64__ - if (strncmp(event, "sys_", 4) == 0) { - snprintf(buf, sizeof(buf), "%c:__x64_%s __x64_%s", - is_kprobe ? 'p' : 'r', event, event); - err = write_kprobe_events(buf); - if (err >= 0) { - need_normal_check = false; - event_prefix = "__x64_"; - } - } -#endif - if (need_normal_check) { - snprintf(buf, sizeof(buf), "%c:%s %s", - is_kprobe ? 'p' : 'r', event, event); - err = write_kprobe_events(buf); - if (err < 0) { - printf("failed to create kprobe '%s' error '%s'\n", - event, strerror(errno)); - return -1; - } - } - - strcpy(buf, DEBUGFS); - strcat(buf, "events/kprobes/"); - strcat(buf, event_prefix); - strcat(buf, event); - strcat(buf, "/id"); - } else if (is_tracepoint) { - event += 11; - - if (*event == 0) { - printf("event name cannot be empty\n"); - return -1; - } - strcpy(buf, DEBUGFS); - strcat(buf, "events/"); - strcat(buf, event); - strcat(buf, "/id"); - } - - efd = open(buf, O_RDONLY, 0); - if (efd < 0) { - printf("failed to open event %s\n", event); - return -1; - } - - err = read(efd, buf, sizeof(buf)); - if (err < 0 || err >= sizeof(buf)) { - printf("read from '%s' failed '%s'\n", event, strerror(errno)); - return -1; - } - - close(efd); - - buf[err] = 0; - id = atoi(buf); - attr.config = id; - - efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0); - if (efd < 0) { - printf("event %d fd %d err %s\n", id, efd, strerror(errno)); - return -1; - } - event_fd[prog_cnt - 1] = efd; - err = ioctl(efd, PERF_EVENT_IOC_ENABLE, 0); - if (err < 0) { - printf("ioctl PERF_EVENT_IOC_ENABLE failed err %s\n", - strerror(errno)); - return -1; - } - err = ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd); - if (err < 0) { - printf("ioctl PERF_EVENT_IOC_SET_BPF failed err %s\n", - strerror(errno)); - return -1; - } - - return 0; -} - -static int load_maps(struct bpf_map_data *maps, int nr_maps, - fixup_map_cb fixup_map) -{ - int i, numa_node; - - for (i = 0; i < nr_maps; i++) { - if (fixup_map) { - fixup_map(&maps[i], i); - /* Allow userspace to assign map FD prior to creation */ - if (maps[i].fd != -1) { - map_fd[i] = maps[i].fd; - continue; - } - } - - numa_node = maps[i].def.map_flags & BPF_F_NUMA_NODE ? - maps[i].def.numa_node : -1; - - if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || - maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) { - int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; - - map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, - maps[i].name, - maps[i].def.key_size, - inner_map_fd, - maps[i].def.max_entries, - maps[i].def.map_flags, - numa_node); - } else { - map_fd[i] = bpf_create_map_node(maps[i].def.type, - maps[i].name, - maps[i].def.key_size, - maps[i].def.value_size, - maps[i].def.max_entries, - maps[i].def.map_flags, - numa_node); - } - if (map_fd[i] < 0) { - printf("failed to create map %d (%s): %d %s\n", - i, maps[i].name, errno, strerror(errno)); - return 1; - } - maps[i].fd = map_fd[i]; - - if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY) - prog_array_fd = map_fd[i]; - } - return 0; -} - -static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, - GElf_Shdr *shdr, Elf_Data **data) -{ - Elf_Scn *scn; - - scn = elf_getscn(elf, i); - if (!scn) - return 1; - - if (gelf_getshdr(scn, shdr) != shdr) - return 2; - - *shname = elf_strptr(elf, ehdr->e_shstrndx, shdr->sh_name); - if (!*shname || !shdr->sh_size) - return 3; - - *data = elf_getdata(scn, 0); - if (!*data || elf_getdata(scn, *data) != NULL) - return 4; - - return 0; -} - -static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, - GElf_Shdr *shdr, struct bpf_insn *insn, - struct bpf_map_data *maps, int nr_maps) -{ - int i, nrels; - - nrels = shdr->sh_size / shdr->sh_entsize; - - for (i = 0; i < nrels; i++) { - GElf_Sym sym; - GElf_Rel rel; - unsigned int insn_idx; - bool match = false; - int j, map_idx; - - gelf_getrel(data, i, &rel); - - insn_idx = rel.r_offset / sizeof(struct bpf_insn); - - gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym); - - if (insn[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { - printf("invalid relo for insn[%d].code 0x%x\n", - insn_idx, insn[insn_idx].code); - return 1; - } - insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; - - /* Match FD relocation against recorded map_data[] offset */ - for (map_idx = 0; map_idx < nr_maps; map_idx++) { - if (maps[map_idx].elf_offset == sym.st_value) { - match = true; - break; - } - } - if (match) { - insn[insn_idx].imm = maps[map_idx].fd; - } else { - printf("invalid relo for insn[%d] no map_data match\n", - insn_idx); - return 1; - } - } - - return 0; -} - -static int cmp_symbols(const void *l, const void *r) -{ - const GElf_Sym *lsym = (const GElf_Sym *)l; - const GElf_Sym *rsym = (const GElf_Sym *)r; - - if (lsym->st_value < rsym->st_value) - return -1; - else if (lsym->st_value > rsym->st_value) - return 1; - else - return 0; -} - -static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, - Elf *elf, Elf_Data *symbols, int strtabidx) -{ - int map_sz_elf, map_sz_copy; - bool validate_zero = false; - Elf_Data *data_maps; - int i, nr_maps; - GElf_Sym *sym; - Elf_Scn *scn; - int copy_sz; - - if (maps_shndx < 0) - return -EINVAL; - if (!symbols) - return -EINVAL; - - /* Get data for maps section via elf index */ - scn = elf_getscn(elf, maps_shndx); - if (scn) - data_maps = elf_getdata(scn, NULL); - if (!scn || !data_maps) { - printf("Failed to get Elf_Data from maps section %d\n", - maps_shndx); - return -EINVAL; - } - - /* For each map get corrosponding symbol table entry */ - sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym)); - for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { - assert(nr_maps < MAX_MAPS+1); - if (!gelf_getsym(symbols, i, &sym[nr_maps])) - continue; - if (sym[nr_maps].st_shndx != maps_shndx) - continue; - /* Only increment iif maps section */ - nr_maps++; - } - - /* Align to map_fd[] order, via sort on offset in sym.st_value */ - qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols); - - /* Keeping compatible with ELF maps section changes - * ------------------------------------------------ - * The program size of struct bpf_load_map_def is known by loader - * code, but struct stored in ELF file can be different. - * - * Unfortunately sym[i].st_size is zero. To calculate the - * struct size stored in the ELF file, assume all struct have - * the same size, and simply divide with number of map - * symbols. - */ - map_sz_elf = data_maps->d_size / nr_maps; - map_sz_copy = sizeof(struct bpf_load_map_def); - if (map_sz_elf < map_sz_copy) { - /* - * Backward compat, loading older ELF file with - * smaller struct, keeping remaining bytes zero. - */ - map_sz_copy = map_sz_elf; - } else if (map_sz_elf > map_sz_copy) { - /* - * Forward compat, loading newer ELF file with larger - * struct with unknown features. Assume zero means - * feature not used. Thus, validate rest of struct - * data is zero. - */ - validate_zero = true; - } - - /* Memcpy relevant part of ELF maps data to loader maps */ - for (i = 0; i < nr_maps; i++) { - struct bpf_load_map_def *def; - unsigned char *addr, *end; - const char *map_name; - size_t offset; - - map_name = elf_strptr(elf, strtabidx, sym[i].st_name); - maps[i].name = strdup(map_name); - if (!maps[i].name) { - printf("strdup(%s): %s(%d)\n", map_name, - strerror(errno), errno); - free(sym); - return -errno; - } - - /* Symbol value is offset into ELF maps section data area */ - offset = sym[i].st_value; - def = (struct bpf_load_map_def *)(data_maps->d_buf + offset); - maps[i].elf_offset = offset; - memset(&maps[i].def, 0, sizeof(struct bpf_load_map_def)); - memcpy(&maps[i].def, def, map_sz_copy); - - /* Verify no newer features were requested */ - if (validate_zero) { - addr = (unsigned char *) def + map_sz_copy; - end = (unsigned char *) def + map_sz_elf; - for (; addr < end; addr++) { - if (*addr != 0) { - free(sym); - return -EFBIG; - } - } - } - } - - free(sym); - return nr_maps; -} - -static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) -{ - int fd, i, ret, maps_shndx = -1, strtabidx = -1; - Elf *elf; - GElf_Ehdr ehdr; - GElf_Shdr shdr, shdr_prog; - Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL; - char *shname, *shname_prog; - int nr_maps = 0; - - /* reset global variables */ - kern_version = 0; - memset(license, 0, sizeof(license)); - memset(processed_sec, 0, sizeof(processed_sec)); - - if (elf_version(EV_CURRENT) == EV_NONE) - return 1; - - fd = open(path, O_RDONLY, 0); - if (fd < 0) - return 1; - - elf = elf_begin(fd, ELF_C_READ, NULL); - - if (!elf) - return 1; - - if (gelf_getehdr(elf, &ehdr) != &ehdr) - return 1; - - /* clear all kprobes */ - i = write_kprobe_events(""); - - /* scan over all elf sections to get license and map info */ - for (i = 1; i < ehdr.e_shnum; i++) { - - if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) - continue; - - if (0) /* helpful for llvm debugging */ - printf("section %d:%s data %p size %zd link %d flags %d\n", - i, shname, data->d_buf, data->d_size, - shdr.sh_link, (int) shdr.sh_flags); - - if (strcmp(shname, "license") == 0) { - processed_sec[i] = true; - memcpy(license, data->d_buf, data->d_size); - } else if (strcmp(shname, "version") == 0) { - processed_sec[i] = true; - if (data->d_size != sizeof(int)) { - printf("invalid size of version section %zd\n", - data->d_size); - return 1; - } - memcpy(&kern_version, data->d_buf, sizeof(int)); - } else if (strcmp(shname, "maps") == 0) { - int j; - - maps_shndx = i; - data_maps = data; - for (j = 0; j < MAX_MAPS; j++) - map_data[j].fd = -1; - } else if (shdr.sh_type == SHT_SYMTAB) { - strtabidx = shdr.sh_link; - symbols = data; - } - } - - ret = 1; - - if (!symbols) { - printf("missing SHT_SYMTAB section\n"); - goto done; - } - - if (data_maps) { - nr_maps = load_elf_maps_section(map_data, maps_shndx, - elf, symbols, strtabidx); - if (nr_maps < 0) { - printf("Error: Failed loading ELF maps (errno:%d):%s\n", - nr_maps, strerror(-nr_maps)); - goto done; - } - if (load_maps(map_data, nr_maps, fixup_map)) - goto done; - map_data_count = nr_maps; - - processed_sec[maps_shndx] = true; - } - - /* process all relo sections, and rewrite bpf insns for maps */ - for (i = 1; i < ehdr.e_shnum; i++) { - if (processed_sec[i]) - continue; - - if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) - continue; - - if (shdr.sh_type == SHT_REL) { - struct bpf_insn *insns; - - /* locate prog sec that need map fixup (relocations) */ - if (get_sec(elf, shdr.sh_info, &ehdr, &shname_prog, - &shdr_prog, &data_prog)) - continue; - - if (shdr_prog.sh_type != SHT_PROGBITS || - !(shdr_prog.sh_flags & SHF_EXECINSTR)) - continue; - - insns = (struct bpf_insn *) data_prog->d_buf; - processed_sec[i] = true; /* relo section */ - - if (parse_relo_and_apply(data, symbols, &shdr, insns, - map_data, nr_maps)) - continue; - } - } - - /* load programs */ - for (i = 1; i < ehdr.e_shnum; i++) { - - if (processed_sec[i]) - continue; - - if (get_sec(elf, i, &ehdr, &shname, &shdr, &data)) - continue; - - if (memcmp(shname, "kprobe/", 7) == 0 || - memcmp(shname, "kretprobe/", 10) == 0 || - memcmp(shname, "tracepoint/", 11) == 0 || - memcmp(shname, "raw_tracepoint/", 15) == 0 || - memcmp(shname, "xdp", 3) == 0 || - memcmp(shname, "perf_event", 10) == 0 || - memcmp(shname, "socket", 6) == 0 || - memcmp(shname, "cgroup/", 7) == 0 || - memcmp(shname, "sockops", 7) == 0 || - memcmp(shname, "sk_skb", 6) == 0 || - memcmp(shname, "sk_msg", 6) == 0) { - ret = load_and_attach(shname, data->d_buf, - data->d_size); - if (ret != 0) - goto done; - } - } - -done: - close(fd); - return ret; -} - -int load_bpf_file(char *path) -{ - return do_load_bpf_file(path, NULL); -} - -int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map) -{ - return do_load_bpf_file(path, fixup_map); -} diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h deleted file mode 100644 index 4fcd258c616f..000000000000 --- a/samples/bpf/bpf_load.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __BPF_LOAD_H -#define __BPF_LOAD_H - -#include - -#define MAX_MAPS 32 -#define MAX_PROGS 32 - -struct bpf_load_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; - unsigned int numa_node; -}; - -struct bpf_map_data { - int fd; - char *name; - size_t elf_offset; - struct bpf_load_map_def def; -}; - -typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx); - -extern int prog_fd[MAX_PROGS]; -extern int event_fd[MAX_PROGS]; -extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; -extern int prog_cnt; - -/* There is a one-to-one mapping between map_fd[] and map_data[]. - * The map_data[] just contains more rich info on the given map. - */ -extern int map_fd[MAX_MAPS]; -extern struct bpf_map_data map_data[MAX_MAPS]; -extern int map_data_count; - -/* parses elf file compiled by llvm .c->.o - * . parses 'maps' section and creates maps via BPF syscall - * . parses 'license' section and passes it to syscall - * . parses elf relocations for BPF maps and adjusts BPF_LD_IMM64 insns by - * storing map_fd into insn->imm and marking such insns as BPF_PSEUDO_MAP_FD - * . loads eBPF programs via BPF syscall - * - * One ELF file can contain multiple BPF programs which will be loaded - * and their FDs stored stored in prog_fd array - * - * returns zero on success - */ -int load_bpf_file(char *path); -int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); - -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); -#endif diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c index 9b783316e860..d5631014a176 100644 --- a/samples/bpf/xdp2skb_meta_kern.c +++ b/samples/bpf/xdp2skb_meta_kern.c @@ -6,7 +6,7 @@ * This uses the XDP data_meta infrastructure, and is a cooperation * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook. * - * Notice: This example does not use the BPF C-loader (bpf_load.c), + * Notice: This example does not use the BPF C-loader, * but instead rely on the iproute2 TC tool for loading BPF-objects. */ #include -- cgit v1.2.3 From f2d2728220ac6482c69c5f018ec09bafd688e7d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 30 Nov 2020 19:52:02 +0100 Subject: samples/bpf: Use recvfrom() in xdpsock/rxdrop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start using recvfrom() the rxdrop scenario. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20201130185205.196029-8-bjorn.topel@gmail.com --- samples/bpf/xdpsock_user.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 2567f0db5aca..f90111b95b2e 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1170,7 +1170,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk, } } -static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) +static void rx_drop(struct xsk_socket_info *xsk) { unsigned int rcvd, i; u32 idx_rx = 0, idx_fq = 0; @@ -1180,7 +1180,7 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) if (!rcvd) { if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { xsk->app_stats.rx_empty_polls++; - ret = poll(fds, num_socks, opt_timeout); + recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); } return; } @@ -1191,7 +1191,7 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) exit_with_error(-ret); if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { xsk->app_stats.fill_fail_polls++; - ret = poll(fds, num_socks, opt_timeout); + recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); } ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); } @@ -1233,7 +1233,7 @@ static void rx_drop_all(void) } for (i = 0; i < num_socks; i++) - rx_drop(xsks[i], fds); + rx_drop(xsks[i]); if (benchmark_done) break; -- cgit v1.2.3 From 284cbc61f851bf86326b28acfe6d161691d4a4ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 30 Nov 2020 19:52:03 +0100 Subject: samples/bpf: Use recvfrom() in xdpsock/l2fwd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start using recvfrom() the l2fwd scenario, instead of poll() which is more expensive and need additional knobs for busy-polling. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20201130185205.196029-9-bjorn.topel@gmail.com --- samples/bpf/xdpsock_user.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index f90111b95b2e..a1a3d6f02ba9 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1098,8 +1098,7 @@ static void kick_tx(struct xsk_socket_info *xsk) exit_with_error(errno); } -static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, - struct pollfd *fds) +static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) { struct xsk_umem_info *umem = xsk->umem; u32 idx_cq = 0, idx_fq = 0; @@ -1134,7 +1133,8 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, exit_with_error(-ret); if (xsk_ring_prod__needs_wakeup(&umem->fq)) { xsk->app_stats.fill_fail_polls++; - ret = poll(fds, num_socks, opt_timeout); + recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, + NULL); } ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } @@ -1331,19 +1331,19 @@ static void tx_only_all(void) complete_tx_only_all(); } -static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) +static void l2fwd(struct xsk_socket_info *xsk) { unsigned int rcvd, i; u32 idx_rx = 0, idx_tx = 0; int ret; - complete_tx_l2fwd(xsk, fds); + complete_tx_l2fwd(xsk); rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); if (!rcvd) { if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { xsk->app_stats.rx_empty_polls++; - ret = poll(fds, num_socks, opt_timeout); + recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); } return; } @@ -1353,7 +1353,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); - complete_tx_l2fwd(xsk, fds); + complete_tx_l2fwd(xsk); if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { xsk->app_stats.tx_wakeup_sendtos++; kick_tx(xsk); @@ -1388,22 +1388,20 @@ static void l2fwd_all(void) struct pollfd fds[MAX_SOCKS] = {}; int i, ret; - for (i = 0; i < num_socks; i++) { - fds[i].fd = xsk_socket__fd(xsks[i]->xsk); - fds[i].events = POLLOUT | POLLIN; - } - for (;;) { if (opt_poll) { - for (i = 0; i < num_socks; i++) + for (i = 0; i < num_socks; i++) { + fds[i].fd = xsk_socket__fd(xsks[i]->xsk); + fds[i].events = POLLOUT | POLLIN; xsks[i]->app_stats.opt_polls++; + } ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; } for (i = 0; i < num_socks; i++) - l2fwd(xsks[i], fds); + l2fwd(xsks[i]); if (benchmark_done) break; -- cgit v1.2.3 From b35fc1482ceb2f36bedd1587c3cfea3167baa2f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 30 Nov 2020 19:52:04 +0100 Subject: samples/bpf: Add busy-poll support to xdpsock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new option to xdpsock, 'B', for busy-polling. This option will also set the batching size, 'b' option, to the busy-poll budget. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20201130185205.196029-10-bjorn.topel@gmail.com --- samples/bpf/xdpsock_user.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index a1a3d6f02ba9..4622a17fafe1 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -95,6 +95,7 @@ static int opt_timeout = 1000; static bool opt_need_wakeup = true; static u32 opt_num_xsks = 1; static u32 prog_id; +static bool opt_busy_poll; struct xsk_ring_stats { unsigned long rx_npkts; @@ -911,6 +912,7 @@ static struct option long_options[] = { {"quiet", no_argument, 0, 'Q'}, {"app-stats", no_argument, 0, 'a'}, {"irq-string", no_argument, 0, 'I'}, + {"busy-poll", no_argument, 0, 'B'}, {0, 0, 0, 0} }; @@ -949,6 +951,7 @@ static void usage(const char *prog) " -Q, --quiet Do not display any stats.\n" " -a, --app-stats Display application (syscall) statistics.\n" " -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n" + " -B, --busy-poll Busy poll.\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, @@ -964,7 +967,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:B", long_options, &option_index); if (c == -1) break; @@ -1062,7 +1065,9 @@ static void parse_command_line(int argc, char **argv) fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str); usage(basename(argv[0])); } - + break; + case 'B': + opt_busy_poll = 1; break; default: usage(basename(argv[0])); @@ -1131,7 +1136,7 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&umem->fq)) { xsk->app_stats.fill_fail_polls++; recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); @@ -1178,7 +1183,7 @@ static void rx_drop(struct xsk_socket_info *xsk) rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { xsk->app_stats.rx_empty_polls++; recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); } @@ -1189,7 +1194,7 @@ static void rx_drop(struct xsk_socket_info *xsk) while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { xsk->app_stats.fill_fail_polls++; recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); } @@ -1341,7 +1346,7 @@ static void l2fwd(struct xsk_socket_info *xsk) rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { xsk->app_stats.rx_empty_polls++; recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); } @@ -1354,7 +1359,7 @@ static void l2fwd(struct xsk_socket_info *xsk) if (ret < 0) exit_with_error(-ret); complete_tx_l2fwd(xsk); - if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { + if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->tx)) { xsk->app_stats.tx_wakeup_sendtos++; kick_tx(xsk); } @@ -1459,6 +1464,24 @@ static void enter_xsks_into_map(struct bpf_object *obj) } } +static void apply_setsockopt(struct xsk_socket_info *xsk) +{ + int sock_opt; + + if (!opt_busy_poll) + return; + + sock_opt = 1; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); + + sock_opt = 20; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); +} + int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; @@ -1500,6 +1523,9 @@ int main(int argc, char **argv) for (i = 0; i < opt_num_xsks; i++) xsks[num_socks++] = xsk_configure_socket(umem, rx, tx); + for (i = 0; i < opt_num_xsks; i++) + apply_setsockopt(xsks[i]); + if (opt_bench == BENCH_TXONLY) { gen_eth_hdr_data(); -- cgit v1.2.3 From 41bf900fe2a0cd56bdc3639ac73d509d52826149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 30 Nov 2020 19:52:05 +0100 Subject: samples/bpf: Add option to set the busy-poll budget MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support for the SO_BUSY_POLL_BUDGET setsockopt, via the batching option ('b'). Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20201130185205.196029-11-bjorn.topel@gmail.com --- samples/bpf/xdpsock_user.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 4622a17fafe1..036bd019e400 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1480,6 +1480,11 @@ static void apply_setsockopt(struct xsk_socket_info *xsk) if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, (void *)&sock_opt, sizeof(sock_opt)) < 0) exit_with_error(errno); + + sock_opt = opt_batch_size; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); } int main(int argc, char **argv) -- cgit v1.2.3 From 5b0764b2d34510bc87d33a580da98f77789ac36f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:59:00 -0800 Subject: bpf: samples: Do not touch RLIMIT_MEMLOCK Since bpf is not using rlimit memlock for the memory accounting and control, do not change the limit in sample applications. Signed-off-by: Roman Gushchin Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201201215900.3569844-35-guro@fb.com --- samples/bpf/map_perf_test_user.c | 6 ------ samples/bpf/offwaketime_user.c | 6 ------ samples/bpf/sockex2_user.c | 2 -- samples/bpf/sockex3_user.c | 2 -- samples/bpf/spintest_user.c | 6 ------ samples/bpf/syscall_tp_user.c | 2 -- samples/bpf/task_fd_query_user.c | 6 ------ samples/bpf/test_lru_dist.c | 3 --- samples/bpf/test_map_in_map_user.c | 6 ------ samples/bpf/test_overhead_user.c | 2 -- samples/bpf/trace_event_user.c | 2 -- samples/bpf/tracex2_user.c | 6 ------ samples/bpf/tracex3_user.c | 6 ------ samples/bpf/tracex4_user.c | 6 ------ samples/bpf/tracex5_user.c | 3 --- samples/bpf/tracex6_user.c | 3 --- samples/bpf/xdp1_user.c | 6 ------ samples/bpf/xdp_adjust_tail_user.c | 6 ------ samples/bpf/xdp_monitor_user.c | 5 ----- samples/bpf/xdp_redirect_cpu_user.c | 6 ------ samples/bpf/xdp_redirect_map_user.c | 6 ------ samples/bpf/xdp_redirect_user.c | 6 ------ samples/bpf/xdp_router_ipv4_user.c | 6 ------ samples/bpf/xdp_rxq_info_user.c | 6 ------ samples/bpf/xdp_sample_pkts_user.c | 6 ------ samples/bpf/xdp_tx_iptunnel_user.c | 6 ------ samples/bpf/xdpsock_user.c | 7 ------- 27 files changed, 133 deletions(-) (limited to 'samples') diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 8b13230b4c46..9db949290a78 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -421,7 +421,6 @@ static void fixup_map(struct bpf_object *obj) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); struct bpf_link *links[8]; struct bpf_program *prog; @@ -430,11 +429,6 @@ int main(int argc, char **argv) char filename[256]; int i = 0; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (argc > 1) test_flags = atoi(argv[1]) ? : test_flags; diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 5734cfdaaacb..73a986876c1a 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -95,18 +95,12 @@ static void int_exit(int sig) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_object *obj = NULL; struct bpf_link *links[2]; struct bpf_program *prog; int delay = 1, i = 0; char filename[256]; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return 2; diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index af925a5afd1d..bafa567b840c 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -16,7 +16,6 @@ struct pair { int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_object *obj; int map_fd, prog_fd; char filename[256]; @@ -24,7 +23,6 @@ int main(int ac, char **argv) FILE *f; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER, &obj, &prog_fd)) diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 7793f6a6ae7e..6ae99ecc766c 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -26,7 +26,6 @@ struct pair { int main(int argc, char **argv) { int i, sock, key, fd, main_prog_fd, jmp_table_fd, hash_map_fd; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_program *prog; struct bpf_object *obj; const char *section; @@ -34,7 +33,6 @@ int main(int argc, char **argv) FILE *f; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index f090d0dc60d6..0d7e1e5a8658 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -10,7 +10,6 @@ int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256], symbol[256]; struct bpf_object *obj = NULL; struct bpf_link *links[20]; @@ -20,11 +19,6 @@ int main(int ac, char **argv) const char *section; struct ksym *sym; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return 2; diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 76a1d00128fb..a0ebf1833ed3 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -115,7 +115,6 @@ cleanup: int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int opt, num_progs = 1; char filename[256]; @@ -131,7 +130,6 @@ int main(int argc, char **argv) } } - setrlimit(RLIMIT_MEMLOCK, &r); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); return test(filename, num_progs); diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index f6b772faa348..a78025b0026b 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -310,7 +310,6 @@ cleanup: int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; extern char __executable_start; char filename[256], buf[256]; __u64 uprobe_file_offset; @@ -318,11 +317,6 @@ int main(int argc, char **argv) struct bpf_object *obj; int i = 0, err = -1; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return err; - } - if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return err; diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index b313dba4111b..c92c5c06b965 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -489,7 +489,6 @@ static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; const char *dist_file; int nr_tasks = 1; @@ -508,8 +507,6 @@ int main(int argc, char **argv) setbuf(stdout, NULL); - assert(!setrlimit(RLIMIT_MEMLOCK, &r)); - srand(time(NULL)); nr_cpus = bpf_num_possible_cpus(); diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index 98656de56b83..472d65c70354 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -114,17 +114,11 @@ static void test_map_in_map(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *link = NULL; struct bpf_program *prog; struct bpf_object *obj; char filename[256]; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c index 819a6fe86f89..4821f9d99c1f 100644 --- a/samples/bpf/test_overhead_user.c +++ b/samples/bpf/test_overhead_user.c @@ -162,13 +162,11 @@ static void unload_progs(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int num_cpu = sysconf(_SC_NPROCESSORS_ONLN); int test_flags = ~0; char filename[256]; int err = 0; - setrlimit(RLIMIT_MEMLOCK, &r); if (argc > 1) test_flags = atoi(argv[1]) ? : test_flags; diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index ac1ba368195c..9664749bf618 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -294,13 +294,11 @@ static void test_bpf_perf_event(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_object *obj = NULL; char filename[256]; int error = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); signal(SIGINT, err_exit); signal(SIGTERM, err_exit); diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 3d6eab711d23..1626d51dfffd 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -116,7 +116,6 @@ static void int_exit(int sig) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; long key, next_key, value; struct bpf_link *links[2]; struct bpf_program *prog; @@ -125,11 +124,6 @@ int main(int ac, char **argv) int i, j = 0; FILE *f; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index 83e0fecbb01a..33e16ba39f25 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -107,7 +107,6 @@ static void print_hist(int fd) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *links[2]; struct bpf_program *prog; struct bpf_object *obj; @@ -127,11 +126,6 @@ int main(int ac, char **argv) } } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index e8faf8f184ae..cea399424bca 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -48,18 +48,12 @@ static void print_old_objects(int fd) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *links[2]; struct bpf_program *prog; struct bpf_object *obj; char filename[256]; int map_fd, i, j = 0; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index c17d3fb5fd64..08dfdc77ad2a 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -34,7 +34,6 @@ static void install_accept_all_seccomp(void) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *link = NULL; struct bpf_program *prog; struct bpf_object *obj; @@ -43,8 +42,6 @@ int main(int ac, char **argv) char filename[256]; FILE *f; - setrlimit(RLIMIT_MEMLOCK, &r); - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index 33df9784775d..28296f40c133 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -175,15 +175,12 @@ static void test_bpf_perf_event(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *links[2]; struct bpf_program *prog; struct bpf_object *obj; char filename[256]; int i = 0; - setrlimit(RLIMIT_MEMLOCK, &r); - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index c447ad9e3a1d..116e39f6b666 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -79,7 +79,6 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -117,11 +116,6 @@ int main(int argc, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - ifindex = if_nametoindex(argv[optind]); if (!ifindex) { perror("if_nametoindex"); diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index ba482dc3da33..a70b094c8ec5 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -82,7 +82,6 @@ static void usage(const char *cmd) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -143,11 +142,6 @@ int main(int argc, char **argv) } } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); - return 1; - } - if (!ifindex) { fprintf(stderr, "Invalid ifname\n"); return 1; diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index 03d0a182913f..49ebc49aefc3 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -687,7 +687,6 @@ static void print_bpf_prog_info(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_program *prog; int longindex = 0, opt; int ret = EXIT_FAILURE; @@ -719,10 +718,6 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return ret; - } /* Remove tracepoint program when program is interrupted or killed */ signal(SIGINT, int_exit); diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index f78cb18319aa..576411612523 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -765,7 +765,6 @@ static int load_cpumap_prog(char *file_name, char *prog_name, int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; char *mprog_filename = "xdp_redirect_kern.o"; char *redir_interface = NULL, *redir_map = NULL; @@ -804,11 +803,6 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return err; diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 35e16dee613e..31131b6e7782 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -96,7 +96,6 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -135,11 +134,6 @@ int main(int argc, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - ifindex_in = if_nametoindex(argv[optind]); if (!ifindex_in) ifindex_in = strtoul(argv[optind], NULL, 0); diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 9ca2bf457cda..41d705c3a1f7 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -97,7 +97,6 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -136,11 +135,6 @@ int main(int argc, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - ifindex_in = if_nametoindex(argv[optind]); if (!ifindex_in) ifindex_in = strtoul(argv[optind], NULL, 0); diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index c2da1b51ff95..b5f03cb17a3c 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -625,7 +625,6 @@ static void usage(const char *prog) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -670,11 +669,6 @@ int main(int ac, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 93fa1bc54f13..74a2926eba08 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -450,7 +450,6 @@ static void stats_poll(int interval, int action, __u32 cfg_opt) int main(int argc, char **argv) { __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */ - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -474,11 +473,6 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return EXIT_FAIL; diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 4b2a300c750c..706475e004cb 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -109,7 +109,6 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -143,11 +142,6 @@ int main(int argc, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index a419bee151a8..1d4f305d02aa 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -155,7 +155,6 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int min_port = 0, max_port = 0, vip2tnl_map_fd; const char *optstr = "i:a:p:s:d:m:T:P:FSNh"; unsigned char opt_flags[256] = {}; @@ -254,11 +253,6 @@ int main(int argc, char **argv) } } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); - return 1; - } - if (!ifindex) { fprintf(stderr, "Invalid ifname\n"); return 1; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 036bd019e400..909f77647deb 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1489,7 +1489,6 @@ static void apply_setsockopt(struct xsk_socket_info *xsk) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; bool rx = false, tx = false; struct xsk_umem_info *umem; struct bpf_object *obj; @@ -1499,12 +1498,6 @@ int main(int argc, char **argv) parse_command_line(argc, argv); - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", - strerror(errno)); - exit(EXIT_FAILURE); - } - if (opt_num_xsks > 1) load_xdp_program(argv, &obj); -- cgit v1.2.3 From 3627d9702d789804a1f4c5a52eabdae810cd9def Mon Sep 17 00:00:00 2001 From: Mariusz Dudek Date: Thu, 3 Dec 2020 10:05:46 +0100 Subject: samples/bpf: Sample application for eBPF load and socket creation split Introduce a sample program to demonstrate the control and data plane split. For the control plane part a new program called xdpsock_ctrl_proc is introduced. For the data plane part, some code was added to xdpsock_user.c to act as the data plane entity. Application xdpsock_ctrl_proc works as control entity with sudo privileges (CAP_SYS_ADMIN and CAP_NET_ADMIN are sufficient) and the extended xdpsock as data plane entity with CAP_NET_RAW capability only. Usage example: sudo ./samples/bpf/xdpsock_ctrl_proc -i sudo ./samples/bpf/xdpsock -i -q -n -N -l -R Signed-off-by: Mariusz Dudek Signed-off-by: Alexei Starovoitov Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20201203090546.11976-3-mariuszx.dudek@intel.com --- samples/bpf/Makefile | 4 +- samples/bpf/xdpsock.h | 8 ++ samples/bpf/xdpsock_ctrl_proc.c | 187 ++++++++++++++++++++++++++++++++++++++++ samples/bpf/xdpsock_user.c | 144 +++++++++++++++++++++++++++++-- 4 files changed, 337 insertions(+), 6 deletions(-) create mode 100644 samples/bpf/xdpsock_ctrl_proc.c (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 05db041f8b18..26fc96ca619e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -48,6 +48,7 @@ tprogs-y += syscall_tp tprogs-y += cpustat tprogs-y += xdp_adjust_tail tprogs-y += xdpsock +tprogs-y += xdpsock_ctrl_proc tprogs-y += xsk_fwd tprogs-y += xdp_fwd tprogs-y += task_fd_query @@ -105,6 +106,7 @@ syscall_tp-objs := syscall_tp_user.o cpustat-objs := cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := xdpsock_user.o +xdpsock_ctrl_proc-objs := xdpsock_ctrl_proc.o xsk_fwd-objs := xsk_fwd.o xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) @@ -202,7 +204,7 @@ TPROGLDLIBS_tracex4 += -lrt TPROGLDLIBS_trace_output += -lrt TPROGLDLIBS_map_perf_test += -lrt TPROGLDLIBS_test_overhead += -lrt -TPROGLDLIBS_xdpsock += -pthread +TPROGLDLIBS_xdpsock += -pthread -lcap TPROGLDLIBS_xsk_fwd += -pthread # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h index b7eca15c78cc..fd70cce60712 100644 --- a/samples/bpf/xdpsock.h +++ b/samples/bpf/xdpsock.h @@ -8,4 +8,12 @@ #define MAX_SOCKS 4 +#define SOCKET_NAME "sock_cal_bpf_fd" +#define MAX_NUM_OF_CLIENTS 10 + +#define CLOSE_CONN 1 + +typedef __u64 u64; +typedef __u32 u32; + #endif /* XDPSOCK_H */ diff --git a/samples/bpf/xdpsock_ctrl_proc.c b/samples/bpf/xdpsock_ctrl_proc.c new file mode 100644 index 000000000000..384e62e3c6d6 --- /dev/null +++ b/samples/bpf/xdpsock_ctrl_proc.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2018 Intel Corporation. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "xdpsock.h" + +static const char *opt_if = ""; + +static struct option long_options[] = { + {"interface", required_argument, 0, 'i'}, + {0, 0, 0, 0} +}; + +static void usage(const char *prog) +{ + const char *str = + " Usage: %s [OPTIONS]\n" + " Options:\n" + " -i, --interface=n Run on interface n\n" + "\n"; + fprintf(stderr, "%s\n", str); + + exit(0); +} + +static void parse_command_line(int argc, char **argv) +{ + int option_index, c; + + opterr = 0; + + for (;;) { + c = getopt_long(argc, argv, "i:", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'i': + opt_if = optarg; + break; + default: + usage(basename(argv[0])); + } + } +} + +static int send_xsks_map_fd(int sock, int fd) +{ + char cmsgbuf[CMSG_SPACE(sizeof(int))]; + struct msghdr msg; + struct iovec iov; + int value = 0; + + if (fd == -1) { + fprintf(stderr, "Incorrect fd = %d\n", fd); + return -1; + } + iov.iov_base = &value; + iov.iov_len = sizeof(int); + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_flags = 0; + msg.msg_control = cmsgbuf; + msg.msg_controllen = CMSG_LEN(sizeof(int)); + + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); + + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + + *(int *)CMSG_DATA(cmsg) = fd; + int ret = sendmsg(sock, &msg, 0); + + if (ret == -1) { + fprintf(stderr, "Sendmsg failed with %s", strerror(errno)); + return -errno; + } + + return ret; +} + +int +main(int argc, char **argv) +{ + struct sockaddr_un server; + int listening = 1; + int rval, msgsock; + int ifindex = 0; + int flag = 1; + int cmd = 0; + int sock; + int err; + int xsks_map_fd; + + parse_command_line(argc, argv); + + ifindex = if_nametoindex(opt_if); + if (ifindex == 0) { + fprintf(stderr, "Unable to get ifindex for Interface %s. Reason:%s", + opt_if, strerror(errno)); + return -errno; + } + + sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock < 0) { + fprintf(stderr, "Opening socket stream failed: %s", strerror(errno)); + return -errno; + } + + server.sun_family = AF_UNIX; + strcpy(server.sun_path, SOCKET_NAME); + + setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof(int)); + + if (bind(sock, (struct sockaddr *)&server, sizeof(struct sockaddr_un))) { + fprintf(stderr, "Binding to socket stream failed: %s", strerror(errno)); + return -errno; + } + + listen(sock, MAX_NUM_OF_CLIENTS); + + err = xsk_setup_xdp_prog(ifindex, &xsks_map_fd); + if (err) { + fprintf(stderr, "Setup of xdp program failed\n"); + goto close_sock; + } + + while (listening) { + msgsock = accept(sock, 0, 0); + if (msgsock == -1) { + fprintf(stderr, "Error accepting connection: %s", strerror(errno)); + err = -errno; + goto close_sock; + } + err = send_xsks_map_fd(msgsock, xsks_map_fd); + if (err <= 0) { + fprintf(stderr, "Error %d sending xsks_map_fd\n", err); + goto cleanup; + } + do { + rval = read(msgsock, &cmd, sizeof(int)); + if (rval < 0) { + fprintf(stderr, "Error reading stream message"); + } else { + if (cmd != CLOSE_CONN) + fprintf(stderr, "Recv unknown cmd = %d\n", cmd); + listening = 0; + break; + } + } while (rval > 0); + } + close(msgsock); + close(sock); + unlink(SOCKET_NAME); + + /* Unset fd for given ifindex */ + err = bpf_set_link_xdp_fd(ifindex, -1, 0); + if (err) { + fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex); + return err; + } + + return 0; + +cleanup: + close(msgsock); +close_sock: + close(sock); + unlink(SOCKET_NAME); + return err; +} diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 909f77647deb..74a578f880b5 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -24,10 +24,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -96,6 +98,7 @@ static bool opt_need_wakeup = true; static u32 opt_num_xsks = 1; static u32 prog_id; static bool opt_busy_poll; +static bool opt_reduced_cap; struct xsk_ring_stats { unsigned long rx_npkts; @@ -154,6 +157,7 @@ struct xsk_socket_info { static int num_socks; struct xsk_socket_info *xsks[MAX_SOCKS]; +int sock; static unsigned long get_nsecs(void) { @@ -461,6 +465,7 @@ static void *poller(void *arg) static void remove_xdp_program(void) { u32 curr_prog_id = 0; + int cmd = CLOSE_CONN; if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { printf("bpf_get_link_xdp_id failed\n"); @@ -472,6 +477,13 @@ static void remove_xdp_program(void) printf("couldn't find a prog id on a given interface\n"); else printf("program on interface changed, not removing\n"); + + if (opt_reduced_cap) { + if (write(sock, &cmd, sizeof(int)) < 0) { + fprintf(stderr, "Error writing into stream socket: %s", strerror(errno)); + exit(EXIT_FAILURE); + } + } } static void int_exit(int sig) @@ -854,7 +866,7 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, xsk->umem = umem; cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - if (opt_num_xsks > 1) + if (opt_num_xsks > 1 || opt_reduced_cap) cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; else cfg.libbpf_flags = 0; @@ -913,6 +925,7 @@ static struct option long_options[] = { {"app-stats", no_argument, 0, 'a'}, {"irq-string", no_argument, 0, 'I'}, {"busy-poll", no_argument, 0, 'B'}, + {"reduce-cap", no_argument, 0, 'R'}, {0, 0, 0, 0} }; @@ -935,7 +948,7 @@ static void usage(const char *prog) " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n" " -u, --unaligned Enable unaligned chunk placement\n" - " -M, --shared-umem Enable XDP_SHARED_UMEM\n" + " -M, --shared-umem Enable XDP_SHARED_UMEM (cannot be used with -R)\n" " -F, --force Force loading the XDP prog\n" " -d, --duration=n Duration in secs to run command.\n" " Default: forever.\n" @@ -952,6 +965,7 @@ static void usage(const char *prog) " -a, --app-stats Display application (syscall) statistics.\n" " -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n" " -B, --busy-poll Busy poll.\n" + " -R, --reduce-cap Use reduced capabilities (cannot be used with -M)\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, @@ -967,7 +981,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:B", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR", long_options, &option_index); if (c == -1) break; @@ -1069,6 +1083,9 @@ static void parse_command_line(int argc, char **argv) case 'B': opt_busy_poll = 1; break; + case 'R': + opt_reduced_cap = true; + break; default: usage(basename(argv[0])); } @@ -1090,6 +1107,11 @@ static void parse_command_line(int argc, char **argv) opt_xsk_frame_size); usage(basename(argv[0])); } + + if (opt_reduced_cap && opt_num_xsks > 1) { + fprintf(stderr, "ERROR: -M and -R cannot be used together\n"); + usage(basename(argv[0])); + } } static void kick_tx(struct xsk_socket_info *xsk) @@ -1487,19 +1509,116 @@ static void apply_setsockopt(struct xsk_socket_info *xsk) exit_with_error(errno); } +static int recv_xsks_map_fd_from_ctrl_node(int sock, int *_fd) +{ + char cms[CMSG_SPACE(sizeof(int))]; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + int value; + int len; + + iov.iov_base = &value; + iov.iov_len = sizeof(int); + + msg.msg_name = 0; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_flags = 0; + msg.msg_control = (caddr_t)cms; + msg.msg_controllen = sizeof(cms); + + len = recvmsg(sock, &msg, 0); + + if (len < 0) { + fprintf(stderr, "Recvmsg failed length incorrect.\n"); + return -EINVAL; + } + + if (len == 0) { + fprintf(stderr, "Recvmsg failed no data\n"); + return -EINVAL; + } + + cmsg = CMSG_FIRSTHDR(&msg); + *_fd = *(int *)CMSG_DATA(cmsg); + + return 0; +} + +static int +recv_xsks_map_fd(int *xsks_map_fd) +{ + struct sockaddr_un server; + int err; + + sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock < 0) { + fprintf(stderr, "Error opening socket stream: %s", strerror(errno)); + return errno; + } + + server.sun_family = AF_UNIX; + strcpy(server.sun_path, SOCKET_NAME); + + if (connect(sock, (struct sockaddr *)&server, sizeof(struct sockaddr_un)) < 0) { + close(sock); + fprintf(stderr, "Error connecting stream socket: %s", strerror(errno)); + return errno; + } + + err = recv_xsks_map_fd_from_ctrl_node(sock, xsks_map_fd); + if (err) { + fprintf(stderr, "Error %d recieving fd\n", err); + return err; + } + return 0; +} + int main(int argc, char **argv) { + struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 }; + struct __user_cap_data_struct data[2] = { { 0 } }; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; bool rx = false, tx = false; struct xsk_umem_info *umem; struct bpf_object *obj; + int xsks_map_fd = 0; pthread_t pt; int i, ret; void *bufs; parse_command_line(argc, argv); - if (opt_num_xsks > 1) - load_xdp_program(argv, &obj); + if (opt_reduced_cap) { + if (capget(&hdr, data) < 0) + fprintf(stderr, "Error getting capabilities\n"); + + data->effective &= CAP_TO_MASK(CAP_NET_RAW); + data->permitted &= CAP_TO_MASK(CAP_NET_RAW); + + if (capset(&hdr, data) < 0) + fprintf(stderr, "Setting capabilities failed\n"); + + if (capget(&hdr, data) < 0) { + fprintf(stderr, "Error getting capabilities\n"); + } else { + fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n", + data[0].effective, data[0].inheritable, data[0].permitted); + fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n", + data[1].effective, data[1].inheritable, data[1].permitted); + } + } else { + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + if (opt_num_xsks > 1) + load_xdp_program(argv, &obj); + } /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */ bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size, @@ -1534,6 +1653,21 @@ int main(int argc, char **argv) if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY) enter_xsks_into_map(obj); + if (opt_reduced_cap) { + ret = recv_xsks_map_fd(&xsks_map_fd); + if (ret) { + fprintf(stderr, "Error %d receiving xsks_map_fd\n", ret); + exit_with_error(ret); + } + if (xsks[0]->xsk) { + ret = xsk_socket__update_xskmap(xsks[0]->xsk, xsks_map_fd); + if (ret) { + fprintf(stderr, "Update of BPF map failed(%d)\n", ret); + exit_with_error(ret); + } + } + } + signal(SIGINT, int_exit); signal(SIGTERM, int_exit); signal(SIGABRT, int_exit); -- cgit v1.2.3 From 2faa7328f53b36b2b171501154bba3fd66d8f5da Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 3 Dec 2020 11:44:52 +0000 Subject: samples/bpf: Fix spelling mistake "recieving" -> "receiving" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a spelling mistake in an error message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Andrii Nakryiko Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201203114452.1060017-1-colin.king@canonical.com --- samples/bpf/xdpsock_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'samples') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 74a578f880b5..568f9815bb1b 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1570,7 +1570,7 @@ recv_xsks_map_fd(int *xsks_map_fd) err = recv_xsks_map_fd_from_ctrl_node(sock, xsks_map_fd); if (err) { - fprintf(stderr, "Error %d recieving fd\n", err); + fprintf(stderr, "Error %d receiving fd\n", err); return err; } return 0; -- cgit v1.2.3