diff options
author | David S. Miller <davem@davemloft.net> | 2017-12-18 10:51:06 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-12-18 10:51:06 -0500 |
commit | 59436c9ee18d7faad0cd1875c9d8322668f98611 (patch) | |
tree | 64543535fdefc11589a24aa9c3e2bab1bd98f894 /tools | |
parent | c30abd5e40dd863f88e26be09b6ce949145a630a (diff) | |
parent | 46df3d209db080395a98fc0875bd05e45e8f44e0 (diff) | |
download | linux-59436c9ee18d7faad0cd1875c9d8322668f98611.tar.gz linux-59436c9ee18d7faad0cd1875c9d8322668f98611.tar.bz2 linux-59436c9ee18d7faad0cd1875c9d8322668f98611.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2017-12-18
The following pull-request contains BPF updates for your *net-next* tree.
The main changes are:
1) Allow arbitrary function calls from one BPF function to another BPF function.
As of today when writing BPF programs, __always_inline had to be used in
the BPF C programs for all functions, unnecessarily causing LLVM to inflate
code size. Handle this more naturally with support for BPF to BPF calls
such that this __always_inline restriction can be overcome. As a result,
it allows for better optimized code and finally enables to introduce core
BPF libraries in the future that can be reused out of different projects.
x86 and arm64 JIT support was added as well, from Alexei.
2) Add infrastructure for tagging functions as error injectable and allow for
BPF to return arbitrary error values when BPF is attached via kprobes on
those. This way of injecting errors generically eases testing and debugging
without having to recompile or restart the kernel. Tags for opting-in for
this facility are added with BPF_ALLOW_ERROR_INJECTION(), from Josef.
3) For BPF offload via nfp JIT, add support for bpf_xdp_adjust_head() helper
call for XDP programs. First part of this work adds handling of BPF
capabilities included in the firmware, and the later patches add support
to the nfp verifier part and JIT as well as some small optimizations,
from Jakub.
4) The bpftool now also gets support for basic cgroup BPF operations such
as attaching, detaching and listing current BPF programs. As a requirement
for the attach part, bpftool can now also load object files through
'bpftool prog load'. This reuses libbpf which we have in the kernel tree
as well. bpftool-cgroup man page is added along with it, from Roman.
5) Back then commit e87c6bc3852b ("bpf: permit multiple bpf attachments for
a single perf event") added support for attaching multiple BPF programs
to a single perf event. Given they are configured through perf's ioctl()
interface, the interface has been extended with a PERF_EVENT_IOC_QUERY_BPF
command in this work in order to return an array of one or multiple BPF
prog ids that are currently attached, from Yonghong.
6) Various minor fixes and cleanups to the bpftool's Makefile as well
as a new 'uninstall' and 'doc-uninstall' target for removing bpftool
itself or prior installed documentation related to it, from Quentin.
7) Add CONFIG_CGROUP_BPF=y to the BPF kernel selftest config file which is
required for the test_dev_cgroup test case to run, from Naresh.
8) Fix reporting of XDP prog_flags for nfp driver, from Jakub.
9) Fix libbpf's exit code from the Makefile when libelf was not found in
the system, also from Jakub.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools')
25 files changed, 3960 insertions, 124 deletions
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 37292bb5ce60..c462a928e03d 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -3,12 +3,16 @@ include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f +RMDIR ?= rmdir --ignore-fail-on-non-empty -# Make the path relative to DESTDIR, not prefix -ifndef DESTDIR -prefix ?= /usr/local +ifeq ($(V),1) + Q = +else + Q = @ endif -mandir ?= $(prefix)/share/man + +prefix ?= /usr/local +mandir ?= $(prefix)/man man8dir = $(mandir)/man8 MAN8_RST = $(wildcard *.rst) @@ -20,15 +24,21 @@ man: man8 man8: $(DOC_MAN8) $(OUTPUT)%.8: %.rst - rst2man $< > $@ + $(QUIET_GEN)rst2man $< > $@ clean: - $(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8) + $(call QUIET_CLEAN, Documentation) + $(Q)$(RM) $(DOC_MAN8) install: man - $(call QUIET_INSTALL, Documentation-man) \ - $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \ - $(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir); + $(call QUIET_INSTALL, Documentation-man) + $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir) + $(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir) + +uninstall: + $(call QUIET_UNINST, Documentation-man) + $(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8)) + $(Q)$(RMDIR) $(DESTDIR)$(man8dir) -.PHONY: man man8 clean install +.PHONY: man man8 clean install uninstall .DEFAULT_GOAL := man diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst new file mode 100644 index 000000000000..45c71b1f682b --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -0,0 +1,118 @@ +================ +bpftool-cgroup +================ +------------------------------------------------------------------------------- +tool for inspection and simple manipulation of eBPF progs +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **cgroup** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + + *COMMANDS* := + { **list** | **attach** | **detach** | **help** } + +MAP COMMANDS +============= + +| **bpftool** **cgroup list** *CGROUP* +| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] +| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* +| **bpftool** **cgroup help** +| +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *ATTACH_TYPE* := { *ingress* | *egress* | *sock_create* | *sock_ops* | *device* } +| *ATTACH_FLAGS* := { *multi* | *override* } + +DESCRIPTION +=========== + **bpftool cgroup list** *CGROUP* + List all programs attached to the cgroup *CGROUP*. + + Output will start with program ID followed by attach type, + attach flags and program name. + + **bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] + Attach program *PROG* to the cgroup *CGROUP* with attach type + *ATTACH_TYPE* and optional *ATTACH_FLAGS*. + + *ATTACH_FLAGS* can be one of: **override** if a sub-cgroup installs + some bpf program, the program in this cgroup yields to sub-cgroup + program; **multi** if a sub-cgroup installs some bpf program, + that cgroup program gets run in addition to the program in this + cgroup. + + Only one program is allowed to be attached to a cgroup with + no attach flags or the **override** flag. Attaching another + program will release old program and attach the new one. + + Multiple programs are allowed to be attached to a cgroup with + **multi**. They are executed in FIFO order (those that were + attached first, run first). + + Non-default *ATTACH_FLAGS* are supported by kernel version 4.14 + and later. + + *ATTACH_TYPE* can be on of: + **ingress** ingress path of the inet socket (since 4.10); + **egress** egress path of the inet socket (since 4.10); + **sock_create** opening of an inet socket (since 4.10); + **sock_ops** various socket operations (since 4.12); + **device** device access (since 4.15). + + **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* + Detach *PROG* from the cgroup *CGROUP* and attach type + *ATTACH_TYPE*. + + **bpftool prog help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + + -f, --bpffs + Show file names of pinned programs. + +EXAMPLES +======== +| +| **# mount -t bpf none /sys/fs/bpf/** +| **# mkdir /sys/fs/cgroup/test.slice** +| **# bpftool prog load ./device_cgroup.o /sys/fs/bpf/prog** +| **# bpftool cgroup attach /sys/fs/cgroup/test.slice/ device id 1 allow_multi** + +**# bpftool cgroup list /sys/fs/cgroup/test.slice/** + +:: + + ID AttachType AttachFlags Name + 1 device allow_multi bpf_prog1 + +| +| **# bpftool cgroup detach /sys/fs/cgroup/test.slice/ device id 1** +| **# bpftool cgroup list /sys/fs/cgroup/test.slice/** + +:: + + ID AttachType AttachFlags Name + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 9f51a268eb06..421cabc417e6 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -128,4 +128,4 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8) + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 36e8d1c3c40d..81c97c0e9b67 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -15,7 +15,7 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **dump xlated** | **dump jited** | **pin** | **help** } + { **show** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } MAP COMMANDS ============= @@ -24,6 +24,7 @@ MAP COMMANDS | **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* +| **bpftool** **prog load** *OBJ* *FILE* | **bpftool** **prog help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } @@ -57,6 +58,11 @@ DESCRIPTION Note: *FILE* must be located in *bpffs* mount. + **bpftool prog load** *OBJ* *FILE* + Load bpf program from binary *OBJ* and pin as *FILE*. + + Note: *FILE* must be located in *bpffs* mount. + **bpftool prog help** Print short help message. @@ -126,8 +132,10 @@ EXAMPLES | | **# mount -t bpf none /sys/fs/bpf/** | **# bpftool prog pin id 10 /sys/fs/bpf/prog** +| **# bpftool prog load ./my_prog.o /sys/fs/bpf/prog2** | **# ls -l /sys/fs/bpf/** | -rw------- 1 root root 0 Jul 22 01:43 prog +| -rw------- 1 root root 0 Jul 22 01:44 prog2 **# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes** @@ -147,4 +155,4 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-map**\ (8) + **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 926c03d5a8da..6732a5a617e4 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,7 +16,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** } + *OBJECT* := { **map** | **program** | **cgroup** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } @@ -26,7 +26,9 @@ SYNOPSIS | **pin** | **help** } *PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin** - | **help** } + | **load** | **help** } + + *CGROUP-COMMANDS* := { **list** | **attach** | **detach** | **help** } DESCRIPTION =========== @@ -53,4 +55,4 @@ OPTIONS SEE ALSO ======== - **bpftool-map**\ (8), **bpftool-prog**\ (8) + **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index ec3052c0b004..3f17ad317512 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,25 +1,10 @@ include ../../scripts/Makefile.include - include ../../scripts/utilities.mak ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -ifneq ($(objtree),) -#$(info Determined 'objtree' to be $(objtree)) -endif - -ifneq ($(OUTPUT),) -#$(info Determined 'OUTPUT' to be $(OUTPUT)) -# Adding $(OUTPUT) as a directory to look for source files, -# because use generated output files as sources dependency -# for flex/bison parsers. -VPATH += $(OUTPUT) -export VPATH endif ifeq ($(V),1) @@ -28,12 +13,12 @@ else Q = @ endif -BPF_DIR = $(srctree)/tools/lib/bpf/ +BPF_DIR = $(srctree)/tools/lib/bpf/ ifneq ($(OUTPUT),) - BPF_PATH=$(OUTPUT) + BPF_PATH = $(OUTPUT) else - BPF_PATH=$(BPF_DIR) + BPF_PATH = $(BPF_DIR) endif LIBBPF = $(BPF_PATH)libbpf.a @@ -45,7 +30,7 @@ $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null -prefix = /usr/local +prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions CC = gcc @@ -55,12 +40,15 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ LIBS = -lelf -lbfd -lopcodes $(LIBBPF) +INSTALL ?= install +RM ?= rm -f + include $(wildcard *.d) all: $(OUTPUT)bpftool -SRCS=$(wildcard *.c) -OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o +SRCS = $(wildcard *.c) +OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< @@ -73,21 +61,34 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) - $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d + +install: $(OUTPUT)bpftool + $(call QUIET_INSTALL, bpftool) + $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin + $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool + $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir) + $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir) -install: - install -m 0755 -d $(prefix)/sbin - install $(OUTPUT)bpftool $(prefix)/sbin/bpftool - install -m 0755 -d $(bash_compdir) - install -m 0644 bash-completion/bpftool $(bash_compdir) +uninstall: + $(call QUIET_UNINST, bpftool) + $(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool + $(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool doc: - $(Q)$(MAKE) -C Documentation/ + $(call descend,Documentation) + +doc-clean: + $(call descend,Documentation,clean) doc-install: - $(Q)$(MAKE) -C Documentation/ install + $(call descend,Documentation,install) + +doc-uninstall: + $(call descend,Documentation,uninstall) FORCE: -.PHONY: all clean FORCE install doc doc-install +.PHONY: all FORCE clean install uninstall +.PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c new file mode 100644 index 000000000000..34ca303d72bc --- /dev/null +++ b/tools/bpf/bpftool/cgroup.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2017 Facebook +// Author: Roman Gushchin <guro@fb.com> + +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <bpf.h> + +#include "main.h" + +#define HELP_SPEC_ATTACH_FLAGS \ + "ATTACH_FLAGS := { multi | override }" + +#define HELP_SPEC_ATTACH_TYPES \ + "ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }" + +static const char * const attach_type_strings[] = { + [BPF_CGROUP_INET_INGRESS] = "ingress", + [BPF_CGROUP_INET_EGRESS] = "egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", + [BPF_CGROUP_SOCK_OPS] = "sock_ops", + [BPF_CGROUP_DEVICE] = "device", + [__MAX_BPF_ATTACH_TYPE] = NULL, +}; + +static enum bpf_attach_type parse_attach_type(const char *str) +{ + enum bpf_attach_type type; + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + if (attach_type_strings[type] && + is_prefix(str, attach_type_strings[type])) + return type; + } + + return __MAX_BPF_ATTACH_TYPE; +} + +static int list_bpf_prog(int id, const char *attach_type_str, + const char *attach_flags_str) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int prog_fd; + + prog_fd = bpf_prog_get_fd_by_id(id); + if (prog_fd < 0) + return -1; + + if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) { + close(prog_fd); + return -1; + } + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "id", info.id); + jsonw_string_field(json_wtr, "attach_type", + attach_type_str); + jsonw_string_field(json_wtr, "attach_flags", + attach_flags_str); + jsonw_string_field(json_wtr, "name", info.name); + jsonw_end_object(json_wtr); + } else { + printf("%-8u %-15s %-15s %-15s\n", info.id, + attach_type_str, + attach_flags_str, + info.name); + } + + close(prog_fd); + return 0; +} + +static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) +{ + __u32 prog_ids[1024] = {0}; + char *attach_flags_str; + __u32 prog_cnt, iter; + __u32 attach_flags; + char buf[32]; + int ret; + + prog_cnt = ARRAY_SIZE(prog_ids); + ret = bpf_prog_query(cgroup_fd, type, 0, &attach_flags, prog_ids, + &prog_cnt); + if (ret) + return ret; + + if (prog_cnt == 0) + return 0; + + switch (attach_flags) { + case BPF_F_ALLOW_MULTI: + attach_flags_str = "multi"; + break; + case BPF_F_ALLOW_OVERRIDE: + attach_flags_str = "override"; + break; + case 0: + attach_flags_str = ""; + break; + default: + snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags); + attach_flags_str = buf; + } + + for (iter = 0; iter < prog_cnt; iter++) + list_bpf_prog(prog_ids[iter], attach_type_strings[type], + attach_flags_str); + + return 0; +} + +static int do_list(int argc, char **argv) +{ + enum bpf_attach_type type; + int cgroup_fd; + int ret = -1; + + if (argc < 1) { + p_err("too few parameters for cgroup list\n"); + goto exit; + } else if (argc > 1) { + p_err("too many parameters for cgroup list\n"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s\n", argv[1]); + goto exit; + } + + if (json_output) + jsonw_start_array(json_wtr); + else + printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType", + "AttachFlags", "Name"); + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + /* + * Not all attach types may be supported, so it's expected, + * that some requests will fail. + * If we were able to get the list for at least one + * attach type, let's return 0. + */ + if (list_attached_bpf_progs(cgroup_fd, type) == 0) + ret = 0; + } + + if (json_output) + jsonw_end_array(json_wtr); + + close(cgroup_fd); +exit: + return ret; +} + +static int do_attach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int cgroup_fd, prog_fd; + int attach_flags = 0; + int ret = -1; + int i; + + if (argc < 4) { + p_err("too few parameters for cgroup attach\n"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s\n", argv[1]); + goto exit; + } + + attach_type = parse_attach_type(argv[1]); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type\n"); + goto exit_cgroup; + } + + argc -= 2; + argv = &argv[2]; + prog_fd = prog_parse_fd(&argc, &argv); + if (prog_fd < 0) + goto exit_cgroup; + + for (i = 0; i < argc; i++) { + if (is_prefix(argv[i], "multi")) { + attach_flags |= BPF_F_ALLOW_MULTI; + } else if (is_prefix(argv[i], "override")) { + attach_flags |= BPF_F_ALLOW_OVERRIDE; + } else { + p_err("unknown option: %s\n", argv[i]); + goto exit_cgroup; + } + } + + if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, attach_flags)) { + p_err("failed to attach program"); + goto exit_prog; + } + + if (json_output) + jsonw_null(json_wtr); + + ret = 0; + +exit_prog: + close(prog_fd); +exit_cgroup: + close(cgroup_fd); +exit: + return ret; +} + +static int do_detach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int prog_fd, cgroup_fd; + int ret = -1; + + if (argc < 4) { + p_err("too few parameters for cgroup detach\n"); + goto exit; + } + + cgroup_fd = open(argv[0], O_RDONLY); + if (cgroup_fd < 0) { + p_err("can't open cgroup %s\n", argv[1]); + goto exit; + } + + attach_type = parse_attach_type(argv[1]); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type"); + goto exit_cgroup; + } + + argc -= 2; + argv = &argv[2]; + prog_fd = prog_parse_fd(&argc, &argv); + if (prog_fd < 0) + goto exit_cgroup; + + if (bpf_prog_detach2(prog_fd, cgroup_fd, attach_type)) { + p_err("failed to detach program"); + goto exit_prog; + } + + if (json_output) + jsonw_null(json_wtr); + + ret = 0; + +exit_prog: + close(prog_fd); +exit_cgroup: + close(cgroup_fd); +exit: + return ret; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s list CGROUP\n" + " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n" + " %s %s detach CGROUP ATTACH_TYPE PROG\n" + " %s %s help\n" + "\n" + " " HELP_SPEC_ATTACH_TYPES "\n" + " " HELP_SPEC_ATTACH_FLAGS "\n" + " " HELP_SPEC_PROGRAM "\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "list", do_list }, + { "attach", do_attach }, + { "detach", do_detach }, + { "help", do_help }, + { 0 } +}; + +int do_cgroup(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 2bd3b280e6dd..b62c94e3997a 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -163,13 +163,49 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return fd; } -int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +int do_pin_fd(int fd, const char *name) { char err_str[ERR_MAX_LEN]; - unsigned int id; - char *endptr; char *file; char *dir; + int err = 0; + + err = bpf_obj_pin(fd, name); + if (!err) + goto out; + + file = malloc(strlen(name) + 1); + strcpy(file, name); + dir = dirname(file); + + if (errno != EPERM || is_bpffs(dir)) { + p_err("can't pin the object (%s): %s", name, strerror(errno)); + goto out_free; + } + + /* Attempt to mount bpffs, then retry pinning. */ + err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); + if (!err) { + err = bpf_obj_pin(fd, name); + if (err) + p_err("can't pin the object (%s): %s", name, + strerror(errno)); + } else { + err_str[ERR_MAX_LEN - 1] = '\0'; + p_err("can't mount BPF file system to pin the object (%s): %s", + name, err_str); + } + +out_free: + free(file); +out: + return err; +} + +int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) +{ + unsigned int id; + char *endptr; int err; int fd; @@ -195,35 +231,8 @@ int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) return -1; } - err = bpf_obj_pin(fd, *argv); - if (!err) - goto out_close; - - file = malloc(strlen(*argv) + 1); - strcpy(file, *argv); - dir = dirname(file); - - if (errno != EPERM || is_bpffs(dir)) { - p_err("can't pin the object (%s): %s", *argv, strerror(errno)); - goto out_free; - } + err = do_pin_fd(fd, *argv); - /* Attempt to mount bpffs, then retry pinning. */ - err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); - if (!err) { - err = bpf_obj_pin(fd, *argv); - if (err) - p_err("can't pin the object (%s): %s", *argv, - strerror(errno)); - } else { - err_str[ERR_MAX_LEN - 1] = '\0'; - p_err("can't mount BPF file system to pin the object (%s): %s", - *argv, err_str); - } - -out_free: - free(file); -out_close: close(fd); return err; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d294bc8168be..ecd53ccf1239 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -85,7 +85,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map }\n" + " OBJECT := { prog | map | cgroup }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -173,6 +173,7 @@ static const struct cmd cmds[] = { { "batch", do_batch }, { "prog", do_prog }, { "map", do_map }, + { "cgroup", do_cgroup }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index bff330b49791..8f6d3cac0347 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -111,9 +111,11 @@ char *get_fdinfo(int fd, const char *key); int open_obj_pinned(char *path); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); +int do_pin_fd(int fd, const char *name); int do_prog(int argc, char **arg); int do_map(int argc, char **arg); +int do_cgroup(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index ad619b96c276..037484ceaeaf 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -45,6 +45,7 @@ #include <sys/stat.h> #include <bpf.h> +#include <libbpf.h> #include "main.h" #include "disasm.h" @@ -635,6 +636,30 @@ static int do_pin(int argc, char **argv) return err; } +static int do_load(int argc, char **argv) +{ + struct bpf_object *obj; + int prog_fd; + + if (argc != 2) + usage(); + + if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) { + p_err("failed to load program\n"); + return -1; + } + + if (do_pin_fd(prog_fd, argv[1])) { + p_err("failed to pin program\n"); + return -1; + } + + if (json_output) + jsonw_null(json_wtr); + + return 0; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -647,13 +672,14 @@ static int do_help(int argc, char **argv) " %s %s dump xlated PROG [{ file FILE | opcodes }]\n" " %s %s dump jited PROG [{ file FILE | opcodes }]\n" " %s %s pin PROG FILE\n" + " %s %s load OBJ FILE\n" " %s %s help\n" "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -663,6 +689,7 @@ static const struct cmd cmds[] = { { "help", do_help }, { "dump", do_dump }, { "pin", do_pin }, + { "load", do_load }, { 0 } }; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4c223ab30293..db1b0923a308 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -197,8 +197,14 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 +/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative + * offset to another bpf function + */ +#define BPF_PSEUDO_CALL 1 + /* flags for BPF_MAP_UPDATE_ELEM command */ #define BPF_ANY 0 /* create new element or update existing */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */ @@ -677,6 +683,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +746,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b9a4953018ed..769533696483 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -418,6 +418,27 @@ struct perf_event_attr { __u16 __reserved_2; /* align to __u64 */ }; +/* + * Structure used by below PERF_EVENT_IOC_QUERY_BPF command + * to query bpf programs attached to the same perf tracepoint + * as the given perf event. + */ +struct perf_event_query_bpf { + /* + * The below ids array length + */ + __u32 ids_len; + /* + * Set by the kernel to indicate the number of + * available programs + */ + __u32 prog_cnt; + /* + * User provided buffer to store program ids + */ + __u32 ids[0]; +}; + #define perf_flags(attr) (*(&(attr)->read_format + 1)) /* @@ -433,6 +454,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) #define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 4555304dc18e..8ed43ae9db9b 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -213,10 +213,10 @@ PHONY += force elfdep bpfdep force: elfdep: - @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit -1 ; fi + @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi bpfdep: - @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit -1 ; fi + @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6534889e2b2f..9f44c196931e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -40,7 +40,7 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, __u32 map_flags); /* Recommend log buffer size */ -#define BPF_LOG_BUF_SIZE 65536 +#define BPF_LOG_BUF_SIZE (256 * 1024) int bpf_load_program_name(enum bpf_prog_type type, const char *name, const struct bpf_insn *insns, size_t insns_cnt, const char *license, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5aa45f89da93..5b83875b3594 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -174,12 +174,19 @@ struct bpf_program { char *name; char *section_name; struct bpf_insn *insns; - size_t insns_cnt; + size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; - struct { + struct reloc_desc { + enum { + RELO_LD64, + RELO_CALL, + } type; int insn_idx; - int map_idx; + union { + int map_idx; + int text_off; + }; } *reloc_desc; int nr_reloc; @@ -234,6 +241,7 @@ struct bpf_object { } *reloc; int nr_reloc; int maps_shndx; + int text_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -375,9 +383,13 @@ bpf_object__init_prog_names(struct bpf_object *obj) size_t pi, si; for (pi = 0; pi < obj->nr_programs; pi++) { - char *name = NULL; + const char *name = NULL; prog = &obj->programs[pi]; + if (prog->idx == obj->efile.text_shndx) { + name = ".text"; + goto skip_search; + } for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; si++) { @@ -387,6 +399,8 @@ bpf_object__init_prog_names(struct bpf_object *obj) continue; if (sym.st_shndx != prog->idx) continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) + continue; name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, @@ -403,7 +417,7 @@ bpf_object__init_prog_names(struct bpf_object *obj) prog->section_name); return -EINVAL; } - +skip_search: prog->name = strdup(name); if (!prog->name) { pr_warning("failed to allocate memory for prog sym %s\n", @@ -793,6 +807,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if ((sh.sh_type == SHT_PROGBITS) && (sh.sh_flags & SHF_EXECINSTR) && (data->d_size > 0)) { + if (strcmp(name, ".text") == 0) + obj->efile.text_shndx = idx; err = bpf_object__add_program(obj, data->d_buf, data->d_size, name, idx); if (err) { @@ -854,11 +870,14 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) } static int -bpf_program__collect_reloc(struct bpf_program *prog, - size_t nr_maps, GElf_Shdr *shdr, - Elf_Data *data, Elf_Data *symbols, - int maps_shndx, struct bpf_map *maps) +bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, + Elf_Data *data, struct bpf_object *obj) { + Elf_Data *symbols = obj->efile.symbols; + int text_shndx = obj->efile.text_shndx; + int maps_shndx = obj->efile.maps_shndx; + struct bpf_map *maps = obj->maps; + size_t nr_maps = obj->nr_maps; int i, nrels; pr_debug("collecting relocating info for: '%s'\n", @@ -891,8 +910,10 @@ bpf_program__collect_reloc(struct bpf_program *prog, GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } + pr_debug("relo for %ld value %ld name %d\n", + rel.r_info >> 32, sym.st_value, sym.st_name); - if (sym.st_shndx != maps_shndx) { + if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", prog->section_name, sym.st_shndx); return -LIBBPF_ERRNO__RELOC; @@ -901,6 +922,17 @@ bpf_program__collect_reloc(struct bpf_program *prog, insn_idx = rel.r_offset / sizeof(struct bpf_insn); pr_debug("relocation: insn_idx=%u\n", insn_idx); + if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { + if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { + pr_warning("incorrect bpf_call opcode\n"); + return -LIBBPF_ERRNO__RELOC; + } + prog->reloc_desc[i].type = RELO_CALL; + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].text_off = sym.st_value; + continue; + } + if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", insn_idx, insns[insn_idx].code); @@ -922,6 +954,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } + prog->reloc_desc[i].type = RELO_LD64; prog->reloc_desc[i].insn_idx = insn_idx; prog->reloc_desc[i].map_idx = map_idx; } @@ -961,27 +994,76 @@ bpf_object__create_maps(struct bpf_object *obj) } static int +bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, + struct reloc_desc *relo) +{ + struct bpf_insn *insn, *new_insn; + struct bpf_program *text; + size_t new_cnt; + + if (relo->type != RELO_CALL) + return -LIBBPF_ERRNO__RELOC; + + if (prog->idx == obj->efile.text_shndx) { + pr_warning("relo in .text insn %d into off %d\n", + relo->insn_idx, relo->text_off); + return -LIBBPF_ERRNO__RELOC; + } + + if (prog->main_prog_cnt == 0) { + text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); + if (!text) { + pr_warning("no .text section found yet relo into text exist\n"); + return -LIBBPF_ERRNO__RELOC; + } + new_cnt = prog->insns_cnt + text->insns_cnt; + new_insn = realloc(prog->insns, new_cnt * sizeof(*insn)); + if (!new_insn) { + pr_warning("oom in prog realloc\n"); + return -ENOMEM; + } + memcpy(new_insn + prog->insns_cnt, text->insns, + text->insns_cnt * sizeof(*insn)); + prog->insns = new_insn; + prog->main_prog_cnt = prog->insns_cnt; + prog->insns_cnt = new_cnt; + } + insn = &prog->insns[relo->insn_idx]; + insn->imm += prog->main_prog_cnt - relo->insn_idx; + pr_debug("added %zd insn from %s to prog %s\n", + text->insns_cnt, text->section_name, prog->section_name); + return 0; +} + +static int bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) { - int i; + int i, err; if (!prog || !prog->reloc_desc) return 0; for (i = 0; i < prog->nr_reloc; i++) { - int insn_idx, map_idx; - struct bpf_insn *insns = prog->insns; + if (prog->reloc_desc[i].type == RELO_LD64) { + struct bpf_insn *insns = prog->insns; + int insn_idx, map_idx; - insn_idx = prog->reloc_desc[i].insn_idx; - map_idx = prog->reloc_desc[i].map_idx; + insn_idx = prog->reloc_desc[i].insn_idx; + map_idx = prog->reloc_desc[i].map_idx; - if (insn_idx >= (int)prog->insns_cnt) { - pr_warning("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; + if (insn_idx >= (int)prog->insns_cnt) { + pr_warning("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + insns[insn_idx].imm = obj->maps[map_idx].fd; + } else { + err = bpf_program__reloc_text(prog, obj, + &prog->reloc_desc[i]); + if (err) + return err; } - insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; - insns[insn_idx].imm = obj->maps[map_idx].fd; } zfree(&prog->reloc_desc); @@ -1024,7 +1106,6 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) Elf_Data *data = obj->efile.reloc[i].data; int idx = shdr->sh_info; struct bpf_program *prog; - size_t nr_maps = obj->nr_maps; if (shdr->sh_type != SHT_REL) { pr_warning("internal error at %d\n", __LINE__); @@ -1038,11 +1119,9 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) return -LIBBPF_ERRNO__RELOC; } - err = bpf_program__collect_reloc(prog, nr_maps, + err = bpf_program__collect_reloc(prog, shdr, data, - obj->efile.symbols, - obj->efile.maps_shndx, - obj->maps); + obj); if (err) return err; } @@ -1195,6 +1274,8 @@ bpf_object__load_progs(struct bpf_object *obj) int err; for (i = 0; i < obj->nr_programs; i++) { + if (obj->programs[i].idx == obj->efile.text_shndx) + continue; err = bpf_program__load(&obj->programs[i], obj->license, obj->kern_version); @@ -1721,6 +1802,45 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); +#define BPF_PROG_SEC(string, type) { string, sizeof(string), type } +static const struct { + const char *sec; + size_t len; + enum bpf_prog_type prog_type; +} section_names[] = { + BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), + BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), + BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), + BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), + BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK), + BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE), + BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), + BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), +}; +#undef BPF_PROG_SEC + +static enum bpf_prog_type bpf_program__guess_type(struct bpf_program *prog) +{ + int i; + + if (!prog->section_name) + goto err; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) + if (strncmp(prog->section_name, section_names[i].sec, + section_names[i].len) == 0) + return section_names[i].prog_type; + +err: + pr_warning("failed to guess program type based on section name %s\n", + prog->section_name); + + return BPF_PROG_TYPE_UNSPEC; +} + int bpf_map__fd(struct bpf_map *map) { return map ? map->fd : -EINVAL; @@ -1818,7 +1938,7 @@ long libbpf_get_error(const void *ptr) int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd) { - struct bpf_program *prog; + struct bpf_program *prog, *first_prog = NULL; struct bpf_object *obj; int err; @@ -1826,13 +1946,30 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, if (IS_ERR(obj)) return -ENOENT; - prog = bpf_program__next(NULL, obj); - if (!prog) { + bpf_object__for_each_program(prog, obj) { + /* + * If type is not specified, try to guess it based on + * section name. + */ + if (type == BPF_PROG_TYPE_UNSPEC) { + type = bpf_program__guess_type(prog); + if (type == BPF_PROG_TYPE_UNSPEC) { + bpf_object__close(obj); + return -EINVAL; + } + } + + bpf_program__set_type(prog, type); + if (prog->idx != obj->efile.text_shndx && !first_prog) + first_prog = prog; + } + + if (!first_prog) { + pr_warning("object file doesn't contain bpf program\n"); bpf_object__close(obj); return -ENOENT; } - bpf_program__set_type(prog, type); err = bpf_object__load(obj); if (err) { bpf_object__close(obj); @@ -1840,6 +1977,6 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, } *pobj = obj; - *prog_fd = bpf_program__fd(prog); + *prog_fd = bpf_program__fd(first_prog); return 0; } diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 3fab179b1aba..fcb3ed0be5f8 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -99,5 +99,6 @@ ifneq ($(silent),1) QUIET_CLEAN = @printf ' CLEAN %s\n' $1; QUIET_INSTALL = @printf ' INSTALL %s\n' $1; + QUIET_UNINST = @printf ' UNINST %s\n' $1; endif endif diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4a9fb8fb445f..a1fcb0c31d02 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -18,7 +18,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o + sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ + test_l4lb_noinline.o test_xdp_noinline.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py @@ -50,8 +51,13 @@ else CPU ?= generic endif +CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ + -Wno-compare-distinct-pointer-types + +$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline +$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline + %.o: %.c - $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ - -Wno-compare-distinct-pointer-types \ + $(CLANG) $(CLANG_FLAGS) \ -O2 -target bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index fd9a17fa8a8b..33cb00e46c49 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -82,7 +82,8 @@ static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, unsigned int buf_size) = (void *) BPF_FUNC_perf_prog_read_value; - +static int (*bpf_override_return)(void *ctx, unsigned long rc) = + (void *) BPF_FUNC_override_return; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 52d53ed08769..9d4897317c77 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -3,3 +3,4 @@ CONFIG_BPF_SYSCALL=y CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m +CONFIG_CGROUP_BPF=y diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c new file mode 100644 index 000000000000..ba44a14e6dc4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" +#include "test_iptunnel_common.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct bpf_map_def SEC("maps") vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct vip_meta), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = CH_RINGS_SIZE, +}; + +struct bpf_map_def SEC("maps") reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = MAX_REALS, +}; + +struct bpf_map_def SEC("maps") stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct vip_stats), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = CTL_MAP_SIZE, +}; + +static __u32 get_packet_hash(struct packet_description *pckt, + bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6); + __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; + __u32 *real_pos; + + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) +{ + void *pkt_start = (void *)(long)skb->data; + struct packet_description pckt = {}; + struct eth_hdr *eth = pkt_start; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("l4lb-demo") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + return process_packet(data, nh_off, data_end, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + return process_packet(data, nh_off, data_end, true, ctx); + else + return TC_ACT_SHOT; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 69427531408d..6472ca98690e 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -21,8 +21,10 @@ typedef __u16 __sum16; #include <linux/ipv6.h> #include <linux/tcp.h> #include <linux/filter.h> +#include <linux/perf_event.h> #include <linux/unistd.h> +#include <sys/ioctl.h> #include <sys/wait.h> #include <sys/resource.h> #include <sys/types.h> @@ -167,10 +169,9 @@ out: #define NUM_ITER 100000 #define VIP_NUM 5 -static void test_l4lb(void) +static void test_l4lb(const char *file) { unsigned int nr_cpus = bpf_num_possible_cpus(); - const char *file = "./test_l4lb.o"; struct vip key = {.protocol = 6}; struct vip_meta { __u32 flags; @@ -247,6 +248,95 @@ out: bpf_object__close(obj); } +static void test_l4lb_all(void) +{ + const char *file1 = "./test_l4lb.o"; + const char *file2 = "./test_l4lb_noinline.o"; + + test_l4lb(file1); + test_l4lb(file2); +} + +static void test_xdp_noinline(void) +{ + const char *file = "./test_xdp_noinline.o"; + unsigned int nr_cpus = bpf_num_possible_cpus(); + struct vip key = {.protocol = 6}; + struct vip_meta { + __u32 flags; + __u32 vip_num; + } value = {.vip_num = VIP_NUM}; + __u32 stats_key = VIP_NUM; + struct vip_stats { + __u64 bytes; + __u64 pkts; + } stats[nr_cpus]; + struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; + } real_def = {.dst = MAGIC_VAL}; + __u32 ch_key = 11, real_num = 3; + __u32 duration, retval, size; + int err, i, prog_fd, map_fd; + __u64 bytes = 0, pkts = 0; + struct bpf_object *obj; + char buf[128]; + u32 *magic = (u32 *)buf; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + map_fd = bpf_find_map(__func__, obj, "vip_map"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &key, &value, 0); + + map_fd = bpf_find_map(__func__, obj, "ch_rings"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); + + map_fd = bpf_find_map(__func__, obj, "reals"); + if (map_fd < 0) + goto out; + bpf_map_update_elem(map_fd, &real_num, &real_def, 0); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 54 || + *magic != MAGIC_VAL, "ipv4", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + buf, &size, &retval, &duration); + CHECK(err || errno || retval != 1 || size != 74 || + *magic != MAGIC_VAL, "ipv6", + "err %d errno %d retval %d size %d magic %x\n", + err, errno, retval, size, *magic); + + map_fd = bpf_find_map(__func__, obj, "stats"); + if (map_fd < 0) + goto out; + bpf_map_lookup_elem(map_fd, &stats_key, stats); + for (i = 0; i < nr_cpus; i++) { + bytes += stats[i].bytes; + pkts += stats[i].pkts; + } + if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { + error_cnt++; + printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts); + } +out: + bpf_object__close(obj); +} + static void test_tcp_estats(void) { const char *file = "./test_tcp_estats.o"; @@ -617,6 +707,136 @@ static void test_obj_name(void) } } +static void test_tp_attach_query(void) +{ + const int num_progs = 3; + int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs]; + __u32 duration = 0, info_len, saved_prog_ids[num_progs]; + const char *file = "./test_tracepoint.o"; + struct perf_event_query_bpf *query; + struct perf_event_attr attr = {}; + struct bpf_object *obj[num_progs]; + struct bpf_prog_info prog_info; + char buf[256]; + + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/sched/sched_switch/id"); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + return; + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), + "read", "bytes %d errno %d\n", bytes, errno)) + return; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN; + attr.sample_period = 1; + attr.wakeup_events = 1; + + query = malloc(sizeof(*query) + sizeof(__u32) * num_progs); + for (i = 0; i < num_progs; i++) { + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], + &prog_fd[i]); + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + goto cleanup1; + + bzero(&prog_info, sizeof(prog_info)); + prog_info.jited_prog_len = 0; + prog_info.xlated_prog_len = 0; + prog_info.nr_map_ids = 0; + info_len = sizeof(prog_info); + err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len); + if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n", + err, errno)) + goto cleanup1; + saved_prog_ids[i] = prog_info.id; + + pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n", + pmu_fd[i], errno)) + goto cleanup2; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 0) { + /* check NULL prog array query */ + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 0, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", + err, errno)) + goto cleanup3; + + if (i == 1) { + /* try to get # of programs only */ + query->ids_len = 0; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + + /* try a few negative tests */ + /* invalid query pointer */ + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, + (struct perf_event_query_bpf *)0x1); + if (CHECK(!err || errno != EFAULT, + "perf_event_ioc_query_bpf", + "err %d errno %d\n", err, errno)) + goto cleanup3; + + /* no enough space */ + query->ids_len = 1; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2, + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + } + + query->ids_len = num_progs; + err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query); + if (CHECK(err || query->prog_cnt != (i + 1), + "perf_event_ioc_query_bpf", + "err %d errno %d query->prog_cnt %u\n", + err, errno, query->prog_cnt)) + goto cleanup3; + for (j = 0; j < i + 1; j++) + if (CHECK(saved_prog_ids[j] != query->ids[j], + "perf_event_ioc_query_bpf", + "#%d saved_prog_id %x query prog_id %x\n", + j, saved_prog_ids[j], query->ids[j])) + goto cleanup3; + } + + i = num_progs - 1; + for (; i >= 0; i--) { + cleanup3: + ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE); + cleanup2: + close(pmu_fd[i]); + cleanup1: + bpf_object__close(obj[i]); + } + free(query); +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -625,11 +845,13 @@ int main(void) test_pkt_access(); test_xdp(); - test_l4lb(); + test_l4lb_all(); + test_xdp_noinline(); test_tcp_estats(); test_bpf_obj_id(); test_pkt_md_access(); test_obj_name(); + test_tp_attach_query(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c new file mode 100644 index 000000000000..04bf084517e0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tracepoint.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3c64f30cf63c..3bacff0d6f91 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2,6 +2,7 @@ * Testsuite for eBPF verifier * * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -277,7 +278,7 @@ static struct bpf_test tests[] = { .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), }, - .errstr = "jump out of range", + .errstr = "not an exit", .result = REJECT, }, { @@ -5648,7 +5649,7 @@ static struct bpf_test tests[] = { "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), @@ -5883,7 +5884,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), @@ -8097,6 +8098,1623 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, }, + { + "calls: basic sanity", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: not on unpriviledged", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "calls: overlapping caller/callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn is not an exit or jmp", + .result = REJECT, + }, + { + "calls: wrong recursive calls", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: wrong src reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: wrong off value", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: jump back loop", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn 0 to 0", + .result = REJECT, + }, + { + "calls: conditional call", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: conditional call 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -5), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 6", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: using r0 returned by callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: using uninit r0 from callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "calls: callee is using r1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + .result = ACCEPT, + }, + { + "calls: callee using args1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "calls: callee using wrong args2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "calls: callee using two args", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "calls: callee changing pkt pointers", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* clear_all_pkt_pointers() has to walk all frames + * to make sure that pkt pointers in the caller + * are cleared when callee is calling a helper that + * adjusts packet size + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_xdp_adjust_head), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R6 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: two calls with args", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: calls with stack arith", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: calls with misaligned stack access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + .errstr = "misaligned stack access", + .result = REJECT, + }, + { + "calls: calls control flow, jump test", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: calls control flow, jump test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "jump out of range from insn 1 to 4", + .result = REJECT, + }, + { + "calls: two calls with bad jump", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range from insn 11 to 9", + .result = REJECT, + }, + { + "calls: recursive call. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: recursive call. test2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: unreachable code", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "unreachable insn 6", + .result = REJECT, + }, + { + "calls: invalid call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: invalid call 2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test2", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: call without exit", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: call into middle of ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: call into middle of other call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: ld_abs with changing ctx data in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", + .result = REJECT, + }, + { + "calls: two calls with bad fallthrough", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: two calls with stack read", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: two calls with stack write", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_8), + /* write into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* read from stack frame of main prog */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: spill into caller stack frame", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot spill", + .result = REJECT, + }, + { + "calls: write into caller stack frame", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: write into callee stack frame", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot return stack pointer", + .result = REJECT, + }, + { + "calls: two calls with stack write and void return", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* write into stack frame of main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), /* void return */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: ambiguous return value", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .errstr = "R0 !read_ok", + .result = REJECT, + }, + { + "calls: two calls that return map_value", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with bool condition", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with incorrect bool check", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = REJECT, + .errstr = "invalid read from stack off -16+0 size 8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), // 26 + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34 + BPF_JMP_IMM(BPF_JA, 0, 0, -30), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -8), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 0 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: pkt_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "calls: pkt_ptr spill into caller stack 2", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* Marking is still kept, but not in all cases safe. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 3", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Marking is still kept and safe here. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 4", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Check marking propagated. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 5", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "same insn cannot be used with different", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: caller stack init to zero or map_value_or_null", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + /* fetch map_value_or_null or const_zero from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* store into map_value */ + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* if (ctx == 0) return; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + /* else bpf_map_lookup() and *(fp - 8) = r0 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 13 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: stack init to zero and pruning", + .insns = { + /* first make allocated_stack 16 byte */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + /* now fork the execution such that the false branch + * of JGT insn will be verified second and it skisp zero + * init of fp-8 stack slot. If stack liveness marking + * is missing live_read marks from call map_lookup + * processing then pruning will incorrectly assume + * that fp-8 stack slot was unused in the fall-through + * branch and will accept the program incorrectly + */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 6 }, + .errstr = "invalid indirect read from stack off -8+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, }; static int probe_filter_length(const struct bpf_insn *fp) diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c new file mode 100644 index 000000000000..5e4aac74f9d0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_noinline.c @@ -0,0 +1,833 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static __attribute__ ((noinline)) +u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static __attribute__ ((noinline)) +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static __attribute__ ((noinline)) +u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +struct flow_key { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; +}; + +struct packet_description { + struct flow_key flow; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_definition { + union { + __be32 vip; + __be32 vipv6[4]; + }; + __u16 port; + __u16 family; + __u8 proto; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_pos_lru { + __u32 pos; + __u64 atime; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct lb_stats { + __u64 v2; + __u64 v1; +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip_definition), + .value_size = sizeof(struct vip_meta), + .max_entries = 512, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(struct flow_key), + .value_size = sizeof(struct real_pos_lru), + .max_entries = 300, + .map_flags = 1U << 1, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 12 * 655, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = 40, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct lb_stats), + .max_entries = 515, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = 16, + .map_flags = 0, +}; + +struct eth_hdr { + unsigned char eth_dest[6]; + unsigned char eth_source[6]; + unsigned short eth_proto; +}; + +static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) +{ + __u64 off = sizeof(struct eth_hdr); + if (is_ipv6) { + off += sizeof(struct ipv6hdr); + if (is_icmp) + off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); + } else { + off += sizeof(struct iphdr); + if (is_icmp) + off += sizeof(struct icmphdr) + sizeof(struct iphdr); + } + return off; +} + +static __attribute__ ((noinline)) +bool parse_udp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return 0; + if (!is_icmp) { + pckt->flow.port16[0] = udp->source; + pckt->flow.port16[1] = udp->dest; + } else { + pckt->flow.port16[0] = udp->dest; + pckt->flow.port16[1] = udp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool parse_tcp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return 0; + if (tcp->syn) + pckt->flags |= (1 << 1); + if (!is_icmp) { + pckt->flow.port16[0] = tcp->source; + pckt->flow.port16[1] = tcp->dest; + } else { + pckt->flow.port16[0] = tcp->dest; + pckt->flow.port16[1] = tcp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + struct ipv6hdr *ip6h; + __u32 ip_suffix; + void *data_end; + void *data; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + ip6h = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct ipv6hdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || ip6h + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 56710; + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + + ip6h->nexthdr = IPPROTO_IPV6; + ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; + ip6h->payload_len = + __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); + ip6h->hop_limit = 4; + + ip6h->saddr.in6_u.u6_addr32[0] = 1; + ip6h->saddr.in6_u.u6_addr32[1] = 2; + ip6h->saddr.in6_u.u6_addr32[2] = 3; + ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; + memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + + __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + __u16 *next_iph_u16; + struct iphdr *iph; + __u32 csum = 0; + void *data_end; + void *data; + + ip_suffix <<= 15; + ip_suffix ^= pckt->flow.src; + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + iph = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct iphdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || iph + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + iph->version = 4; + iph->ihl = 5; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 1; + iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); + /* don't update iph->daddr, since it will overwrite old eth_proto + * and multiple iterations of bpf_prog_run() will fail + */ + + iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; + iph->ttl = 4; + + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct ipv6hdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + if (inner_v4) + new_eth->eth_proto = 8; + else + new_eth->eth_proto = 56710; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct iphdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +int swap_mac_and_send(void *data, void *data_end) +{ + unsigned char tmp_mac[6]; + struct eth_hdr *eth; + + eth = data; + memcpy(tmp_mac, eth->eth_source, 6); + memcpy(eth->eth_source, eth->eth_dest, 6); + memcpy(eth->eth_dest, tmp_mac, 6); + return XDP_TX; +} + +static __attribute__ ((noinline)) +int send_icmp_reply(void *data, void *data_end) +{ + struct icmphdr *icmp_hdr; + __u16 *next_iph_u16; + __u32 tmp_addr = 0; + struct iphdr *iph; + __u32 csum1 = 0; + __u32 csum = 0; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + iph = data + off; + off += sizeof(struct iphdr); + icmp_hdr = data + off; + icmp_hdr->type = 0; + icmp_hdr->checksum += 0x0007; + iph->ttl = 4; + tmp_addr = iph->daddr; + iph->daddr = iph->saddr; + iph->saddr = tmp_addr; + iph->check = 0; + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int send_icmp6_reply(void *data, void *data_end) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + __be32 tmp_addr[4]; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + ip6h = data + off; + off += sizeof(struct ipv6hdr); + icmp_hdr = data + off; + icmp_hdr->icmp6_type = 129; + icmp_hdr->icmp6_cksum -= 0x0001; + ip6h->hop_limit = 4; + memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->icmp6_type == 128) + return send_icmp6_reply(data, data_end); + if (icmp_hdr->icmp6_type != 3) + return XDP_PASS; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + pckt->flow.proto = ip6h->nexthdr; + pckt->flags |= (1 << 0); + memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); + return -1; +} + +static __attribute__ ((noinline)) +int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->type == 8) + return send_icmp_reply(data, data_end); + if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) + return XDP_PASS; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + pckt->flow.proto = iph->protocol; + pckt->flags |= (1 << 0); + pckt->flow.src = iph->daddr; + pckt->flow.dst = iph->saddr; + return -1; +} + +static __attribute__ ((noinline)) +__u32 get_packet_hash(struct packet_description *pckt, + bool hash_16bytes) +{ + if (hash_16bytes) + return jhash_2words(jhash(pckt->flow.srcv6, 16, 12), + pckt->flow.ports, 24); + else + return jhash_2words(pckt->flow.src, pckt->flow.ports, + 24); +} + +__attribute__ ((noinline)) +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6, void *lru_map) +{ + struct real_pos_lru new_dst_lru = { }; + bool hash_16bytes = is_ipv6; + __u32 *real_pos, hash, key; + __u64 cur_time; + + if (vip_info->flags & (1 << 2)) + hash_16bytes = 1; + if (vip_info->flags & (1 << 3)) { + pckt->flow.port16[0] = pckt->flow.port16[1]; + memset(pckt->flow.srcv6, 0, 16); + } + hash = get_packet_hash(pckt, hash_16bytes); + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + key = 2 * vip_info->vip_num + hash % 2; + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return 0; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return 0; + if (!(vip_info->flags & (1 << 1))) { + __u32 conn_rate_key = 512 + 2; + struct lb_stats *conn_rate_stats = + bpf_map_lookup_elem(&stats, &conn_rate_key); + + if (!conn_rate_stats) + return 1; + cur_time = bpf_ktime_get_ns(); + if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { + conn_rate_stats->v1 = 1; + conn_rate_stats->v2 = cur_time; + } else { + conn_rate_stats->v1 += 1; + if (conn_rate_stats->v1 >= 1) + return 1; + } + if (pckt->flow.proto == IPPROTO_UDP) + new_dst_lru.atime = cur_time; + new_dst_lru.pos = key; + bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); + } + return 1; +} + +__attribute__ ((noinline)) +static void connection_table_lookup(struct real_definition **real, + struct packet_description *pckt, + void *lru_map) +{ + + struct real_pos_lru *dst_lru; + __u64 cur_time; + __u32 key; + + dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); + if (!dst_lru) + return; + if (pckt->flow.proto == IPPROTO_UDP) { + cur_time = bpf_ktime_get_ns(); + if (cur_time - dst_lru->atime > 300000) + return; + dst_lru->atime = cur_time; + } + key = dst_lru->pos; + *real = bpf_map_lookup_elem(&reals, &key); +} + +/* don't believe your eyes! + * below function has 6 arguments whereas bpf and llvm allow maximum of 5 + * but since it's _static_ llvm can optimize one argument away + */ +__attribute__ ((noinline)) +static int process_l3_headers_v6(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct ipv6hdr *ip6h; + __u64 iph_len; + int action; + + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + iph_len = sizeof(struct ipv6hdr); + *protocol = ip6h->nexthdr; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(ip6h->payload_len); + off += iph_len; + if (*protocol == 45) { + return XDP_DROP; + } else if (*protocol == 59) { + action = parse_icmpv6(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); + } + return -1; +} + +__attribute__ ((noinline)) +static int process_l3_headers_v4(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct iphdr *iph; + __u64 iph_len; + int action; + + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + *protocol = iph->protocol; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(iph->tot_len); + off += 20; + if (iph->frag_off & 65343) + return XDP_DROP; + if (*protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + pckt->flow.src = iph->saddr; + pckt->flow.dst = iph->daddr; + } + return -1; +} + +__attribute__ ((noinline)) +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct xdp_md *xdp) +{ + + struct real_definition *dst = NULL; + struct packet_description pckt = { }; + struct vip_definition vip = { }; + struct lb_stats *data_stats; + struct eth_hdr *eth = data; + void *lru_map = &lru_cache; + struct vip_meta *vip_info; + __u32 lru_stats_key = 513; + __u32 mac_addr_pos = 0; + __u32 stats_key = 512; + struct ctl_value *cval; + __u16 pkt_bytes; + __u64 iph_len; + __u8 protocol; + __u32 vip_num; + int action; + + if (is_ipv6) + action = process_l3_headers_v6(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + else + action = process_l3_headers_v4(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + if (action >= 0) + return action; + protocol = pckt.flow.proto; + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else { + return XDP_TX; + } + + if (is_ipv6) + memcpy(vip.vipv6, pckt.flow.dstv6, 16); + else + vip.vip = pckt.flow.dst; + vip.port = pckt.flow.port16[1]; + vip.proto = pckt.flow.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.port = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return XDP_PASS; + if (!(vip_info->flags & (1 << 4))) + pckt.flow.port16[1] = 0; + } + if (data_end - data > 1400) + return XDP_DROP; + data_stats = bpf_map_lookup_elem(&stats, &stats_key); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + if (!dst) { + if (vip_info->flags & (1 << 0)) + pckt.flow.port16[0] = 0; + if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) + connection_table_lookup(&dst, &pckt, lru_map); + if (dst) + goto out; + if (pckt.flow.proto == IPPROTO_TCP) { + struct lb_stats *lru_stats = + bpf_map_lookup_elem(&stats, &lru_stats_key); + + if (!lru_stats) + return XDP_DROP; + if (pckt.flags & (1 << 1)) + lru_stats->v1 += 1; + else + lru_stats->v2 += 1; + } + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map)) + return XDP_DROP; + data_stats->v2 += 1; + } +out: + cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); + if (!cval) + return XDP_DROP; + if (dst->flags & (1 << 0)) { + if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } else { + if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + data_stats->v2 += pkt_bytes; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + if (data + 4 > data_end) + return XDP_DROP; + *(u32 *)data = dst->dst; + return XDP_DROP; +} + +__attribute__ ((section("xdp-test"), used)) +int balancer_ingress(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return XDP_DROP; + eth_proto = eth->eth_proto; + if (eth_proto == 8) + return process_packet(data, nh_off, data_end, 0, ctx); + else if (eth_proto == 56710) + return process_packet(data, nh_off, data_end, 1, ctx); + else + return XDP_DROP; +} + +char _license[] __attribute__ ((section("license"), used)) = "GPL"; +int _version __attribute__ ((section("version"), used)) = 1; |