diff options
Diffstat (limited to 'tools')
92 files changed, 9120 insertions, 1080 deletions
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h index 40bde6b23501..12835ea0e417 100644 --- a/tools/arch/arm64/include/asm/barrier.h +++ b/tools/arch/arm64/include/asm/barrier.h @@ -14,4 +14,74 @@ #define wmb() asm volatile("dmb ishst" ::: "memory") #define rmb() asm volatile("dmb ishld" ::: "memory") +#define smp_store_release(p, v) \ +do { \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("stlrb %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u8 *)__u.__c) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile ("stlrh %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u16 *)__u.__c) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u32 *)__u.__c) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) \ + : "r" (*(__u64 *)__u.__c) \ + : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("ldarb %w0, %1" \ + : "=r" (*(__u8 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 2: \ + asm volatile ("ldarh %w0, %1" \ + : "=r" (*(__u16 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (*(__u32 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (*(__u64 *)__u.__c) \ + : "Q" (*p) : "memory"); \ + break; \ + default: \ + /* Only to shut up gcc ... */ \ + mb(); \ + break; \ + } \ + __u.__val; \ +}) + #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */ diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h index d808ee0e77b5..4d471d9511a5 100644 --- a/tools/arch/ia64/include/asm/barrier.h +++ b/tools/arch/ia64/include/asm/barrier.h @@ -46,4 +46,17 @@ #define rmb() mb() #define wmb() mb() +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */ diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h index a634da05bc97..905a2c66d96d 100644 --- a/tools/arch/powerpc/include/asm/barrier.h +++ b/tools/arch/powerpc/include/asm/barrier.h @@ -27,4 +27,20 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") +#if defined(__powerpc64__) +#define smp_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory") + +#define smp_store_release(p, v) \ +do { \ + smp_lwsync(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + smp_lwsync(); \ + ___p1; \ +}) +#endif /* defined(__powerpc64__) */ #endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */ diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h index 5030c99f47d2..de362fa664d4 100644 --- a/tools/arch/s390/include/asm/barrier.h +++ b/tools/arch/s390/include/asm/barrier.h @@ -28,4 +28,17 @@ #define rmb() mb() #define wmb() mb() +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* __TOOLS_LIB_ASM_BARRIER_H */ diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h index ba61344287d5..cfb0fdc8ccf0 100644 --- a/tools/arch/sparc/include/asm/barrier_64.h +++ b/tools/arch/sparc/include/asm/barrier_64.h @@ -40,4 +40,17 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ #define rmb() __asm__ __volatile__("":::"memory") #define wmb() __asm__ __volatile__("":::"memory") +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + barrier(); \ + ___p1; \ +}) + #endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */ diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h index 8774dee27471..58919868473c 100644 --- a/tools/arch/x86/include/asm/barrier.h +++ b/tools/arch/x86/include/asm/barrier.h @@ -26,4 +26,18 @@ #define wmb() asm volatile("sfence" ::: "memory") #endif +#if defined(__x86_64__) +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + barrier(); \ + ___p1; \ +}) +#endif /* defined(__x86_64__) */ #endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */ diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index a6258bc8ec4f..f55a2daed59b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -15,13 +15,15 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** - | **pin** | **help** } + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** + | **delete** | **pin** | **help** } MAP COMMANDS ============= | **bpftool** **map { show | list }** [*MAP*] +| **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \ +| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*] | **bpftool** **map dump** *MAP* | **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*] | **bpftool** **map lookup** *MAP* **key** *DATA* @@ -36,6 +38,11 @@ MAP COMMANDS | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } | *VALUE* := { *DATA* | *MAP* | *PROG* } | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } +| *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash** +| | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash** +| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** +| | **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash** +| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** } DESCRIPTION =========== @@ -47,6 +54,10 @@ DESCRIPTION Output will start with map ID followed by map type and zero or more named attributes (depending on kernel version). + **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*] + Create a new map with given parameters and pin it to *bpffs* + as *FILE*. + **bpftool map dump** *MAP* Dump all entries in a given *MAP*. @@ -75,7 +86,9 @@ DESCRIPTION **bpftool map pin** *MAP* *FILE* Pin map *MAP* as *FILE*. - Note: *FILE* must be located in *bpffs* mount. + Note: *FILE* must be located in *bpffs* mount. It must not + contain a dot character ('.'), which is reserved for future + extensions of *bpffs*. **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*] Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map. diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst new file mode 100644 index 000000000000..408ec30d8872 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -0,0 +1,139 @@ +================ +bpftool-net +================ +------------------------------------------------------------------------------- +tool for inspection of netdev/tc related bpf prog attachments +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **net** *COMMAND* + + *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } + + *COMMANDS* := + { **show** | **list** } [ **dev** name ] | **help** + +NET COMMANDS +============ + +| **bpftool** **net { show | list } [ dev name ]** +| **bpftool** **net help** + +DESCRIPTION +=========== + **bpftool net { show | list } [ dev name ]** + List bpf program attachments in the kernel networking subsystem. + + Currently, only device driver xdp attachments and tc filter + classification/action attachments are implemented, i.e., for + program types **BPF_PROG_TYPE_SCHED_CLS**, + **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. + For programs attached to a particular cgroup, e.g., + **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, + **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + users can use **bpftool cgroup** to dump cgroup attachments. + For sk_{filter, skb, msg, reuseport} and lwt/seg6 + bpf programs, users should consult other tools, e.g., iproute2. + + The current output will start with all xdp program attachments, followed by + all tc class/qdisc bpf program attachments. Both xdp programs and + tc programs are ordered based on ifindex number. If multiple bpf + programs attached to the same networking device through **tc filter**, + the order will be first all bpf programs attached to tc classes, then + all bpf programs attached to non clsact qdiscs, and finally all + bpf programs attached to root and clsact qdisc. + + **bpftool net help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + +EXAMPLES +======== + +| **# bpftool net** + +:: + + xdp: + eth0(2) driver id 198 + + tc: + eth0(2) htb name prefix_matcher.o:[cls_prefix_matcher_htb] id 111727 act [] + eth0(2) clsact/ingress fbflow_icmp id 130246 act [] + eth0(2) clsact/egress prefix_matcher.o:[cls_prefix_matcher_clsact] id 111726 + eth0(2) clsact/egress cls_fg_dscp id 108619 act [] + eth0(2) clsact/egress fbflow_egress id 130245 + +| +| **# bpftool -jp net** + +:: + + [{ + "xdp": [{ + "devname": "eth0", + "ifindex": 2, + "mode": "driver", + "id": 198 + } + ], + "tc": [{ + "devname": "eth0", + "ifindex": 2, + "kind": "htb", + "name": "prefix_matcher.o:[cls_prefix_matcher_htb]", + "id": 111727, + "act": [] + },{ + "devname": "eth0", + "ifindex": 2, + "kind": "clsact/ingress", + "name": "fbflow_icmp", + "id": 130246, + "act": [] + },{ + "devname": "eth0", + "ifindex": 2, + "kind": "clsact/egress", + "name": "prefix_matcher.o:[cls_prefix_matcher_clsact]", + "id": 111726, + },{ + "devname": "eth0", + "ifindex": 2, + "kind": "clsact/egress", + "name": "cls_fg_dscp", + "id": 108619, + "act": [] + },{ + "devname": "eth0", + "ifindex": 2, + "kind": "clsact/egress", + "name": "fbflow_egress", + "id": 130245, + } + ] + } + ] + + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 64156a16d530..ac4e904b10fb 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -25,6 +25,8 @@ MAP COMMANDS | **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] | **bpftool** **prog pin** *PROG* *FILE* | **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] +| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP* +| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP* | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -37,6 +39,7 @@ MAP COMMANDS | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | | **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | } +| *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** } DESCRIPTION @@ -72,7 +75,9 @@ DESCRIPTION **bpftool prog pin** *PROG* *FILE* Pin program *PROG* as *FILE*. - Note: *FILE* must be located in *bpffs* mount. + Note: *FILE* must be located in *bpffs* mount. It must not + contain a dot character ('.'), which is reserved for future + extensions of *bpffs*. **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] Load bpf program from binary *OBJ* and pin as *FILE*. @@ -88,7 +93,17 @@ DESCRIPTION If **dev** *NAME* is specified program will be loaded onto given networking device (offload). - Note: *FILE* must be located in *bpffs* mount. + Note: *FILE* must be located in *bpffs* mount. It must not + contain a dot character ('.'), which is reserved for future + extensions of *bpffs*. + + **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP* + Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*) + to the map *MAP*. + + **bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP* + Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*) + from the map *MAP*. **bpftool prog help** Print short help message. diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index b6f5d560460d..04cd4f92ab89 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,22 +16,24 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **cgroup** | **perf** } + *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } *MAP-COMMANDS* := - { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** - | **pin** | **event_pipe** | **help** } + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** + | **delete** | **pin** | **event_pipe** | **help** } *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** - | **load** | **help** } + | **load** | **attach** | **detach** | **help** } *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } *PERF-COMMANDS* := { **show** | **list** | **help** } + *NET-COMMANDS* := { **show** | **list** | **help** } + DESCRIPTION =========== *bpftool* allows for inspection and simple modification of BPF objects @@ -55,7 +57,11 @@ OPTIONS -p, --pretty Generate human-readable JSON output. Implies **-j**. + -m, --mapcompat + Allow loading maps with unknown map definitions. + + SEE ALSO ======== **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), **bpftool-net**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 74288a2197ab..dac7eff4c7e5 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -46,6 +46,13 @@ CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(srctree)/tools/lib/bpf \ -I$(srctree)/tools/perf CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' +ifneq ($(EXTRA_CFLAGS),) +CFLAGS += $(EXTRA_CFLAGS) +endif +ifneq ($(EXTRA_LDFLAGS),) +LDFLAGS += $(EXTRA_LDFLAGS) +endif + LIBS = -lelf -lbfd -lopcodes $(LIBBPF) INSTALL ?= install @@ -90,7 +97,7 @@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS) $(OUTPUT)%.o: %.c $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 598066c40191..3f78e6404589 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -143,7 +143,7 @@ _bpftool_map_update_map_type() local type type=$(bpftool -jp map show $keyword $ref | \ command sed -n 's/.*"type": "\(.*\)",$/\1/p') - printf $type + [[ -n $type ]] && printf $type } _bpftool_map_update_get_id() @@ -184,7 +184,7 @@ _bpftool() # Deal with options if [[ ${words[cword]} == -* ]]; then - local c='--version --json --pretty --bpffs' + local c='--version --json --pretty --bpffs --mapcompat' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi @@ -292,6 +292,23 @@ _bpftool() fi return 0 ;; + attach|detach) + if [[ ${#words[@]} == 7 ]]; then + COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) + return 0 + fi + + if [[ ${#words[@]} == 6 ]]; then + COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) ) + return 0 + fi + + if [[ $prev == "$command" ]]; then + COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) + return 0 + fi + return 0 + ;; load) local obj @@ -347,7 +364,7 @@ _bpftool() ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'dump help pin load \ + COMPREPLY=( $( compgen -W 'dump help pin attach detach load \ show list' -- "$cur" ) ) ;; esac @@ -370,6 +387,42 @@ _bpftool() ;; esac ;; + create) + case $prev in + $command) + _filedir + return 0 + ;; + type) + COMPREPLY=( $( compgen -W 'hash array prog_array \ + perf_event_array percpu_hash percpu_array \ + stack_trace cgroup_array lru_hash \ + lru_percpu_hash lpm_trie array_of_maps \ + hash_of_maps devmap sockmap cpumap xskmap \ + sockhash cgroup_storage reuseport_sockarray \ + percpu_cgroup_storage' -- \ + "$cur" ) ) + return 0 + ;; + key|value|flags|name|entries) + return 0 + ;; + dev) + _sysfs_get_netdevs + return 0 + ;; + *) + _bpftool_once_attr 'type' + _bpftool_once_attr 'key' + _bpftool_once_attr 'value' + _bpftool_once_attr 'entries' + _bpftool_once_attr 'name' + _bpftool_once_attr 'flags' + _bpftool_once_attr 'dev' + return 0 + ;; + esac + ;; lookup|getnext|delete) case $prev in $command) @@ -483,7 +536,7 @@ _bpftool() *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'delete dump getnext help \ - lookup pin event_pipe show list update' -- \ + lookup pin event_pipe show list update create' -- \ "$cur" ) ) ;; esac @@ -494,10 +547,10 @@ _bpftool() _filedir return 0 ;; - tree) - _filedir - return 0 - ;; + tree) + _filedir + return 0 + ;; attach|detach) local ATTACH_TYPES='ingress egress sock_create sock_ops \ device bind4 bind6 post_bind4 post_bind6 connect4 \ @@ -552,6 +605,15 @@ _bpftool() ;; esac ;; + net) + case $command in + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help \ + show list' -- "$cur" ) ) + ;; + esac + ;; esac } && complete -F _bpftool bpftool diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index b3a0709ea7ed..25af85304ebe 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -554,7 +554,9 @@ static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name) return read_sysfs_hex_int(full_path); } -const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) +const char * +ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, + const char **opt) { char devname[IF_NAMESIZE]; int vendor_id; @@ -579,6 +581,7 @@ const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino) device_id != 0x6000 && device_id != 0x6003) p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch"); + *opt = "ctx4"; return "NFP-6xxx"; default: p_err("Can't get bfd arch name for device vendor id 0x%04x", @@ -618,3 +621,24 @@ void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) jsonw_string_field(json_wtr, "ifname", name); jsonw_end_object(json_wtr); } + +int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what) +{ + char *endptr; + + NEXT_ARGP(); + + if (*val) { + p_err("%s already specified", what); + return -1; + } + + *val = strtoul(**argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as %s", **argv, what); + return -1; + } + NEXT_ARGP(); + + return 0; +} diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 87439320ef70..c75ffd9ce2bb 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -77,7 +77,7 @@ static int fprintf_json(void *out, const char *fmt, ...) } void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch) + const char *arch, const char *disassembler_options) { disassembler_ftype disassemble; struct disassemble_info info; @@ -116,6 +116,8 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, info.arch = bfd_get_arch(bfdf); info.mach = bfd_get_mach(bfdf); + if (disassembler_options) + info.disassembler_options = disassembler_options; info.buffer = image; info.buffer_length = len; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d15a62be6cf0..75a3296dc0bc 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -55,6 +55,7 @@ json_writer_t *json_wtr; bool pretty_output; bool json_output; bool show_pinned; +int bpf_flags; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; @@ -85,7 +86,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf }\n" + " OBJECT := { prog | map | cgroup | perf | net }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -215,6 +216,7 @@ static const struct cmd cmds[] = { { "map", do_map }, { "cgroup", do_cgroup }, { "perf", do_perf }, + { "net", do_net }, { "version", do_version }, { 0 } }; @@ -319,7 +321,8 @@ static int do_batch(int argc, char **argv) p_err("reading batch file failed: %s", strerror(errno)); err = -1; } else { - p_info("processed %d commands", lines); + if (!json_output) + printf("processed %d commands\n", lines); err = 0; } err_close: @@ -340,6 +343,7 @@ int main(int argc, char **argv) { "pretty", no_argument, NULL, 'p' }, { "version", no_argument, NULL, 'V' }, { "bpffs", no_argument, NULL, 'f' }, + { "mapcompat", no_argument, NULL, 'm' }, { 0 } }; int opt, ret; @@ -354,7 +358,7 @@ int main(int argc, char **argv) hash_init(map_table.table); opterr = 0; - while ((opt = getopt_long(argc, argv, "Vhpjf", + while ((opt = getopt_long(argc, argv, "Vhpjfm", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -378,6 +382,9 @@ int main(int argc, char **argv) case 'f': show_pinned = true; break; + case 'm': + bpf_flags = MAPS_RELAX_COMPAT; + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 238e734d75b3..28322ace2856 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -74,7 +74,7 @@ #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} }" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} | {-m|--mapcompat}" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE }" @@ -89,6 +89,7 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; extern bool show_pinned; +extern int bpf_flags; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; @@ -136,19 +137,23 @@ int do_map(int argc, char **arg); int do_event_pipe(int argc, char **argv); int do_cgroup(int argc, char **arg); int do_perf(int argc, char **arg); +int do_net(int argc, char **arg); +int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); int map_parse_fd(int *argc, char ***argv); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch); + const char *arch, const char *disassembler_options); void print_data_json(uint8_t *data, size_t len); void print_hex_data_json(uint8_t *data, size_t len); unsigned int get_page_size(void); unsigned int get_possible_cpus(void); -const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); +const char * +ifindex_to_bfd_params(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, + const char **opt); struct btf_dumper { const struct btf *btf; @@ -165,4 +170,11 @@ struct btf_dumper { */ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, const void *data); + +struct nlattr; +struct ifinfomsg; +struct tcmsg; +int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb); +int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, + const char *devname, int ifindex); #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index b455930a3eaf..7bf38f0e152e 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -36,6 +36,7 @@ #include <fcntl.h> #include <linux/err.h> #include <linux/kernel.h> +#include <net/if.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -71,13 +72,16 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_XSKMAP] = "xskmap", [BPF_MAP_TYPE_SOCKHASH] = "sockhash", [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", + [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", }; static bool map_is_per_cpu(__u32 type) { return type == BPF_MAP_TYPE_PERCPU_HASH || type == BPF_MAP_TYPE_PERCPU_ARRAY || - type == BPF_MAP_TYPE_LRU_PERCPU_HASH; + type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE; } static bool map_is_map_of_maps(__u32 type) @@ -91,6 +95,17 @@ static bool map_is_map_of_progs(__u32 type) return type == BPF_MAP_TYPE_PROG_ARRAY; } +static int map_type_from_str(const char *type) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(map_type_name); i++) + /* Don't allow prefixing in case of possible future shadowing */ + if (map_type_name[i] && !strcmp(map_type_name[i], type)) + return i; + return -1; +} + static void *alloc_value(struct bpf_map_info *info) { if (map_is_per_cpu(info->type)) @@ -170,9 +185,28 @@ static int do_dump_btf(const struct btf_dumper *d, if (ret) goto err_end_obj; - jsonw_name(d->jw, "value"); + if (!map_is_per_cpu(map_info->type)) { + jsonw_name(d->jw, "value"); + ret = btf_dumper_type(d, map_info->btf_value_type_id, value); + } else { + unsigned int i, n, step; - ret = btf_dumper_type(d, map_info->btf_value_type_id, value); + jsonw_name(d->jw, "values"); + jsonw_start_array(d->jw); + n = get_possible_cpus(); + step = round_up(map_info->value_size, 8); + for (i = 0; i < n; i++) { + jsonw_start_object(d->jw); + jsonw_int_field(d->jw, "cpu", i); + jsonw_name(d->jw, "value"); + ret = btf_dumper_type(d, map_info->btf_value_type_id, + value + i * step); + jsonw_end_object(d->jw); + if (ret) + break; + } + jsonw_end_array(d->jw); + } err_end_obj: /* end of key-value pair */ @@ -299,11 +333,40 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, jsonw_end_object(json_wtr); } jsonw_end_array(json_wtr); + if (btf) { + struct btf_dumper d = { + .btf = btf, + .jw = json_wtr, + .is_plain_text = false, + }; + + jsonw_name(json_wtr, "formatted"); + do_dump_btf(&d, info, key, value); + } } jsonw_end_object(json_wtr); } +static void print_entry_error(struct bpf_map_info *info, unsigned char *key, + const char *value) +{ + int value_size = strlen(value); + bool single_line, break_names; + + break_names = info->key_size > 16 || value_size > 16; + single_line = info->key_size + value_size <= 24 && !break_names; + + printf("key:%c", break_names ? '\n' : ' '); + fprint_hex(stdout, key, info->key_size, " "); + + printf(single_line ? " " : "\n"); + + printf("value:%c%s", break_names ? '\n' : ' ', value); + + printf("\n"); +} + static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, unsigned char *value) { @@ -626,6 +689,54 @@ static int do_show(int argc, char **argv) return errno == ENOENT ? 0 : -1; } +static int dump_map_elem(int fd, void *key, void *value, + struct bpf_map_info *map_info, struct btf *btf, + json_writer_t *btf_wtr) +{ + int num_elems = 0; + int lookup_errno; + + if (!bpf_map_lookup_elem(fd, key, value)) { + if (json_output) { + print_entry_json(map_info, key, value, btf); + } else { + if (btf) { + struct btf_dumper d = { + .btf = btf, + .jw = btf_wtr, + .is_plain_text = true, + }; + + do_dump_btf(&d, map_info, key, value); + } else { + print_entry_plain(map_info, key, value); + } + num_elems++; + } + return num_elems; + } + + /* lookup error handling */ + lookup_errno = errno; + + if (map_is_map_of_maps(map_info->type) || + map_is_map_of_progs(map_info->type)) + return 0; + + if (json_output) { + jsonw_name(json_wtr, "key"); + print_hex_data_json(key, map_info->key_size); + jsonw_name(json_wtr, "value"); + jsonw_start_object(json_wtr); + jsonw_string_field(json_wtr, "error", strerror(lookup_errno)); + jsonw_end_object(json_wtr); + } else { + print_entry_error(map_info, key, strerror(lookup_errno)); + } + + return 0; +} + static int do_dump(int argc, char **argv) { struct bpf_map_info info = {}; @@ -644,12 +755,6 @@ static int do_dump(int argc, char **argv) if (fd < 0) return -1; - if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) { - p_err("Dumping maps of maps and program maps not supported"); - close(fd); - return -1; - } - key = malloc(info.key_size); value = alloc_value(&info); if (!key || !value) { @@ -687,40 +792,8 @@ static int do_dump(int argc, char **argv) err = 0; break; } - - if (!bpf_map_lookup_elem(fd, key, value)) { - if (json_output) - print_entry_json(&info, key, value, btf); - else - if (btf) { - struct btf_dumper d = { - .btf = btf, - .jw = btf_wtr, - .is_plain_text = true, - }; - - do_dump_btf(&d, &info, key, value); - } else { - print_entry_plain(&info, key, value); - } - } else { - if (json_output) { - jsonw_name(json_wtr, "key"); - print_hex_data_json(key, info.key_size); - jsonw_name(json_wtr, "value"); - jsonw_start_object(json_wtr); - jsonw_string_field(json_wtr, "error", - "can't lookup element"); - jsonw_end_object(json_wtr); - } else { - p_info("can't lookup element with key: "); - fprint_hex(stderr, key, info.key_size, " "); - fprintf(stderr, "\n"); - } - } - + num_elems += dump_map_elem(fd, key, value, &info, btf, btf_wtr); prev_key = key; - num_elems++; } if (json_output) @@ -997,6 +1070,92 @@ static int do_pin(int argc, char **argv) return err; } +static int do_create(int argc, char **argv) +{ + struct bpf_create_map_attr attr = { NULL, }; + const char *pinfile; + int err, fd; + + if (!REQ_ARGS(7)) + return -1; + pinfile = GET_ARG(); + + while (argc) { + if (!REQ_ARGS(2)) + return -1; + + if (is_prefix(*argv, "type")) { + NEXT_ARG(); + + if (attr.map_type) { + p_err("map type already specified"); + return -1; + } + + attr.map_type = map_type_from_str(*argv); + if ((int)attr.map_type < 0) { + p_err("unrecognized map type: %s", *argv); + return -1; + } + NEXT_ARG(); + } else if (is_prefix(*argv, "name")) { + NEXT_ARG(); + attr.name = GET_ARG(); + } else if (is_prefix(*argv, "key")) { + if (parse_u32_arg(&argc, &argv, &attr.key_size, + "key size")) + return -1; + } else if (is_prefix(*argv, "value")) { + if (parse_u32_arg(&argc, &argv, &attr.value_size, + "value size")) + return -1; + } else if (is_prefix(*argv, "entries")) { + if (parse_u32_arg(&argc, &argv, &attr.max_entries, + "max entries")) + return -1; + } else if (is_prefix(*argv, "flags")) { + if (parse_u32_arg(&argc, &argv, &attr.map_flags, + "flags")) + return -1; + } else if (is_prefix(*argv, "dev")) { + NEXT_ARG(); + + if (attr.map_ifindex) { + p_err("offload device already specified"); + return -1; + } + + attr.map_ifindex = if_nametoindex(*argv); + if (!attr.map_ifindex) { + p_err("unrecognized netdevice '%s': %s", + *argv, strerror(errno)); + return -1; + } + NEXT_ARG(); + } + } + + if (!attr.name) { + p_err("map name not specified"); + return -1; + } + + fd = bpf_create_map_xattr(&attr); + if (fd < 0) { + p_err("map create failed: %s", strerror(errno)); + return -1; + } + + err = do_pin_fd(fd, pinfile); + close(fd); + if (err) + return err; + + if (json_output) + jsonw_null(json_wtr); + return 0; +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -1006,6 +1165,9 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s %s { show | list } [MAP]\n" + " %s %s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n" + " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n" + " [dev NAME]\n" " %s %s dump MAP\n" " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n" " %s %s lookup MAP key DATA\n" @@ -1020,11 +1182,17 @@ static int do_help(int argc, char **argv) " " HELP_SPEC_PROGRAM "\n" " VALUE := { DATA | MAP | PROG }\n" " UPDATE_FLAGS := { any | exist | noexist }\n" + " TYPE := { hash | array | prog_array | perf_event_array | percpu_hash |\n" + " percpu_array | stack_trace | cgroup_array | lru_hash |\n" + " lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n" + " devmap | sockmap | cpumap | xskmap | sockhash |\n" + " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2]); return 0; } @@ -1040,6 +1208,7 @@ static const struct cmd cmds[] = { { "delete", do_delete }, { "pin", do_pin }, { "event_pipe", do_event_pipe }, + { "create", do_create }, { 0 } }; diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 6d41323be291..bdaf4062e26e 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -50,15 +50,17 @@ static void int_exit(int signo) stop = true; } -static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv) +static enum bpf_perf_event_ret +print_bpf_output(struct perf_event_header *event, void *private_data) { - struct event_ring_info *ring = priv; - struct perf_event_sample *e = event; + struct perf_event_sample *e = container_of(event, struct perf_event_sample, + header); + struct event_ring_info *ring = private_data; struct { struct perf_event_header header; __u64 id; __u64 lost; - } *lost = event; + } *lost = (typeof(lost))event; if (json_output) { jsonw_start_object(json_wtr); diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c new file mode 100644 index 000000000000..d441bb7035ca --- /dev/null +++ b/tools/bpf/bpftool/net.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook + +#define _GNU_SOURCE +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libbpf.h> +#include <net/if.h> +#include <linux/if.h> +#include <linux/rtnetlink.h> +#include <linux/tc_act/tc_bpf.h> +#include <sys/socket.h> + +#include <bpf.h> +#include <nlattr.h> +#include "main.h" +#include "netlink_dumper.h" + +struct ip_devname_ifindex { + char devname[64]; + int ifindex; +}; + +struct bpf_netdev_t { + struct ip_devname_ifindex *devices; + int used_len; + int array_len; + int filter_idx; +}; + +struct tc_kind_handle { + char kind[64]; + int handle; +}; + +struct bpf_tcinfo_t { + struct tc_kind_handle *handle_array; + int used_len; + int array_len; + bool is_qdisc; +}; + +struct bpf_filter_t { + const char *kind; + const char *devname; + int ifindex; +}; + +static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) +{ + struct bpf_netdev_t *netinfo = cookie; + struct ifinfomsg *ifinfo = msg; + + if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index) + return 0; + + if (netinfo->used_len == netinfo->array_len) { + netinfo->devices = realloc(netinfo->devices, + (netinfo->array_len + 16) * + sizeof(struct ip_devname_ifindex)); + if (!netinfo->devices) + return -ENOMEM; + + netinfo->array_len += 16; + } + netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index; + snprintf(netinfo->devices[netinfo->used_len].devname, + sizeof(netinfo->devices[netinfo->used_len].devname), + "%s", + tb[IFLA_IFNAME] + ? libbpf_nla_getattr_str(tb[IFLA_IFNAME]) + : ""); + netinfo->used_len++; + + return do_xdp_dump(ifinfo, tb); +} + +static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) +{ + struct bpf_tcinfo_t *tcinfo = cookie; + struct tcmsg *info = msg; + + if (tcinfo->is_qdisc) { + /* skip clsact qdisc */ + if (tb[TCA_KIND] && + strcmp(libbpf_nla_data(tb[TCA_KIND]), "clsact") == 0) + return 0; + if (info->tcm_handle == 0) + return 0; + } + + if (tcinfo->used_len == tcinfo->array_len) { + tcinfo->handle_array = realloc(tcinfo->handle_array, + (tcinfo->array_len + 16) * sizeof(struct tc_kind_handle)); + if (!tcinfo->handle_array) + return -ENOMEM; + + tcinfo->array_len += 16; + } + tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle; + snprintf(tcinfo->handle_array[tcinfo->used_len].kind, + sizeof(tcinfo->handle_array[tcinfo->used_len].kind), + "%s", + tb[TCA_KIND] + ? libbpf_nla_getattr_str(tb[TCA_KIND]) + : "unknown"); + tcinfo->used_len++; + + return 0; +} + +static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb) +{ + const struct bpf_filter_t *filter_info = cookie; + + return do_filter_dump((struct tcmsg *)msg, tb, filter_info->kind, + filter_info->devname, filter_info->ifindex); +} + +static int show_dev_tc_bpf(int sock, unsigned int nl_pid, + struct ip_devname_ifindex *dev) +{ + struct bpf_filter_t filter_info; + struct bpf_tcinfo_t tcinfo; + int i, handle, ret = 0; + + tcinfo.handle_array = NULL; + tcinfo.used_len = 0; + tcinfo.array_len = 0; + + tcinfo.is_qdisc = false; + ret = libbpf_nl_get_class(sock, nl_pid, dev->ifindex, + dump_class_qdisc_nlmsg, &tcinfo); + if (ret) + goto out; + + tcinfo.is_qdisc = true; + ret = libbpf_nl_get_qdisc(sock, nl_pid, dev->ifindex, + dump_class_qdisc_nlmsg, &tcinfo); + if (ret) + goto out; + + filter_info.devname = dev->devname; + filter_info.ifindex = dev->ifindex; + for (i = 0; i < tcinfo.used_len; i++) { + filter_info.kind = tcinfo.handle_array[i].kind; + ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, + tcinfo.handle_array[i].handle, + dump_filter_nlmsg, &filter_info); + if (ret) + goto out; + } + + /* root, ingress and egress handle */ + handle = TC_H_ROOT; + filter_info.kind = "root"; + ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); + if (ret) + goto out; + + handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + filter_info.kind = "clsact/ingress"; + ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); + if (ret) + goto out; + + handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS); + filter_info.kind = "clsact/egress"; + ret = libbpf_nl_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); + if (ret) + goto out; + +out: + free(tcinfo.handle_array); + return 0; +} + +static int do_show(int argc, char **argv) +{ + int i, sock, ret, filter_idx = -1; + struct bpf_netdev_t dev_array; + unsigned int nl_pid; + char err_buf[256]; + + if (argc == 2) { + if (strcmp(argv[0], "dev") != 0) + usage(); + filter_idx = if_nametoindex(argv[1]); + if (filter_idx == 0) { + fprintf(stderr, "invalid dev name %s\n", argv[1]); + return -1; + } + } else if (argc != 0) { + usage(); + } + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) { + fprintf(stderr, "failed to open netlink sock\n"); + return -1; + } + + dev_array.devices = NULL; + dev_array.used_len = 0; + dev_array.array_len = 0; + dev_array.filter_idx = filter_idx; + + if (json_output) + jsonw_start_array(json_wtr); + NET_START_OBJECT; + NET_START_ARRAY("xdp", "%s:\n"); + ret = libbpf_nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array); + NET_END_ARRAY("\n"); + + if (!ret) { + NET_START_ARRAY("tc", "%s:\n"); + for (i = 0; i < dev_array.used_len; i++) { + ret = show_dev_tc_bpf(sock, nl_pid, + &dev_array.devices[i]); + if (ret) + break; + } + NET_END_ARRAY("\n"); + } + NET_END_OBJECT; + if (json_output) + jsonw_end_array(json_wtr); + + if (ret) { + if (json_output) + jsonw_null(json_wtr); + libbpf_strerror(ret, err_buf, sizeof(err_buf)); + fprintf(stderr, "Error: %s\n", err_buf); + } + free(dev_array.devices); + close(sock); + return ret; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s { show | list } [dev <devname>]\n" + " %s %s help\n" + "Note: Only xdp and tc attachments are supported now.\n" + " For progs attached to cgroups, use \"bpftool cgroup\"\n" + " to dump program attachments. For program types\n" + " sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n" + " consult iproute2.\n", + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "help", do_help }, + { 0 } +}; + +int do_net(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c new file mode 100644 index 000000000000..4e9f4531269f --- /dev/null +++ b/tools/bpf/bpftool/netlink_dumper.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook + +#include <stdlib.h> +#include <string.h> +#include <libbpf.h> +#include <linux/rtnetlink.h> +#include <linux/tc_act/tc_bpf.h> + +#include <nlattr.h> +#include "main.h" +#include "netlink_dumper.h" + +static void xdp_dump_prog_id(struct nlattr **tb, int attr, + const char *mode, + bool new_json_object) +{ + if (!tb[attr]) + return; + + if (new_json_object) + NET_START_OBJECT + NET_DUMP_STR("mode", " %s", mode); + NET_DUMP_UINT("id", " id %u", libbpf_nla_getattr_u32(tb[attr])) + if (new_json_object) + NET_END_OBJECT +} + +static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex, + const char *name) +{ + struct nlattr *tb[IFLA_XDP_MAX + 1]; + unsigned char mode; + + if (libbpf_nla_parse_nested(tb, IFLA_XDP_MAX, attr, NULL) < 0) + return -1; + + if (!tb[IFLA_XDP_ATTACHED]) + return 0; + + mode = libbpf_nla_getattr_u8(tb[IFLA_XDP_ATTACHED]); + if (mode == XDP_ATTACHED_NONE) + return 0; + + NET_START_OBJECT; + if (name) + NET_DUMP_STR("devname", "%s", name); + NET_DUMP_UINT("ifindex", "(%d)", ifindex); + + if (mode == XDP_ATTACHED_MULTI) { + if (json_output) { + jsonw_name(json_wtr, "multi_attachments"); + jsonw_start_array(json_wtr); + } + xdp_dump_prog_id(tb, IFLA_XDP_SKB_PROG_ID, "generic", true); + xdp_dump_prog_id(tb, IFLA_XDP_DRV_PROG_ID, "driver", true); + xdp_dump_prog_id(tb, IFLA_XDP_HW_PROG_ID, "offload", true); + if (json_output) + jsonw_end_array(json_wtr); + } else if (mode == XDP_ATTACHED_DRV) { + xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "driver", false); + } else if (mode == XDP_ATTACHED_SKB) { + xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "generic", false); + } else if (mode == XDP_ATTACHED_HW) { + xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "offload", false); + } + + NET_END_OBJECT_FINAL; + return 0; +} + +int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb) +{ + if (!tb[IFLA_XDP]) + return 0; + + return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index, + libbpf_nla_getattr_str(tb[IFLA_IFNAME])); +} + +static int do_bpf_dump_one_act(struct nlattr *attr) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + + if (libbpf_nla_parse_nested(tb, TCA_ACT_BPF_MAX, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + if (!tb[TCA_ACT_BPF_PARMS]) + return -LIBBPF_ERRNO__NLPARSE; + + NET_START_OBJECT_NESTED2; + if (tb[TCA_ACT_BPF_NAME]) + NET_DUMP_STR("name", "%s", + libbpf_nla_getattr_str(tb[TCA_ACT_BPF_NAME])); + if (tb[TCA_ACT_BPF_ID]) + NET_DUMP_UINT("id", " id %u", + libbpf_nla_getattr_u32(tb[TCA_ACT_BPF_ID])); + NET_END_OBJECT_NESTED; + return 0; +} + +static int do_dump_one_act(struct nlattr *attr) +{ + struct nlattr *tb[TCA_ACT_MAX + 1]; + + if (!attr) + return 0; + + if (libbpf_nla_parse_nested(tb, TCA_ACT_MAX, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + if (tb[TCA_ACT_KIND] && + strcmp(libbpf_nla_data(tb[TCA_ACT_KIND]), "bpf") == 0) + return do_bpf_dump_one_act(tb[TCA_ACT_OPTIONS]); + + return 0; +} + +static int do_bpf_act_dump(struct nlattr *attr) +{ + struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; + int act, ret; + + if (libbpf_nla_parse_nested(tb, TCA_ACT_MAX_PRIO, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + NET_START_ARRAY("act", " %s ["); + for (act = 0; act <= TCA_ACT_MAX_PRIO; act++) { + ret = do_dump_one_act(tb[act]); + if (ret) + break; + } + NET_END_ARRAY("] "); + + return ret; +} + +static int do_bpf_filter_dump(struct nlattr *attr) +{ + struct nlattr *tb[TCA_BPF_MAX + 1]; + int ret; + + if (libbpf_nla_parse_nested(tb, TCA_BPF_MAX, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + if (tb[TCA_BPF_NAME]) + NET_DUMP_STR("name", " %s", + libbpf_nla_getattr_str(tb[TCA_BPF_NAME])); + if (tb[TCA_BPF_ID]) + NET_DUMP_UINT("id", " id %u", + libbpf_nla_getattr_u32(tb[TCA_BPF_ID])); + if (tb[TCA_BPF_ACT]) { + ret = do_bpf_act_dump(tb[TCA_BPF_ACT]); + if (ret) + return ret; + } + + return 0; +} + +int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind, + const char *devname, int ifindex) +{ + int ret = 0; + + if (tb[TCA_OPTIONS] && + strcmp(libbpf_nla_data(tb[TCA_KIND]), "bpf") == 0) { + NET_START_OBJECT; + if (devname[0] != '\0') + NET_DUMP_STR("devname", "%s", devname); + NET_DUMP_UINT("ifindex", "(%u)", ifindex); + NET_DUMP_STR("kind", " %s", kind); + ret = do_bpf_filter_dump(tb[TCA_OPTIONS]); + NET_END_OBJECT_FINAL; + } + + return ret; +} diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h new file mode 100644 index 000000000000..e3516b586a34 --- /dev/null +++ b/tools/bpf/bpftool/netlink_dumper.h @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook + +#ifndef _NETLINK_DUMPER_H_ +#define _NETLINK_DUMPER_H_ + +#define NET_START_OBJECT \ +{ \ + if (json_output) \ + jsonw_start_object(json_wtr); \ +} + +#define NET_START_OBJECT_NESTED(name) \ +{ \ + if (json_output) { \ + jsonw_name(json_wtr, name); \ + jsonw_start_object(json_wtr); \ + } else { \ + fprintf(stdout, "%s {", name); \ + } \ +} + +#define NET_START_OBJECT_NESTED2 \ +{ \ + if (json_output) \ + jsonw_start_object(json_wtr); \ + else \ + fprintf(stdout, "{"); \ +} + +#define NET_END_OBJECT_NESTED \ +{ \ + if (json_output) \ + jsonw_end_object(json_wtr); \ + else \ + fprintf(stdout, "}"); \ +} + +#define NET_END_OBJECT \ +{ \ + if (json_output) \ + jsonw_end_object(json_wtr); \ +} + +#define NET_END_OBJECT_FINAL \ +{ \ + if (json_output) \ + jsonw_end_object(json_wtr); \ + else \ + fprintf(stdout, "\n"); \ +} + +#define NET_START_ARRAY(name, fmt_str) \ +{ \ + if (json_output) { \ + jsonw_name(json_wtr, name); \ + jsonw_start_array(json_wtr); \ + } else { \ + fprintf(stdout, fmt_str, name); \ + } \ +} + +#define NET_END_ARRAY(endstr) \ +{ \ + if (json_output) \ + jsonw_end_array(json_wtr); \ + else \ + fprintf(stdout, "%s", endstr); \ +} + +#define NET_DUMP_UINT(name, fmt_str, val) \ +{ \ + if (json_output) \ + jsonw_uint_field(json_wtr, name, val); \ + else \ + fprintf(stdout, fmt_str, val); \ +} + +#define NET_DUMP_STR(name, fmt_str, str) \ +{ \ + if (json_output) \ + jsonw_string_field(json_wtr, name, str);\ + else \ + fprintf(stdout, fmt_str, str); \ +} + +#define NET_DUMP_STR_ONLY(str) \ +{ \ + if (json_output) \ + jsonw_string(json_wtr, str); \ + else \ + fprintf(stdout, "%s ", str); \ +} + +#endif diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index dce960d22106..5302ee282409 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -74,8 +74,29 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", }; +static const char * const attach_type_strings[] = { + [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", + [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", + [BPF_SK_MSG_VERDICT] = "msg_verdict", + [__MAX_BPF_ATTACH_TYPE] = NULL, +}; + +enum bpf_attach_type parse_attach_type(const char *str) +{ + enum bpf_attach_type type; + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + if (attach_type_strings[type] && + is_prefix(str, attach_type_strings[type])) + return type; + } + + return __MAX_BPF_ATTACH_TYPE; +} + static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) { struct timespec real_time_ts, boot_time_ts; @@ -428,6 +449,7 @@ static int do_dump(int argc, char **argv) unsigned long *func_ksyms = NULL; struct bpf_prog_info info = {}; unsigned int *func_lens = NULL; + const char *disasm_opt = NULL; unsigned int nr_func_ksyms; unsigned int nr_func_lens; struct dump_data dd = {}; @@ -586,9 +608,10 @@ static int do_dump(int argc, char **argv) const char *name = NULL; if (info.ifindex) { - name = ifindex_to_bfd_name_ns(info.ifindex, - info.netns_dev, - info.netns_ino); + name = ifindex_to_bfd_params(info.ifindex, + info.netns_dev, + info.netns_ino, + &disasm_opt); if (!name) goto err_free; } @@ -630,7 +653,8 @@ static int do_dump(int argc, char **argv) printf("%s:\n", sym_name); } - disasm_print_insn(img, lens[i], opcodes, name); + disasm_print_insn(img, lens[i], opcodes, name, + disasm_opt); img += lens[i]; if (json_output) @@ -642,7 +666,8 @@ static int do_dump(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); } else { - disasm_print_insn(buf, *member_len, opcodes, name); + disasm_print_insn(buf, *member_len, opcodes, name, + disasm_opt); } } else if (visual) { if (json_output) @@ -696,6 +721,77 @@ int map_replace_compar(const void *p1, const void *p2) return a->idx - b->idx; } +static int do_attach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int err, mapfd, progfd; + + if (!REQ_ARGS(5)) { + p_err("too few parameters for map attach"); + return -EINVAL; + } + + progfd = prog_parse_fd(&argc, &argv); + if (progfd < 0) + return progfd; + + attach_type = parse_attach_type(*argv); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type"); + return -EINVAL; + } + NEXT_ARG(); + + mapfd = map_parse_fd(&argc, &argv); + if (mapfd < 0) + return mapfd; + + err = bpf_prog_attach(progfd, mapfd, attach_type, 0); + if (err) { + p_err("failed prog attach to map"); + return -EINVAL; + } + + if (json_output) + jsonw_null(json_wtr); + return 0; +} + +static int do_detach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int err, mapfd, progfd; + + if (!REQ_ARGS(5)) { + p_err("too few parameters for map detach"); + return -EINVAL; + } + + progfd = prog_parse_fd(&argc, &argv); + if (progfd < 0) + return progfd; + + attach_type = parse_attach_type(*argv); + if (attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach type"); + return -EINVAL; + } + NEXT_ARG(); + + mapfd = map_parse_fd(&argc, &argv); + if (mapfd < 0) + return mapfd; + + err = bpf_prog_detach2(progfd, mapfd, attach_type); + if (err) { + p_err("failed prog detach from map"); + return -EINVAL; + } + + if (json_output) + jsonw_null(json_wtr); + return 0; +} static int do_load(int argc, char **argv) { enum bpf_attach_type expected_attach_type; @@ -816,7 +912,7 @@ static int do_load(int argc, char **argv) } } - obj = bpf_object__open_xattr(&attr); + obj = __bpf_object__open_xattr(&attr, bpf_flags); if (IS_ERR_OR_NULL(obj)) { p_err("failed to open object file"); goto err_free_reuse_maps; @@ -941,6 +1037,8 @@ static int do_help(int argc, char **argv) " %s %s pin PROG FILE\n" " %s %s load OBJ FILE [type TYPE] [dev NAME] \\\n" " [map { idx IDX | name NAME } MAP]\n" + " %s %s attach PROG ATTACH_TYPE MAP\n" + " %s %s detach PROG ATTACH_TYPE MAP\n" " %s %s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -952,10 +1050,12 @@ static int do_help(int argc, char **argv) " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" " cgroup/sendmsg4 | cgroup/sendmsg6 }\n" + " ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -967,6 +1067,8 @@ static const struct cmd cmds[] = { { "dump", do_dump }, { "pin", do_pin }, { "load", do_load }, + { "attach", do_attach }, + { "detach", do_detach }, { 0 } }; diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h index 391d942536e5..8d378c57cb01 100644 --- a/tools/include/asm/barrier.h +++ b/tools/include/asm/barrier.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/compiler.h> #if defined(__i386__) || defined(__x86_64__) #include "../../arch/x86/include/asm/barrier.h" #elif defined(__arm__) @@ -26,3 +27,37 @@ #else #include <asm-generic/barrier.h> #endif + +/* + * Generic fallback smp_*() definitions for archs that haven't + * been updated yet. + */ + +#ifndef smp_rmb +# define smp_rmb() rmb() +#endif + +#ifndef smp_wmb +# define smp_wmb() wmb() +#endif + +#ifndef smp_mb +# define smp_mb() mb() +#endif + +#ifndef smp_store_release +# define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) +#endif + +#ifndef smp_load_acquire +# define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + smp_mb(); \ + ___p1; \ +}) +#endif diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h new file mode 100644 index 000000000000..9a083ae60473 --- /dev/null +++ b/tools/include/linux/ring_buffer.h @@ -0,0 +1,73 @@ +#ifndef _TOOLS_LINUX_RING_BUFFER_H_ +#define _TOOLS_LINUX_RING_BUFFER_H_ + +#include <asm/barrier.h> + +/* + * Contract with kernel for walking the perf ring buffer from + * user space requires the following barrier pairing (quote + * from kernel/events/ring_buffer.c): + * + * Since the mmap() consumer (userspace) can run on a + * different CPU: + * + * kernel user + * + * if (LOAD ->data_tail) { LOAD ->data_head + * (A) smp_rmb() (C) + * STORE $data LOAD $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->data_head STORE ->data_tail + * } + * + * Where A pairs with D, and B pairs with C. + * + * In our case A is a control dependency that separates the + * load of the ->data_tail and the stores of $data. In case + * ->data_tail indicates there is no room in the buffer to + * store $data we do not. + * + * D needs to be a full barrier since it separates the data + * READ from the tail WRITE. + * + * For B a WMB is sufficient since it separates two WRITEs, + * and for C an RMB is sufficient since it separates two READs. + * + * Note, instead of B, C, D we could also use smp_store_release() + * in B and D as well as smp_load_acquire() in C. + * + * However, this optimization does not make sense for all kernel + * supported architectures since for a fair number it would + * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(), + * and smp_mb() + WRITE_ONCE() pair for smp_store_release(). + * + * Thus for those smp_wmb() in B and smp_rmb() in C would still + * be less expensive. For the case of D this has either the same + * cost or is less expensive, for example, due to TSO x86 can + * avoid the CPU barrier entirely. + */ + +static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) +{ +/* + * Architectures where smp_load_acquire() does not fallback to + * READ_ONCE() + smp_mb() pair. + */ +#if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ + defined(__ia64__) || defined(__sparc__) && defined(__arch64__) + return smp_load_acquire(&base->data_head); +#else + u64 head = READ_ONCE(base->data_head); + + smp_rmb(); + return head; +#endif +} + +static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base, + u64 tail) +{ + smp_store_release(&base->data_tail, tail); +} + +#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 66917a4eba27..852dc17ab47a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -103,6 +103,7 @@ enum bpf_cmd { BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, }; enum bpf_map_type { @@ -127,6 +128,9 @@ enum bpf_map_type { BPF_MAP_TYPE_SOCKHASH, BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, }; enum bpf_prog_type { @@ -152,6 +156,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_SEG6LOCAL, BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, + BPF_PROG_TYPE_FLOW_DISSECTOR, }; enum bpf_attach_type { @@ -172,6 +177,7 @@ enum bpf_attach_type { BPF_CGROUP_UDP4_SENDMSG, BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, + BPF_FLOW_DISSECTOR, __MAX_BPF_ATTACH_TYPE }; @@ -459,6 +465,28 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is removed to + * make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1430,7 +1458,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags) + * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -2141,6 +2169,94 @@ union bpf_attr { * request in the skb. * Return * 0 on success, or a negative error in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for TCP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * Description + * Look for UDP socket matching *tuple*, optionally in a child + * network namespace *netns*. The return value must be checked, + * and if non-NULL, released via **bpf_sk_release**\ (). + * + * The *ctx* should point to the context of the program, such as + * the skb or socket (depending on the hook in use). This is used + * to determine the base network namespace for the lookup. + * + * *tuple_size* must be one of: + * + * **sizeof**\ (*tuple*\ **->ipv4**) + * Look for an IPv4 socket. + * **sizeof**\ (*tuple*\ **->ipv6**) + * Look for an IPv6 socket. + * + * If the *netns* is zero, then the socket lookup table in the + * netns associated with the *ctx* will be used. For the TC hooks, + * this in the netns of the device in the skb. For socket hooks, + * this in the netns of the socket. If *netns* is non-zero, then + * it specifies the ID of the netns relative to the netns + * associated with the *ctx*. + * + * All values for *flags* are reserved for future usage, and must + * be left at zero. + * + * This helper is available only if the kernel was compiled with + * **CONFIG_NET** configuration option. + * Return + * Pointer to *struct bpf_sock*, or NULL in case of failure. + * + * int bpf_sk_release(struct bpf_sock *sk) + * Description + * Release the reference held by *sock*. *sock* must be a non-NULL + * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) + * Description + * For socket policies, insert *len* bytes into msg at offset + * *start*. + * + * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a + * *msg* it may want to insert metadata or options into the msg. + * This can later be read and used by any of the lower layer BPF + * hooks. + * + * This helper may fail if under memory pressure (a malloc + * fails) in these cases BPF programs will get an appropriate + * error and BPF programs will need to handle them. + * + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2226,7 +2342,14 @@ union bpf_attr { FN(get_current_cgroup_id), \ FN(get_local_storage), \ FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), + FN(skb_ancestor_cgroup_id), \ + FN(sk_lookup_tcp), \ + FN(sk_lookup_udp), \ + FN(sk_release), \ + FN(map_push_elem), \ + FN(map_pop_elem), \ + FN(map_peek_elem), \ + FN(msg_push_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2333,6 +2456,7 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; + struct bpf_flow_keys *flow_keys; }; struct bpf_tunnel_key { @@ -2395,6 +2519,23 @@ struct bpf_sock { */ }; +struct bpf_sock_tuple { + union { + struct { + __be32 saddr; + __be32 daddr; + __be16 sport; + __be16 dport; + } ipv4; + struct { + __be32 saddr[4]; + __be32 daddr[4]; + __be16 sport; + __be16 dport; + } ipv6; + }; +}; + #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. @@ -2778,4 +2919,27 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; +struct bpf_flow_keys { + __u16 nhoff; + __u16 thoff; + __u16 addr_proto; /* ETH_P_* of valid addrs */ + __u8 is_frag; + __u8 is_first_frag; + __u8 is_encap; + __u8 ip_proto; + __be16 n_proto; + __be16 sport; + __be16 dport; + union { + struct { + __be32 ipv4_src; + __be32 ipv4_dst; + }; + struct { + __u32 ipv6_src[4]; /* in6_addr; network order */ + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + }; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 43391e2d1153..58faab897201 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -161,6 +161,7 @@ enum { IFLA_EVENT, IFLA_NEW_NETNSID, IFLA_IF_NETNSID, + IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */ IFLA_CARRIER_UP_COUNT, IFLA_CARRIER_DOWN_COUNT, IFLA_NEW_IFINDEX, @@ -554,6 +555,7 @@ enum { IFLA_GENEVE_UDP_ZERO_CSUM6_TX, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, IFLA_GENEVE_LABEL, + IFLA_GENEVE_TTL_INHERIT, __IFLA_GENEVE_MAX }; #define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1) diff --git a/tools/include/uapi/linux/tls.h b/tools/include/uapi/linux/tls.h new file mode 100644 index 000000000000..ff02287495ac --- /dev/null +++ b/tools/include/uapi/linux/tls.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* + * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _UAPI_LINUX_TLS_H +#define _UAPI_LINUX_TLS_H + +#include <linux/types.h> + +/* TLS socket options */ +#define TLS_TX 1 /* Set transmit parameters */ +#define TLS_RX 2 /* Set receive parameters */ + +/* Supported versions */ +#define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) +#define TLS_VERSION_MAJOR(ver) (((ver) >> 8) & 0xFF) + +#define TLS_VERSION_NUMBER(id) ((((id##_VERSION_MAJOR) & 0xFF) << 8) | \ + ((id##_VERSION_MINOR) & 0xFF)) + +#define TLS_1_2_VERSION_MAJOR 0x3 +#define TLS_1_2_VERSION_MINOR 0x3 +#define TLS_1_2_VERSION TLS_VERSION_NUMBER(TLS_1_2) + +/* Supported ciphers */ +#define TLS_CIPHER_AES_GCM_128 51 +#define TLS_CIPHER_AES_GCM_128_IV_SIZE 8 +#define TLS_CIPHER_AES_GCM_128_KEY_SIZE 16 +#define TLS_CIPHER_AES_GCM_128_SALT_SIZE 4 +#define TLS_CIPHER_AES_GCM_128_TAG_SIZE 16 +#define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE 8 + +#define TLS_SET_RECORD_TYPE 1 +#define TLS_GET_RECORD_TYPE 2 + +struct tls_crypto_info { + __u16 version; + __u16 cipher_type; +}; + +struct tls12_crypto_info_aes_gcm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE]; + unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE]; +}; + +#endif /* _UAPI_LINUX_TLS_H */ diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 6eb9bacd1948..7bc31c905018 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index d49902e818b5..425b480bda75 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) # Most of this file is copied from tools/lib/traceevent/Makefile BPF_VERSION = 0 @@ -69,7 +69,7 @@ FEATURE_USER = .libbpf FEATURE_TESTS = libelf libelf-mmap bpf reallocarray FEATURE_DISPLAY = libelf bpf -INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) check_feat := 1 @@ -125,6 +125,7 @@ override CFLAGS += $(EXTRA_WARNINGS) override CFLAGS += -Werror -Wall override CFLAGS += -fPIC override CFLAGS += $(INCLUDES) +override CFLAGS += -fvisibility=hidden ifeq ($(VERBOSE),1) Q = diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 60aa4ca8b2c5..03f9bcc4ef50 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: LGPL-2.1 +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* * common eBPF ELF operations. @@ -28,16 +28,8 @@ #include <linux/bpf.h> #include "bpf.h" #include "libbpf.h" -#include "nlattr.h" -#include <linux/rtnetlink.h> -#include <linux/if_link.h> -#include <sys/socket.h> #include <errno.h> -#ifndef SOL_NETLINK -#define SOL_NETLINK 270 -#endif - /* * When building perf, unistd.h is overridden. __NR_bpf is * required to be defined explicitly. @@ -286,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value) return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } +int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + + return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); +} + int bpf_map_delete_elem(int fd, const void *key) { union bpf_attr attr; @@ -499,127 +503,6 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); } -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) -{ - struct sockaddr_nl sa; - int sock, seq = 0, len, ret = -1; - char buf[4096]; - struct nlattr *nla, *nla_xdp; - struct { - struct nlmsghdr nh; - struct ifinfomsg ifinfo; - char attrbuf[64]; - } req; - struct nlmsghdr *nh; - struct nlmsgerr *err; - socklen_t addrlen; - int one = 1; - - memset(&sa, 0, sizeof(sa)); - sa.nl_family = AF_NETLINK; - - sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (sock < 0) { - return -errno; - } - - if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, - &one, sizeof(one)) < 0) { - fprintf(stderr, "Netlink error reporting not supported\n"); - } - - if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { - ret = -errno; - goto cleanup; - } - - addrlen = sizeof(sa); - if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { - ret = -errno; - goto cleanup; - } - - if (addrlen != sizeof(sa)) { - ret = -LIBBPF_ERRNO__INTERNAL; - goto cleanup; - } - - memset(&req, 0, sizeof(req)); - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; - req.nh.nlmsg_type = RTM_SETLINK; - req.nh.nlmsg_pid = 0; - req.nh.nlmsg_seq = ++seq; - req.ifinfo.ifi_family = AF_UNSPEC; - req.ifinfo.ifi_index = ifindex; - - /* started nested attribute for XDP */ - nla = (struct nlattr *)(((char *)&req) - + NLMSG_ALIGN(req.nh.nlmsg_len)); - nla->nla_type = NLA_F_NESTED | IFLA_XDP; - nla->nla_len = NLA_HDRLEN; - - /* add XDP fd */ - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FD; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); - memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); - nla->nla_len += nla_xdp->nla_len; - - /* if user passed in any flags, add those too */ - if (flags) { - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FLAGS; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); - memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); - nla->nla_len += nla_xdp->nla_len; - } - - req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); - - if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { - ret = -errno; - goto cleanup; - } - - len = recv(sock, buf, sizeof(buf), 0); - if (len < 0) { - ret = -errno; - goto cleanup; - } - - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); - nh = NLMSG_NEXT(nh, len)) { - if (nh->nlmsg_pid != sa.nl_pid) { - ret = -LIBBPF_ERRNO__WRNGPID; - goto cleanup; - } - if (nh->nlmsg_seq != seq) { - ret = -LIBBPF_ERRNO__INVSEQ; - goto cleanup; - } - switch (nh->nlmsg_type) { - case NLMSG_ERROR: - err = (struct nlmsgerr *)NLMSG_DATA(nh); - if (!err->error) - continue; - ret = err->error; - nla_dump_errormsg(nh); - goto cleanup; - case NLMSG_DONE: - break; - default: - break; - } - } - - ret = 0; - -cleanup: - close(sock); - return ret; -} - int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) { diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6f38164b2618..26a51538213c 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* * common eBPF ELF operations. @@ -20,13 +20,17 @@ * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see <http://www.gnu.org/licenses> */ -#ifndef __BPF_BPF_H -#define __BPF_BPF_H +#ifndef __LIBBPF_BPF_H +#define __LIBBPF_BPF_H #include <linux/bpf.h> #include <stdbool.h> #include <stddef.h> +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + struct bpf_create_map_attr { const char *name; enum bpf_map_type map_type; @@ -42,21 +46,24 @@ struct bpf_create_map_attr { __u32 inner_map_fd; }; -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node); -int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags); -int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, - int max_entries, __u32 map_flags); -int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags, int node); -int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags); +LIBBPF_API int +bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, + int max_entries, __u32 map_flags, int node); +LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, + int max_entries, __u32 map_flags); +LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags); +LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, + const char *name, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags, int node); +LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, + const char *name, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags); struct bpf_load_program_attr { enum bpf_prog_type prog_type; @@ -69,46 +76,56 @@ struct bpf_load_program_attr { __u32 prog_ifindex; }; +/* Flags to direct loading requirements */ +#define MAPS_RELAX_COMPAT 0x01 + /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE (256 * 1024) -int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); -int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, - size_t log_buf_sz); -int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, int log_level); +LIBBPF_API int +bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz); +LIBBPF_API int bpf_load_program(enum bpf_prog_type type, + const struct bpf_insn *insns, size_t insns_cnt, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz); +LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, + const struct bpf_insn *insns, + size_t insns_cnt, int strict_alignment, + const char *license, __u32 kern_version, + char *log_buf, size_t log_buf_sz, + int log_level); -int bpf_map_update_elem(int fd, const void *key, const void *value, - __u64 flags); +LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, + __u64 flags); -int bpf_map_lookup_elem(int fd, const void *key, void *value); -int bpf_map_delete_elem(int fd, const void *key); -int bpf_map_get_next_key(int fd, const void *key, void *next_key); -int bpf_obj_pin(int fd, const char *pathname); -int bpf_obj_get(const char *pathname); -int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, - unsigned int flags); -int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); -int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); -int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, - void *data_out, __u32 *size_out, __u32 *retval, - __u32 *duration); -int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); -int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); -int bpf_prog_get_fd_by_id(__u32 id); -int bpf_map_get_fd_by_id(__u32 id); -int bpf_btf_get_fd_by_id(__u32 id); -int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); -int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, - __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); -int bpf_raw_tracepoint_open(const char *name, int prog_fd); -int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, - bool do_log); -int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, - __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, - __u64 *probe_addr); -#endif +LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); +LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, + void *value); +LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); +LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); +LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); +LIBBPF_API int bpf_obj_get(const char *pathname); +LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, + enum bpf_attach_type type, unsigned int flags); +LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); +LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, + enum bpf_attach_type type); +LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, + __u32 size, void *data_out, __u32 *size_out, + __u32 *retval, __u32 *duration); +LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); +LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); +LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); +LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, + __u32 query_flags, __u32 *attach_flags, + __u32 *prog_ids, __u32 *prog_cnt); +LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); +LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, + __u32 log_buf_size, bool do_log); +LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, + __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, + __u64 *probe_offset, __u64 *probe_addr); +#endif /* __LIBBPF_BPF_H */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index cf94b0770522..449591aa9900 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: LGPL-2.1 +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2018 Facebook */ #include <stdlib.h> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4897e0724d4e..b77e7080f7e7 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -1,11 +1,15 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* Copyright (c) 2018 Facebook */ -#ifndef __BPF_BTF_H -#define __BPF_BTF_H +#ifndef __LIBBPF_BTF_H +#define __LIBBPF_BTF_H #include <linux/types.h> +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + #define BTF_ELF_SEC ".BTF" struct btf; @@ -14,13 +18,15 @@ struct btf_type; typedef int (*btf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); -void btf__free(struct btf *btf); -struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); -__s32 btf__find_by_name(const struct btf *btf, const char *type_name); -const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); -__s64 btf__resolve_size(const struct btf *btf, __u32 type_id); -int btf__resolve_type(const struct btf *btf, __u32 type_id); -int btf__fd(const struct btf *btf); -const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_API void btf__free(struct btf *btf); +LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, + const char *type_name); +LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, + __u32 id); +LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); +LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); +LIBBPF_API int btf__fd(const struct btf *btf); +LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); -#endif +#endif /* __LIBBPF_BTF_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index bdb94939fd60..b607be7236d3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: LGPL-2.1 +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* * Common eBPF ELF object loading operations. @@ -7,19 +7,6 @@ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> * Copyright (C) 2015 Huawei Inc. * Copyright (C) 2017 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> */ #define _GNU_SOURCE @@ -32,7 +19,6 @@ #include <unistd.h> #include <fcntl.h> #include <errno.h> -#include <perf-sys.h> #include <asm/unistd.h> #include <linux/err.h> #include <linux/kernel.h> @@ -40,6 +26,8 @@ #include <linux/btf.h> #include <linux/list.h> #include <linux/limits.h> +#include <linux/perf_event.h> +#include <linux/ring_buffer.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/vfs.h> @@ -182,7 +170,7 @@ static LIST_HEAD(bpf_objects_list); struct bpf_object { char license[64]; - u32 kern_version; + __u32 kern_version; struct bpf_program *programs; size_t nr_programs; @@ -228,7 +216,7 @@ struct bpf_object { }; #define obj_elf_valid(o) ((o)->efile.elf) -static void bpf_program__unload(struct bpf_program *prog) +void bpf_program__unload(struct bpf_program *prog) { int i; @@ -470,7 +458,8 @@ static int bpf_object__elf_init(struct bpf_object *obj) obj->efile.fd = open(obj->path, O_RDONLY); if (obj->efile.fd < 0) { char errmsg[STRERR_BUFSIZE]; - char *cp = str_error(errno, errmsg, sizeof(errmsg)); + char *cp = libbpf_strerror_r(errno, errmsg, + sizeof(errmsg)); pr_warning("failed to open %s: %s\n", obj->path, cp); return -errno; @@ -552,7 +541,7 @@ static int bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) { - u32 kver; + __u32 kver; if (size != sizeof(kver)) { pr_warning("invalid kver section in %s\n", obj->path); @@ -574,8 +563,9 @@ static int compare_bpf_map(const void *_a, const void *_b) } static int -bpf_object__init_maps(struct bpf_object *obj) +bpf_object__init_maps(struct bpf_object *obj, int flags) { + bool strict = !(flags & MAPS_RELAX_COMPAT); int i, map_idx, map_def_sz, nr_maps = 0; Elf_Scn *scn; Elf_Data *data; @@ -697,7 +687,8 @@ bpf_object__init_maps(struct bpf_object *obj) "has unrecognized, non-zero " "options\n", obj->path, map_name); - return -EINVAL; + if (strict) + return -EINVAL; } } memcpy(&obj->maps[map_idx].def, def, @@ -728,7 +719,7 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) return false; } -static int bpf_object__elf_collect(struct bpf_object *obj) +static int bpf_object__elf_collect(struct bpf_object *obj, int flags) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; @@ -811,7 +802,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) data->d_size, name, idx); if (err) { char errmsg[STRERR_BUFSIZE]; - char *cp = str_error(-err, errmsg, sizeof(errmsg)); + char *cp = libbpf_strerror_r(-err, errmsg, + sizeof(errmsg)); pr_warning("failed to alloc program %s (%s): %s", name, obj->path, cp); @@ -854,7 +846,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return LIBBPF_ERRNO__FORMAT; } if (obj->efile.maps_shndx >= 0) { - err = bpf_object__init_maps(obj); + err = bpf_object__init_maps(obj, flags); if (err) goto out; } @@ -1140,7 +1132,7 @@ bpf_object__create_maps(struct bpf_object *obj) *pfd = bpf_create_map_xattr(&create_attr); if (*pfd < 0 && create_attr.btf_key_type_id) { - cp = str_error(errno, errmsg, sizeof(errmsg)); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", map->name, cp, errno); create_attr.btf_fd = 0; @@ -1155,7 +1147,7 @@ bpf_object__create_maps(struct bpf_object *obj) size_t j; err = *pfd; - cp = str_error(errno, errmsg, sizeof(errmsg)); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("failed to create map (name: '%s'): %s\n", map->name, cp); for (j = 0; j < i; j++) @@ -1306,7 +1298,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) static int load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, const char *name, struct bpf_insn *insns, int insns_cnt, - char *license, u32 kern_version, int *pfd, int prog_ifindex) + char *license, __u32 kern_version, int *pfd, int prog_ifindex) { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; @@ -1339,7 +1331,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, } ret = -LIBBPF_ERRNO__LOAD; - cp = str_error(errno, errmsg, sizeof(errmsg)); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("load bpf program failed: %s\n", cp); if (log_buf && log_buf[0] != '\0') { @@ -1375,9 +1367,9 @@ out: return ret; } -static int +int bpf_program__load(struct bpf_program *prog, - char *license, u32 kern_version) + char *license, __u32 kern_version) { int err = 0, fd, i; @@ -1502,6 +1494,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: return false; case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_KPROBE: @@ -1525,7 +1518,7 @@ static int bpf_object__validate(struct bpf_object *obj, bool needs_kver) static struct bpf_object * __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, - bool needs_kver) + bool needs_kver, int flags) { struct bpf_object *obj; int err; @@ -1541,7 +1534,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, CHECK_ERR(bpf_object__elf_init(obj), err, out); CHECK_ERR(bpf_object__check_endianness(obj), err, out); - CHECK_ERR(bpf_object__elf_collect(obj), err, out); + CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out); @@ -1552,7 +1545,8 @@ out: return ERR_PTR(err); } -struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) +struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, + int flags) { /* param validation */ if (!attr->file) @@ -1561,7 +1555,13 @@ struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) pr_debug("loading %s\n", attr->file); return __bpf_object__open(attr->file, NULL, 0, - bpf_prog_type__needs_kver(attr->prog_type)); + bpf_prog_type__needs_kver(attr->prog_type), + flags); +} + +struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) +{ + return __bpf_object__open_xattr(attr, 0); } struct bpf_object *bpf_object__open(const char *path) @@ -1594,7 +1594,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf, pr_debug("loading object '%s' from buffer\n", name); - return __bpf_object__open(name, obj_buf, obj_buf_sz, true); + return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true); } int bpf_object__unload(struct bpf_object *obj) @@ -1654,7 +1654,7 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { - cp = str_error(errno, errmsg, sizeof(errmsg)); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("failed to statfs %s: %s\n", dir, cp); err = -errno; } @@ -1690,7 +1690,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, } if (bpf_obj_pin(prog->instances.fds[instance], path)) { - cp = str_error(errno, errmsg, sizeof(errmsg)); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("failed to pin program: %s\n", cp); return -errno; } @@ -1708,7 +1708,7 @@ static int make_dir(const char *path) err = -errno; if (err) { - cp = str_error(-err, errmsg, sizeof(errmsg)); + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); pr_warning("failed to mkdir %s: %s\n", path, cp); } return err; @@ -1770,7 +1770,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path) } if (bpf_obj_pin(map->fd, path)) { - cp = str_error(errno, errmsg, sizeof(errmsg)); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("failed to pin map: %s\n", cp); return -errno; } @@ -2084,57 +2084,90 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } -#define BPF_PROG_SEC_FULL(string, ptype, atype) \ - { string, sizeof(string) - 1, ptype, atype } +#define BPF_PROG_SEC_IMPL(string, ptype, eatype, atype) \ + { string, sizeof(string) - 1, ptype, eatype, atype } -#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0) +/* Programs that can NOT be attached. */ +#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, -EINVAL) -#define BPF_S_PROG_SEC(string, ptype) \ - BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype) +/* Programs that can be attached. */ +#define BPF_APROG_SEC(string, ptype, atype) \ + BPF_PROG_SEC_IMPL(string, ptype, 0, atype) -#define BPF_SA_PROG_SEC(string, ptype) \ - BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype) +/* Programs that must specify expected attach type at load time. */ +#define BPF_EAPROG_SEC(string, ptype, eatype) \ + BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype) + +/* Programs that can be attached but attach type can't be identified by section + * name. Kept for backward compatibility. + */ +#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) static const struct { const char *sec; size_t len; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; + enum bpf_attach_type attach_type; } section_names[] = { - BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), - BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), - BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), - BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), - BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), - BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), - BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK), - BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE), - BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), - BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), - BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), - BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL), - BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS), - BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), - BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG), - BPF_PROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2), - BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND), - BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND), - BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT), - BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT), - BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG), - BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG), - BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND), - BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND), + BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), + BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), + BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), + BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), + BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), + BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), + BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), + BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT), + BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT), + BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL), + BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB, + BPF_CGROUP_INET_INGRESS), + BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB, + BPF_CGROUP_INET_EGRESS), + BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB), + BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET_SOCK_CREATE), + BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET4_POST_BIND), + BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK, + BPF_CGROUP_INET6_POST_BIND), + BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE, + BPF_CGROUP_DEVICE), + BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS, + BPF_CGROUP_SOCK_OPS), + BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB, + BPF_SK_SKB_STREAM_PARSER), + BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB, + BPF_SK_SKB_STREAM_VERDICT), + BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB), + BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG, + BPF_SK_MSG_VERDICT), + BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2, + BPF_LIRC_MODE2), + BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR, + BPF_FLOW_DISSECTOR), + BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_BIND), + BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_BIND), + BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_CONNECT), + BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_CONNECT), + BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP4_SENDMSG), + BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_UDP6_SENDMSG), }; +#undef BPF_PROG_SEC_IMPL #undef BPF_PROG_SEC -#undef BPF_PROG_SEC_FULL -#undef BPF_S_PROG_SEC -#undef BPF_SA_PROG_SEC +#undef BPF_APROG_SEC +#undef BPF_EAPROG_SEC +#undef BPF_APROG_COMPAT int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type) @@ -2154,6 +2187,25 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, return -EINVAL; } +int libbpf_attach_type_by_name(const char *name, + enum bpf_attach_type *attach_type) +{ + int i; + + if (!name) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (strncmp(name, section_names[i].sec, section_names[i].len)) + continue; + if (section_names[i].attach_type == -EINVAL) + return -EINVAL; + *attach_type = section_names[i].attach_type; + return 0; + } + return -EINVAL; +} + static int bpf_program__identify_section(struct bpf_program *prog, enum bpf_prog_type *prog_type, @@ -2336,7 +2388,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, bpf_program__set_expected_attach_type(prog, expected_attach_type); - if (!bpf_program__is_function_storage(prog, obj) && !first_prog) + if (!first_prog) first_prog = prog; } @@ -2363,61 +2415,49 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, } enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mem, unsigned long size, - unsigned long page_size, void **buf, size_t *buf_len, - bpf_perf_event_print_t fn, void *priv) +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data) { - volatile struct perf_event_mmap_page *header = mem; + struct perf_event_mmap_page *header = mmap_mem; + __u64 data_head = ring_buffer_read_head(header); __u64 data_tail = header->data_tail; - __u64 data_head = header->data_head; - int ret = LIBBPF_PERF_EVENT_ERROR; - void *base, *begin, *end; - - asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ - if (data_head == data_tail) - return LIBBPF_PERF_EVENT_CONT; - - base = ((char *)header) + page_size; - - begin = base + data_tail % size; - end = base + data_head % size; - - while (begin != end) { - struct perf_event_header *ehdr; - - ehdr = begin; - if (begin + ehdr->size > base + size) { - long len = base + size - begin; - - if (*buf_len < ehdr->size) { - free(*buf); - *buf = malloc(ehdr->size); - if (!*buf) { + void *base = ((__u8 *)header) + page_size; + int ret = LIBBPF_PERF_EVENT_CONT; + struct perf_event_header *ehdr; + size_t ehdr_size; + + while (data_head != data_tail) { + ehdr = base + (data_tail & (mmap_size - 1)); + ehdr_size = ehdr->size; + + if (((void *)ehdr) + ehdr_size > base + mmap_size) { + void *copy_start = ehdr; + size_t len_first = base + mmap_size - copy_start; + size_t len_secnd = ehdr_size - len_first; + + if (*copy_size < ehdr_size) { + free(*copy_mem); + *copy_mem = malloc(ehdr_size); + if (!*copy_mem) { + *copy_size = 0; ret = LIBBPF_PERF_EVENT_ERROR; break; } - *buf_len = ehdr->size; + *copy_size = ehdr_size; } - memcpy(*buf, begin, len); - memcpy(*buf + len, base, ehdr->size - len); - ehdr = (void *)*buf; - begin = base + ehdr->size - len; - } else if (begin + ehdr->size == base + size) { - begin = base; - } else { - begin += ehdr->size; + memcpy(*copy_mem, copy_start, len_first); + memcpy(*copy_mem + len_first, base, len_secnd); + ehdr = *copy_mem; } - ret = fn(ehdr, priv); + ret = fn(ehdr, private_data); + data_tail += ehdr_size; if (ret != LIBBPF_PERF_EVENT_CONT) break; - - data_tail += ehdr->size; } - __sync_synchronize(); /* smp_mb() */ - header->data_tail = data_tail; - + ring_buffer_write_tail(header, data_tail); return ret; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 96c55fac54c3..1f3468dad8b2 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* * Common eBPF ELF object loading operations. @@ -6,22 +6,9 @@ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> * Copyright (C) 2015 Huawei Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> */ -#ifndef __BPF_LIBBPF_H -#define __BPF_LIBBPF_H +#ifndef __LIBBPF_LIBBPF_H +#define __LIBBPF_LIBBPF_H #include <stdio.h> #include <stdint.h> @@ -29,6 +16,10 @@ #include <sys/types.h> // for size_t #include <linux/bpf.h> +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + enum libbpf_errno { __LIBBPF_ERRNO__START = 4000, @@ -46,10 +37,11 @@ enum libbpf_errno { LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */ LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */ LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */ + LIBBPF_ERRNO__NLPARSE, /* netlink parsing error */ __LIBBPF_ERRNO__END, }; -int libbpf_strerror(int err, char *buf, size_t size); +LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); /* * __printf is defined in include/linux/compiler-gcc.h. However, @@ -59,9 +51,9 @@ int libbpf_strerror(int err, char *buf, size_t size); typedef int (*libbpf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); -void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug); +LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn, + libbpf_print_fn_t info, + libbpf_print_fn_t debug); /* Hide internal to user */ struct bpf_object; @@ -71,25 +63,28 @@ struct bpf_object_open_attr { enum bpf_prog_type prog_type; }; -struct bpf_object *bpf_object__open(const char *path); -struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr); -struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz, - const char *name); -int bpf_object__pin(struct bpf_object *object, const char *path); -void bpf_object__close(struct bpf_object *object); +LIBBPF_API struct bpf_object *bpf_object__open(const char *path); +LIBBPF_API struct bpf_object * +bpf_object__open_xattr(struct bpf_object_open_attr *attr); +struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, + int flags); +LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, + size_t obj_buf_sz, + const char *name); +LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); +LIBBPF_API void bpf_object__close(struct bpf_object *object); /* Load/unload object into/from kernel */ -int bpf_object__load(struct bpf_object *obj); -int bpf_object__unload(struct bpf_object *obj); -const char *bpf_object__name(struct bpf_object *obj); -unsigned int bpf_object__kversion(struct bpf_object *obj); -int bpf_object__btf_fd(const struct bpf_object *obj); +LIBBPF_API int bpf_object__load(struct bpf_object *obj); +LIBBPF_API int bpf_object__unload(struct bpf_object *obj); +LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); +LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); +LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); -struct bpf_program * +LIBBPF_API struct bpf_program * bpf_object__find_program_by_title(struct bpf_object *obj, const char *title); -struct bpf_object *bpf_object__next(struct bpf_object *prev); +LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ for ((pos) = bpf_object__next(NULL), \ (tmp) = bpf_object__next(pos); \ @@ -97,17 +92,20 @@ struct bpf_object *bpf_object__next(struct bpf_object *prev); (pos) = (tmp), (tmp) = bpf_object__next(tmp)) typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); -int bpf_object__set_priv(struct bpf_object *obj, void *priv, - bpf_object_clear_priv_t clear_priv); -void *bpf_object__priv(struct bpf_object *prog); +LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, + bpf_object_clear_priv_t clear_priv); +LIBBPF_API void *bpf_object__priv(struct bpf_object *prog); -int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, - enum bpf_attach_type *expected_attach_type); +LIBBPF_API int +libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, + enum bpf_attach_type *expected_attach_type); +LIBBPF_API int libbpf_attach_type_by_name(const char *name, + enum bpf_attach_type *attach_type); /* Accessors of bpf_program */ struct bpf_program; -struct bpf_program *bpf_program__next(struct bpf_program *prog, - struct bpf_object *obj); +LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, + struct bpf_object *obj); #define bpf_object__for_each_program(pos, obj) \ for ((pos) = bpf_program__next(NULL, (obj)); \ @@ -117,18 +115,24 @@ struct bpf_program *bpf_program__next(struct bpf_program *prog, typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); -int bpf_program__set_priv(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv); +LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, + bpf_program_clear_priv_t clear_priv); -void *bpf_program__priv(struct bpf_program *prog); -void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); +LIBBPF_API void *bpf_program__priv(struct bpf_program *prog); +LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, + __u32 ifindex); -const char *bpf_program__title(struct bpf_program *prog, bool needs_copy); +LIBBPF_API const char *bpf_program__title(struct bpf_program *prog, + bool needs_copy); -int bpf_program__fd(struct bpf_program *prog); -int bpf_program__pin_instance(struct bpf_program *prog, const char *path, - int instance); -int bpf_program__pin(struct bpf_program *prog, const char *path); +LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, + __u32 kern_version); +LIBBPF_API int bpf_program__fd(struct bpf_program *prog); +LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, + const char *path, + int instance); +LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); +LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_insn; @@ -189,34 +193,36 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n, struct bpf_insn *insns, int insns_cnt, struct bpf_prog_prep_result *res); -int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, - bpf_program_prep_t prep); +LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance, + bpf_program_prep_t prep); -int bpf_program__nth_fd(struct bpf_program *prog, int n); +LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n); /* * Adjust type of BPF program. Default is kprobe. */ -int bpf_program__set_socket_filter(struct bpf_program *prog); -int bpf_program__set_tracepoint(struct bpf_program *prog); -int bpf_program__set_raw_tracepoint(struct bpf_program *prog); -int bpf_program__set_kprobe(struct bpf_program *prog); -int bpf_program__set_sched_cls(struct bpf_program *prog); -int bpf_program__set_sched_act(struct bpf_program *prog); -int bpf_program__set_xdp(struct bpf_program *prog); -int bpf_program__set_perf_event(struct bpf_program *prog); -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); -void bpf_program__set_expected_attach_type(struct bpf_program *prog, - enum bpf_attach_type type); - -bool bpf_program__is_socket_filter(struct bpf_program *prog); -bool bpf_program__is_tracepoint(struct bpf_program *prog); -bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); -bool bpf_program__is_kprobe(struct bpf_program *prog); -bool bpf_program__is_sched_cls(struct bpf_program *prog); -bool bpf_program__is_sched_act(struct bpf_program *prog); -bool bpf_program__is_xdp(struct bpf_program *prog); -bool bpf_program__is_perf_event(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); +LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, + enum bpf_prog_type type); +LIBBPF_API void +bpf_program__set_expected_attach_type(struct bpf_program *prog, + enum bpf_attach_type type); + +LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog); /* * No need for __attribute__((packed)), all members of 'bpf_map_def' @@ -237,39 +243,39 @@ struct bpf_map_def { * so no need to worry about a name clash. */ struct bpf_map; -struct bpf_map * +LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); /* * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. */ -struct bpf_map * +LIBBPF_API struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); -struct bpf_map * +LIBBPF_API struct bpf_map * bpf_map__next(struct bpf_map *map, struct bpf_object *obj); #define bpf_map__for_each(pos, obj) \ for ((pos) = bpf_map__next(NULL, (obj)); \ (pos) != NULL; \ (pos) = bpf_map__next((pos), (obj))) -int bpf_map__fd(struct bpf_map *map); -const struct bpf_map_def *bpf_map__def(struct bpf_map *map); -const char *bpf_map__name(struct bpf_map *map); -__u32 bpf_map__btf_key_type_id(const struct bpf_map *map); -__u32 bpf_map__btf_value_type_id(const struct bpf_map *map); +LIBBPF_API int bpf_map__fd(struct bpf_map *map); +LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); +LIBBPF_API const char *bpf_map__name(struct bpf_map *map); +LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); +LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); -int bpf_map__set_priv(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv); -void *bpf_map__priv(struct bpf_map *map); -int bpf_map__reuse_fd(struct bpf_map *map, int fd); -bool bpf_map__is_offload_neutral(struct bpf_map *map); -void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); -int bpf_map__pin(struct bpf_map *map, const char *path); +LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, + bpf_map_clear_priv_t clear_priv); +LIBBPF_API void *bpf_map__priv(struct bpf_map *map); +LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); +LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); -long libbpf_get_error(const void *ptr); +LIBBPF_API long libbpf_get_error(const void *ptr); struct bpf_prog_load_attr { const char *file; @@ -278,12 +284,12 @@ struct bpf_prog_load_attr { int ifindex; }; -int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd); -int bpf_prog_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd); +LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd); +LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd); -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, @@ -291,10 +297,24 @@ enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_CONT = -2, }; -typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event, - void *priv); -int bpf_perf_event_read_simple(void *mem, unsigned long size, - unsigned long page_size, - void **buf, size_t *buf_len, - bpf_perf_event_print_t fn, void *priv); -#endif +struct perf_event_header; +typedef enum bpf_perf_event_ret + (*bpf_perf_event_print_t)(struct perf_event_header *hdr, + void *private_data); +LIBBPF_API enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data); + +struct nlattr; +typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); +int libbpf_netlink_open(unsigned int *nl_pid); +int libbpf_nl_get_link(int sock, unsigned int nl_pid, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); +int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); +int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); +int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); +#endif /* __LIBBPF_LIBBPF_H */ diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index d9ba851bd7f9..d83b17f8435c 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -1,23 +1,10 @@ -// SPDX-License-Identifier: LGPL-2.1 +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> * Copyright (C) 2015 Huawei Inc. * Copyright (C) 2017 Nicira, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; - * version 2.1 of the License (not later!) - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses> */ #include <stdio.h> @@ -42,6 +29,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = { [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", + [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", }; int libbpf_strerror(int err, char *buf, size_t size) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c new file mode 100644 index 000000000000..0ce67aea8f3b --- /dev/null +++ b/tools/lib/bpf/netlink.c @@ -0,0 +1,337 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2018 Facebook */ + +#include <stdlib.h> +#include <memory.h> +#include <unistd.h> +#include <linux/bpf.h> +#include <linux/rtnetlink.h> +#include <sys/socket.h> +#include <errno.h> +#include <time.h> + +#include "bpf.h" +#include "libbpf.h" +#include "nlattr.h" + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, + void *cookie); + +int libbpf_netlink_open(__u32 *nl_pid) +{ + struct sockaddr_nl sa; + socklen_t addrlen; + int one = 1, ret; + int sock; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) + return -errno; + + if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)) < 0) { + fprintf(stderr, "Netlink error reporting not supported\n"); + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + ret = -errno; + goto cleanup; + } + + addrlen = sizeof(sa); + if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { + ret = -errno; + goto cleanup; + } + + if (addrlen != sizeof(sa)) { + ret = -LIBBPF_ERRNO__INTERNAL; + goto cleanup; + } + + *nl_pid = sa.nl_pid; + return sock; + +cleanup: + close(sock); + return ret; +} + +static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, + __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, + void *cookie) +{ + bool multipart = true; + struct nlmsgerr *err; + struct nlmsghdr *nh; + char buf[4096]; + int len, ret; + + while (multipart) { + multipart = false; + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + ret = -errno; + goto done; + } + + if (len == 0) + break; + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != nl_pid) { + ret = -LIBBPF_ERRNO__WRNGPID; + goto done; + } + if (nh->nlmsg_seq != seq) { + ret = -LIBBPF_ERRNO__INVSEQ; + goto done; + } + if (nh->nlmsg_flags & NLM_F_MULTI) + multipart = true; + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + ret = err->error; + libbpf_nla_dump_errormsg(nh); + goto done; + case NLMSG_DONE: + return 0; + default: + break; + } + if (_fn) { + ret = _fn(nh, fn, cookie); + if (ret) + return ret; + } + } + } + ret = 0; +done: + return ret; +} + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +{ + int sock, seq = 0, ret; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + __u32 nl_pid; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | IFLA_XDP; + nla->nla_len = NLA_HDRLEN; + + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FD; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FLAGS; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + ret = -errno; + goto cleanup; + } + ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL); + +cleanup: + close(sock); + return ret; +} + +static int __dump_link_nlmsg(struct nlmsghdr *nlh, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) +{ + struct nlattr *tb[IFLA_MAX + 1], *attr; + struct ifinfomsg *ifi = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); + attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi))); + if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_link_nlmsg(cookie, ifi, tb); +} + +int libbpf_nl_get_link(int sock, unsigned int nl_pid, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlh.nlmsg_type = RTM_GETLINK, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .ifm.ifi_family = AF_PACKET, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, + dump_link_nlmsg, cookie); +} + +static int __dump_class_nlmsg(struct nlmsghdr *nlh, + libbpf_dump_nlmsg_t dump_class_nlmsg, + void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_class_nlmsg(cookie, t, tb); +} + +int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTCLASS, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg, + dump_class_nlmsg, cookie); +} + +static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh, + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, + void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_qdisc_nlmsg(cookie, t, tb); +} + +int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETQDISC, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg, + dump_qdisc_nlmsg, cookie); +} + +static int __dump_filter_nlmsg(struct nlmsghdr *nlh, + libbpf_dump_nlmsg_t dump_filter_nlmsg, + void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_filter_nlmsg(cookie, t, tb); +} + +int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTFILTER, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + .t.tcm_parent = handle, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg, + dump_filter_nlmsg, cookie); +} diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 4719434278b2..1e69c0c8d413 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -1,13 +1,8 @@ -// SPDX-License-Identifier: LGPL-2.1 +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* * NETLINK Netlink attributes * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation version 2.1 - * of the License. - * * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> */ @@ -17,20 +12,15 @@ #include <string.h> #include <stdio.h> -static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = { - [NLA_U8] = sizeof(uint8_t), - [NLA_U16] = sizeof(uint16_t), - [NLA_U32] = sizeof(uint32_t), - [NLA_U64] = sizeof(uint64_t), - [NLA_STRING] = 1, - [NLA_FLAG] = 0, +static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = { + [LIBBPF_NLA_U8] = sizeof(uint8_t), + [LIBBPF_NLA_U16] = sizeof(uint16_t), + [LIBBPF_NLA_U32] = sizeof(uint32_t), + [LIBBPF_NLA_U64] = sizeof(uint64_t), + [LIBBPF_NLA_STRING] = 1, + [LIBBPF_NLA_FLAG] = 0, }; -static int nla_len(const struct nlattr *nla) -{ - return nla->nla_len - NLA_HDRLEN; -} - static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) { int totlen = NLA_ALIGN(nla->nla_len); @@ -46,20 +36,15 @@ static int nla_ok(const struct nlattr *nla, int remaining) nla->nla_len <= remaining; } -static void *nla_data(const struct nlattr *nla) -{ - return (char *) nla + NLA_HDRLEN; -} - static int nla_type(const struct nlattr *nla) { return nla->nla_type & NLA_TYPE_MASK; } static int validate_nla(struct nlattr *nla, int maxtype, - struct nla_policy *policy) + struct libbpf_nla_policy *policy) { - struct nla_policy *pt; + struct libbpf_nla_policy *pt; unsigned int minlen = 0; int type = nla_type(nla); @@ -68,23 +53,24 @@ static int validate_nla(struct nlattr *nla, int maxtype, pt = &policy[type]; - if (pt->type > NLA_TYPE_MAX) + if (pt->type > LIBBPF_NLA_TYPE_MAX) return 0; if (pt->minlen) minlen = pt->minlen; - else if (pt->type != NLA_UNSPEC) + else if (pt->type != LIBBPF_NLA_UNSPEC) minlen = nla_attr_minlen[pt->type]; - if (nla_len(nla) < minlen) + if (libbpf_nla_len(nla) < minlen) return -1; - if (pt->maxlen && nla_len(nla) > pt->maxlen) + if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) return -1; - if (pt->type == NLA_STRING) { - char *data = nla_data(nla); - if (data[nla_len(nla) - 1] != '\0') + if (pt->type == LIBBPF_NLA_STRING) { + char *data = libbpf_nla_data(nla); + + if (data[libbpf_nla_len(nla) - 1] != '\0') return -1; } @@ -114,15 +100,15 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh) * @see nla_validate * @return 0 on success or a negative error code. */ -static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - struct nla_policy *policy) +int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, + int len, struct libbpf_nla_policy *policy) { struct nlattr *nla; int rem, err; memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - nla_for_each_attr(nla, head, len, rem) { + libbpf_nla_for_each_attr(nla, head, len, rem) { int type = nla_type(nla); if (type > maxtype) @@ -146,12 +132,33 @@ errout: return err; } +/** + * Create attribute index based on nested attribute + * @arg tb Index array to be filled (maxtype+1 elements). + * @arg maxtype Maximum attribute type expected and accepted. + * @arg nla Nested Attribute. + * @arg policy Attribute validation policy. + * + * Feeds the stream of attributes nested into the specified attribute + * to libbpf_nla_parse(). + * + * @see libbpf_nla_parse + * @return 0 on success or a negative error code. + */ +int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + struct libbpf_nla_policy *policy) +{ + return libbpf_nla_parse(tb, maxtype, libbpf_nla_data(nla), + libbpf_nla_len(nla), policy); +} + /* dump netlink extended ack error message */ -int nla_dump_errormsg(struct nlmsghdr *nlh) +int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh) { - struct nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = { - [NLMSGERR_ATTR_MSG] = { .type = NLA_STRING }, - [NLMSGERR_ATTR_OFFS] = { .type = NLA_U32 }, + struct libbpf_nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = { + [NLMSGERR_ATTR_MSG] = { .type = LIBBPF_NLA_STRING }, + [NLMSGERR_ATTR_OFFS] = { .type = LIBBPF_NLA_U32 }, }; struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr; struct nlmsgerr *err; @@ -172,14 +179,15 @@ int nla_dump_errormsg(struct nlmsghdr *nlh) attr = (struct nlattr *) ((void *) err + hlen); alen = nlh->nlmsg_len - hlen; - if (nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) { + if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, + extack_policy) != 0) { fprintf(stderr, "Failed to parse extended error attributes\n"); return 0; } if (tb[NLMSGERR_ATTR_MSG]) - errmsg = (char *) nla_data(tb[NLMSGERR_ATTR_MSG]); + errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]); fprintf(stderr, "Kernel error message: %s\n", errmsg); diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h index 931a71f68f93..6cc3ac91690f 100644 --- a/tools/lib/bpf/nlattr.h +++ b/tools/lib/bpf/nlattr.h @@ -1,18 +1,13 @@ -/* SPDX-License-Identifier: LGPL-2.1 */ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* * NETLINK Netlink attributes * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation version 2.1 - * of the License. - * * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch> */ -#ifndef __NLATTR_H -#define __NLATTR_H +#ifndef __LIBBPF_NLATTR_H +#define __LIBBPF_NLATTR_H #include <stdint.h> #include <linux/netlink.h> @@ -23,19 +18,19 @@ * Standard attribute types to specify validation policy */ enum { - NLA_UNSPEC, /**< Unspecified type, binary data chunk */ - NLA_U8, /**< 8 bit integer */ - NLA_U16, /**< 16 bit integer */ - NLA_U32, /**< 32 bit integer */ - NLA_U64, /**< 64 bit integer */ - NLA_STRING, /**< NUL terminated character string */ - NLA_FLAG, /**< Flag */ - NLA_MSECS, /**< Micro seconds (64bit) */ - NLA_NESTED, /**< Nested attributes */ - __NLA_TYPE_MAX, + LIBBPF_NLA_UNSPEC, /**< Unspecified type, binary data chunk */ + LIBBPF_NLA_U8, /**< 8 bit integer */ + LIBBPF_NLA_U16, /**< 16 bit integer */ + LIBBPF_NLA_U32, /**< 32 bit integer */ + LIBBPF_NLA_U64, /**< 64 bit integer */ + LIBBPF_NLA_STRING, /**< NUL terminated character string */ + LIBBPF_NLA_FLAG, /**< Flag */ + LIBBPF_NLA_MSECS, /**< Micro seconds (64bit) */ + LIBBPF_NLA_NESTED, /**< Nested attributes */ + __LIBBPF_NLA_TYPE_MAX, }; -#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1) +#define LIBBPF_NLA_TYPE_MAX (__LIBBPF_NLA_TYPE_MAX - 1) /** * @ingroup attr @@ -43,8 +38,8 @@ enum { * * See section @core_doc{core_attr_parse,Attribute Parsing} for more details. */ -struct nla_policy { - /** Type of attribute or NLA_UNSPEC */ +struct libbpf_nla_policy { + /** Type of attribute or LIBBPF_NLA_UNSPEC */ uint16_t type; /** Minimal length of payload required */ @@ -62,11 +57,50 @@ struct nla_policy { * @arg len length of attribute stream * @arg rem initialized to len, holds bytes currently remaining in stream */ -#define nla_for_each_attr(pos, head, len, rem) \ +#define libbpf_nla_for_each_attr(pos, head, len, rem) \ for (pos = head, rem = len; \ nla_ok(pos, rem); \ pos = nla_next(pos, &(rem))) -int nla_dump_errormsg(struct nlmsghdr *nlh); +/** + * libbpf_nla_data - head of payload + * @nla: netlink attribute + */ +static inline void *libbpf_nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla) +{ + return *(uint8_t *)libbpf_nla_data(nla); +} + +static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla) +{ + return *(uint32_t *)libbpf_nla_data(nla); +} + +static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla) +{ + return (const char *)libbpf_nla_data(nla); +} + +/** + * libbpf_nla_len - length of payload + * @nla: netlink attribute + */ +static inline int libbpf_nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, + int len, struct libbpf_nla_policy *policy); +int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, + struct nlattr *nla, + struct libbpf_nla_policy *policy); + +int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh); -#endif /* __NLATTR_H */ +#endif /* __LIBBPF_NLATTR_H */ diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c index b8798114a357..00e48ac5b806 100644 --- a/tools/lib/bpf/str_error.c +++ b/tools/lib/bpf/str_error.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: LGPL-2.1 +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) #undef _GNU_SOURCE #include <string.h> #include <stdio.h> @@ -9,7 +9,7 @@ * libc, while checking strerror_r() return to avoid having to check this in * all places calling it. */ -char *str_error(int err, char *dst, int len) +char *libbpf_strerror_r(int err, char *dst, int len) { int ret = strerror_r(err, dst, len); if (ret) diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index 355b1db571d1..a139334d57b6 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -1,6 +1,6 @@ -// SPDX-License-Identifier: LGPL-2.1 -#ifndef BPF_STR_ERROR -#define BPF_STR_ERROR +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __LIBBPF_STR_ERROR_H +#define __LIBBPF_STR_ERROR_H -char *str_error(int err, char *dst, int len); -#endif // BPF_STR_ERROR +char *libbpf_strerror_r(int err, char *dst, int len); +#endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index e603314dc792..cc5e2d6d17a9 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -4,7 +4,7 @@ #include <linux/compiler.h> #include <linux/refcount.h> #include <linux/types.h> -#include <asm/barrier.h> +#include <linux/ring_buffer.h> #include <stdbool.h> #include "auxtrace.h" #include "event.h" @@ -71,21 +71,12 @@ void perf_mmap__consume(struct perf_mmap *map); static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { - struct perf_event_mmap_page *pc = mm->base; - u64 head = READ_ONCE(pc->data_head); - rmb(); - return head; + return ring_buffer_read_head(mm->base); } static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) { - struct perf_event_mmap_page *pc = md->base; - - /* - * ensure all reads are done before we write the tail out. - */ - mb(); - pc->data_tail = tail; + ring_buffer_write_tail(md->base, tail); } union perf_event *perf_mmap__read_forward(struct perf_mmap *map); diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 49938d72cf63..1b799e30c06d 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -19,3 +19,11 @@ test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user +test_skb_cgroup_id_user +test_socket_cookie +test_cgroup_storage +test_select_reuseport +test_flow_dissector +flow_dissector_load +test_netcnt +test_section_names diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fff7fb1285fc..e39dfb4e7970 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,7 +23,8 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ - test_socket_cookie test_cgroup_storage test_select_reuseport + test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ + test_netcnt TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ @@ -35,7 +36,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ - test_skb_cgroup_id_kern.o + test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ + test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -47,10 +49,15 @@ TEST_PROGS := test_kmod.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ - test_skb_cgroup_id.sh + test_skb_cgroup_id.sh \ + test_flow_dissector.sh \ + test_xdp_vlan.sh + +TEST_PROGS_EXTENDED := with_addr.sh # Compile but not part of 'make run_tests' -TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user +TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ + flow_dissector_load test_flow_dissector include ../lib.mk @@ -70,6 +77,7 @@ $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c +$(OUTPUT)/test_netcnt: cgroup_helpers.c .PHONY: force @@ -110,6 +118,9 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline +$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h +$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h + BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c new file mode 100644 index 000000000000..107350a7821d --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <limits.h> +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/icmp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_packet.h> +#include <sys/socket.h> +#include <linux/if_tunnel.h> +#include <linux/mpls.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +#define PROG(F) SEC(#F) int bpf_func_##F + +/* These are the identifiers of the BPF programs that will be used in tail + * calls. Name is limited to 16 characters, with the terminating character and + * bpf_func_ above, we have only 6 to work with, anything after will be cropped. + */ +enum { + IP, + IPV6, + IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ + IPV6FR, /* Fragmentation IPv6 Extension Header */ + MPLS, + VLAN, +}; + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define IP6_MF 0x0001 +#define IP6_OFFSET 0xFFF8 + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct gre_hdr { + __be16 flags; + __be16 proto; +}; + +struct frag_hdr { + __u8 nexthdr; + __u8 reserved; + __be16 frag_off; + __be32 identification; +}; + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 8 +}; + +static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, + __u16 hdr_size, + void *buffer) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + __u16 nhoff = skb->flow_keys->nhoff; + __u8 *hdr; + + /* Verifies this variable offset does not overflow */ + if (nhoff > (USHRT_MAX - hdr_size)) + return NULL; + + hdr = data + nhoff; + if (hdr + hdr_size <= data_end) + return hdr; + + if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size)) + return NULL; + + return buffer; +} + +/* Dispatches on ETHERTYPE */ +static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->n_proto = proto; + switch (proto) { + case bpf_htons(ETH_P_IP): + bpf_tail_call(skb, &jmp_table, IP); + break; + case bpf_htons(ETH_P_IPV6): + bpf_tail_call(skb, &jmp_table, IPV6); + break; + case bpf_htons(ETH_P_MPLS_MC): + case bpf_htons(ETH_P_MPLS_UC): + bpf_tail_call(skb, &jmp_table, MPLS); + break; + case bpf_htons(ETH_P_8021Q): + case bpf_htons(ETH_P_8021AD): + bpf_tail_call(skb, &jmp_table, VLAN); + break; + default: + /* Protocol not supported */ + return BPF_DROP; + } + + return BPF_DROP; +} + +SEC("dissect") +int _dissect(struct __sk_buff *skb) +{ + if (!skb->vlan_present) + return parse_eth_proto(skb, skb->protocol); + else + return parse_eth_proto(skb, skb->vlan_proto); +} + +/* Parses on IPPROTO_* */ +static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + void *data_end = (void *)(long)skb->data_end; + struct icmphdr *icmp, _icmp; + struct gre_hdr *gre, _gre; + struct ethhdr *eth, _eth; + struct tcphdr *tcp, _tcp; + struct udphdr *udp, _udp; + + keys->ip_proto = proto; + switch (proto) { + case IPPROTO_ICMP: + icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); + if (!icmp) + return BPF_DROP; + return BPF_OK; + case IPPROTO_IPIP: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); + case IPPROTO_IPV6: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); + case IPPROTO_GRE: + gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); + if (!gre) + return BPF_DROP; + + if (bpf_htons(gre->flags & GRE_VERSION)) + /* Only inspect standard GRE packets with version 0 */ + return BPF_OK; + + keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */ + if (GRE_IS_CSUM(gre->flags)) + keys->nhoff += 4; /* Step over chksum and Padding */ + if (GRE_IS_KEY(gre->flags)) + keys->nhoff += 4; /* Step over key */ + if (GRE_IS_SEQ(gre->flags)) + keys->nhoff += 4; /* Step over sequence number */ + + keys->is_encap = true; + + if (gre->proto == bpf_htons(ETH_P_TEB)) { + eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), + &_eth); + if (!eth) + return BPF_DROP; + + keys->nhoff += sizeof(*eth); + + return parse_eth_proto(skb, eth->h_proto); + } else { + return parse_eth_proto(skb, gre->proto); + } + case IPPROTO_TCP: + tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); + if (!tcp) + return BPF_DROP; + + if (tcp->doff < 5) + return BPF_DROP; + + if ((__u8 *)tcp + (tcp->doff << 2) > data_end) + return BPF_DROP; + + keys->thoff = keys->nhoff; + keys->sport = tcp->source; + keys->dport = tcp->dest; + return BPF_OK; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); + if (!udp) + return BPF_DROP; + + keys->thoff = keys->nhoff; + keys->sport = udp->source; + keys->dport = udp->dest; + return BPF_OK; + default: + return BPF_DROP; + } + + return BPF_DROP; +} + +static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->ip_proto = nexthdr; + switch (nexthdr) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + bpf_tail_call(skb, &jmp_table, IPV6OP); + break; + case IPPROTO_FRAGMENT: + bpf_tail_call(skb, &jmp_table, IPV6FR); + break; + default: + return parse_ip_proto(skb, nexthdr); + } + + return BPF_DROP; +} + +PROG(IP)(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + struct bpf_flow_keys *keys = skb->flow_keys; + void *data = (void *)(long)skb->data; + struct iphdr *iph, _iph; + bool done = false; + + iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); + if (!iph) + return BPF_DROP; + + /* IP header cannot be smaller than 20 bytes */ + if (iph->ihl < 5) + return BPF_DROP; + + keys->addr_proto = ETH_P_IP; + keys->ipv4_src = iph->saddr; + keys->ipv4_dst = iph->daddr; + + keys->nhoff += iph->ihl << 2; + if (data + keys->nhoff > data_end) + return BPF_DROP; + + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { + keys->is_frag = true; + if (iph->frag_off & bpf_htons(IP_OFFSET)) + /* From second fragment on, packets do not have headers + * we can parse. + */ + done = true; + else + keys->is_first_frag = true; + } + + if (done) + return BPF_OK; + + return parse_ip_proto(skb, iph->protocol); +} + +PROG(IPV6)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct ipv6hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + keys->addr_proto = ETH_P_IPV6; + memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); + + keys->nhoff += sizeof(struct ipv6hdr); + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6OP)(struct __sk_buff *skb) +{ + struct ipv6_opt_hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + /* hlen is in 8-octets and does not include the first 8 bytes + * of the header + */ + skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3; + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6FR)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct frag_hdr *fragh, _fragh; + + fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); + if (!fragh) + return BPF_DROP; + + keys->nhoff += sizeof(*fragh); + keys->is_frag = true; + if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) + keys->is_first_frag = true; + + return parse_ipv6_proto(skb, fragh->nexthdr); +} + +PROG(MPLS)(struct __sk_buff *skb) +{ + struct mpls_label *mpls, _mpls; + + mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); + if (!mpls) + return BPF_DROP; + + return BPF_OK; +} + +PROG(VLAN)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct vlan_hdr *vlan, _vlan; + __be16 proto; + + /* Peek back to see if single or double-tagging */ + if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto, + sizeof(proto))) + return BPF_DROP; + + /* Account for double-tagging */ + if (proto == bpf_htons(ETH_P_8021AD)) { + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + keys->nhoff += sizeof(*vlan); + } + + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + keys->nhoff += sizeof(*vlan); + /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ + if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || + vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index e4be7730222d..686e57ce40f4 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value, (void *) BPF_FUNC_map_update_elem; static int (*bpf_map_delete_elem)(void *map, void *key) = (void *) BPF_FUNC_map_delete_elem; +static int (*bpf_map_push_elem)(void *map, void *value, + unsigned long long flags) = + (void *) BPF_FUNC_map_push_elem; +static int (*bpf_map_pop_elem)(void *map, void *value) = + (void *) BPF_FUNC_map_pop_elem; +static int (*bpf_map_peek_elem)(void *map, void *value) = + (void *) BPF_FUNC_map_peek_elem; static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = (void *) BPF_FUNC_probe_read; static unsigned long long (*bpf_ktime_get_ns)(void) = @@ -104,6 +111,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) = (void *) BPF_FUNC_msg_cork_bytes; static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; +static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = + (void *) BPF_FUNC_msg_push_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = @@ -143,6 +152,22 @@ static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) = (void *) BPF_FUNC_skb_cgroup_id; static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; +static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned int netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_sk_lookup_tcp; +static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned int netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_sk_lookup_udp; +static int (*bpf_sk_release)(struct bpf_sock *sk) = + (void *) BPF_FUNC_sk_release; +static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) = + (void *) BPF_FUNC_skb_vlan_push; +static int (*bpf_skb_vlan_pop)(void *ctx) = + (void *) BPF_FUNC_skb_vlan_pop; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index b4994a94968b..dd49df5e2df4 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -18,3 +18,5 @@ CONFIG_CRYPTO_HMAC=m CONFIG_CRYPTO_SHA256=m CONFIG_VXLAN=y CONFIG_GENEVE=y +CONFIG_NET_CLS_FLOWER=m +CONFIG_LWTUNNEL=y diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c new file mode 100644 index 000000000000..d3273b5b3173 --- /dev/null +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; +const char *cfg_map_name = "jmp_table"; +bool cfg_attach = true; +char *cfg_section_name; +char *cfg_path_name; + +static void load_and_attach_program(void) +{ + struct bpf_program *prog, *main_prog; + struct bpf_map *prog_array; + int i, fd, prog_fd, ret; + struct bpf_object *obj; + int prog_array_fd; + + ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj, + &prog_fd); + if (ret) + error(1, 0, "bpf_prog_load %s", cfg_path_name); + + main_prog = bpf_object__find_program_by_title(obj, cfg_section_name); + if (!main_prog) + error(1, 0, "bpf_object__find_program_by_title %s", + cfg_section_name); + + prog_fd = bpf_program__fd(main_prog); + if (prog_fd < 0) + error(1, 0, "bpf_program__fd"); + + prog_array = bpf_object__find_map_by_name(obj, cfg_map_name); + if (!prog_array) + error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name); + + prog_array_fd = bpf_map__fd(prog_array); + if (prog_array_fd < 0) + error(1, 0, "bpf_map__fd %s", cfg_map_name); + + i = 0; + bpf_object__for_each_program(prog, obj) { + fd = bpf_program__fd(prog); + if (fd < 0) + error(1, 0, "bpf_program__fd"); + + if (fd != prog_fd) { + printf("%d: %s\n", i, bpf_program__title(prog, false)); + bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY); + ++i; + } + } + + ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0); + if (ret) + error(1, 0, "bpf_prog_attach %s", cfg_path_name); + + ret = bpf_object__pin(obj, cfg_pin_path); + if (ret) + error(1, 0, "bpf_object__pin %s", cfg_pin_path); + +} + +static void detach_program(void) +{ + char command[64]; + int ret; + + ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); + if (ret) + error(1, 0, "bpf_prog_detach"); + + /* To unpin, it is necessary and sufficient to just remove this dir */ + sprintf(command, "rm -r %s", cfg_pin_path); + ret = system(command); + if (ret) + error(1, errno, command); +} + +static void parse_opts(int argc, char **argv) +{ + bool attach = false; + bool detach = false; + int c; + + while ((c = getopt(argc, argv, "adp:s:")) != -1) { + switch (c) { + case 'a': + if (detach) + error(1, 0, "attach/detach are exclusive"); + attach = true; + break; + case 'd': + if (attach) + error(1, 0, "attach/detach are exclusive"); + detach = true; + break; + case 'p': + if (cfg_path_name) + error(1, 0, "only one prog name can be given"); + + cfg_path_name = optarg; + break; + case 's': + if (cfg_section_name) + error(1, 0, "only one section can be given"); + + cfg_section_name = optarg; + break; + } + } + + if (detach) + cfg_attach = false; + + if (cfg_attach && !cfg_path_name) + error(1, 0, "must provide a path to the BPF program"); + + if (cfg_attach && !cfg_section_name) + error(1, 0, "must provide a section name"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + if (cfg_attach) + load_and_attach_program(); + else + detach_program(); + return 0; +} diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h new file mode 100644 index 000000000000..81084c1c2c23 --- /dev/null +++ b/tools/testing/selftests/bpf/netcnt_common.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __NETCNT_COMMON_H +#define __NETCNT_COMMON_H + +#include <linux/types.h> + +#define MAX_PERCPU_PACKETS 32 + +struct percpu_net_cnt { + __u64 packets; + __u64 bytes; + + __u64 prev_ts; + + __u64 prev_packets; + __u64 prev_bytes; +}; + +struct net_cnt { + __u64 packets; + __u64 bytes; +}; + +#endif diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/netcnt_prog.c new file mode 100644 index 000000000000..1198abca1360 --- /dev/null +++ b/tools/testing/selftests/bpf/netcnt_prog.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/version.h> + +#include "bpf_helpers.h" +#include "netcnt_common.h" + +#define MAX_BPS (3 * 1024 * 1024) + +#define REFRESH_TIME_NS 100000000 +#define NS_PER_SEC 1000000000 + +struct bpf_map_def SEC("maps") percpu_netcnt = { + .type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct percpu_net_cnt), +}; + +struct bpf_map_def SEC("maps") netcnt = { + .type = BPF_MAP_TYPE_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct net_cnt), +}; + +SEC("cgroup/skb") +int bpf_nextcnt(struct __sk_buff *skb) +{ + struct percpu_net_cnt *percpu_cnt; + char fmt[] = "%d %llu %llu\n"; + struct net_cnt *cnt; + __u64 ts, dt; + int ret; + + cnt = bpf_get_local_storage(&netcnt, 0); + percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0); + + percpu_cnt->packets++; + percpu_cnt->bytes += skb->len; + + if (percpu_cnt->packets > MAX_PERCPU_PACKETS) { + __sync_fetch_and_add(&cnt->packets, + percpu_cnt->packets); + percpu_cnt->packets = 0; + + __sync_fetch_and_add(&cnt->bytes, + percpu_cnt->bytes); + percpu_cnt->bytes = 0; + } + + ts = bpf_ktime_get_ns(); + dt = ts - percpu_cnt->prev_ts; + + dt *= MAX_BPS; + dt /= NS_PER_SEC; + + if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt) + ret = 1; + else + ret = 0; + + if (dt > REFRESH_TIME_NS) { + percpu_cnt->prev_ts = ts; + percpu_cnt->prev_packets = cnt->packets; + percpu_cnt->prev_bytes = cnt->bytes; + } + + return !!ret; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 6b5cfeb7a9cc..f42b3396d622 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> +#include <linux/kernel.h> #include <bpf/bpf.h> #include <sys/resource.h> #include <libelf.h> @@ -45,7 +46,6 @@ static int count_result(int err) return err; } -#define min(a, b) ((a) < (b) ? (a) : (b)) #define __printf(a, b) __attribute__((format(printf, a, b))) __printf(1, 2) @@ -130,6 +130,7 @@ struct btf_raw_test { bool map_create_err; bool ordered_map; bool lossless_map; + bool percpu_map; int hdr_len_delta; int type_off_delta; int str_off_delta; @@ -2157,6 +2158,7 @@ static struct btf_pprint_test_meta { const char *map_name; bool ordered_map; bool lossless_map; + bool percpu_map; } pprint_tests_meta[] = { { .descr = "BTF pretty print array", @@ -2164,6 +2166,7 @@ static struct btf_pprint_test_meta { .map_name = "pprint_test_array", .ordered_map = true, .lossless_map = true, + .percpu_map = false, }, { @@ -2172,6 +2175,7 @@ static struct btf_pprint_test_meta { .map_name = "pprint_test_hash", .ordered_map = false, .lossless_map = true, + .percpu_map = false, }, { @@ -2180,30 +2184,83 @@ static struct btf_pprint_test_meta { .map_name = "pprint_test_lru_hash", .ordered_map = false, .lossless_map = false, + .percpu_map = false, +}, + +{ + .descr = "BTF pretty print percpu array", + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY, + .map_name = "pprint_test_percpu_array", + .ordered_map = true, + .lossless_map = true, + .percpu_map = true, +}, + +{ + .descr = "BTF pretty print percpu hash", + .map_type = BPF_MAP_TYPE_PERCPU_HASH, + .map_name = "pprint_test_percpu_hash", + .ordered_map = false, + .lossless_map = true, + .percpu_map = true, +}, + +{ + .descr = "BTF pretty print lru percpu hash", + .map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH, + .map_name = "pprint_test_lru_percpu_hash", + .ordered_map = false, + .lossless_map = false, + .percpu_map = true, }, }; -static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i) +static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i, + int num_cpus, int rounded_value_size) { - v->ui32 = i; - v->si32 = -i; - v->unused_bits2a = 3; - v->bits28 = i; - v->unused_bits2b = 3; - v->ui64 = i; - v->aenum = i & 0x03; + int cpu; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->ui32 = i + cpu; + v->si32 = -i; + v->unused_bits2a = 3; + v->bits28 = i; + v->unused_bits2b = 3; + v->ui64 = i; + v->aenum = i & 0x03; + v = (void *)v + rounded_value_size; + } } +static int check_line(const char *expected_line, int nexpected_line, + int expected_line_len, const char *line) +{ + if (CHECK(nexpected_line == expected_line_len, + "expected_line is too long")) + return -1; + + if (strcmp(expected_line, line)) { + fprintf(stderr, "unexpected pprint output\n"); + fprintf(stderr, "expected: %s", expected_line); + fprintf(stderr, " read: %s", line); + return -1; + } + + return 0; +} + + static int do_test_pprint(void) { const struct btf_raw_test *test = &pprint_test_template; struct bpf_create_map_attr create_attr = {}; + bool ordered_map, lossless_map, percpu_map; + int err, ret, num_cpus, rounded_value_size; + struct pprint_mapv *mapv = NULL; unsigned int key, nr_read_elems; - bool ordered_map, lossless_map; int map_fd = -1, btf_fd = -1; - struct pprint_mapv mapv = {}; unsigned int raw_btf_size; char expected_line[255]; FILE *pin_file = NULL; @@ -2212,7 +2269,6 @@ static int do_test_pprint(void) char *line = NULL; uint8_t *raw_btf; ssize_t nread; - int err, ret; fprintf(stderr, "%s......", test->descr); raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, @@ -2261,9 +2317,18 @@ static int do_test_pprint(void) if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno)) goto done; + percpu_map = test->percpu_map; + num_cpus = percpu_map ? bpf_num_possible_cpus() : 1; + rounded_value_size = round_up(sizeof(struct pprint_mapv), 8); + mapv = calloc(num_cpus, rounded_value_size); + if (CHECK(!mapv, "mapv allocation failure")) { + err = -1; + goto done; + } + for (key = 0; key < test->max_entries; key++) { - set_pprint_mapv(&mapv, key); - bpf_map_update_elem(map_fd, &key, &mapv, 0); + set_pprint_mapv(mapv, key, num_cpus, rounded_value_size); + bpf_map_update_elem(map_fd, &key, mapv, 0); } pin_file = fopen(pin_path, "r"); @@ -2286,33 +2351,74 @@ static int do_test_pprint(void) ordered_map = test->ordered_map; lossless_map = test->lossless_map; do { + struct pprint_mapv *cmapv; ssize_t nexpected_line; unsigned int next_key; + int cpu; next_key = ordered_map ? nr_read_elems : atoi(line); - set_pprint_mapv(&mapv, next_key); - nexpected_line = snprintf(expected_line, sizeof(expected_line), - "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n", - next_key, - mapv.ui32, mapv.si32, - mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b, - mapv.ui64, - mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3], - mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7], - pprint_enum_str[mapv.aenum]); - - if (CHECK(nexpected_line == sizeof(expected_line), - "expected_line is too long")) { - err = -1; - goto done; + set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size); + cmapv = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + if (percpu_map) { + /* for percpu map, the format looks like: + * <key>: { + * cpu0: <value_on_cpu0> + * cpu1: <value_on_cpu1> + * ... + * cpun: <value_on_cpun> + * } + * + * let us verify the line containing the key here. + */ + if (cpu == 0) { + nexpected_line = snprintf(expected_line, + sizeof(expected_line), + "%u: {\n", + next_key); + + err = check_line(expected_line, nexpected_line, + sizeof(expected_line), line); + if (err == -1) + goto done; + } + + /* read value@cpu */ + nread = getline(&line, &line_len, pin_file); + if (nread < 0) + break; + } + + nexpected_line = snprintf(expected_line, sizeof(expected_line), + "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," + "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + cmapv->ui32, cmapv->si32, + cmapv->unused_bits2a, + cmapv->bits28, + cmapv->unused_bits2b, + cmapv->ui64, + cmapv->ui8a[0], cmapv->ui8a[1], + cmapv->ui8a[2], cmapv->ui8a[3], + cmapv->ui8a[4], cmapv->ui8a[5], + cmapv->ui8a[6], cmapv->ui8a[7], + pprint_enum_str[cmapv->aenum]); + + err = check_line(expected_line, nexpected_line, + sizeof(expected_line), line); + if (err == -1) + goto done; + + cmapv = (void *)cmapv + rounded_value_size; } - if (strcmp(expected_line, line)) { - err = -1; - fprintf(stderr, "unexpected pprint output\n"); - fprintf(stderr, "expected: %s", expected_line); - fprintf(stderr, " read: %s", line); - goto done; + if (percpu_map) { + /* skip the last bracket for the percpu map */ + nread = getline(&line, &line_len, pin_file); + if (nread < 0) + break; } nread = getline(&line, &line_len, pin_file); @@ -2334,6 +2440,8 @@ static int do_test_pprint(void) err = 0; done: + if (mapv) + free(mapv); if (!err) fprintf(stderr, "OK"); if (*btf_log_buf && (err || args.always_log)) @@ -2361,6 +2469,7 @@ static int test_pprint(void) pprint_test_template.map_name = pprint_tests_meta[i].map_name; pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map; pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map; + pprint_test_template.percpu_map = pprint_tests_meta[i].percpu_map; err |= count_result(do_test_pprint()); } diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 4e196e3bfecf..f44834155f25 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -4,6 +4,7 @@ #include <linux/filter.h> #include <stdio.h> #include <stdlib.h> +#include <sys/sysinfo.h> #include "bpf_rlimit.h" #include "cgroup_helpers.h" @@ -15,6 +16,14 @@ char bpf_log_buf[BPF_LOG_BUF_SIZE]; int main(int argc, char **argv) { struct bpf_insn prog[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), /* percpu map fd */ + BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */ BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, @@ -28,9 +37,18 @@ int main(int argc, char **argv) }; size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); int error = EXIT_FAILURE; - int map_fd, prog_fd, cgroup_fd; + int map_fd, percpu_map_fd, prog_fd, cgroup_fd; struct bpf_cgroup_storage_key key; unsigned long long value; + unsigned long long *percpu_value; + int cpu, nproc; + + nproc = get_nprocs_conf(); + percpu_value = malloc(sizeof(*percpu_value) * nproc); + if (!percpu_value) { + printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); + goto err; + } map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key), sizeof(value), 0, 0); @@ -39,7 +57,15 @@ int main(int argc, char **argv) goto out; } - prog[0].imm = map_fd; + percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + sizeof(key), sizeof(value), 0, 0); + if (percpu_map_fd < 0) { + printf("Failed to create map: %s\n", strerror(errno)); + goto out; + } + + prog[0].imm = percpu_map_fd; + prog[7].imm = map_fd; prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); @@ -77,7 +103,15 @@ int main(int argc, char **argv) } if (bpf_map_lookup_elem(map_fd, &key, &value)) { - printf("Failed to lookup cgroup storage\n"); + printf("Failed to lookup cgroup storage 0\n"); + goto err; + } + + for (cpu = 0; cpu < nproc; cpu++) + percpu_value[cpu] = 1000; + + if (bpf_map_update_elem(percpu_map_fd, &key, percpu_value, 0)) { + printf("Failed to update the data in the cgroup storage\n"); goto err; } @@ -120,11 +154,31 @@ int main(int argc, char **argv) goto err; } + /* Check the final value of the counter in the percpu local storage */ + + for (cpu = 0; cpu < nproc; cpu++) + percpu_value[cpu] = 0; + + if (bpf_map_lookup_elem(percpu_map_fd, &key, percpu_value)) { + printf("Failed to lookup the per-cpu cgroup storage\n"); + goto err; + } + + value = 0; + for (cpu = 0; cpu < nproc; cpu++) + value += percpu_value[cpu]; + + if (value != nproc * 1000 + 6) { + printf("Unexpected data in the per-cpu cgroup storage\n"); + goto err; + } + error = 0; printf("test_cgroup_storage:PASS\n"); err: cleanup_cgroup_environment(); + free(percpu_value); out: return error; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c new file mode 100644 index 000000000000..12b784afba31 --- /dev/null +++ b/tools/testing/selftests/bpf/test_flow_dissector.c @@ -0,0 +1,782 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Inject packets with all sorts of encapsulation into the kernel. + * + * IPv4/IPv6 outer layer 3 + * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/.. + * IPv4/IPv6 inner layer 3 + */ + +#define _GNU_SOURCE + +#include <stddef.h> +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <error.h> +#include <errno.h> +#include <linux/if_packet.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <netinet/ip.h> +#include <netinet/in.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#define CFG_PORT_INNER 8000 + +/* Add some protocol definitions that do not exist in userspace */ + +struct grehdr { + uint16_t unused; + uint16_t protocol; +} __attribute__((packed)); + +struct guehdr { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hlen:5, + control:1, + version:2; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u8 version:2, + control:1, + hlen:5; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __u8 proto_ctype; + __be16 flags; + }; + __be32 word; + }; +}; + +static uint8_t cfg_dsfield_inner; +static uint8_t cfg_dsfield_outer; +static uint8_t cfg_encap_proto; +static bool cfg_expect_failure = false; +static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */ +static int cfg_l3_inner = AF_UNSPEC; +static int cfg_l3_outer = AF_UNSPEC; +static int cfg_num_pkt = 10; +static int cfg_num_secs = 0; +static char cfg_payload_char = 'a'; +static int cfg_payload_len = 100; +static int cfg_port_gue = 6080; +static bool cfg_only_rx; +static bool cfg_only_tx; +static int cfg_src_port = 9; + +static char buf[ETH_DATA_LEN]; + +#define INIT_ADDR4(name, addr4, port) \ + static struct sockaddr_in name = { \ + .sin_family = AF_INET, \ + .sin_port = __constant_htons(port), \ + .sin_addr.s_addr = __constant_htonl(addr4), \ + }; + +#define INIT_ADDR6(name, addr6, port) \ + static struct sockaddr_in6 name = { \ + .sin6_family = AF_INET6, \ + .sin6_port = __constant_htons(port), \ + .sin6_addr = addr6, \ + }; + +INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER) +INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0) +INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0) +INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0) +INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0) +INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0) + +INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER) +INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0) + +static unsigned long util_gettime(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void util_printaddr(const char *msg, struct sockaddr *addr) +{ + unsigned long off = 0; + char nbuf[INET6_ADDRSTRLEN]; + + switch (addr->sa_family) { + case PF_INET: + off = __builtin_offsetof(struct sockaddr_in, sin_addr); + break; + case PF_INET6: + off = __builtin_offsetof(struct sockaddr_in6, sin6_addr); + break; + default: + error(1, 0, "printaddr: unsupported family %u\n", + addr->sa_family); + } + + if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf, + sizeof(nbuf))) + error(1, errno, "inet_ntop"); + + fprintf(stderr, "%s: %s\n", msg, nbuf); +} + +static unsigned long add_csum_hword(const uint16_t *start, int num_u16) +{ + unsigned long sum = 0; + int i; + + for (i = 0; i < num_u16; i++) + sum += start[i]; + + return sum; +} + +static uint16_t build_ip_csum(const uint16_t *start, int num_u16, + unsigned long sum) +{ + sum += add_csum_hword(start, num_u16); + + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + + return ~sum; +} + +static void build_ipv4_header(void *header, uint8_t proto, + uint32_t src, uint32_t dst, + int payload_len, uint8_t tos) +{ + struct iphdr *iph = header; + + iph->ihl = 5; + iph->version = 4; + iph->tos = tos; + iph->ttl = 8; + iph->tot_len = htons(sizeof(*iph) + payload_len); + iph->id = htons(1337); + iph->protocol = proto; + iph->saddr = src; + iph->daddr = dst; + iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0); +} + +static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) +{ + uint16_t val, *ptr = (uint16_t *)ip6h; + + val = ntohs(*ptr); + val &= 0xF00F; + val |= ((uint16_t) dsfield) << 4; + *ptr = htons(val); +} + +static void build_ipv6_header(void *header, uint8_t proto, + struct sockaddr_in6 *src, + struct sockaddr_in6 *dst, + int payload_len, uint8_t dsfield) +{ + struct ipv6hdr *ip6h = header; + + ip6h->version = 6; + ip6h->payload_len = htons(payload_len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 8; + ipv6_set_dsfield(ip6h, dsfield); + + memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr)); + memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr)); +} + +static uint16_t build_udp_v4_csum(const struct iphdr *iph, + const struct udphdr *udph, + int num_words) +{ + unsigned long pseudo_sum; + int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */ + + pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16); + pseudo_sum += htons(IPPROTO_UDP); + pseudo_sum += udph->len; + return build_ip_csum((void *) udph, num_words, pseudo_sum); +} + +static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h, + const struct udphdr *udph, + int num_words) +{ + unsigned long pseudo_sum; + int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */ + + pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16); + pseudo_sum += htons(ip6h->nexthdr); + pseudo_sum += ip6h->payload_len; + return build_ip_csum((void *) udph, num_words, pseudo_sum); +} + +static void build_udp_header(void *header, int payload_len, + uint16_t dport, int family) +{ + struct udphdr *udph = header; + int len = sizeof(*udph) + payload_len; + + udph->source = htons(cfg_src_port); + udph->dest = htons(dport); + udph->len = htons(len); + udph->check = 0; + if (family == AF_INET) + udph->check = build_udp_v4_csum(header - sizeof(struct iphdr), + udph, len >> 1); + else + udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr), + udph, len >> 1); +} + +static void build_gue_header(void *header, uint8_t proto) +{ + struct guehdr *gueh = header; + + gueh->proto_ctype = proto; +} + +static void build_gre_header(void *header, uint16_t proto) +{ + struct grehdr *greh = header; + + greh->protocol = htons(proto); +} + +static int l3_length(int family) +{ + if (family == AF_INET) + return sizeof(struct iphdr); + else + return sizeof(struct ipv6hdr); +} + +static int build_packet(void) +{ + int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0; + int el3_len = 0; + + if (cfg_l3_extra) + el3_len = l3_length(cfg_l3_extra); + + /* calculate header offsets */ + if (cfg_encap_proto) { + ol3_len = l3_length(cfg_l3_outer); + + if (cfg_encap_proto == IPPROTO_GRE) + ol4_len = sizeof(struct grehdr); + else if (cfg_encap_proto == IPPROTO_UDP) + ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr); + } + + il3_len = l3_length(cfg_l3_inner); + il4_len = sizeof(struct udphdr); + + if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >= + sizeof(buf)) + error(1, 0, "packet too large\n"); + + /* + * Fill packet from inside out, to calculate correct checksums. + * But create ip before udp headers, as udp uses ip for pseudo-sum. + */ + memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len, + cfg_payload_char, cfg_payload_len); + + /* add zero byte for udp csum padding */ + buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0; + + switch (cfg_l3_inner) { + case PF_INET: + build_ipv4_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, + in_saddr4.sin_addr.s_addr, + in_daddr4.sin_addr.s_addr, + il4_len + cfg_payload_len, + cfg_dsfield_inner); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, + &in_saddr6, &in_daddr6, + il4_len + cfg_payload_len, + cfg_dsfield_inner); + break; + } + + build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len, + cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner); + + if (!cfg_encap_proto) + return il3_len + il4_len + cfg_payload_len; + + switch (cfg_l3_outer) { + case PF_INET: + build_ipv4_header(buf + el3_len, cfg_encap_proto, + out_saddr4.sin_addr.s_addr, + out_daddr4.sin_addr.s_addr, + ol4_len + il3_len + il4_len + cfg_payload_len, + cfg_dsfield_outer); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len, cfg_encap_proto, + &out_saddr6, &out_daddr6, + ol4_len + il3_len + il4_len + cfg_payload_len, + cfg_dsfield_outer); + break; + } + + switch (cfg_encap_proto) { + case IPPROTO_UDP: + build_gue_header(buf + el3_len + ol3_len + ol4_len - + sizeof(struct guehdr), + cfg_l3_inner == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6); + build_udp_header(buf + el3_len + ol3_len, + sizeof(struct guehdr) + il3_len + il4_len + + cfg_payload_len, + cfg_port_gue, cfg_l3_outer); + break; + case IPPROTO_GRE: + build_gre_header(buf + el3_len + ol3_len, + cfg_l3_inner == PF_INET ? ETH_P_IP + : ETH_P_IPV6); + break; + } + + switch (cfg_l3_extra) { + case PF_INET: + build_ipv4_header(buf, + cfg_l3_outer == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6, + extra_saddr4.sin_addr.s_addr, + extra_daddr4.sin_addr.s_addr, + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len, 0); + break; + case PF_INET6: + build_ipv6_header(buf, + cfg_l3_outer == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6, + &extra_saddr6, &extra_daddr6, + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len, 0); + break; + } + + return el3_len + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len; +} + +/* sender transmits encapsulated over RAW or unencap'd over UDP */ +static int setup_tx(void) +{ + int family, fd, ret; + + if (cfg_l3_extra) + family = cfg_l3_extra; + else if (cfg_l3_outer) + family = cfg_l3_outer; + else + family = cfg_l3_inner; + + fd = socket(family, SOCK_RAW, IPPROTO_RAW); + if (fd == -1) + error(1, errno, "socket tx"); + + if (cfg_l3_extra) { + if (cfg_l3_extra == PF_INET) + ret = connect(fd, (void *) &extra_daddr4, + sizeof(extra_daddr4)); + else + ret = connect(fd, (void *) &extra_daddr6, + sizeof(extra_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } else if (cfg_l3_outer) { + /* connect to destination if not encapsulated */ + if (cfg_l3_outer == PF_INET) + ret = connect(fd, (void *) &out_daddr4, + sizeof(out_daddr4)); + else + ret = connect(fd, (void *) &out_daddr6, + sizeof(out_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } else { + /* otherwise using loopback */ + if (cfg_l3_inner == PF_INET) + ret = connect(fd, (void *) &in_daddr4, + sizeof(in_daddr4)); + else + ret = connect(fd, (void *) &in_daddr6, + sizeof(in_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } + + return fd; +} + +/* receiver reads unencapsulated UDP */ +static int setup_rx(void) +{ + int fd, ret; + + fd = socket(cfg_l3_inner, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket rx"); + + if (cfg_l3_inner == PF_INET) + ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4)); + else + ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6)); + if (ret) + error(1, errno, "bind rx"); + + return fd; +} + +static int do_tx(int fd, const char *pkt, int len) +{ + int ret; + + ret = write(fd, pkt, len); + if (ret == -1) + error(1, errno, "send"); + if (ret != len) + error(1, errno, "send: len (%d < %d)\n", ret, len); + + return 1; +} + +static int do_poll(int fd, short events, int timeout) +{ + struct pollfd pfd; + int ret; + + pfd.fd = fd; + pfd.events = events; + + ret = poll(&pfd, 1, timeout); + if (ret == -1) + error(1, errno, "poll"); + if (ret && !(pfd.revents & POLLIN)) + error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents); + + return ret; +} + +static int do_rx(int fd) +{ + char rbuf; + int ret, num = 0; + + while (1) { + ret = recv(fd, &rbuf, 1, MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "recv"); + if (rbuf != cfg_payload_char) + error(1, 0, "recv: payload mismatch"); + num++; + }; + + return num; +} + +static int do_main(void) +{ + unsigned long tstop, treport, tcur; + int fdt = -1, fdr = -1, len, tx = 0, rx = 0; + + if (!cfg_only_tx) + fdr = setup_rx(); + if (!cfg_only_rx) + fdt = setup_tx(); + + len = build_packet(); + + tcur = util_gettime(); + treport = tcur + 1000; + tstop = tcur + (cfg_num_secs * 1000); + + while (1) { + if (!cfg_only_rx) + tx += do_tx(fdt, buf, len); + + if (!cfg_only_tx) + rx += do_rx(fdr); + + if (cfg_num_secs) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + if (tcur >= treport) { + fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); + tx = 0; + rx = 0; + treport = tcur + 1000; + } + } else { + if (tx == cfg_num_pkt) + break; + } + } + + /* read straggler packets, if any */ + if (rx < tx) { + tstop = util_gettime() + 100; + while (rx < tx) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + + do_poll(fdr, POLLIN, tstop - tcur); + rx += do_rx(fdr); + } + } + + fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); + + if (fdr != -1 && close(fdr)) + error(1, errno, "close rx"); + if (fdt != -1 && close(fdt)) + error(1, errno, "close tx"); + + /* + * success (== 0) only if received all packets + * unless failure is expected, in which case none must arrive. + */ + if (cfg_expect_failure) + return rx != 0; + else + return rx != tx; +} + + +static void __attribute__((noreturn)) usage(const char *filepath) +{ + fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] " + "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] " + "[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] " + "[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n", + filepath); + exit(1); +} + +static void parse_addr(int family, void *addr, const char *optarg) +{ + int ret; + + ret = inet_pton(family, optarg, addr); + if (ret == -1) + error(1, errno, "inet_pton"); + if (ret == 0) + error(1, 0, "inet_pton: bad string"); +} + +static void parse_addr4(struct sockaddr_in *addr, const char *optarg) +{ + parse_addr(AF_INET, &addr->sin_addr, optarg); +} + +static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg) +{ + parse_addr(AF_INET6, &addr->sin6_addr, optarg); +} + +static int parse_protocol_family(const char *filepath, const char *optarg) +{ + if (!strcmp(optarg, "4")) + return PF_INET; + if (!strcmp(optarg, "6")) + return PF_INET6; + + usage(filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) { + switch (c) { + case 'd': + if (cfg_l3_outer == AF_UNSPEC) + error(1, 0, "-d must be preceded by -o"); + if (cfg_l3_outer == AF_INET) + parse_addr4(&out_daddr4, optarg); + else + parse_addr6(&out_daddr6, optarg); + break; + case 'D': + if (cfg_l3_inner == AF_UNSPEC) + error(1, 0, "-D must be preceded by -i"); + if (cfg_l3_inner == AF_INET) + parse_addr4(&in_daddr4, optarg); + else + parse_addr6(&in_daddr6, optarg); + break; + case 'e': + if (!strcmp(optarg, "gre")) + cfg_encap_proto = IPPROTO_GRE; + else if (!strcmp(optarg, "gue")) + cfg_encap_proto = IPPROTO_UDP; + else if (!strcmp(optarg, "bare")) + cfg_encap_proto = IPPROTO_IPIP; + else if (!strcmp(optarg, "none")) + cfg_encap_proto = IPPROTO_IP; /* == 0 */ + else + usage(argv[0]); + break; + case 'f': + cfg_src_port = strtol(optarg, NULL, 0); + break; + case 'F': + cfg_expect_failure = true; + break; + case 'h': + usage(argv[0]); + break; + case 'i': + if (!strcmp(optarg, "4")) + cfg_l3_inner = PF_INET; + else if (!strcmp(optarg, "6")) + cfg_l3_inner = PF_INET6; + else + usage(argv[0]); + break; + case 'l': + cfg_payload_len = strtol(optarg, NULL, 0); + break; + case 'n': + cfg_num_pkt = strtol(optarg, NULL, 0); + break; + case 'o': + cfg_l3_outer = parse_protocol_family(argv[0], optarg); + break; + case 'O': + cfg_l3_extra = parse_protocol_family(argv[0], optarg); + break; + case 'R': + cfg_only_rx = true; + break; + case 's': + if (cfg_l3_outer == AF_INET) + parse_addr4(&out_saddr4, optarg); + else + parse_addr6(&out_saddr6, optarg); + break; + case 'S': + if (cfg_l3_inner == AF_INET) + parse_addr4(&in_saddr4, optarg); + else + parse_addr6(&in_saddr6, optarg); + break; + case 't': + cfg_num_secs = strtol(optarg, NULL, 0); + break; + case 'T': + cfg_only_tx = true; + break; + case 'x': + cfg_dsfield_outer = strtol(optarg, NULL, 0); + break; + case 'X': + cfg_dsfield_inner = strtol(optarg, NULL, 0); + break; + } + } + + if (cfg_only_rx && cfg_only_tx) + error(1, 0, "options: cannot combine rx-only and tx-only"); + + if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC) + error(1, 0, "options: must specify outer with encap"); + else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC) + error(1, 0, "options: cannot combine no-encap and outer"); + else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC) + error(1, 0, "options: cannot combine no-encap and extra"); + + if (cfg_l3_inner == AF_UNSPEC) + cfg_l3_inner = AF_INET6; + if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP) + cfg_encap_proto = IPPROTO_IPV6; + + /* RFC 6040 4.2: + * on decap, if outer encountered congestion (CE == 0x3), + * but inner cannot encode ECN (NoECT == 0x0), then drop packet. + */ + if (((cfg_dsfield_outer & 0x3) == 0x3) && + ((cfg_dsfield_inner & 0x3) == 0x0)) + cfg_expect_failure = true; +} + +static void print_opts(void) +{ + if (cfg_l3_inner == PF_INET6) { + util_printaddr("inner.dest6", (void *) &in_daddr6); + util_printaddr("inner.source6", (void *) &in_saddr6); + } else { + util_printaddr("inner.dest4", (void *) &in_daddr4); + util_printaddr("inner.source4", (void *) &in_saddr4); + } + + if (!cfg_l3_outer) + return; + + fprintf(stderr, "encap proto: %u\n", cfg_encap_proto); + + if (cfg_l3_outer == PF_INET6) { + util_printaddr("outer.dest6", (void *) &out_daddr6); + util_printaddr("outer.source6", (void *) &out_saddr6); + } else { + util_printaddr("outer.dest4", (void *) &out_daddr4); + util_printaddr("outer.source4", (void *) &out_saddr4); + } + + if (!cfg_l3_extra) + return; + + if (cfg_l3_outer == PF_INET6) { + util_printaddr("extra.dest6", (void *) &extra_daddr6); + util_printaddr("extra.source6", (void *) &extra_saddr6); + } else { + util_printaddr("extra.dest4", (void *) &extra_daddr4); + util_printaddr("extra.source4", (void *) &extra_saddr4); + } + +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + print_opts(); + return do_main(); +} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh new file mode 100755 index 000000000000..c0fb073b5eab --- /dev/null +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Load BPF flow dissector and verify it correctly dissects traffic +export TESTNAME=test_flow_dissector +unmount=0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ $UID != 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + +# This test needs to be run in a network namespace with in_netns.sh. Check if +# this is the case and run it with in_netns.sh if it is being run in the root +# namespace. +if [[ -z $(ip netns identify $$) ]]; then + ../net/in_netns.sh "$0" "$@" + exit $? +fi + +# Determine selftest success via shell exit code +exit_handler() +{ + if (( $? == 0 )); then + echo "selftests: $TESTNAME [PASS]"; + else + echo "selftests: $TESTNAME [FAILED]"; + fi + + set +e + + # Cleanup + tc filter del dev lo ingress pref 1337 2> /dev/null + tc qdisc del dev lo ingress 2> /dev/null + ./flow_dissector_load -d 2> /dev/null + if [ $unmount -ne 0 ]; then + umount bpffs 2> /dev/null + fi +} + +# Exit script immediately (well catched by trap handler) if any +# program/thing exits with a non-zero status. +set -e + +# (Use 'trap -l' to list meaning of numbers) +trap exit_handler 0 2 3 6 9 + +# Mount BPF file system +if /bin/mount | grep /sys/fs/bpf > /dev/null; then + echo "bpffs already mounted" +else + echo "bpffs not mounted. Mounting..." + unmount=1 + /bin/mount bpffs /sys/fs/bpf -t bpf +fi + +# Attach BPF program +./flow_dissector_load -p bpf_flow.o -s dissect + +# Setup +tc qdisc add dev lo ingress + +echo "Testing IPv4..." +# Drops all IP/UDP packets coming from port 9 +tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ + udp src_port 9 action drop + +# Send 10 IPv4/UDP packets from port 8. Filter should not drop any. +./test_flow_dissector -i 4 -f 8 +# Send 10 IPv4/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 4 -f 9 -F +# Send 10 IPv4/UDP packets from port 10. Filter should not drop any. +./test_flow_dissector -i 4 -f 10 + +echo "Testing IPIP..." +# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 8 +# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 9 -F +# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 10 + +echo "Testing IPv4 + GRE..." +# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 8 +# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 9 -F +# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 10 + +tc filter del dev lo ingress pref 1337 + +echo "Testing IPv6..." +# Drops all IPv6/UDP packets coming from port 9 +tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \ + udp src_port 9 action drop + +# Send 10 IPv6/UDP packets from port 8. Filter should not drop any. +./test_flow_dissector -i 6 -f 8 +# Send 10 IPv6/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 6 -f 9 -F +# Send 10 IPv6/UDP packets from port 10. Filter should not drop any. +./test_flow_dissector -i 6 -f 10 + +exit 0 diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh index d97dc914cd49..156d89f1edcc 100755 --- a/tools/testing/selftests/bpf/test_libbpf.sh +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -6,7 +6,7 @@ export TESTNAME=test_libbpf # Determine selftest success via shell exit code exit_handler() { - if (( $? == 0 )); then + if [ $? -eq 0 ]; then echo "selftests: $TESTNAME [PASS]"; else echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2 diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 9b552c0fc47d..4db2116e52be 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -15,6 +15,7 @@ #include <string.h> #include <assert.h> #include <stdlib.h> +#include <time.h> #include <sys/wait.h> #include <sys/socket.h> @@ -471,6 +472,122 @@ static void test_devmap(int task, void *data) close(fd); } +static void test_queuemap(int task, void *data) +{ + const int MAP_SIZE = 32; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + int fd, i; + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) + vals[i] = rand(); + + /* Invalid key size */ + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE, + map_flags); + assert(fd < 0 && errno == EINVAL); + + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE, + map_flags); + /* Queue map does not support BPF_F_NO_PREALLOC */ + if (map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } + if (fd < 0) { + printf("Failed to create queuemap '%s'!\n", strerror(errno)); + exit(1); + } + + /* Push MAP_SIZE elements */ + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); + + /* Check that element cannot be pushed due to max_entries limit */ + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + errno == E2BIG); + + /* Peek element */ + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]); + + /* Replace half elements */ + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); + + /* Pop all elements */ + for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++) + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && + val == vals[i]); + + /* Check that there are not elements left */ + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + errno == ENOENT); + + /* Check that non supported functions set errno to EINVAL */ + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + + close(fd); +} + +static void test_stackmap(int task, void *data) +{ + const int MAP_SIZE = 32; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + int fd, i; + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) + vals[i] = rand(); + + /* Invalid key size */ + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE, + map_flags); + assert(fd < 0 && errno == EINVAL); + + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE, + map_flags); + /* Stack map does not support BPF_F_NO_PREALLOC */ + if (map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } + if (fd < 0) { + printf("Failed to create stackmap '%s'!\n", strerror(errno)); + exit(1); + } + + /* Push MAP_SIZE elements */ + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); + + /* Check that element cannot be pushed due to max_entries limit */ + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + errno == E2BIG); + + /* Peek element */ + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]); + + /* Replace half elements */ + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); + + /* Pop all elements */ + for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--) + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && + val == vals[i]); + + /* Check that there are not elements left */ + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + errno == ENOENT); + + /* Check that non supported functions set errno to EINVAL */ + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + + close(fd); +} + #include <sys/socket.h> #include <sys/ioctl.h> #include <arpa/inet.h> @@ -1434,10 +1551,15 @@ static void run_all_tests(void) test_map_wronly(); test_reuseport_array(); + + test_queuemap(0, NULL); + test_stackmap(0, NULL); } int main(void) { + srand(time(NULL)); + map_flags = 0; run_all_tests(); diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c new file mode 100644 index 000000000000..7887df693399 --- /dev/null +++ b/tools/testing/selftests/bpf/test_netcnt.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <sys/sysinfo.h> +#include <sys/time.h> + +#include <linux/bpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "cgroup_helpers.h" +#include "bpf_rlimit.h" +#include "netcnt_common.h" + +#define BPF_PROG "./netcnt_prog.o" +#define TEST_CGROUP "/test-network-counters/" + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +int main(int argc, char **argv) +{ + struct percpu_net_cnt *percpu_netcnt; + struct bpf_cgroup_storage_key key; + int map_fd, percpu_map_fd; + int error = EXIT_FAILURE; + struct net_cnt netcnt; + struct bpf_object *obj; + int prog_fd, cgroup_fd; + unsigned long packets; + unsigned long bytes; + int cpu, nproc; + __u32 prog_cnt; + + nproc = get_nprocs_conf(); + percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); + if (!percpu_netcnt) { + printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); + goto err; + } + + if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB, + &obj, &prog_fd)) { + printf("Failed to load bpf program\n"); + goto out; + } + + if (setup_cgroup_environment()) { + printf("Failed to load bpf program\n"); + goto err; + } + + /* Create a cgroup, get fd, and join it */ + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + if (!cgroup_fd) { + printf("Failed to create test cgroup\n"); + goto err; + } + + if (join_cgroup(TEST_CGROUP)) { + printf("Failed to join cgroup\n"); + goto err; + } + + /* Attach bpf program */ + if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { + printf("Failed to attach bpf program"); + goto err; + } + + assert(system("ping localhost -6 -c 10000 -f -q > /dev/null") == 0); + + if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, + &prog_cnt)) { + printf("Failed to query attached programs"); + goto err; + } + + map_fd = bpf_find_map(__func__, obj, "netcnt"); + if (map_fd < 0) { + printf("Failed to find bpf map with net counters"); + goto err; + } + + percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt"); + if (percpu_map_fd < 0) { + printf("Failed to find bpf map with percpu net counters"); + goto err; + } + + if (bpf_map_get_next_key(map_fd, NULL, &key)) { + printf("Failed to get key in cgroup storage\n"); + goto err; + } + + if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) { + printf("Failed to lookup cgroup storage\n"); + goto err; + } + + if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) { + printf("Failed to lookup percpu cgroup storage\n"); + goto err; + } + + /* Some packets can be still in per-cpu cache, but not more than + * MAX_PERCPU_PACKETS. + */ + packets = netcnt.packets; + bytes = netcnt.bytes; + for (cpu = 0; cpu < nproc; cpu++) { + if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) { + printf("Unexpected percpu value: %llu\n", + percpu_netcnt[cpu].packets); + goto err; + } + + packets += percpu_netcnt[cpu].packets; + bytes += percpu_netcnt[cpu].bytes; + } + + /* No packets should be lost */ + if (packets != 10000) { + printf("Unexpected packet count: %lu\n", packets); + goto err; + } + + /* Let's check that bytes counter matches the number of packets + * multiplied by the size of ipv6 ICMP packet. + */ + if (bytes != packets * 104) { + printf("Unexpected bytes count: %lu\n", bytes); + goto err; + } + + error = 0; + printf("test_netcnt:PASS\n"); + +err: + cleanup_cgroup_environment(); + free(percpu_netcnt); + +out: + return error; +} diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 0ef68204c84b..2d3c04f45530 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -112,13 +112,13 @@ static void test_pkt_access(void) err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); - CHECK(err || errno || retval, "ipv4", + CHECK(err || retval, "ipv4", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6), NULL, NULL, &retval, &duration); - CHECK(err || errno || retval, "ipv6", + CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); bpf_object__close(obj); @@ -153,14 +153,14 @@ static void test_xdp(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 74 || + CHECK(err || retval != XDP_TX || size != 74 || iph->protocol != IPPROTO_IPIP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 114 || + CHECK(err || retval != XDP_TX || size != 114 || iph6->nexthdr != IPPROTO_IPV6, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); @@ -185,13 +185,13 @@ static void test_xdp_adjust_tail(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_DROP, + CHECK(err || retval != XDP_DROP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 54, + CHECK(err || retval != XDP_TX || size != 54, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); bpf_object__close(obj); @@ -254,14 +254,14 @@ static void test_l4lb(const char *file) err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || + CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || + CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); @@ -343,14 +343,14 @@ static void test_xdp_noinline(void) err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 1 || size != 54 || + CHECK(err || retval != 1 || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 1 || size != 74 || + CHECK(err || retval != 1 || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); @@ -1698,8 +1698,142 @@ static void test_task_fd_query_tp(void) "sys_enter_read"); } +static void test_reference_tracking() +{ + const char *file = "./test_sk_lookup_kern.o"; + struct bpf_object *obj; + struct bpf_program *prog; + __u32 duration; + int err = 0; + + obj = bpf_object__open(file); + if (IS_ERR(obj)) { + error_cnt++; + return; + } + + bpf_object__for_each_program(prog, obj) { + const char *title; + + /* Ignore .text sections */ + title = bpf_program__title(prog, false); + if (strstr(title, ".text") != NULL) + continue; + + bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS); + + /* Expect verifier failure if test name has 'fail' */ + if (strstr(title, "fail") != NULL) { + libbpf_set_print(NULL, NULL, NULL); + err = !bpf_program__load(prog, "GPL", 0); + libbpf_set_print(printf, printf, NULL); + } else { + err = bpf_program__load(prog, "GPL", 0); + } + CHECK(err, title, "\n"); + } + bpf_object__close(obj); +} + +enum { + QUEUE, + STACK, +}; + +static void test_queue_stack_map(int type) +{ + const int MAP_SIZE = 32; + __u32 vals[MAP_SIZE], duration, retval, size, val; + int i, err, prog_fd, map_in_fd, map_out_fd; + char file[32], buf[128]; + struct bpf_object *obj; + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE; i++) + vals[i] = rand(); + + if (type == QUEUE) + strncpy(file, "./test_queue_map.o", sizeof(file)); + else if (type == STACK) + strncpy(file, "./test_stack_map.o", sizeof(file)); + else + return; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + map_in_fd = bpf_find_map(__func__, obj, "map_in"); + if (map_in_fd < 0) + goto out; + + map_out_fd = bpf_find_map(__func__, obj, "map_out"); + if (map_out_fd < 0) + goto out; + + /* Push 32 elements to the input map */ + for (i = 0; i < MAP_SIZE; i++) { + err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0); + if (err) { + error_cnt++; + goto out; + } + } + + /* The eBPF program pushes iph.saddr in the output map, + * pops the input map and saves this value in iph.daddr + */ + for (i = 0; i < MAP_SIZE; i++) { + if (type == QUEUE) { + val = vals[i]; + pkt_v4.iph.saddr = vals[i] * 5; + } else if (type == STACK) { + val = vals[MAP_SIZE - 1 - i]; + pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5; + } + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + if (err || retval || size != sizeof(pkt_v4) || + iph->daddr != val) + break; + } + + CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val, + "bpf_map_pop_elem", + "err %d errno %d retval %d size %d iph->daddr %u\n", + err, errno, retval, size, iph->daddr); + + /* Queue is empty, program should return TC_ACT_SHOT */ + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4), + "check-queue-stack-map-empty", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + /* Check that the program pushed elements correctly */ + for (i = 0; i < MAP_SIZE; i++) { + err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val); + if (err || val != vals[i] * 5) + break; + } + + CHECK(i != MAP_SIZE && (err || val != vals[i] * 5), + "bpf_map_push_elem", "err %d value %u\n", err, val); + +out: + pkt_v4.iph.saddr = 0; + bpf_object__close(obj); +} + int main(void) { + srand(time(NULL)); + jit_enabled = is_jit_enabled(); test_pkt_access(); @@ -1719,6 +1853,9 @@ int main(void) test_get_stack_raw_tp(); test_task_fd_query_rawtp(); test_task_fd_query_tp(); + test_reference_tracking(); + test_queue_stack_map(QUEUE); + test_queue_stack_map(STACK); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/test_queue_map.c new file mode 100644 index 000000000000..87db1f9da33d --- /dev/null +++ b/tools/testing/selftests/bpf/test_queue_map.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Politecnico di Torino +#define MAP_TYPE BPF_MAP_TYPE_QUEUE +#include "test_queue_stack_map.h" diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h new file mode 100644 index 000000000000..295b9b3bc5c7 --- /dev/null +++ b/tools/testing/selftests/bpf/test_queue_stack_map.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018 Politecnico di Torino +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/pkt_cls.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = { + .type = MAP_TYPE, + .key_size = 0, + .value_size = sizeof(__u32), + .max_entries = 32, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = { + .type = MAP_TYPE, + .key_size = 0, + .value_size = sizeof(__u32), + .max_entries = 32, + .map_flags = 0, +}; + +SEC("test") +int _test(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + __u32 value; + int err; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + struct iphdr *iph = (struct iphdr *)(eth + 1); + + if (iph + 1 > data_end) + return TC_ACT_SHOT; + + err = bpf_map_pop_elem(&map_in, &value); + if (err) + return TC_ACT_SHOT; + + iph->daddr = value; + + err = bpf_map_push_elem(&map_out, &iph->saddr, 0); + if (err) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c new file mode 100644 index 000000000000..7c4f41572b1c --- /dev/null +++ b/tools/testing/selftests/bpf/test_section_names.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <err.h> +#include <bpf/libbpf.h> + +#include "bpf_util.h" + +struct sec_name_test { + const char sec_name[32]; + struct { + int rc; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + } expected_load; + struct { + int rc; + enum bpf_attach_type attach_type; + } expected_attach; +}; + +static struct sec_name_test tests[] = { + {"InvAliD", {-EINVAL, 0, 0}, {-EINVAL, 0} }, + {"cgroup", {-EINVAL, 0, 0}, {-EINVAL, 0} }, + {"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} }, + {"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, + {"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, + {"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} }, + {"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} }, + {"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} }, + { + "raw_tracepoint/", + {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, + {-EINVAL, 0}, + }, + {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} }, + {"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} }, + {"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} }, + {"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} }, + {"lwt_xmit", {0, BPF_PROG_TYPE_LWT_XMIT, 0}, {-EINVAL, 0} }, + {"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} }, + { + "cgroup_skb/ingress", + {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, + {0, BPF_CGROUP_INET_INGRESS}, + }, + { + "cgroup_skb/egress", + {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, + {0, BPF_CGROUP_INET_EGRESS}, + }, + {"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} }, + { + "cgroup/sock", + {0, BPF_PROG_TYPE_CGROUP_SOCK, 0}, + {0, BPF_CGROUP_INET_SOCK_CREATE}, + }, + { + "cgroup/post_bind4", + {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND}, + {0, BPF_CGROUP_INET4_POST_BIND}, + }, + { + "cgroup/post_bind6", + {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND}, + {0, BPF_CGROUP_INET6_POST_BIND}, + }, + { + "cgroup/dev", + {0, BPF_PROG_TYPE_CGROUP_DEVICE, 0}, + {0, BPF_CGROUP_DEVICE}, + }, + {"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} }, + { + "sk_skb/stream_parser", + {0, BPF_PROG_TYPE_SK_SKB, 0}, + {0, BPF_SK_SKB_STREAM_PARSER}, + }, + { + "sk_skb/stream_verdict", + {0, BPF_PROG_TYPE_SK_SKB, 0}, + {0, BPF_SK_SKB_STREAM_VERDICT}, + }, + {"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} }, + {"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} }, + {"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} }, + { + "flow_dissector", + {0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0}, + {0, BPF_FLOW_DISSECTOR}, + }, + { + "cgroup/bind4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND}, + {0, BPF_CGROUP_INET4_BIND}, + }, + { + "cgroup/bind6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND}, + {0, BPF_CGROUP_INET6_BIND}, + }, + { + "cgroup/connect4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT}, + {0, BPF_CGROUP_INET4_CONNECT}, + }, + { + "cgroup/connect6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT}, + {0, BPF_CGROUP_INET6_CONNECT}, + }, + { + "cgroup/sendmsg4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG}, + {0, BPF_CGROUP_UDP4_SENDMSG}, + }, + { + "cgroup/sendmsg6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG}, + {0, BPF_CGROUP_UDP6_SENDMSG}, + }, +}; + +static int test_prog_type_by_name(const struct sec_name_test *test) +{ + enum bpf_attach_type expected_attach_type; + enum bpf_prog_type prog_type; + int rc; + + rc = libbpf_prog_type_by_name(test->sec_name, &prog_type, + &expected_attach_type); + + if (rc != test->expected_load.rc) { + warnx("prog: unexpected rc=%d for %s", rc, test->sec_name); + return -1; + } + + if (rc) + return 0; + + if (prog_type != test->expected_load.prog_type) { + warnx("prog: unexpected prog_type=%d for %s", prog_type, + test->sec_name); + return -1; + } + + if (expected_attach_type != test->expected_load.expected_attach_type) { + warnx("prog: unexpected expected_attach_type=%d for %s", + expected_attach_type, test->sec_name); + return -1; + } + + return 0; +} + +static int test_attach_type_by_name(const struct sec_name_test *test) +{ + enum bpf_attach_type attach_type; + int rc; + + rc = libbpf_attach_type_by_name(test->sec_name, &attach_type); + + if (rc != test->expected_attach.rc) { + warnx("attach: unexpected rc=%d for %s", rc, test->sec_name); + return -1; + } + + if (rc) + return 0; + + if (attach_type != test->expected_attach.attach_type) { + warnx("attach: unexpected attach_type=%d for %s", attach_type, + test->sec_name); + return -1; + } + + return 0; +} + +static int run_test_case(const struct sec_name_test *test) +{ + if (test_prog_type_by_name(test)) + return -1; + if (test_attach_type_by_name(test)) + return -1; + return 0; +} + +static int run_tests(void) +{ + int passes = 0; + int fails = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + if (run_test_case(&tests[i])) + ++fails; + else + ++passes; + } + printf("Summary: %d PASSED, %d FAILED\n", passes, fails); + return fails ? -1 : 0; +} + +int main(int argc, char **argv) +{ + return run_tests(); +} diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c new file mode 100644 index 000000000000..b745bdc08c2b --- /dev/null +++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io + +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pkt_cls.h> +#include <linux/tcp.h> +#include <sys/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; + +/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ +static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, + void *data_end, __u16 eth_proto, + bool *ipv4) +{ + struct bpf_sock_tuple *result; + __u8 proto = 0; + __u64 ihl_len; + + if (eth_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(data + nh_off); + + if (iph + 1 > data_end) + return NULL; + ihl_len = iph->ihl * 4; + proto = iph->protocol; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&iph->saddr; + } else if (eth_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off); + + if (ip6h + 1 > data_end) + return NULL; + ihl_len = sizeof(*ip6h); + proto = ip6h->nexthdr; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&ip6h->saddr; + } + + if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP) + return NULL; + + return result; +} + +SEC("sk_lookup_success") +int bpf_sk_lookup_test0(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + struct bpf_sock_tuple *tuple; + struct bpf_sock *sk; + size_t tuple_len; + bool ipv4; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4); + if (!tuple || tuple + sizeof *tuple > data_end) + return TC_ACT_SHOT; + + tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0); + if (sk) + bpf_sk_release(sk); + return sk ? TC_ACT_OK : TC_ACT_UNSPEC; +} + +SEC("sk_lookup_success_simple") +int bpf_sk_lookup_test1(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + if (sk) + bpf_sk_release(sk); + return 0; +} + +SEC("fail_use_after_free") +int bpf_sk_lookup_uaf(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family = 0; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + if (sk) { + bpf_sk_release(sk); + family = sk->family; + } + return family; +} + +SEC("fail_modify_sk_pointer") +int bpf_sk_lookup_modptr(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + if (sk) { + sk += 1; + bpf_sk_release(sk); + } + return 0; +} + +SEC("fail_modify_sk_or_null_pointer") +int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk += 1; + if (sk) + bpf_sk_release(sk); + return 0; +} + +SEC("fail_no_release") +int bpf_sk_lookup_test2(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + return 0; +} + +SEC("fail_release_twice") +int bpf_sk_lookup_test3(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_release(sk); + bpf_sk_release(sk); + return 0; +} + +SEC("fail_release_unchecked") +int bpf_sk_lookup_test4(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_release(sk); + return 0; +} + +void lookup_no_release(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); +} + +SEC("fail_no_release_subcall") +int bpf_sk_lookup_test5(struct __sk_buff *skb) +{ + lookup_no_release(skb); + return 0; +} diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index 68e108e4687a..b6c2c605d8c0 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -158,11 +158,7 @@ static int run_test(int cgfd) bpf_object__for_each_program(prog, pobj) { prog_name = bpf_program__title(prog, /*needs_copy*/ false); - if (strcmp(prog_name, "cgroup/connect6") == 0) { - attach_type = BPF_CGROUP_INET6_CONNECT; - } else if (strcmp(prog_name, "sockops") == 0) { - attach_type = BPF_CGROUP_SOCK_OPS; - } else { + if (libbpf_attach_type_by_name(prog_name, &attach_type)) { log_err("Unexpected prog: %s", prog_name); goto err; } diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 0c7d9e556b47..622ade0a0957 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -28,6 +28,7 @@ #include <linux/sock_diag.h> #include <linux/bpf.h> #include <linux/if_link.h> +#include <linux/tls.h> #include <assert.h> #include <libgen.h> @@ -43,6 +44,13 @@ int running; static void running_handler(int a); +#ifndef TCP_ULP +# define TCP_ULP 31 +#endif +#ifndef SOL_TLS +# define SOL_TLS 282 +#endif + /* randomly selected ports for testing on lo */ #define S1_PORT 10000 #define S2_PORT 10001 @@ -69,8 +77,12 @@ int txmsg_apply; int txmsg_cork; int txmsg_start; int txmsg_end; +int txmsg_start_push; +int txmsg_end_push; int txmsg_ingress; int txmsg_skb; +int ktls; +int peek_flag; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -90,8 +102,12 @@ static const struct option long_options[] = { {"txmsg_cork", required_argument, NULL, 'k'}, {"txmsg_start", required_argument, NULL, 's'}, {"txmsg_end", required_argument, NULL, 'e'}, + {"txmsg_start_push", required_argument, NULL, 'p'}, + {"txmsg_end_push", required_argument, NULL, 'q'}, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, {"txmsg_skb", no_argument, &txmsg_skb, 1 }, + {"ktls", no_argument, &ktls, 1 }, + {"peek", no_argument, &peek_flag, 1 }, {0, 0, NULL, 0 } }; @@ -112,6 +128,71 @@ static void usage(char *argv[]) printf("\n"); } +char *sock_to_string(int s) +{ + if (s == c1) + return "client1"; + else if (s == c2) + return "client2"; + else if (s == s1) + return "server1"; + else if (s == s2) + return "server2"; + else if (s == p1) + return "peer1"; + else if (s == p2) + return "peer2"; + else + return "unknown"; +} + +static int sockmap_init_ktls(int verbose, int s) +{ + struct tls12_crypto_info_aes_gcm_128 tls_tx = { + .info = { + .version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + }, + }; + struct tls12_crypto_info_aes_gcm_128 tls_rx = { + .info = { + .version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + }, + }; + int so_buf = 6553500; + int err; + + err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls")); + if (err) { + fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx)); + if (err) { + fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx)); + if (err) { + fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf)); + if (err) { + fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf)); + if (err) { + fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + + if (verbose) + fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s)); + return 0; +} static int sockmap_init_sockets(int verbose) { int i, err, one = 1; @@ -277,33 +358,40 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, return 0; } -static int msg_loop(int fd, int iov_count, int iov_length, int cnt, - struct msg_stats *s, bool tx, - struct sockmap_options *opt) +static void msg_free_iov(struct msghdr *msg) { - struct msghdr msg = {0}; - int err, i, flags = MSG_NOSIGNAL; + int i; + + for (i = 0; i < msg->msg_iovlen; i++) + free(msg->msg_iov[i].iov_base); + free(msg->msg_iov); + msg->msg_iov = NULL; + msg->msg_iovlen = 0; +} + +static int msg_alloc_iov(struct msghdr *msg, + int iov_count, int iov_length, + bool data, bool xmit) +{ + unsigned char k = 0; struct iovec *iov; - unsigned char k; - bool data_test = opt->data_test; - bool drop = opt->drop_expected; + int i; iov = calloc(iov_count, sizeof(struct iovec)); if (!iov) return errno; - k = 0; for (i = 0; i < iov_count; i++) { unsigned char *d = calloc(iov_length, sizeof(char)); if (!d) { fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count); - goto out_errno; + goto unwind_iov; } iov[i].iov_base = d; iov[i].iov_len = iov_length; - if (data_test && tx) { + if (data && xmit) { int j; for (j = 0; j < iov_length; j++) @@ -311,9 +399,60 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } } - msg.msg_iov = iov; - msg.msg_iovlen = iov_count; - k = 0; + msg->msg_iov = iov; + msg->msg_iovlen = iov_count; + + return 0; +unwind_iov: + for (i--; i >= 0 ; i--) + free(msg->msg_iov[i].iov_base); + return -ENOMEM; +} + +static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) +{ + int i, j, bytes_cnt = 0; + unsigned char k = 0; + + for (i = 0; i < msg->msg_iovlen; i++) { + unsigned char *d = msg->msg_iov[i].iov_base; + + for (j = 0; + j < msg->msg_iov[i].iov_len && size; j++) { + if (d[j] != k++) { + fprintf(stderr, + "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", + i, j, d[j], k - 1, d[j+1], k); + return -EIO; + } + bytes_cnt++; + if (bytes_cnt == chunk_sz) { + k = 0; + bytes_cnt = 0; + } + size--; + } + } + return 0; +} + +static int msg_loop(int fd, int iov_count, int iov_length, int cnt, + struct msg_stats *s, bool tx, + struct sockmap_options *opt) +{ + struct msghdr msg = {0}, msg_peek = {0}; + int err, i, flags = MSG_NOSIGNAL; + bool drop = opt->drop_expected; + bool data = opt->data_test; + + err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx); + if (err) + goto out_errno; + if (peek_flag) { + err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx); + if (err) + goto out_errno; + } if (tx) { clock_gettime(CLOCK_MONOTONIC, &s->start); @@ -333,19 +472,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } clock_gettime(CLOCK_MONOTONIC, &s->end); } else { - int slct, recv, max_fd = fd; + int slct, recvp = 0, recv, max_fd = fd; int fd_flags = O_NONBLOCK; struct timeval timeout; float total_bytes; - int bytes_cnt = 0; - int chunk_sz; fd_set w; - if (opt->sendpage) - chunk_sz = iov_length * cnt; - else - chunk_sz = iov_length * iov_count; - fcntl(fd, fd_flags); total_bytes = (float)iov_count * (float)iov_length * (float)cnt; err = clock_gettime(CLOCK_MONOTONIC, &s->start); @@ -377,6 +509,19 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, goto out_errno; } + errno = 0; + if (peek_flag) { + flags |= MSG_PEEK; + recvp = recvmsg(fd, &msg_peek, flags); + if (recvp < 0) { + if (errno != EWOULDBLOCK) { + clock_gettime(CLOCK_MONOTONIC, &s->end); + goto out_errno; + } + } + flags = 0; + } + recv = recvmsg(fd, &msg, flags); if (recv < 0) { if (errno != EWOULDBLOCK) { @@ -388,27 +533,23 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, s->bytes_recvd += recv; - if (data_test) { - int j; - - for (i = 0; i < msg.msg_iovlen; i++) { - unsigned char *d = iov[i].iov_base; - - for (j = 0; - j < iov[i].iov_len && recv; j++) { - if (d[j] != k++) { - errno = -EIO; - fprintf(stderr, - "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", - i, j, d[j], k - 1, d[j+1], k); - goto out_errno; - } - bytes_cnt++; - if (bytes_cnt == chunk_sz) { - k = 0; - bytes_cnt = 0; - } - recv--; + if (data) { + int chunk_sz = opt->sendpage ? + iov_length * cnt : + iov_length * iov_count; + + errno = msg_verify_data(&msg, recv, chunk_sz); + if (errno) { + perror("data verify msg failed\n"); + goto out_errno; + } + if (recvp) { + errno = msg_verify_data(&msg_peek, + recvp, + chunk_sz); + if (errno) { + perror("data verify msg_peek failed\n"); + goto out_errno; } } } @@ -416,14 +557,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->end); } - for (i = 0; i < iov_count; i++) - free(iov[i].iov_base); - free(iov); - return 0; + msg_free_iov(&msg); + msg_free_iov(&msg_peek); + return err; out_errno: - for (i = 0; i < iov_count; i++) - free(iov[i].iov_base); - free(iov); + msg_free_iov(&msg); + msg_free_iov(&msg_peek); return errno; } @@ -456,6 +595,21 @@ static int sendmsg_test(struct sockmap_options *opt) else rx_fd = p2; + if (ktls) { + /* Redirecting into non-TLS socket which sends into a TLS + * socket is not a valid test. So in this case lets not + * enable kTLS but still run the test. + */ + if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) { + err = sockmap_init_ktls(opt->verbose, rx_fd); + if (err) + return err; + } + err = sockmap_init_ktls(opt->verbose, c1); + if (err) + return err; + } + rxpid = fork(); if (rxpid == 0) { if (opt->drop_expected) @@ -469,17 +623,16 @@ static int sendmsg_test(struct sockmap_options *opt) fprintf(stderr, "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); - shutdown(p2, SHUT_RDWR); - shutdown(p1, SHUT_RDWR); if (s.end.tv_sec - s.start.tv_sec) { sent_Bps = sentBps(s); recvd_Bps = recvdBps(s); } if (opt->verbose) fprintf(stdout, - "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n", + "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n", s.bytes_sent, sent_Bps, sent_Bps/giga, - s.bytes_recvd, recvd_Bps, recvd_Bps/giga); + s.bytes_recvd, recvd_Bps, recvd_Bps/giga, + peek_flag ? "(peek_msg)" : ""); if (err && txmsg_cork) err = 0; exit(err ? 1 : 0); @@ -500,7 +653,6 @@ static int sendmsg_test(struct sockmap_options *opt) fprintf(stderr, "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); - shutdown(c1, SHUT_RDWR); if (s.end.tv_sec - s.start.tv_sec) { sent_Bps = sentBps(s); recvd_Bps = recvdBps(s); @@ -755,6 +907,30 @@ run: } } + if (txmsg_start_push) { + i = 2; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_push, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_start_push): %d (%s)\n", + err, strerror(errno)); + goto out; + } + } + + if (txmsg_end_push) { + i = 3; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_end_push, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_end_push): %d (%s)\n", + txmsg_end_push, i, err, strerror(errno)); + goto out; + } + } + if (txmsg_ingress) { int in = BPF_F_INGRESS; @@ -910,6 +1086,10 @@ static void test_options(char *options) strncat(options, "ingress,", OPTSTRING); if (txmsg_skb) strncat(options, "skb,", OPTSTRING); + if (ktls) + strncat(options, "ktls,", OPTSTRING); + if (peek_flag) + strncat(options, "peek,", OPTSTRING); } static int __test_exec(int cgrp, int test, struct sockmap_options *opt) @@ -1083,6 +1263,8 @@ static int test_mixed(int cgrp) txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; txmsg_apply = txmsg_cork = 0; txmsg_start = txmsg_end = 0; + txmsg_start_push = txmsg_end_push = 0; + /* Test small and large iov_count values with pass/redir/apply/cork */ txmsg_pass = 1; txmsg_redir = 0; @@ -1199,6 +1381,8 @@ static int test_start_end(int cgrp) /* Test basic start/end with lots of iov_count and iov_lengths */ txmsg_start = 1; txmsg_end = 2; + txmsg_start_push = 1; + txmsg_end_push = 2; err = test_txmsg(cgrp); if (err) goto out; @@ -1212,6 +1396,8 @@ static int test_start_end(int cgrp) for (i = 99; i <= 1600; i += 500) { txmsg_start = 0; txmsg_end = i; + txmsg_start_push = 0; + txmsg_end_push = i; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1221,6 +1407,8 @@ static int test_start_end(int cgrp) for (i = 199; i <= 1600; i += 500) { txmsg_start = 100; txmsg_end = i; + txmsg_start_push = 100; + txmsg_end_push = i; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1229,6 +1417,8 @@ static int test_start_end(int cgrp) /* Test start/end with cork pulling last sg entry */ txmsg_start = 1500; txmsg_end = 1600; + txmsg_start_push = 1500; + txmsg_end_push = 1600; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1236,6 +1426,8 @@ static int test_start_end(int cgrp) /* Test start/end pull of single byte in last page */ txmsg_start = 1111; txmsg_end = 1112; + txmsg_start_push = 1111; + txmsg_end_push = 1112; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1243,6 +1435,8 @@ static int test_start_end(int cgrp) /* Test start/end with end < start */ txmsg_start = 1111; txmsg_end = 0; + txmsg_start_push = 1111; + txmsg_end_push = 0; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1250,6 +1444,8 @@ static int test_start_end(int cgrp) /* Test start/end with end > data */ txmsg_start = 0; txmsg_end = 1601; + txmsg_start_push = 0; + txmsg_end_push = 1601; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1257,6 +1453,8 @@ static int test_start_end(int cgrp) /* Test start/end with start > data */ txmsg_start = 1601; txmsg_end = 1600; + txmsg_start_push = 1601; + txmsg_end_push = 1600; err = test_exec(cgrp, &opt); out: @@ -1272,7 +1470,7 @@ char *map_names[] = { "sock_map_redir", "sock_apply_bytes", "sock_cork_bytes", - "sock_pull_bytes", + "sock_bytes", "sock_redir_flags", "sock_skb_opts", }; @@ -1348,9 +1546,9 @@ static int populate_progs(char *bpf_file) return 0; } -static int __test_suite(char *bpf_file) +static int __test_suite(int cg_fd, char *bpf_file) { - int cg_fd, err; + int err, cleanup = cg_fd; err = populate_progs(bpf_file); if (err < 0) { @@ -1358,26 +1556,28 @@ static int __test_suite(char *bpf_file) return err; } - if (setup_cgroup_environment()) { - fprintf(stderr, "ERROR: cgroup env failed\n"); - return -EINVAL; - } - - cg_fd = create_and_get_cgroup(CG_PATH); if (cg_fd < 0) { - fprintf(stderr, - "ERROR: (%i) open cg path failed: %s\n", - cg_fd, optarg); - return cg_fd; - } + if (setup_cgroup_environment()) { + fprintf(stderr, "ERROR: cgroup env failed\n"); + return -EINVAL; + } - if (join_cgroup(CG_PATH)) { - fprintf(stderr, "ERROR: failed to join cgroup\n"); - return -EINVAL; + cg_fd = create_and_get_cgroup(CG_PATH); + if (cg_fd < 0) { + fprintf(stderr, + "ERROR: (%i) open cg path failed: %s\n", + cg_fd, optarg); + return cg_fd; + } + + if (join_cgroup(CG_PATH)) { + fprintf(stderr, "ERROR: failed to join cgroup\n"); + return -EINVAL; + } } /* Tests basic commands and APIs with range of iov values */ - txmsg_start = txmsg_end = 0; + txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0; err = test_txmsg(cg_fd); if (err) goto out; @@ -1394,20 +1594,24 @@ static int __test_suite(char *bpf_file) out: printf("Summary: %i PASSED %i FAILED\n", passed, failed); - cleanup_cgroup_environment(); - close(cg_fd); + if (cleanup < 0) { + cleanup_cgroup_environment(); + close(cg_fd); + } return err; } -static int test_suite(void) +static int test_suite(int cg_fd) { int err; - err = __test_suite(BPF_SOCKMAP_FILENAME); + err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME); if (err) goto out; - err = __test_suite(BPF_SOCKHASH_FILENAME); + err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME); out: + if (cg_fd > -1) + close(cg_fd); return err; } @@ -1420,9 +1624,9 @@ int main(int argc, char **argv) int test = PING_PONG; if (argc < 2) - return test_suite(); + return test_suite(-1); - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:", + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:", long_options, &longindex)) != -1) { switch (opt) { case 's': @@ -1431,6 +1635,12 @@ int main(int argc, char **argv) case 'e': txmsg_end = atoi(optarg); break; + case 'p': + txmsg_start_push = atoi(optarg); + break; + case 'q': + txmsg_end_push = atoi(optarg); + break; case 'a': txmsg_apply = atoi(optarg); break; @@ -1486,6 +1696,9 @@ int main(int argc, char **argv) } } + if (argc <= 3 && cg_fd) + return test_suite(cg_fd); + if (!cg_fd) { fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n", argv[0]); diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index 8e8e41780bb9..14b8bbac004f 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -70,11 +70,11 @@ struct bpf_map_def SEC("maps") sock_cork_bytes = { .max_entries = 1 }; -struct bpf_map_def SEC("maps") sock_pull_bytes = { +struct bpf_map_def SEC("maps") sock_bytes = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), - .max_entries = 2 + .max_entries = 4 }; struct bpf_map_def SEC("maps") sock_redir_flags = { @@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1; - int *start, *end; + int *bytes, zero = 0, one = 1, two = 2, three = 3; + int *start, *end, *start_push, *end_push; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -190,18 +190,24 @@ int bpf_prog4(struct sk_msg_md *msg) bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); if (bytes) bpf_msg_cork_bytes(msg, *bytes); - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) bpf_msg_pull_data(msg, *start, *end, 0); + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) + bpf_msg_push_data(msg, *start_push, *end_push, 0); return SK_PASS; } SEC("sk_msg2") int bpf_prog5(struct sk_msg_md *msg) { - int err1 = -1, err2 = -1, zero = 0, one = 1; - int *bytes, *start, *end, len1, len2; + int zero = 0, one = 1, two = 2, three = 3; + int *start, *end, *start_push, *end_push; + int *bytes, len1, len2 = 0, len3; + int err1 = -1, err2 = -1; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -210,8 +216,8 @@ int bpf_prog5(struct sk_msg_md *msg) if (bytes) err2 = bpf_msg_cork_bytes(msg, *bytes); len1 = (__u64)msg->data_end - (__u64)msg->data; - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) { int err; @@ -225,6 +231,23 @@ int bpf_prog5(struct sk_msg_md *msg) bpf_printk("sk_msg2: length update %i->%i\n", len1, len2); } + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) { + int err; + + bpf_printk("sk_msg2: push(%i:%i)\n", + start_push ? *start_push : 0, + end_push ? *end_push : 0); + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + bpf_printk("sk_msg2: push_data err %i\n", err); + len3 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length push_update %i->%i\n", + len2 ? len2 : len1, len3); + } + bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", len1, err1, err2); return SK_PASS; @@ -233,8 +256,8 @@ int bpf_prog5(struct sk_msg_md *msg) SEC("sk_msg3") int bpf_prog6(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1, key = 0; - int *start, *end, *f; + int *bytes, *start, *end, *start_push, *end_push, *f; + int zero = 0, one = 1, two = 2, three = 3, key = 0; __u64 flags = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -243,10 +266,17 @@ int bpf_prog6(struct sk_msg_md *msg) bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); if (bytes) bpf_msg_cork_bytes(msg, *bytes); - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) bpf_msg_pull_data(msg, *start, *end, 0); + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) + bpf_msg_push_data(msg, *start_push, *end_push, 0); + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -262,8 +292,9 @@ int bpf_prog6(struct sk_msg_md *msg) SEC("sk_msg4") int bpf_prog7(struct sk_msg_md *msg) { - int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0; - int *f, *bytes, *start, *end, len1, len2; + int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3; + int *bytes, *start, *end, *start_push, *end_push, *f; + int err1 = 0, err2 = 0, key = 0; __u64 flags = 0; int err; @@ -274,10 +305,10 @@ int bpf_prog7(struct sk_msg_md *msg) if (bytes) err2 = bpf_msg_cork_bytes(msg, *bytes); len1 = (__u64)msg->data_end - (__u64)msg->data; - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); - if (start && end) { + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); + if (start && end) { bpf_printk("sk_msg2: pull(%i:%i)\n", start ? *start : 0, end ? *end : 0); err = bpf_msg_pull_data(msg, *start, *end, 0); @@ -288,6 +319,22 @@ int bpf_prog7(struct sk_msg_md *msg) bpf_printk("sk_msg2: length update %i->%i\n", len1, len2); } + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) { + bpf_printk("sk_msg4: push(%i:%i)\n", + start_push ? *start_push : 0, + end_push ? *end_push : 0); + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + bpf_printk("sk_msg4: push_data err %i\n", + err); + len3 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg4: length push_update %i->%i\n", + len2 ? len2 : len1, len3); + } + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -342,8 +389,8 @@ int bpf_prog9(struct sk_msg_md *msg) SEC("sk_msg7") int bpf_prog10(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1; - int *start, *end; + int *bytes, *start, *end, *start_push, *end_push; + int zero = 0, one = 1, two = 2, three = 3; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -351,10 +398,14 @@ int bpf_prog10(struct sk_msg_md *msg) bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); if (bytes) bpf_msg_cork_bytes(msg, *bytes); - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) bpf_msg_pull_data(msg, *start, *end, 0); + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) + bpf_msg_push_data(msg, *start_push, *end_push, 0); return SK_DROP; } diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/test_stack_map.c new file mode 100644 index 000000000000..31c3880e6da0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_stack_map.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Politecnico di Torino +#define MAP_TYPE BPF_MAP_TYPE_STACK +#include "test_queue_stack_map.h" diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c index 4b7fd540cea9..74f73b33a7b0 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -5,6 +5,7 @@ #include <linux/if_ether.h> #include <linux/if_packet.h> #include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/tcp.h> @@ -17,6 +18,13 @@ struct bpf_map_def SEC("maps") global_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct tcpbpf_globals), + .max_entries = 4, +}; + +struct bpf_map_def SEC("maps") sockopt_results = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(int), .max_entries = 2, }; @@ -45,11 +53,14 @@ int _version SEC("version") = 1; SEC("sockops") int bpf_testcb(struct bpf_sock_ops *skops) { - int rv = -1; - int bad_call_rv = 0; + char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)]; + struct tcphdr *thdr; int good_call_rv = 0; - int op; + int bad_call_rv = 0; + int save_syn = 1; + int rv = -1; int v = 0; + int op; op = (int) skops->op; @@ -82,6 +93,21 @@ int bpf_testcb(struct bpf_sock_ops *skops) v = 0xff; rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v, sizeof(v)); + if (skops->family == AF_INET6) { + v = bpf_getsockopt(skops, IPPROTO_TCP, TCP_SAVED_SYN, + header, (sizeof(struct ipv6hdr) + + sizeof(struct tcphdr))); + if (!v) { + int offset = sizeof(struct ipv6hdr); + + thdr = (struct tcphdr *)(header + offset); + v = thdr->syn; + __u32 key = 1; + + bpf_map_update_elem(&sockopt_results, &key, &v, + BPF_ANY); + } + } break; case BPF_SOCK_OPS_RTO_CB: break; @@ -111,6 +137,12 @@ int bpf_testcb(struct bpf_sock_ops *skops) break; case BPF_SOCK_OPS_TCP_LISTEN_CB: bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN, + &save_syn, sizeof(save_syn)); + /* Update global map w/ result of setsock opt */ + __u32 key = 0; + + bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY); break; default: rv = -1; diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index a275c2971376..e6eebda7d112 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -54,6 +54,26 @@ err: return -1; } +int verify_sockopt_result(int sock_map_fd) +{ + __u32 key = 0; + int res; + int rv; + + /* check setsockopt for SAVE_SYN */ + rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); + EXPECT_EQ(0, rv, "d"); + EXPECT_EQ(0, res, "d"); + key = 1; + /* check getsockopt for SAVED_SYN */ + rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); + EXPECT_EQ(0, rv, "d"); + EXPECT_EQ(1, res, "d"); + return 0; +err: + return -1; +} + static int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { @@ -70,11 +90,11 @@ static int bpf_find_map(const char *test, struct bpf_object *obj, int main(int argc, char **argv) { const char *file = "test_tcpbpf_kern.o"; + int prog_fd, map_fd, sock_map_fd; struct tcpbpf_globals g = {0}; const char *cg_path = "/foo"; int error = EXIT_FAILURE; struct bpf_object *obj; - int prog_fd, map_fd; int cg_fd = -1; __u32 key = 0; int rv; @@ -110,6 +130,10 @@ int main(int argc, char **argv) if (map_fd < 0) goto err; + sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results"); + if (sock_map_fd < 0) + goto err; + rv = bpf_map_lookup_elem(map_fd, &key, &g); if (rv != 0) { printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); @@ -121,6 +145,11 @@ int main(int argc, char **argv) goto err; } + if (verify_sockopt_result(sock_map_fd)) { + printf("FAILED: Wrong sockopt stats\n"); + goto err; + } + printf("PASSED!\n"); error = 0; err: diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 67c412d19c09..769d68a48f30 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3,6 +3,7 @@ * * Copyright (c) 2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2017 Facebook + * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -47,7 +48,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 8 +#define MAX_NR_MAPS 13 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -60,14 +61,19 @@ static bool unpriv_disabled = false; struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; - int fixup_map1[MAX_FIXUPS]; - int fixup_map2[MAX_FIXUPS]; - int fixup_map3[MAX_FIXUPS]; - int fixup_map4[MAX_FIXUPS]; + int fixup_map_hash_8b[MAX_FIXUPS]; + int fixup_map_hash_48b[MAX_FIXUPS]; + int fixup_map_hash_16b[MAX_FIXUPS]; + int fixup_map_array_48b[MAX_FIXUPS]; + int fixup_map_sockmap[MAX_FIXUPS]; + int fixup_map_sockhash[MAX_FIXUPS]; + int fixup_map_xskmap[MAX_FIXUPS]; + int fixup_map_stacktrace[MAX_FIXUPS]; int fixup_prog1[MAX_FIXUPS]; int fixup_prog2[MAX_FIXUPS]; int fixup_map_in_map[MAX_FIXUPS]; int fixup_cgroup_storage[MAX_FIXUPS]; + int fixup_percpu_cgroup_storage[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t retval; @@ -177,6 +183,24 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) self->retval = (uint32_t)res; } +/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ +#define BPF_SK_LOOKUP \ + /* struct bpf_sock_tuple tuple = {} */ \ + BPF_MOV64_IMM(BPF_REG_2, 0), \ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -16), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -24), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48), \ + /* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */ \ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), \ + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), \ + BPF_MOV64_IMM(BPF_REG_4, 0), \ + BPF_MOV64_IMM(BPF_REG_5, 0), \ + BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp) + static struct bpf_test tests[] = { { "add+sub+mul", @@ -856,7 +880,7 @@ static struct bpf_test tests[] = { BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map1 = { 2 }, + .fixup_map_hash_8b = { 2 }, .errstr = "invalid indirect read from stack", .result = REJECT, }, @@ -1090,7 +1114,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 invalid mem access 'map_value_or_null'", .result = REJECT, }, @@ -1107,7 +1131,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "misaligned value access", .result = REJECT, .flags = F_LOAD_WITH_STRICT_ALIGNMENT, @@ -1127,7 +1151,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 invalid mem access", .errstr_unpriv = "R0 leaks addr", .result = REJECT, @@ -1217,7 +1241,7 @@ static struct bpf_test tests[] = { BPF_FUNC_map_delete_elem), BPF_EXIT_INSN(), }, - .fixup_map1 = { 24 }, + .fixup_map_hash_8b = { 24 }, .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, @@ -1371,7 +1395,7 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, pkt_type)), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .errstr = "different pointers", .errstr_unpriv = "R1 pointer comparison", .result = REJECT, @@ -1394,7 +1418,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JA, 0, 0, -12), }, - .fixup_map1 = { 6 }, + .fixup_map_hash_8b = { 6 }, .errstr = "different pointers", .errstr_unpriv = "R1 pointer comparison", .result = REJECT, @@ -1418,7 +1442,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JA, 0, 0, -13), }, - .fixup_map1 = { 7 }, + .fixup_map_hash_8b = { 7 }, .errstr = "different pointers", .errstr_unpriv = "R1 pointer comparison", .result = REJECT, @@ -2555,7 +2579,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr_unpriv = "R4 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -2572,7 +2596,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "invalid indirect read from stack off -8+0 size 8", .result = REJECT, }, @@ -2707,6 +2731,137 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "unpriv: spill/fill of different pointers stx - ctx and sock", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb == NULL) *target = sock; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* else *target = skb; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* struct __sk_buff *skb = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* skb->mark = 42; */ + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + /* if (sk) bpf_sk_release(sk) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "type=ctx expected=sock", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx - leak sock", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb == NULL) *target = sock; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* else *target = skb; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* struct __sk_buff *skb = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* skb->mark = 42; */ + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + //.errstr = "same insn cannot be used with different pointers", + .errstr = "Unreleased reference", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx - sock and ctx (read)", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb) *target = skb */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* else *target = sock */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* struct bpf_sock *sk = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct bpf_sock, mark)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx - sock and ctx (write)", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb) *target = skb */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* else *target = sock */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* struct bpf_sock *sk = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* if (sk) sk->mark = 42; bpf_sk_release(sk); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, mark)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + //.errstr = "same insn cannot be used with different pointers", + .errstr = "cannot write into socket", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "unpriv: spill/fill of different pointers ldx", .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), @@ -2743,7 +2898,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -2783,7 +2938,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, @@ -3275,7 +3430,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_ST stores into R1 context is not allowed", + .errstr = "BPF_ST stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3287,7 +3442,7 @@ static struct bpf_test tests[] = { BPF_REG_0, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_XADD stores into R1 context is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3637,7 +3792,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", + .errstr = "R3 pointer arithmetic on pkt_end", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3922,7 +4077,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 5 }, + .fixup_map_hash_8b = { 5 }, .result_unpriv = ACCEPT, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, @@ -3938,7 +4093,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, @@ -3966,7 +4121,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 11 }, + .fixup_map_hash_8b = { 11 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -3988,7 +4143,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 7 }, + .fixup_map_hash_8b = { 7 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, @@ -4010,7 +4165,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 6 }, + .fixup_map_hash_8b = { 6 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, @@ -4033,7 +4188,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 5 }, + .fixup_map_hash_8b = { 5 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -4048,7 +4203,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -4076,7 +4231,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 11 }, + .fixup_map_hash_8b = { 11 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -4098,7 +4253,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 7 }, + .fixup_map_hash_8b = { 7 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -4120,7 +4275,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 6 }, + .fixup_map_hash_8b = { 6 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -4391,6 +4546,85 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "prevent map lookup in sockmap", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_sockmap = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + }, + { + "prevent map lookup in sockhash", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_sockhash = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + }, + { + "prevent map lookup in xskmap", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_xskmap = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "prevent map lookup in stack trace", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_stacktrace = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_PERF_EVENT, + }, + { + "prevent map lookup in prog array", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_prog2 = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem", + }, + { "valid map access into an array with a constant", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -4404,7 +4638,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -4426,7 +4660,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -4450,7 +4684,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -4478,7 +4712,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -4498,7 +4732,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=48 size=8", .result = REJECT, }, @@ -4519,7 +4753,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 min value is outside of the array range", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -4541,7 +4775,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -4566,7 +4800,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .errstr = "R0 unbounded memory access", .result_unpriv = REJECT, @@ -4593,7 +4827,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, @@ -4623,12 +4857,183 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3, 11 }, + .fixup_map_hash_48b = { 3, 11 }, .errstr = "R0 pointer += pointer", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "direct packet read test#1 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, queue_mapping)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, protocol)), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, vlan_present)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "direct packet read test#2 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, vlan_tci)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, vlan_proto)), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, priority)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, priority)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, + ingress_ifindex)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, hash)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "direct packet read test#3 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8, + offsetof(struct __sk_buff, cb[4])), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "direct packet read test#4 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, family)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip4)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, local_ip4)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, remote_port)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, local_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid access of tc_classid for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid access of data_meta for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data_meta)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid access of flow_keys for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, flow_keys)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid write access to napi_id for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9, + offsetof(struct __sk_buff, napi_id)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { "valid cgroup storage access", .insns = { BPF_MOV64_IMM(BPF_REG_2, 0), @@ -4656,7 +5061,7 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .result = REJECT, .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, @@ -4676,7 +5081,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { - "invalid per-cgroup storage access 3", + "invalid cgroup storage access 3", .insns = { BPF_MOV64_IMM(BPF_REG_2, 0), BPF_LD_MAP_FD(BPF_REG_1, 0), @@ -4744,6 +5149,121 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { + "valid per-cpu cgroup storage access", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + .result = REJECT, + .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "fd 1 is not pointing to valid bpf_map", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=256 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=-2 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 7), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 6", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { "multiple registers share map_lookup_elem result", .insns = { BPF_MOV64_IMM(BPF_REG_1, 10), @@ -4758,7 +5278,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4779,8 +5299,8 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, - .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", + .fixup_map_hash_8b = { 4 }, + .errstr = "R4 pointer arithmetic on map_value_or_null", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4800,8 +5320,8 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, - .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", + .fixup_map_hash_8b = { 4 }, + .errstr = "R4 pointer arithmetic on map_value_or_null", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4821,8 +5341,8 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, - .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", + .fixup_map_hash_8b = { 4 }, + .errstr = "R4 pointer arithmetic on map_value_or_null", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4847,7 +5367,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .result = REJECT, .errstr = "R4 !read_ok", .prog_type = BPF_PROG_TYPE_SCHED_CLS @@ -4875,7 +5395,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4896,7 +5416,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 unbounded memory access", .result = REJECT, .errstr_unpriv = "R0 leaks addr", @@ -5146,11 +5666,11 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[0])), BPF_EXIT_INSN(), }, - .fixup_map1 = { 2 }, + .fixup_map_hash_8b = { 2 }, .errstr_unpriv = "R2 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 context is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", }, { "leak pointer into ctx 2", @@ -5165,7 +5685,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R10 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 context is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", }, { "leak pointer into ctx 3", @@ -5176,7 +5696,7 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[0])), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .errstr_unpriv = "R2 leaks addr into ctx", .result_unpriv = REJECT, .result = ACCEPT, @@ -5198,7 +5718,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .errstr_unpriv = "R6 leaks addr into mem", .result_unpriv = REJECT, .result = ACCEPT, @@ -5218,7 +5738,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5237,7 +5757,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5255,7 +5775,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=0 size=0", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5275,7 +5795,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=0 size=56", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5295,7 +5815,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5319,7 +5839,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5340,7 +5860,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5360,7 +5880,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=4 size=0", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5384,7 +5904,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=4 size=52", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5406,7 +5926,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5428,7 +5948,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5453,7 +5973,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5475,7 +5995,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5495,7 +6015,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R1 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5520,7 +6040,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=4 size=52", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5543,7 +6063,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5566,7 +6086,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5592,7 +6112,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5615,7 +6135,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5637,7 +6157,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R1 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5659,7 +6179,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R1 unbounded memory access", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5685,7 +6205,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=4 size=45", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5709,7 +6229,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5732,7 +6252,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = REJECT, .errstr = "R1 unbounded memory access", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5756,7 +6276,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5779,7 +6299,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = REJECT, .errstr = "R1 unbounded memory access", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5804,7 +6324,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5828,7 +6348,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5852,7 +6372,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = REJECT, .errstr = "R1 min value is negative", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5877,7 +6397,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5901,7 +6421,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5925,7 +6445,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = REJECT, .errstr = "R1 min value is negative", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5944,7 +6464,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 8 }, + .fixup_map_hash_16b = { 3, 8 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5964,7 +6484,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_update_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5984,8 +6504,8 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_update_elem), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, - .fixup_map3 = { 10 }, + .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_16b = { 10 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=0 size=16", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6006,7 +6526,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 9 }, + .fixup_map_hash_16b = { 3, 9 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6026,7 +6546,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 9 }, + .fixup_map_hash_16b = { 3, 9 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=12 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6046,7 +6566,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 9 }, + .fixup_map_hash_16b = { 3, 9 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=-4 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6068,7 +6588,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6089,7 +6609,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=12 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6110,7 +6630,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=-4 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6133,7 +6653,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 11 }, + .fixup_map_hash_16b = { 3, 11 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6153,7 +6673,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = REJECT, .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6176,7 +6696,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 11 }, + .fixup_map_hash_16b = { 3, 11 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=9 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6198,7 +6718,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6219,7 +6739,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6236,7 +6756,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R1 !read_ok", .errstr = "R1 !read_ok", .result = REJECT, @@ -6270,7 +6790,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6298,7 +6818,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6317,7 +6837,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 bitwise operator &= on pointer", .result = REJECT, }, @@ -6334,7 +6854,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 32-bit pointer arithmetic prohibited", .result = REJECT, }, @@ -6351,7 +6871,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 pointer arithmetic with /= operator", .result = REJECT, }, @@ -6368,7 +6888,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "invalid mem access 'inv'", .result = REJECT, @@ -6392,7 +6912,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 invalid mem access 'inv'", .result = REJECT, }, @@ -6415,7 +6935,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6661,7 +7181,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6687,7 +7207,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=0 size=49", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6715,7 +7235,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6742,7 +7262,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R1 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6814,7 +7334,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_csum_diff), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -6839,7 +7359,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_csum_diff), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -6862,7 +7382,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_csum_diff), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -6943,7 +7463,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6964,7 +7484,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6984,7 +7504,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -7059,7 +7579,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 max value is outside of the array range", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -7089,7 +7609,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 max value is outside of the array range", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -7137,7 +7657,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, - .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited", + .errstr = "R1 pointer arithmetic on map_ptr prohibited", .result = REJECT, }, { @@ -7442,7 +7962,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7466,7 +7986,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7492,7 +8012,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7517,7 +8037,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7541,7 +8061,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, { @@ -7565,7 +8085,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7611,7 +8131,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, { @@ -7636,7 +8156,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7662,7 +8182,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, { @@ -7687,7 +8207,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7714,7 +8234,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7740,7 +8260,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7769,7 +8289,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, }, @@ -7799,7 +8319,7 @@ static struct bpf_test tests[] = { BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3), BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .errstr = "R0 invalid mem access 'inv'", .result = REJECT, }, @@ -7827,7 +8347,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", .result = REJECT, .result_unpriv = REJECT, @@ -7854,7 +8374,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 max value is outside of the array range", .result = REJECT, }, @@ -7879,7 +8399,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result = REJECT, }, @@ -7905,7 +8425,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT }, { @@ -7930,7 +8450,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "map_value pointer and 4294967295", .result = REJECT }, @@ -7956,7 +8476,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 min value is outside of the array range", .result = REJECT }, @@ -7980,7 +8500,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .errstr = "value_size=8 off=1073741825", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -8005,7 +8525,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .errstr = "value 1073741823", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -8041,7 +8561,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT }, { @@ -8080,7 +8600,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ .errstr = "R0 unbounded memory access", .result = REJECT @@ -8123,7 +8643,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ .errstr = "R0 unbounded memory access", .result = REJECT @@ -8152,7 +8672,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT }, { @@ -8179,7 +8699,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 max value is outside of the array range", .result = REJECT }, @@ -8209,7 +8729,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 unbounded memory access", .result = REJECT }, @@ -8229,7 +8749,7 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "map_value pointer and 2147483646", .result = REJECT }, @@ -8251,7 +8771,7 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "pointer offset 1073741822", .result = REJECT }, @@ -8272,7 +8792,7 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "pointer offset -1073741822", .result = REJECT }, @@ -8294,7 +8814,7 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "map_value pointer and 1000000000000", .result = REJECT }, @@ -8314,7 +8834,7 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .retval = POINTER_VALUE, .result_unpriv = REJECT, @@ -8335,7 +8855,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .retval = POINTER_VALUE, .result_unpriv = REJECT, @@ -8403,7 +8923,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 5 }, + .fixup_map_hash_8b = { 5 }, .errstr = "variable stack read R2", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, @@ -8484,7 +9004,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .errstr = "R0 unbounded memory access", .result_unpriv = REJECT, @@ -8811,7 +9331,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", + .errstr = "R3 pointer arithmetic on pkt_end", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -8830,7 +9350,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", + .errstr = "R3 pointer arithmetic on pkt_end", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -10018,7 +10538,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 16 }, + .fixup_map_hash_8b = { 16 }, .result = REJECT, .errstr = "R0 min value is outside of the array range", }, @@ -10969,7 +11489,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), /* return 0 */ }, .prog_type = BPF_PROG_TYPE_XDP, - .fixup_map1 = { 23 }, + .fixup_map_hash_8b = { 23 }, .result = ACCEPT, }, { @@ -11024,7 +11544,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), /* return 1 */ }, .prog_type = BPF_PROG_TYPE_XDP, - .fixup_map1 = { 23 }, + .fixup_map_hash_8b = { 23 }, .result = ACCEPT, }, { @@ -11079,7 +11599,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), /* return 1 */ }, .prog_type = BPF_PROG_TYPE_XDP, - .fixup_map1 = { 23 }, + .fixup_map_hash_8b = { 23 }, .result = REJECT, .errstr = "invalid read from stack off -16+0 size 8", }, @@ -11151,7 +11671,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", }, @@ -11223,7 +11743,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = ACCEPT, }, { @@ -11294,7 +11814,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -8), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", }, @@ -11366,7 +11886,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = ACCEPT, }, { @@ -11437,7 +11957,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "R0 invalid mem access 'inv'", }, @@ -11782,7 +12302,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 13 }, + .fixup_map_hash_8b = { 13 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -11809,7 +12329,7 @@ static struct bpf_test tests[] = { BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map2 = { 6 }, + .fixup_map_hash_48b = { 6 }, .errstr = "invalid indirect read from stack off -8+0 size 8", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, @@ -11841,8 +12361,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map2 = { 13 }, - .fixup_map4 = { 16 }, + .fixup_map_hash_48b = { 13 }, + .fixup_map_array_48b = { 16 }, .result = ACCEPT, .retval = 1, }, @@ -11874,7 +12394,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_in_map = { 16 }, - .fixup_map4 = { 13 }, + .fixup_map_array_48b = { 13 }, .result = REJECT, .errstr = "R0 invalid mem access 'map_ptr'", }, @@ -11942,7 +12462,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R6 invalid mem access 'inv'", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -11966,7 +12486,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "invalid read from stack off -16+0 size 8", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -12088,7 +12608,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = REJECT, .errstr = "misaligned value access off", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -12114,7 +12634,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_XADD stores into R2 packet", + .errstr = "BPF_XADD stores into R2 pkt is not allowed", .prog_type = BPF_PROG_TYPE_XDP, }, { @@ -12198,7 +12718,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_get_stack), BPF_EXIT_INSN(), }, - .fixup_map2 = { 4 }, + .fixup_map_hash_48b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -12442,6 +12962,214 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "reference tracking: leak potential reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: leak potential reference on stack", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: leak potential reference on stack 2", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: zero potential reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: copy and zero potential references", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: release reference without check", + .insns = { + BPF_SK_LOOKUP, + /* reference in r0 may be NULL */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=sock_or_null expected=sock", + .result = REJECT, + }, + { + "reference tracking: release reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: release reference 2", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: release reference twice", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, + }, + { + "reference tracking: release reference twice inside branch", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */ + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, + }, + { + "reference tracking: alloc, check, free in one subbranch", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), + /* if (offsetof(skb, mark) > data_len) exit; */ + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, + offsetof(struct __sk_buff, mark)), + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */ + /* Leak reference in R0 */ + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: alloc, check, free in both subbranches", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), + /* if (offsetof(skb, mark) > data_len) exit; */ + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, + offsetof(struct __sk_buff, mark)), + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking in call: free reference in subprog", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { "pass modified ctx pointer to helper, 1", .insns = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), @@ -12511,6 +13239,407 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, + { + "reference tracking in call: free reference in subprog and outside", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, + }, + { + "reference tracking in call: alloc & leak reference in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_4), + BPF_SK_LOOKUP, + /* spill unchecked sk_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking in call: alloc in subprog, release outside", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), /* return sk */ + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, + .result = ACCEPT, + }, + { + "reference tracking in call: sk_ptr leak into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + /* spill unchecked sk_ptr into stack of caller */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking in call: sk_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + /* spill unchecked sk_ptr into stack of caller */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* now the sk_ptr is verified, free the reference */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: allow LD_ABS", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: forbid LD_ABS while holding reference", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", + .result = REJECT, + }, + { + "reference tracking: allow LD_IND", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "reference tracking: forbid LD_IND while holding reference", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", + .result = REJECT, + }, + { + "reference tracking: check reference or tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_SK_LOOKUP, + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 17 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: release reference then tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_SK_LOOKUP, + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 18 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: leak possible reference over tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + /* Look up socket and store in REG_6 */ + BPF_SK_LOOKUP, + /* bpf_tail_call() */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 16 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "tail_call would lead to reference leak", + .result = REJECT, + }, + { + "reference tracking: leak checked reference over tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + /* Look up socket and store in REG_6 */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if (!sk) goto end */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 17 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "tail_call would lead to reference leak", + .result = REJECT, + }, + { + "reference tracking: mangle and release sock_or_null", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R1 pointer arithmetic on sock_or_null prohibited", + .result = REJECT, + }, + { + "reference tracking: mangle and release sock", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R1 pointer arithmetic on sock prohibited", + .result = REJECT, + }, + { + "reference tracking: access member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: write to member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LD_IMM64(BPF_REG_2, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2, + offsetof(struct bpf_sock, mark)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "cannot write into socket", + .result = REJECT, + }, + { + "reference tracking: invalid 64-bit access of member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_sock access off=0 size=8", + .result = REJECT, + }, + { + "reference tracking: access after release", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "reference tracking: direct access for lookup", + .insns = { + /* Check that the packet is at least 64B long */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9), + /* sk = sk_lookup_tcp(ctx, skb->data, ...) */ + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -12536,18 +13665,18 @@ static int create_map(uint32_t type, uint32_t size_key, return fd; } -static int create_prog_dummy1(void) +static int create_prog_dummy1(enum bpf_map_type prog_type) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_0, 42), BPF_EXIT_INSN(), }; - return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + return bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_dummy2(int mfd, int idx) +static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_3, idx), @@ -12558,11 +13687,12 @@ static int create_prog_dummy2(int mfd, int idx) BPF_EXIT_INSN(), }; - return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + return bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_array(uint32_t max_elem, int p1key) +static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem, + int p1key) { int p2key = 1; int mfd, p1fd, p2fd; @@ -12574,8 +13704,8 @@ static int create_prog_array(uint32_t max_elem, int p1key) return -1; } - p1fd = create_prog_dummy1(); - p2fd = create_prog_dummy2(mfd, p2key); + p1fd = create_prog_dummy1(prog_type); + p2fd = create_prog_dummy2(prog_type, mfd, p2key); if (p1fd < 0 || p2fd < 0) goto out; if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0) @@ -12615,32 +13745,39 @@ static int create_map_in_map(void) return outer_map_fd; } -static int create_cgroup_storage(void) +static int create_cgroup_storage(bool percpu) { + enum bpf_map_type type = percpu ? BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE : + BPF_MAP_TYPE_CGROUP_STORAGE; int fd; - fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, - sizeof(struct bpf_cgroup_storage_key), + fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key), TEST_DATA_LEN, 0, 0); if (fd < 0) - printf("Failed to create array '%s'!\n", strerror(errno)); + printf("Failed to create cgroup storage '%s'!\n", + strerror(errno)); return fd; } static char bpf_vlog[UINT_MAX >> 8]; -static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, - int *map_fds) +static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, + struct bpf_insn *prog, int *map_fds) { - int *fixup_map1 = test->fixup_map1; - int *fixup_map2 = test->fixup_map2; - int *fixup_map3 = test->fixup_map3; - int *fixup_map4 = test->fixup_map4; + int *fixup_map_hash_8b = test->fixup_map_hash_8b; + int *fixup_map_hash_48b = test->fixup_map_hash_48b; + int *fixup_map_hash_16b = test->fixup_map_hash_16b; + int *fixup_map_array_48b = test->fixup_map_array_48b; + int *fixup_map_sockmap = test->fixup_map_sockmap; + int *fixup_map_sockhash = test->fixup_map_sockhash; + int *fixup_map_xskmap = test->fixup_map_xskmap; + int *fixup_map_stacktrace = test->fixup_map_stacktrace; int *fixup_prog1 = test->fixup_prog1; int *fixup_prog2 = test->fixup_prog2; int *fixup_map_in_map = test->fixup_map_in_map; int *fixup_cgroup_storage = test->fixup_cgroup_storage; + int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage; if (test->fill_helper) test->fill_helper(test); @@ -12649,44 +13786,44 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, * for verifier and not do a runtime lookup, so the only thing * that really matters is value size in this case. */ - if (*fixup_map1) { + if (*fixup_map_hash_8b) { map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long), sizeof(long long), 1); do { - prog[*fixup_map1].imm = map_fds[0]; - fixup_map1++; - } while (*fixup_map1); + prog[*fixup_map_hash_8b].imm = map_fds[0]; + fixup_map_hash_8b++; + } while (*fixup_map_hash_8b); } - if (*fixup_map2) { + if (*fixup_map_hash_48b) { map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long), sizeof(struct test_val), 1); do { - prog[*fixup_map2].imm = map_fds[1]; - fixup_map2++; - } while (*fixup_map2); + prog[*fixup_map_hash_48b].imm = map_fds[1]; + fixup_map_hash_48b++; + } while (*fixup_map_hash_48b); } - if (*fixup_map3) { + if (*fixup_map_hash_16b) { map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long), sizeof(struct other_val), 1); do { - prog[*fixup_map3].imm = map_fds[2]; - fixup_map3++; - } while (*fixup_map3); + prog[*fixup_map_hash_16b].imm = map_fds[2]; + fixup_map_hash_16b++; + } while (*fixup_map_hash_16b); } - if (*fixup_map4) { + if (*fixup_map_array_48b) { map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(struct test_val), 1); do { - prog[*fixup_map4].imm = map_fds[3]; - fixup_map4++; - } while (*fixup_map4); + prog[*fixup_map_array_48b].imm = map_fds[3]; + fixup_map_array_48b++; + } while (*fixup_map_array_48b); } if (*fixup_prog1) { - map_fds[4] = create_prog_array(4, 0); + map_fds[4] = create_prog_array(prog_type, 4, 0); do { prog[*fixup_prog1].imm = map_fds[4]; fixup_prog1++; @@ -12694,7 +13831,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } if (*fixup_prog2) { - map_fds[5] = create_prog_array(8, 7); + map_fds[5] = create_prog_array(prog_type, 8, 7); do { prog[*fixup_prog2].imm = map_fds[5]; fixup_prog2++; @@ -12710,12 +13847,52 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } if (*fixup_cgroup_storage) { - map_fds[7] = create_cgroup_storage(); + map_fds[7] = create_cgroup_storage(false); do { prog[*fixup_cgroup_storage].imm = map_fds[7]; fixup_cgroup_storage++; } while (*fixup_cgroup_storage); } + + if (*fixup_percpu_cgroup_storage) { + map_fds[8] = create_cgroup_storage(true); + do { + prog[*fixup_percpu_cgroup_storage].imm = map_fds[8]; + fixup_percpu_cgroup_storage++; + } while (*fixup_percpu_cgroup_storage); + } + if (*fixup_map_sockmap) { + map_fds[9] = create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(int), + sizeof(int), 1); + do { + prog[*fixup_map_sockmap].imm = map_fds[9]; + fixup_map_sockmap++; + } while (*fixup_map_sockmap); + } + if (*fixup_map_sockhash) { + map_fds[10] = create_map(BPF_MAP_TYPE_SOCKHASH, sizeof(int), + sizeof(int), 1); + do { + prog[*fixup_map_sockhash].imm = map_fds[10]; + fixup_map_sockhash++; + } while (*fixup_map_sockhash); + } + if (*fixup_map_xskmap) { + map_fds[11] = create_map(BPF_MAP_TYPE_XSKMAP, sizeof(int), + sizeof(int), 1); + do { + prog[*fixup_map_xskmap].imm = map_fds[11]; + fixup_map_xskmap++; + } while (*fixup_map_xskmap); + } + if (*fixup_map_stacktrace) { + map_fds[12] = create_map(BPF_MAP_TYPE_STACK_TRACE, sizeof(u32), + sizeof(u64), 1); + do { + prog[*fixup_map_stacktrace].imm = map_fds[12]; + fixup_map_stacktrace++; + } while (fixup_map_stacktrace); + } } static void do_test_single(struct bpf_test *test, bool unpriv, @@ -12732,11 +13909,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv, for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; - do_test_fixup(test, prog, map_fds); + if (!prog_type) + prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + do_test_fixup(test, prog_type, prog, map_fds); prog_len = probe_filter_length(prog); - fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + fd_prog = bpf_verify_program(prog_type, prog, prog_len, + test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.c b/tools/testing/selftests/bpf/test_xdp_vlan.c new file mode 100644 index 000000000000..365a7d2d9f5c --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_vlan.c @@ -0,0 +1,292 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright(c) 2018 Jesper Dangaard Brouer. + * + * XDP/TC VLAN manipulation example + * + * GOTCHA: Remember to disable NIC hardware offloading of VLANs, + * else the VLAN tags are NOT inlined in the packet payload: + * + * # ethtool -K ixgbe2 rxvlan off + * + * Verify setting: + * # ethtool -k ixgbe2 | grep rx-vlan-offload + * rx-vlan-offload: off + * + */ +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <linux/pkt_cls.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here + * + * struct vlan_hdr - vlan header + * @h_vlan_TCI: priority and VLAN ID + * @h_vlan_encapsulated_proto: packet type ID or len + */ +struct _vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ +#define VLAN_PRIO_SHIFT 13 +#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ +#define VLAN_TAG_PRESENT VLAN_CFI_MASK +#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ +#define VLAN_N_VID 4096 + +struct parse_pkt { + __u16 l3_proto; + __u16 l3_offset; + __u16 vlan_outer; + __u16 vlan_inner; + __u8 vlan_outer_offset; + __u8 vlan_inner_offset; +}; + +char _license[] SEC("license") = "GPL"; + +static __always_inline +bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt) +{ + __u16 eth_type; + __u8 offset; + + offset = sizeof(*eth); + /* Make sure packet is large enough for parsing eth + 2 VLAN headers */ + if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end) + return false; + + eth_type = eth->h_proto; + + /* Handle outer VLAN tag */ + if (eth_type == bpf_htons(ETH_P_8021Q) + || eth_type == bpf_htons(ETH_P_8021AD)) { + struct _vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + pkt->vlan_outer_offset = offset; + pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI) + & VLAN_VID_MASK; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + offset += sizeof(*vlan_hdr); + } + + /* Handle inner (double) VLAN tag */ + if (eth_type == bpf_htons(ETH_P_8021Q) + || eth_type == bpf_htons(ETH_P_8021AD)) { + struct _vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + pkt->vlan_inner_offset = offset; + pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI) + & VLAN_VID_MASK; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + offset += sizeof(*vlan_hdr); + } + + pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */ + pkt->l3_offset = offset; + + return true; +} + +/* Hint, VLANs are choosen to hit network-byte-order issues */ +#define TESTVLAN 4011 /* 0xFAB */ +// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */ + +SEC("xdp_drop_vlan_4011") +int xdp_prognum0(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct parse_pkt pkt = { 0 }; + + if (!parse_eth_frame(data, data_end, &pkt)) + return XDP_ABORTED; + + /* Drop specific VLAN ID example */ + if (pkt.vlan_outer == TESTVLAN) + return XDP_ABORTED; + /* + * Using XDP_ABORTED makes it possible to record this event, + * via tracepoint xdp:xdp_exception like: + * # perf record -a -e xdp:xdp_exception + * # perf script + */ + return XDP_PASS; +} +/* +Commands to setup VLAN on Linux to test packets gets dropped: + + export ROOTDEV=ixgbe2 + export VLANID=4011 + ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID + ip link set dev $ROOTDEV.$VLANID up + + ip link set dev $ROOTDEV mtu 1508 + ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID + +Load prog with ip tool: + + ip link set $ROOTDEV xdp off + ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011 + +*/ + +/* Changing VLAN to zero, have same practical effect as removing the VLAN. */ +#define TO_VLAN 0 + +SEC("xdp_vlan_change") +int xdp_prognum1(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct parse_pkt pkt = { 0 }; + + if (!parse_eth_frame(data, data_end, &pkt)) + return XDP_ABORTED; + + /* Change specific VLAN ID */ + if (pkt.vlan_outer == TESTVLAN) { + struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset; + + /* Modifying VLAN, preserve top 4 bits */ + vlan_hdr->h_vlan_TCI = + bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000) + | TO_VLAN); + } + + return XDP_PASS; +} + +/* + * Show XDP+TC can cooperate, on creating a VLAN rewriter. + * 1. Create a XDP prog that can "pop"/remove a VLAN header. + * 2. Create a TC-bpf prog that egress can add a VLAN header. + */ + +#ifndef ETH_ALEN /* Ethernet MAC address length */ +#define ETH_ALEN 6 /* bytes */ +#endif +#define VLAN_HDR_SZ 4 /* bytes */ + +SEC("xdp_vlan_remove_outer") +int xdp_prognum2(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct parse_pkt pkt = { 0 }; + char *dest; + + if (!parse_eth_frame(data, data_end, &pkt)) + return XDP_ABORTED; + + /* Skip packet if no outer VLAN was detected */ + if (pkt.vlan_outer_offset == 0) + return XDP_PASS; + + /* Moving Ethernet header, dest overlap with src, memmove handle this */ + dest = data; + dest+= VLAN_HDR_SZ; + /* + * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by + * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes + */ + __builtin_memmove(dest, data, ETH_ALEN * 2); + /* Note: LLVM built-in memmove inlining require size to be constant */ + + /* Move start of packet header seen by Linux kernel stack */ + bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); + + return XDP_PASS; +} + +static __always_inline +void shift_mac_4bytes_16bit(void *data) +{ + __u16 *p = data; + + p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */ + p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */ + p[5] = p[3]; + p[4] = p[2]; + p[3] = p[1]; + p[2] = p[0]; +} + +static __always_inline +void shift_mac_4bytes_32bit(void *data) +{ + __u32 *p = data; + + /* Assuming VLAN hdr present. The 4 bytes in p[3] that gets + * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI. + * The vlan_hdr->h_vlan_encapsulated_proto take over role as + * ethhdr->h_proto. + */ + p[3] = p[2]; + p[2] = p[1]; + p[1] = p[0]; +} + +SEC("xdp_vlan_remove_outer2") +int xdp_prognum3(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *orig_eth = data; + struct parse_pkt pkt = { 0 }; + + if (!parse_eth_frame(orig_eth, data_end, &pkt)) + return XDP_ABORTED; + + /* Skip packet if no outer VLAN was detected */ + if (pkt.vlan_outer_offset == 0) + return XDP_PASS; + + /* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */ + shift_mac_4bytes_32bit(data); + + /* Move start of packet header seen by Linux kernel stack */ + bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); + + return XDP_PASS; +} + +/*===================================== + * BELOW: TC-hook based ebpf programs + * ==================================== + * The TC-clsact eBPF programs (currently) need to be attach via TC commands + */ + +SEC("tc_vlan_push") +int _tc_progA(struct __sk_buff *ctx) +{ + bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN); + + return TC_ACT_OK; +} +/* +Commands to setup TC to use above bpf prog: + +export ROOTDEV=ixgbe2 +export FILE=xdp_vlan01_kern.o + +# Re-attach clsact to clear/flush existing role +tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\ +tc qdisc add dev $ROOTDEV clsact + +# Attach BPF prog EGRESS +tc filter add dev $ROOTDEV egress \ + prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push + +tc filter show dev $ROOTDEV egress +*/ diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh new file mode 100755 index 000000000000..51a3a31d1aac --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh @@ -0,0 +1,195 @@ +#!/bin/bash + +TESTNAME=xdp_vlan + +usage() { + echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME" + echo "" + echo "Usage: $0 [-vfh]" + echo " -v | --verbose : Verbose" + echo " --flush : Flush before starting (e.g. after --interactive)" + echo " --interactive : Keep netns setup running after test-run" + echo "" +} + +cleanup() +{ + local status=$? + + if [ "$status" = "0" ]; then + echo "selftests: $TESTNAME [PASS]"; + else + echo "selftests: $TESTNAME [FAILED]"; + fi + + if [ -n "$INTERACTIVE" ]; then + echo "Namespace setup still active explore with:" + echo " ip netns exec ns1 bash" + echo " ip netns exec ns2 bash" + exit $status + fi + + set +e + ip link del veth1 2> /dev/null + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null +} + +# Using external program "getopt" to get --long-options +OPTIONS=$(getopt -o hvfi: \ + --long verbose,flush,help,interactive,debug -- "$@") +if (( $? != 0 )); then + usage + echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?" + exit 2 +fi +eval set -- "$OPTIONS" + +## --- Parse command line arguments / parameters --- +while true; do + case "$1" in + -v | --verbose) + export VERBOSE=yes + shift + ;; + -i | --interactive | --debug ) + INTERACTIVE=yes + shift + ;; + -f | --flush ) + cleanup + shift + ;; + -- ) + shift + break + ;; + -h | --help ) + usage; + echo "selftests: $TESTNAME [SKIP] usage help info requested" + exit 0 + ;; + * ) + shift + break + ;; + esac +done + +if [ "$EUID" -ne 0 ]; then + echo "selftests: $TESTNAME [FAILED] need root privileges" + exit 1 +fi + +ip link set dev lo xdp off 2>/dev/null > /dev/null +if [ $? -ne 0 ];then + echo "selftests: $TESTNAME [SKIP] need ip xdp support" + exit 0 +fi + +# Interactive mode likely require us to cleanup netns +if [ -n "$INTERACTIVE" ]; then + ip link del veth1 2> /dev/null + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null +fi + +# Exit on failure +set -e + +# Some shell-tools dependencies +which ip > /dev/null +which tc > /dev/null +which ethtool > /dev/null + +# Make rest of shell verbose, showing comments as doc/info +if [ -n "$VERBOSE" ]; then + set -v +fi + +# Create two namespaces +ip netns add ns1 +ip netns add ns2 + +# Run cleanup if failing or on kill +trap cleanup 0 2 3 6 9 + +# Create veth pair +ip link add veth1 type veth peer name veth2 + +# Move veth1 and veth2 into the respective namespaces +ip link set veth1 netns ns1 +ip link set veth2 netns ns2 + +# NOTICE: XDP require VLAN header inside packet payload +# - Thus, disable VLAN offloading driver features +# - For veth REMEMBER TX side VLAN-offload +# +# Disable rx-vlan-offload (mostly needed on ns1) +ip netns exec ns1 ethtool -K veth1 rxvlan off +ip netns exec ns2 ethtool -K veth2 rxvlan off +# +# Disable tx-vlan-offload (mostly needed on ns2) +ip netns exec ns2 ethtool -K veth2 txvlan off +ip netns exec ns1 ethtool -K veth1 txvlan off + +export IPADDR1=100.64.41.1 +export IPADDR2=100.64.41.2 + +# In ns1/veth1 add IP-addr on plain net_device +ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1 +ip netns exec ns1 ip link set veth1 up + +# In ns2/veth2 create VLAN device +export VLAN=4011 +export DEVNS2=veth2 +ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN +ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN +ip netns exec ns2 ip link set $DEVNS2 up +ip netns exec ns2 ip link set $DEVNS2.$VLAN up + +# Bringup lo in netns (to avoids confusing people using --interactive) +ip netns exec ns1 ip link set lo up +ip netns exec ns2 ip link set lo up + +# At this point, the hosts cannot reach each-other, +# because ns2 are using VLAN tags on the packets. + +ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Okay ping fails"' + + +# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags +# ---------------------------------------------------------------------- +# In ns1: ingress use XDP to remove VLAN tags +export DEVNS1=veth1 +export FILE=test_xdp_vlan.o + +# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" +export XDP_PROG=xdp_vlan_change +ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG + +# In ns1: egress use TC to add back VLAN tag 4011 +# (del cmd) +# tc qdisc del dev $DEVNS1 clsact 2> /dev/null +# +ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact +ip netns exec ns1 tc filter add dev $DEVNS1 egress \ + prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push + +# Now the namespaces can reach each-other, test with ping: +ip netns exec ns2 ping -W 2 -c 3 $IPADDR1 +ip netns exec ns1 ping -W 2 -c 3 $IPADDR2 + +# Second test: Replace xdp prog, that fully remove vlan header +# +# Catch kernel bug for generic-XDP, that does didn't allow us to +# remove a VLAN header, because skb->protocol still contain VLAN +# ETH_P_8021Q indication, and this cause overwriting of our changes. +# +export XDP_PROG=xdp_vlan_remove_outer2 +ip netns exec ns1 ip link set $DEVNS1 xdp off +ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG + +# Now the namespaces should still be able reach each-other, test with ping: +ip netns exec ns2 ping -W 2 -c 3 $IPADDR1 +ip netns exec ns1 ping -W 2 -c 3 $IPADDR2 diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index cabe2a3a3b30..4cdb63bf0521 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -41,6 +41,7 @@ int load_kallsyms(void) syms[i].name = strdup(func); i++; } + fclose(f); sym_cnt = i; qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); return 0; @@ -124,10 +125,11 @@ struct perf_event_sample { char data[]; }; -static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv) +static enum bpf_perf_event_ret +bpf_perf_event_print(struct perf_event_header *hdr, void *private_data) { - struct perf_event_sample *e = event; - perf_event_print_fn fn = priv; + struct perf_event_sample *e = (struct perf_event_sample *)hdr; + perf_event_print_fn fn = private_data; int ret; if (e->header.type == PERF_RECORD_SAMPLE) { diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh new file mode 100755 index 000000000000..ffcd3953f94c --- /dev/null +++ b/tools/testing/selftests/bpf/with_addr.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# add private ipv4 and ipv6 addresses to loopback + +readonly V6_INNER='100::a/128' +readonly V4_INNER='192.168.0.1/32' + +if getopts ":s" opt; then + readonly SIT_DEV_NAME='sixtofourtest0' + readonly V6_SIT='2::/64' + readonly V4_SIT='172.17.0.1/32' + shift +fi + +fail() { + echo "error: $*" 1>&2 + exit 1 +} + +setup() { + ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address' + ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address' + + if [[ -n "${V6_SIT}" ]]; then + ip link add "${SIT_DEV_NAME}" type sit remote any local any \ + || fail 'failed to add sit' + ip link set dev "${SIT_DEV_NAME}" up \ + || fail 'failed to bring sit device up' + ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \ + || fail 'failed to setup v6 SIT address' + ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \ + || fail 'failed to setup v4 SIT address' + fi + + sleep 2 # avoid race causing bind to fail +} + +cleanup() { + if [[ -n "${V6_SIT}" ]]; then + ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}" + ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}" + ip link del "${SIT_DEV_NAME}" + fi + + ip -4 addr del "${V4_INNER}" dev lo + ip -6 addr del "${V6_INNER}" dev lo +} + +trap cleanup EXIT + +setup +"$@" +exit "$?" diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh new file mode 100755 index 000000000000..e24949ed3a20 --- /dev/null +++ b/tools/testing/selftests/bpf/with_tunnels.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# setup tunnels for flow dissection test + +readonly SUFFIX="test_$(mktemp -u XXXX)" +CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo" + +setup() { + ip link add "ipip_${SUFFIX}" type ipip ${CONFIG} + ip link add "gre_${SUFFIX}" type gre ${CONFIG} + ip link add "sit_${SUFFIX}" type sit ${CONFIG} + + echo "tunnels before test:" + ip tunnel show + + ip link set "ipip_${SUFFIX}" up + ip link set "gre_${SUFFIX}" up + ip link set "sit_${SUFFIX}" up +} + + +cleanup() { + ip tunnel del "ipip_${SUFFIX}" + ip tunnel del "gre_${SUFFIX}" + ip tunnel del "sit_${SUFFIX}" + + echo "tunnels after test:" + ip tunnel show +} + +trap cleanup EXIT + +setup +"$@" +exit "$?" diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh new file mode 100755 index 000000000000..0150bb2741eb --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -0,0 +1,347 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A test for switch behavior under MC overload. An issue in Spectrum chips +# causes throughput of UC traffic to drop severely when a switch is under heavy +# MC load. This issue can be overcome by putting the switch to MC-aware mode. +# This test verifies that UC performance stays intact even as the switch is +# under MC flood, and therefore that the MC-aware mode is enabled and correctly +# configured. +# +# Because mlxsw throttles CPU port, the traffic can't actually reach userspace +# at full speed. That makes it impossible to use iperf3 to simply measure the +# throughput, because many packets (that reach $h3) don't get to the kernel at +# all even in UDP mode (the situation is even worse in TCP mode, where one can't +# hope to see more than a couple Mbps). +# +# So instead we send traffic with mausezahn and use RX ethtool counters at $h3. +# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore +# each gets a different priority and we can use per-prio ethtool counters to +# measure the throughput. In order to avoid prioritizing unicast traffic, prio +# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and +# thus TC 0). +# +# Mausezahn can't actually saturate the links unless it's using large frames. +# Thus we set MTU to 10K on all involved interfaces. Then both unicast and +# multicast traffic uses 8K frames. +# +# +-----------------------+ +----------------------------------+ +# | H1 | | H2 | +# | | | unicast --> + $h2.111 | +# | | | traffic | 192.0.2.129/28 | +# | multicast | | | e-qos-map 0:1 | +# | traffic | | | | +# | $h1 + <----- | | + $h2 | +# +-----|-----------------+ +--------------|-------------------+ +# | | +# +-----|-------------------------------------------------|-------------------+ +# | + $swp1 + $swp2 | +# | | >1Gbps | >1Gbps | +# | +---|----------------+ +----------|----------------+ | +# | | + $swp1.1 | | + $swp2.111 | | +# | | BR1 | SW | BR111 | | +# | | + $swp3.1 | | + $swp3.111 | | +# | +---|----------------+ +----------|----------------+ | +# | \_________________________________________________/ | +# | | | +# | + $swp3 | +# | | 1Gbps bottleneck | +# | | prio qdisc: {0..7} -> 7 | +# +------------------------------------|--------------------------------------+ +# | +# +--|-----------------+ +# | + $h3 H3 | +# | | | +# | + $h3.111 | +# | 192.0.2.130/28 | +# +--------------------+ + +ALL_TESTS=" + ping_ipv4 + test_mc_aware +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 +} + +h1_destroy() +{ + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 111 v$h2 192.0.2.129/28 + ip link set dev $h2.111 type vlan egress-qos-map 0:1 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + mtu_restore $h2 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + mtu_set $h3 10000 + + vlan_create $h3 111 v$h3 192.0.2.130/28 +} + +h3_destroy() +{ + vlan_destroy $h3 111 + + mtu_restore $h3 + simple_if_fini $h3 +} + +switch_create() +{ + ip link set dev $swp1 up + mtu_set $swp1 10000 + + ip link set dev $swp2 up + mtu_set $swp2 10000 + + ip link set dev $swp3 up + mtu_set $swp3 10000 + + vlan_create $swp2 111 + vlan_create $swp3 111 + + ethtool -s $swp3 speed 1000 autoneg off + tc qdisc replace dev $swp3 root handle 3: \ + prio bands 8 priomap 7 7 7 7 7 7 7 7 + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev br1 up + ip link set dev $swp1 master br1 + ip link set dev $swp3 master br1 + + ip link add name br111 type bridge vlan_filtering 0 + ip link set dev br111 up + ip link set dev $swp2.111 master br111 + ip link set dev $swp3.111 master br111 +} + +switch_destroy() +{ + ip link del dev br111 + ip link del dev br1 + + tc qdisc del dev $swp3 root handle 3: + ethtool -s $swp3 autoneg on + + vlan_destroy $swp3 111 + vlan_destroy $swp2 111 + + mtu_restore $swp3 + ip link set dev $swp3 down + + mtu_restore $swp2 + ip link set dev $swp2 down + + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h2 192.0.2.130 +} + +humanize() +{ + local speed=$1; shift + + for unit in bps Kbps Mbps Gbps; do + if (($(echo "$speed < 1024" | bc))); then + break + fi + + speed=$(echo "scale=1; $speed / 1024" | bc) + done + + echo "$speed${unit}" +} + +rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $((8 * (t1 - t0) / interval)) +} + +check_rate() +{ + local rate=$1; shift + local min=$1; shift + local what=$1; shift + + if ((rate > min)); then + return 0 + fi + + echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr + return 1 +} + +measure_uc_rate() +{ + local what=$1; shift + + local interval=10 + local i + local ret=0 + + # Dips in performance might cause momentary ingress rate to drop below + # 1Gbps. That wouldn't saturate egress and MC would thus get through, + # seemingly winning bandwidth on account of UC. Demand at least 2Gbps + # average ingress rate to somewhat mitigate this. + local min_ingress=2147483648 + + mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ + -a own -b $h3mac -t udp -q & + sleep 1 + + for i in {5..0}; do + local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) + sleep $interval + local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) + + local ir=$(rate $u0 $u1 $interval) + local er=$(rate $t0 $t1 $interval) + + if check_rate $ir $min_ingress "$what ingress rate"; then + break + fi + + # Fail the test if we can't get the throughput. + if ((i == 0)); then + ret=1 + fi + done + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + echo $ir $er + exit $ret +} + +test_mc_aware() +{ + RET=0 + + local -a uc_rate + uc_rate=($(measure_uc_rate "UC-only")) + check_err $? "Could not get high enough UC-only ingress rate" + local ucth1=${uc_rate[1]} + + mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q & + + local d0=$(date +%s) + local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local -a uc_rate_2 + uc_rate_2=($(measure_uc_rate "UC+MC")) + check_err $? "Could not get high enough UC+MC ingress rate" + local ucth2=${uc_rate_2[1]} + + local d1=$(date +%s) + local t1=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local deg=$(bc <<< " + scale=2 + ret = 100 * ($ucth1 - $ucth2) / $ucth1 + if (ret > 0) { ret } else { 0 } + ") + check_err $(bc <<< "$deg > 10") + + local interval=$((d1 - d0)) + local mc_ir=$(rate $u0 $u1 $interval) + local mc_er=$(rate $t0 $t1 $interval) + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + log_test "UC performace under MC overload" + + echo "UC-only throughput $(humanize $ucth1)" + echo "UC+MC throughput $(humanize $ucth2)" + echo "Degradation $deg %" + echo + echo "Full report:" + echo " UC only:" + echo " ingress UC throughput $(humanize ${uc_rate[0]})" + echo " egress UC throughput $(humanize ${uc_rate[1]})" + echo " UC+MC:" + echo " ingress UC throughput $(humanize ${uc_rate_2[0]})" + echo " egress UC throughput $(humanize ${uc_rate_2[1]})" + echo " ingress MC throughput $(humanize $mc_ir)" + echo " egress MC throughput $(humanize $mc_er)" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 78b24cf76f40..8cf22b3c2563 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -14,3 +14,4 @@ udpgso_bench_rx udpgso_bench_tx tcp_inq tls +ip_defrag diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 919aa2ac00af..256d82d5fa87 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,13 +5,13 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd -TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx +TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 0f45633bd634..802b4af18729 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,11 +9,11 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric" +TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="ip -netns testns" +IP="ip -netns ns1" log_test() { @@ -47,8 +47,10 @@ log_test() setup() { set -e - ip netns add testns + ip netns add ns1 $IP link set dev lo up + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -61,7 +63,8 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del testns + ip netns del ns1 + ip netns del ns2 &> /dev/null } get_linklocal() @@ -639,11 +642,14 @@ add_initial_route6() check_route6() { - local pfx="2001:db8:104::/64" + local pfx local expected="$1" local out local rc=0 + set -- $expected + pfx=$1 + out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') [ "${out}" = "${expected}" ] && return 0 @@ -690,28 +696,33 @@ route_setup() [ "${VERBOSE}" = "1" ] && set -x set -e - $IP li add red up type vrf table 101 + ip netns add ns2 + ip -netns ns2 link set dev lo up + ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 + $IP li add veth1 type veth peer name veth2 $IP li add veth3 type veth peer name veth4 $IP li set veth1 up $IP li set veth3 up - $IP li set veth2 vrf red up - $IP li set veth4 vrf red up - $IP li add dummy1 type dummy - $IP li set dummy1 vrf red up - - $IP -6 addr add 2001:db8:101::1/64 dev veth1 - $IP -6 addr add 2001:db8:101::2/64 dev veth2 - $IP -6 addr add 2001:db8:103::1/64 dev veth3 - $IP -6 addr add 2001:db8:103::2/64 dev veth4 - $IP -6 addr add 2001:db8:104::1/64 dev dummy1 + $IP li set veth2 netns ns2 up + $IP li set veth4 netns ns2 up + ip -netns ns2 li add dummy1 type dummy + ip -netns ns2 li set dummy1 up + $IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad + $IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad $IP addr add 172.16.101.1/24 dev veth1 - $IP addr add 172.16.101.2/24 dev veth2 $IP addr add 172.16.103.1/24 dev veth3 - $IP addr add 172.16.103.2/24 dev veth4 - $IP addr add 172.16.104.1/24 dev dummy1 + + ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad + ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad + ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad + + ip -netns ns2 addr add 172.16.101.2/24 dev veth2 + ip -netns ns2 addr add 172.16.103.2/24 dev veth4 + ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 set +ex } @@ -944,7 +955,7 @@ ipv6_addr_metric_test() log_test $rc 0 "Modify metric of address" # verify prefix route removed on down - run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then @@ -967,6 +978,77 @@ ipv6_addr_metric_test() cleanup } +ipv6_route_metrics_test() +{ + local rc + + echo + echo "IPv6 routes with metrics" + + route_setup + + # + # single path with metrics + # + run_cmd "$IP -6 ro add 2001:db8:111::/64 via 2001:db8:101::2 mtu 1400" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:111::/64 via 2001:db8:101::2 dev veth1 metric 1024 mtu 1400" + rc=$? + fi + log_test $rc 0 "Single path route with mtu metric" + + + # + # multipath via separate routes with metrics + # + run_cmd "$IP -6 ro add 2001:db8:112::/64 via 2001:db8:101::2 mtu 1400" + run_cmd "$IP -6 ro append 2001:db8:112::/64 via 2001:db8:103::2" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:112::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route via 2 single routes with mtu metric on first" + + # second route is coalesced to first to make a multipath route. + # MTU of the second path is hidden from display! + run_cmd "$IP -6 ro add 2001:db8:113::/64 via 2001:db8:101::2" + run_cmd "$IP -6 ro append 2001:db8:113::/64 via 2001:db8:103::2 mtu 1400" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:113::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route via 2 single routes with mtu metric on 2nd" + + run_cmd "$IP -6 ro del 2001:db8:113::/64 via 2001:db8:101::2" + if [ $? -eq 0 ]; then + check_route6 "2001:db8:113::/64 via 2001:db8:103::2 dev veth3 metric 1024 mtu 1400" + log_test $? 0 " MTU of second leg" + fi + + # + # multipath with metrics + # + run_cmd "$IP -6 ro add 2001:db8:115::/64 mtu 1400 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:115::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route with mtu metric" + + $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300 + run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1" + log_test $? 0 "Using route with mtu metric" + + run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo" + log_test $? 2 "Invalid metric (fails metric_convert)" + + route_cleanup +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route() @@ -1005,11 +1087,15 @@ add_initial_route() check_route() { - local pfx="172.16.104.0/24" + local pfx local expected="$1" local out local rc=0 + set -- $expected + pfx=$1 + [ "${pfx}" = "unreachable" ] && pfx=$2 + out=$($IP ro ls match ${pfx}) [ "${out}" = "${expected}" ] && return 0 @@ -1319,6 +1405,43 @@ ipv4_addr_metric_test() cleanup } +ipv4_route_metrics_test() +{ + local rc + + echo + echo "IPv4 route add / append tests" + + route_setup + + run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 mtu 1400" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.111.0/24 via 172.16.101.2 dev veth1 mtu 1400" + rc=$? + fi + log_test $rc 0 "Single path route with mtu metric" + + + run_cmd "$IP ro add 172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 nexthop via 172.16.103.2" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route with mtu metric" + + $IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300 + run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1" + log_test $? 0 "Using route with mtu metric" + + run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo" + log_test $? 2 "Invalid metric (fails metric_convert)" + + route_cleanup +} + + ################################################################################ # usage @@ -1385,6 +1508,8 @@ do ipv4_route_test|ipv4_rt) ipv4_route_test;; ipv6_addr_metric) ipv6_addr_metric_test;; ipv4_addr_metric) ipv4_addr_metric_test;; + ipv6_route_metrics) ipv6_route_metrics_test;; + ipv4_route_metrics) ipv4_route_metrics_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh new file mode 100755 index 000000000000..1f8ef0eff862 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="sticky" +NUM_NETIFS=4 +TEST_MAC=de:ad:be:ef:13:37 +source lib.sh + +switch_create() +{ + ip link add dev br0 type bridge + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $h1 up + ip link set dev $swp1 up + ip link set dev $h2 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $h2 down + ip link set dev $swp1 down + ip link set dev $h1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + h2=${NETIFS[p3]} + swp2=${NETIFS[p4]} + + switch_create +} + +cleanup() +{ + pre_cleanup + switch_destroy +} + +sticky() +{ + bridge fdb add $TEST_MAC dev $swp1 master static sticky + check_err $? "Could not add fdb entry" + bridge fdb del $TEST_MAC dev $swp1 vlan 1 master static sticky + $MZ $h2 -c 1 -a $TEST_MAC -t arp "request" -q + bridge -j fdb show br br0 brport $swp1\ + | jq -e ".[] | select(.mac == \"$TEST_MAC\")" &> /dev/null + check_err $? "Did not find FDB record when should" + + log_test "Sticky fdb entry" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ca53b539aa2d..85d253546684 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -251,7 +251,7 @@ lldpad_app_wait_set() { local dev=$1; shift - while lldptool -t -i $dev -V APP -c app | grep -q pending; do + while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do echo "$dev: waiting for lldpad to push pending APP updates" sleep 5 done @@ -494,6 +494,14 @@ tc_rule_stats_get() | jq '.[1].options.actions[].stats.packets' } +ethtool_stats_get() +{ + local dev=$1; shift + local stat=$1; shift + + ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 +} + mac_get() { local if_name=$1 @@ -541,6 +549,23 @@ forwarding_restore() sysctl_restore net.ipv4.conf.all.forwarding } +declare -A MTU_ORIG +mtu_set() +{ + local dev=$1; shift + local mtu=$1; shift + + MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu') + ip link set dev $dev mtu $mtu +} + +mtu_restore() +{ + local dev=$1; shift + + ip link set dev $dev mtu ${MTU_ORIG["$dev"]} +} + tc_offload_check() { local num_netifs=${1:-$NUM_NETIFS} diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c new file mode 100644 index 000000000000..61ae2782388e --- /dev/null +++ b/tools/testing/selftests/net/ip_defrag.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +static bool cfg_do_ipv4; +static bool cfg_do_ipv6; +static bool cfg_verbose; +static bool cfg_overlap; +static unsigned short cfg_port = 9000; + +const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; +const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; + +#define IP4_HLEN (sizeof(struct iphdr)) +#define IP6_HLEN (sizeof(struct ip6_hdr)) +#define UDP_HLEN (sizeof(struct udphdr)) + +/* IPv6 fragment header lenth. */ +#define FRAG_HLEN 8 + +static int payload_len; +static int max_frag_len; + +#define MSG_LEN_MAX 60000 /* Max UDP payload length. */ + +#define IP4_MF (1u << 13) /* IPv4 MF flag. */ +#define IP6_MF (1) /* IPv6 MF flag. */ + +#define CSUM_MANGLED_0 (0xffff) + +static uint8_t udp_payload[MSG_LEN_MAX]; +static uint8_t ip_frame[IP_MAXPACKET]; +static uint32_t ip_id = 0xabcd; +static int msg_counter; +static int frag_counter; +static unsigned int seed; + +/* Receive a UDP packet. Validate it matches udp_payload. */ +static void recv_validate_udp(int fd_udp) +{ + ssize_t ret; + static uint8_t recv_buff[MSG_LEN_MAX]; + + ret = recv(fd_udp, recv_buff, payload_len, 0); + msg_counter++; + + if (cfg_overlap) { + if (ret != -1) + error(1, 0, "recv: expected timeout; got %d", + (int)ret); + if (errno != ETIMEDOUT && errno != EAGAIN) + error(1, errno, "recv: expected timeout: %d", + errno); + return; /* OK */ + } + + if (ret == -1) + error(1, errno, "recv: payload_len = %d max_frag_len = %d", + payload_len, max_frag_len); + if (ret != payload_len) + error(1, 0, "recv: wrong size: %d vs %d", (int)ret, payload_len); + if (memcmp(udp_payload, recv_buff, payload_len)) + error(1, 0, "recv: wrong data"); +} + +static uint32_t raw_checksum(uint8_t *buf, int len, uint32_t sum) +{ + int i; + + for (i = 0; i < (len & ~1U); i += 2) { + sum += (u_int16_t)ntohs(*((u_int16_t *)(buf + i))); + if (sum > 0xffff) + sum -= 0xffff; + } + + if (i < len) { + sum += buf[i] << 8; + if (sum > 0xffff) + sum -= 0xffff; + } + + return sum; +} + +static uint16_t udp_checksum(struct ip *iphdr, struct udphdr *udphdr) +{ + uint32_t sum = 0; + uint16_t res; + + sum = raw_checksum((uint8_t *)&iphdr->ip_src, 2 * sizeof(iphdr->ip_src), + IPPROTO_UDP + (uint32_t)(UDP_HLEN + payload_len)); + sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum); + sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum); + res = 0xffff & ~sum; + if (res) + return htons(res); + else + return CSUM_MANGLED_0; +} + +static uint16_t udp6_checksum(struct ip6_hdr *iphdr, struct udphdr *udphdr) +{ + uint32_t sum = 0; + uint16_t res; + + sum = raw_checksum((uint8_t *)&iphdr->ip6_src, 2 * sizeof(iphdr->ip6_src), + IPPROTO_UDP); + sum = raw_checksum((uint8_t *)&udphdr->len, sizeof(udphdr->len), sum); + sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum); + sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum); + res = 0xffff & ~sum; + if (res) + return htons(res); + else + return CSUM_MANGLED_0; +} + +static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen, + int offset, bool ipv6) +{ + int frag_len; + int res; + int payload_offset = offset > 0 ? offset - UDP_HLEN : 0; + uint8_t *frag_start = ipv6 ? ip_frame + IP6_HLEN + FRAG_HLEN : + ip_frame + IP4_HLEN; + + if (offset == 0) { + struct udphdr udphdr; + udphdr.source = htons(cfg_port + 1); + udphdr.dest = htons(cfg_port); + udphdr.len = htons(UDP_HLEN + payload_len); + udphdr.check = 0; + if (ipv6) + udphdr.check = udp6_checksum((struct ip6_hdr *)ip_frame, &udphdr); + else + udphdr.check = udp_checksum((struct ip *)ip_frame, &udphdr); + memcpy(frag_start, &udphdr, UDP_HLEN); + } + + if (ipv6) { + struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame; + struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + if (payload_len - payload_offset <= max_frag_len && offset > 0) { + /* This is the last fragment. */ + frag_len = FRAG_HLEN + payload_len - payload_offset; + fraghdr->ip6f_offlg = htons(offset); + } else { + frag_len = FRAG_HLEN + max_frag_len; + fraghdr->ip6f_offlg = htons(offset | IP6_MF); + } + ip6hdr->ip6_plen = htons(frag_len); + if (offset == 0) + memcpy(frag_start + UDP_HLEN, udp_payload, + frag_len - FRAG_HLEN - UDP_HLEN); + else + memcpy(frag_start, udp_payload + payload_offset, + frag_len - FRAG_HLEN); + frag_len += IP6_HLEN; + } else { + struct ip *iphdr = (struct ip *)ip_frame; + if (payload_len - payload_offset <= max_frag_len && offset > 0) { + /* This is the last fragment. */ + frag_len = IP4_HLEN + payload_len - payload_offset; + iphdr->ip_off = htons(offset / 8); + } else { + frag_len = IP4_HLEN + max_frag_len; + iphdr->ip_off = htons(offset / 8 | IP4_MF); + } + iphdr->ip_len = htons(frag_len); + if (offset == 0) + memcpy(frag_start + UDP_HLEN, udp_payload, + frag_len - IP4_HLEN - UDP_HLEN); + else + memcpy(frag_start, udp_payload + payload_offset, + frag_len - IP4_HLEN); + } + + res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); + if (res < 0) + error(1, errno, "send_fragment"); + if (res != frag_len) + error(1, 0, "send_fragment: %d vs %d", res, frag_len); + + frag_counter++; +} + +static void send_udp_frags(int fd_raw, struct sockaddr *addr, + socklen_t alen, bool ipv6) +{ + struct ip *iphdr = (struct ip *)ip_frame; + struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame; + int res; + int offset; + int frag_len; + + /* Send the UDP datagram using raw IP fragments: the 0th fragment + * has the UDP header; other fragments are pieces of udp_payload + * split in chunks of frag_len size. + * + * Odd fragments (1st, 3rd, 5th, etc.) are sent out first, then + * even fragments (0th, 2nd, etc.) are sent out. + */ + if (ipv6) { + struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + ((struct sockaddr_in6 *)addr)->sin6_port = 0; + memset(ip6hdr, 0, sizeof(*ip6hdr)); + ip6hdr->ip6_flow = htonl(6<<28); /* Version. */ + ip6hdr->ip6_nxt = IPPROTO_FRAGMENT; + ip6hdr->ip6_hops = 255; + ip6hdr->ip6_src = addr6; + ip6hdr->ip6_dst = addr6; + fraghdr->ip6f_nxt = IPPROTO_UDP; + fraghdr->ip6f_reserved = 0; + fraghdr->ip6f_ident = htonl(ip_id++); + } else { + memset(iphdr, 0, sizeof(*iphdr)); + iphdr->ip_hl = 5; + iphdr->ip_v = 4; + iphdr->ip_tos = 0; + iphdr->ip_id = htons(ip_id++); + iphdr->ip_ttl = 0x40; + iphdr->ip_p = IPPROTO_UDP; + iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK); + iphdr->ip_dst = addr4; + iphdr->ip_sum = 0; + } + + /* Odd fragments. */ + offset = max_frag_len; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += 2 * max_frag_len; + } + + if (cfg_overlap) { + /* Send an extra random fragment. */ + offset = rand() % (UDP_HLEN + payload_len - 1); + /* sendto() returns EINVAL if offset + frag_len is too small. */ + if (ipv6) { + struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + frag_len = max_frag_len + rand() % 256; + /* In IPv6 if !!(frag_len % 8), the fragment is dropped. */ + frag_len &= ~0x7; + fraghdr->ip6f_offlg = htons(offset / 8 | IP6_MF); + ip6hdr->ip6_plen = htons(frag_len); + frag_len += IP6_HLEN; + } else { + frag_len = IP4_HLEN + UDP_HLEN + rand() % 256; + iphdr->ip_off = htons(offset / 8 | IP4_MF); + iphdr->ip_len = htons(frag_len); + } + res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); + if (res < 0) + error(1, errno, "sendto overlap"); + if (res != frag_len) + error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len); + frag_counter++; + } + + /* Event fragments. */ + offset = 0; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += 2 * max_frag_len; + } +} + +static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6) +{ + int fd_tx_raw, fd_rx_udp; + struct timeval tv = { .tv_sec = 0, .tv_usec = 10 * 1000 }; + int idx; + int min_frag_len = ipv6 ? 1280 : 8; + + /* Initialize the payload. */ + for (idx = 0; idx < MSG_LEN_MAX; ++idx) + udp_payload[idx] = idx % 256; + + /* Open sockets. */ + fd_tx_raw = socket(addr->sa_family, SOCK_RAW, IPPROTO_RAW); + if (fd_tx_raw == -1) + error(1, errno, "socket tx_raw"); + + fd_rx_udp = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fd_rx_udp == -1) + error(1, errno, "socket rx_udp"); + if (bind(fd_rx_udp, addr, alen)) + error(1, errno, "bind"); + /* Fail fast. */ + if (setsockopt(fd_rx_udp, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + for (payload_len = min_frag_len; payload_len < MSG_LEN_MAX; + payload_len += (rand() % 4096)) { + if (cfg_verbose) + printf("payload_len: %d\n", payload_len); + max_frag_len = min_frag_len; + do { + send_udp_frags(fd_tx_raw, addr, alen, ipv6); + recv_validate_udp(fd_rx_udp); + max_frag_len += 8 * (rand() % 8); + } while (max_frag_len < (1500 - FRAG_HLEN) && max_frag_len <= payload_len); + } + + /* Cleanup. */ + if (close(fd_tx_raw)) + error(1, errno, "close tx_raw"); + if (close(fd_rx_udp)) + error(1, errno, "close rx_udp"); + + if (cfg_verbose) + printf("processed %d messages, %d fragments\n", + msg_counter, frag_counter); + + fprintf(stderr, "PASS\n"); +} + + +static void run_test_v4(void) +{ + struct sockaddr_in addr = {0}; + + addr.sin_family = AF_INET; + addr.sin_port = htons(cfg_port); + addr.sin_addr = addr4; + + run_test((void *)&addr, sizeof(addr), false /* !ipv6 */); +} + +static void run_test_v6(void) +{ + struct sockaddr_in6 addr = {0}; + + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = addr6; + + run_test((void *)&addr, sizeof(addr), true /* ipv6 */); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "46ov")) != -1) { + switch (c) { + case '4': + cfg_do_ipv4 = true; + break; + case '6': + cfg_do_ipv6 = true; + break; + case 'o': + cfg_overlap = true; + break; + case 'v': + cfg_verbose = true; + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + seed = time(NULL); + srand(seed); + /* Print the seed to track/reproduce potential failures. */ + printf("seed = %d\n", seed); + + if (cfg_do_ipv4) + run_test_v4(); + if (cfg_do_ipv6) + run_test_v6(); + + return 0; +} diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh new file mode 100755 index 000000000000..f34672796044 --- /dev/null +++ b/tools/testing/selftests/net/ip_defrag.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a couple of IP defragmentation tests. + +set +x +set -e + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up + ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_high_thresh=9000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_low_thresh=7000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_high_thresh=9000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_low_thresh=7000000 >/dev/null 2>&1 +} + +cleanup() { + ip netns del "${NETNS}" +} + +trap cleanup EXIT +setup + +echo "ipv4 defrag" +ip netns exec "${NETNS}" ./ip_defrag -4 + + +echo "ipv4 defrag with overlaps" +ip netns exec "${NETNS}" ./ip_defrag -4o + +echo "ipv6 defrag" +ip netns exec "${NETNS}" ./ip_defrag -6 + +echo "ipv6 defrag with overlaps" +ip netns exec "${NETNS}" ./ip_defrag -6o + diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 0ab9423d009f..a369d616b390 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -6,6 +6,26 @@ # # Tests currently implemented: # +# - pmtu_ipv4 +# Set up two namespaces, A and B, with two paths between them over routers +# R1 and R2 (also implemented with namespaces), with different MTUs: +# +# segment a_r1 segment b_r1 a_r1: 2000 +# .--------------R1--------------. a_r2: 1500 +# A B a_r3: 2000 +# '--------------R2--------------' a_r4: 1400 +# segment a_r2 segment b_r2 +# +# Check that PMTU exceptions with the correct PMTU are created. Then +# decrease and increase the MTU of the local link for one of the paths, +# A to R1, checking that route exception PMTU changes accordingly over +# this path. Also check that locked exceptions are created when an ICMP +# message advertising a PMTU smaller than net.ipv4.route.min_pmtu is +# received +# +# - pmtu_ipv6 +# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6 +# # - pmtu_vti4_exception # Set up vti tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is not @@ -50,6 +70,8 @@ ksft_skip=4 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) tests=" + pmtu_ipv4_exception ipv4: PMTU exceptions + pmtu_ipv6_exception ipv6: PMTU exceptions pmtu_vti6_exception vti6: PMTU exceptions pmtu_vti4_exception vti4: PMTU exceptions pmtu_vti4_default_mtu vti4: default MTU assignment @@ -60,8 +82,45 @@ tests=" NS_A="ns-$(mktemp -u XXXXXX)" NS_B="ns-$(mktemp -u XXXXXX)" +NS_R1="ns-$(mktemp -u XXXXXX)" +NS_R2="ns-$(mktemp -u XXXXXX)" ns_a="ip netns exec ${NS_A}" ns_b="ip netns exec ${NS_B}" +ns_r1="ip netns exec ${NS_R1}" +ns_r2="ip netns exec ${NS_R2}" + +# Addressing and routing for tests with routers: four network segments, with +# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an +# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). +# Addresses are: +# - IPv4: PREFIX4.SEGMENT.ID (/24) +# - IPv6: PREFIX6:SEGMENT::ID (/64) +prefix4="192.168" +prefix6="fd00" +a_r1=1 +a_r2=2 +b_r1=3 +b_r2=4 +# ns peer segment +routing_addrs=" + A R1 ${a_r1} + A R2 ${a_r2} + B R1 ${b_r1} + B R2 ${b_r2} +" +# Traffic from A to B goes through R1 by default, and through R2, if destined to +# B's address on the b_r2 segment. +# Traffic from B to A goes through R1. +# ns destination gateway +routes=" + A default ${prefix4}.${a_r1}.2 + A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2 + B default ${prefix4}.${b_r1}.2 + + A default ${prefix6}:${a_r1}::2 + A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 + B default ${prefix6}:${b_r1}::2 +" veth4_a_addr="192.168.1.1" veth4_b_addr="192.168.1.2" @@ -83,6 +142,7 @@ dummy6_mask="64" cleanup_done=1 err_buf= +tcpdump_pids= err() { err_buf="${err_buf}${1} @@ -94,9 +154,15 @@ err_flush() { err_buf= } +# Find the auto-generated name for this namespace +nsname() { + eval echo \$NS_$1 +} + setup_namespaces() { - ip netns add ${NS_A} || return 1 - ip netns add ${NS_B} + for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do + ip netns add ${n} || return 1 + done } setup_veth() { @@ -167,6 +233,49 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_routing() { + for i in ${NS_R1} ${NS_R2}; do + ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1 + ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1 + done + + for i in ${routing_addrs}; do + [ "${ns}" = "" ] && ns="${i}" && continue + [ "${peer}" = "" ] && peer="${i}" && continue + [ "${segment}" = "" ] && segment="${i}" + + ns_name="$(nsname ${ns})" + peer_name="$(nsname ${peer})" + if="veth_${ns}-${peer}" + ifpeer="veth_${peer}-${ns}" + + # Create veth links + ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1 + ip -n ${peer_name} link set dev ${ifpeer} up + + # Add addresses + ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if} + ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if} + + ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer} + ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer} + + ns=""; peer=""; segment="" + done + + for i in ${routes}; do + [ "${ns}" = "" ] && ns="${i}" && continue + [ "${addr}" = "" ] && addr="${i}" && continue + [ "${gw}" = "" ] && gw="${i}" + + ns_name="$(nsname ${ns})" + + ip -n ${ns_name} route add ${addr} via ${gw} + + ns=""; addr=""; gw="" + done +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -176,10 +285,28 @@ setup() { done } +trace() { + [ $tracing -eq 0 ] && return + + for arg do + [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue + ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & + tcpdump_pids="${tcpdump_pids} $!" + ns_cmd= + done + sleep 1 +} + cleanup() { + for pid in ${tcpdump_pids}; do + kill ${pid} + done + tcpdump_pids= + [ ${cleanup_done} -eq 1 ] && return - ip netns del ${NS_A} 2> /dev/null - ip netns del ${NS_B} 2> /dev/null + for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do + ip netns del ${n} 2> /dev/null + done cleanup_done=1 } @@ -196,7 +323,9 @@ mtu_parse() { next=0 for i in ${input}; do + [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue [ ${next} -eq 1 ] && echo "${i}" && return + [ ${next} -eq 2 ] && echo "lock ${i}" && return [ "${i}" = "mtu" ] && next=1 done } @@ -229,8 +358,117 @@ route_get_dst_pmtu_from_exception() { mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" } +check_pmtu_value() { + expected="${1}" + value="${2}" + event="${3}" + + [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0 + [ "${value}" = "${expected}" ] && return 0 + [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1 + [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1 + err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}" + return 1 +} + +test_pmtu_ipvX() { + family=${1} + + setup namespaces routing || return 2 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Decrease local MTU below PMTU, check for PMTU decrease in route exception + mtu "${ns_a}" veth_A-R1 1300 + mtu "${ns_r1}" veth_R1-A 1300 + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1 + # Second exception shouldn't be modified + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 + + # Increase MTU, check for PMTU increase in route exception + mtu "${ns_a}" veth_A-R1 1700 + mtu "${ns_r1}" veth_R1-A 1700 + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1 + # Second exception shouldn't be modified + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 + + # Skip PMTU locking tests for IPv6 + [ $family -eq 6 ] && return 0 + + # Decrease remote MTU on path via R2, get new exception + mtu "${ns_r2}" veth_R2-B 400 + mtu "${ns_b}" veth_B-R2 400 + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 + + # Decrease local MTU below PMTU + mtu "${ns_a}" veth_A-R2 500 + mtu "${ns_r2}" veth_R2-A 500 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1 + + # Increase local MTU + mtu "${ns_a}" veth_A-R2 1500 + mtu "${ns_r2}" veth_R2-A 1500 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1 + + # Get new exception + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 +} + +test_pmtu_ipv4_exception() { + test_pmtu_ipvX 4 +} + +test_pmtu_ipv6_exception() { + test_pmtu_ipvX 6 +} + test_pmtu_vti4_exception() { setup namespaces veth vti4 xfrm4 || return 2 + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti4_a "${ns_b}" vti4_b veth_mtu=1500 vti_mtu=$((veth_mtu - 20)) @@ -248,28 +486,19 @@ test_pmtu_vti4_exception() { # exception is created ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" - if [ "${pmtu}" != "" ]; then - err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}" - return 1 - fi + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 # Now exceed link layer MTU by one byte, check that exception is created + # with the right PMTU value ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" - if [ "${pmtu}" = "" ]; then - err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))" - return 1 - fi - - # ...with the right PMTU value - if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then - err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}" - return 1 - fi + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" } test_pmtu_vti6_exception() { setup namespaces veth vti6 xfrm6 || return 2 + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti6_a "${ns_b}" vti6_b fail=0 # Create route exception by exceeding link layer MTU @@ -280,25 +509,18 @@ test_pmtu_vti6_exception() { ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null # Check that exception was created - if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then - err " tunnel exceeding link layer MTU didn't create route exception" - return 1 - fi + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 # Decrease tunnel MTU, check for PMTU decrease in route exception mtu "${ns_a}" vti6_a 3000 - - if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then - err " decreasing tunnel MTU didn't decrease route exception PMTU" - fail=1 - fi + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 # Increase tunnel MTU, check for PMTU increase in route exception mtu "${ns_a}" vti6_a 9000 - if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then - err " increasing tunnel MTU didn't increase route exception PMTU" - fail=1 - fi + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 return ${fail} } @@ -445,15 +667,56 @@ test_pmtu_vti6_link_change_mtu() { return ${fail} } -trap cleanup EXIT +usage() { + echo + echo "$0 [OPTIONS] [TEST]..." + echo "If no TEST argument is given, all tests will be run." + echo + echo "Options" + echo " --trace: capture traffic to TEST_INTERFACE.pcap" + echo + echo "Available tests${tests}" + exit 1 +} exitcode=0 desc=0 IFS=" " + +tracing=0 +for arg do + if [ "${arg}" != "${arg#--*}" ]; then + opt="${arg#--}" + if [ "${opt}" = "trace" ]; then + if which tcpdump > /dev/null 2>&1; then + tracing=1 + else + echo "=== tcpdump not available, tracing disabled" + fi + else + usage + fi + else + # Check first that all requested tests are available before + # running any + command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; } + fi +done + +trap cleanup EXIT + for t in ${tests}; do [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0 + run_this=1 + for arg do + [ "${arg}" != "${arg#--*}" ] && continue + [ "${arg}" = "${name}" ] && run_this=1 && break + run_this=0 + done + [ $run_this -eq 0 ] && continue + ( unset IFS eval test_${name} diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 8fdfeafaf8c0..fac68d710f35 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -121,11 +121,11 @@ TEST_F(tls, send_then_sendfile) buf = (char *)malloc(st.st_size); EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send); - EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send); + EXPECT_EQ(recv(self->cfd, recv_buf, to_send, MSG_WAITALL), to_send); EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0); EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); - EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size); + EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); } TEST_F(tls, recv_max) @@ -160,7 +160,7 @@ TEST_F(tls, msg_more) EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len); EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT), + EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_WAITALL), send_len * 2); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } @@ -180,7 +180,7 @@ TEST_F(tls, sendmsg_single) msg.msg_iov = &vec; msg.msg_iovlen = 1; EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); - EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } @@ -288,7 +288,7 @@ TEST_F(tls, splice_from_pipe) ASSERT_GE(pipe(p), 0); EXPECT_GE(write(p[1], mem_send, send_len), 0); EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0); - EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0); + EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -306,7 +306,7 @@ TEST_F(tls, splice_from_pipe2) EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0); EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0); EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0); - EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0); + EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -322,13 +322,13 @@ TEST_F(tls, send_and_splice) ASSERT_GE(pipe(p), 0); EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2); - EXPECT_NE(recv(self->cfd, buf, send_len2, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2); EXPECT_EQ(memcmp(test_str, buf, send_len2), 0); EXPECT_GE(write(p[1], mem_send, send_len), send_len); EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len); - EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0); + EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -436,7 +436,7 @@ TEST_F(tls, multiple_send_single_recv) EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); memset(recv_mem, 0, total_len); - EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len); + EXPECT_EQ(recv(self->cfd, recv_mem, total_len, MSG_WAITALL), total_len); EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0); EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0); @@ -516,17 +516,17 @@ TEST_F(tls, recv_peek_multiple_records) len = strlen(test_str_second) + 1; EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); - len = sizeof(buf); + len = strlen(test_str_first); memset(buf, 0, len); - EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1); + EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len); /* MSG_PEEK can only peek into the current record. */ - len = strlen(test_str_first) + 1; + len = strlen(test_str_first); EXPECT_EQ(memcmp(test_str_first, buf, len), 0); - len = sizeof(buf); + len = strlen(test_str) + 1; memset(buf, 0, len); - EXPECT_NE(recv(self->cfd, buf, len, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len); /* Non-MSG_PEEK will advance strparser (and therefore record) * however. @@ -543,6 +543,28 @@ TEST_F(tls, recv_peek_multiple_records) len = strlen(test_str_second) + 1; EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); + len = strlen(test_str) + 1; + memset(buf, 0, len); + EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len); + + len = strlen(test_str) + 1; + EXPECT_EQ(memcmp(test_str, buf, len), 0); +} + +TEST_F(tls, recv_peek_large_buf_mult_recs) +{ + char const *test_str = "test_read_peek_mult_recs"; + char const *test_str_first = "test_read_peek"; + char const *test_str_second = "_mult_recs"; + int len; + char buf[64]; + + len = strlen(test_str_first); + EXPECT_EQ(send(self->fd, test_str_first, len, 0), len); + + len = strlen(test_str_second) + 1; + EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); + len = sizeof(buf); memset(buf, 0, len); EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1); @@ -551,6 +573,7 @@ TEST_F(tls, recv_peek_multiple_records) EXPECT_EQ(memcmp(test_str, buf, len), 0); } + TEST_F(tls, pollin) { char const *test_str = "test_poll"; @@ -564,7 +587,7 @@ TEST_F(tls, pollin) EXPECT_EQ(poll(&fd, 1, 20), 1); EXPECT_EQ(fd.revents & POLLIN, 1); - EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len); /* Test timing out */ EXPECT_EQ(poll(&fd, 1, 20), 0); } @@ -582,7 +605,7 @@ TEST_F(tls, poll_wait) /* Set timeout to inf. secs */ EXPECT_EQ(poll(&fd, 1, -1), 1); EXPECT_EQ(fd.revents & POLLIN, 1); - EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, recv_mem, send_len, MSG_WAITALL), send_len); } TEST_F(tls, blocking) @@ -728,7 +751,7 @@ TEST_F(tls, control_msg) EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); vec.iov_base = buf; - EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len); + EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len); cmsg = CMSG_FIRSTHDR(&msg); EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README index 49a6f8c3fdae..f9281e8aa313 100644 --- a/tools/testing/selftests/tc-testing/README +++ b/tools/testing/selftests/tc-testing/README @@ -232,6 +232,8 @@ directory: and the other is a test whether the command leaked memory or not. (This one is a preliminary version, it may not work quite right yet, but the overall template is there and it should only need tweaks.) + - buildebpfPlugin.py: + builds all programs in $EBPFDIR. ACKNOWLEDGEMENTS diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile new file mode 100644 index 000000000000..dc92eb271d9a --- /dev/null +++ b/tools/testing/selftests/tc-testing/bpf/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 + +APIDIR := ../../../../include/uapi +TEST_GEN_FILES = action.o + +top_srcdir = ../../../../.. +include ../../lib.mk + +CLANG ?= clang +LLC ?= llc +PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) + +ifeq ($(PROBE),) + CPU ?= probe +else + CPU ?= generic +endif + +CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + +CLANG_FLAGS = -I. -I$(APIDIR) \ + $(CLANG_SYS_INCLUDES) \ + -Wno-compare-distinct-pointer-types + +$(OUTPUT)/%.o: %.c + $(CLANG) $(CLANG_FLAGS) \ + -O2 -target bpf -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c new file mode 100644 index 000000000000..c32b99b80e19 --- /dev/null +++ b/tools/testing/selftests/tc-testing/bpf/action.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Davide Caratti, Red Hat inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <linux/bpf.h> +#include <linux/pkt_cls.h> + +__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s) +{ + return TC_ACT_OK; +} + +__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s) +{ + s->data = 0x0; + return TC_ACT_OK; +} + +char _license[] __attribute__((section("license"),used)) = "GPL"; diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py new file mode 100644 index 000000000000..9f0ba10c44b4 --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py @@ -0,0 +1,66 @@ +''' +build ebpf program +''' + +import os +import signal +from string import Template +import subprocess +import time +from TdcPlugin import TdcPlugin +from tdc_config import * + +class SubPlugin(TdcPlugin): + def __init__(self): + self.sub_class = 'buildebpf/SubPlugin' + self.tap = '' + super().__init__() + + def pre_suite(self, testcount, testidlist): + super().pre_suite(testcount, testidlist) + + if self.args.buildebpf: + self._ebpf_makeall() + + def post_suite(self, index): + super().post_suite(index) + + self._ebpf_makeclean() + + def add_args(self, parser): + super().add_args(parser) + + self.argparser_group = self.argparser.add_argument_group( + 'buildebpf', + 'options for buildebpfPlugin') + self.argparser_group.add_argument( + '-B', '--buildebpf', action='store_true', + help='build eBPF programs') + + return self.argparser + + def _ebpf_makeall(self): + if self.args.buildebpf: + self._make('all') + + def _ebpf_makeclean(self): + if self.args.buildebpf: + self._make('clean') + + def _make(self, target): + command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target) + proc = subprocess.Popen(command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=ENVIR) + (rawout, serr) = proc.communicate() + + if proc.returncode != 0 and len(serr) > 0: + foutput = serr.decode("utf-8") + else: + foutput = rawout.decode("utf-8") + + proc.stdout.close() + proc.stderr.close() + return proc, foutput diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 6f289a49e5ec..5970cee6d05f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -55,7 +55,6 @@ "bpf" ], "setup": [ - "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o", [ "$TC action flush action bpf", 0, @@ -63,14 +62,13 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ - "$TC action flush action bpf", - "rm -f _b.o" + "$TC action flush action bpf" ] }, { @@ -81,7 +79,6 @@ "bpf" ], "setup": [ - "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o", [ "$TC action flush action bpf", 0, @@ -89,10 +86,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ [ @@ -100,8 +97,7 @@ 0, 1, 255 - ], - "rm -f _c.o" + ] ] }, { diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index 68c91023cdb9..89189a03ce3d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -536,5 +536,29 @@ "matchPattern": "^[ \t]+index [0-9]+ ref", "matchCount": "0", "teardown": [] + }, + { + "id": "8e47", + "name": "Add gact action with random determ goto chain control action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pass random determ goto chain 1 2 index 90", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass random type determ goto chain 1 val 2.*index 90 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 30f9b54bd666..4086a50a670e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -715,5 +715,29 @@ "teardown": [ "$TC actions flush action police" ] + }, + { + "id": "b48b", + "name": "Add police action with exceed goto chain control action", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 1mbit burst 1k conform-exceed pass / goto chain 42", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 1Mbit burst 1Kb mtu 2Kb action pass/goto chain 42", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index a023d0d62b25..d651bc1501bd 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -16,7 +16,9 @@ NAMES = { 'DEV2': '', 'BATCH_FILE': './batch.txt', # Name of the namespace to use - 'NS': 'tcut' + 'NS': 'tcut', + # Directory containing eBPF test programs + 'EBPFDIR': './bpf' } |