summaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
commit9d31d2338950293ec19d9b095fbaa9030899dcb4 (patch)
treee688040d0557c24a2eeb9f6c9c223d949f6f7ef9 /tools/testing/selftests/bpf/progs/test_tc_tunnel.c
parent635de956a7f5a6ffcb04f29d70630c64c717b56b (diff)
parent4a52dd8fefb45626dace70a63c0738dbd83b7edb (diff)
downloadlinux-stable-9d31d2338950293ec19d9b095fbaa9030899dcb4.tar.gz
linux-stable-9d31d2338950293ec19d9b095fbaa9030899dcb4.tar.bz2
linux-stable-9d31d2338950293ec19d9b095fbaa9030899dcb4.zip
Merge tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - bpf: - allow bpf programs calling kernel functions (initially to reuse TCP congestion control implementations) - enable task local storage for tracing programs - remove the need to store per-task state in hash maps, and allow tracing programs access to task local storage previously added for BPF_LSM - add bpf_for_each_map_elem() helper, allowing programs to walk all map elements in a more robust and easier to verify fashion - sockmap: support UDP and cross-protocol BPF_SK_SKB_VERDICT redirection - lpm: add support for batched ops in LPM trie - add BTF_KIND_FLOAT support - mostly to allow use of BTF on s390 which has floats in its headers files - improve BPF syscall documentation and extend the use of kdoc parsing scripts we already employ for bpf-helpers - libbpf, bpftool: support static linking of BPF ELF files - improve support for encapsulation of L2 packets - xdp: restructure redirect actions to avoid a runtime lookup, improving performance by 4-8% in microbenchmarks - xsk: build skb by page (aka generic zerocopy xmit) - improve performance of software AF_XDP path by 33% for devices which don't need headers in the linear skb part (e.g. virtio) - nexthop: resilient next-hop groups - improve path stability on next-hops group changes (incl. offload for mlxsw) - ipv6: segment routing: add support for IPv4 decapsulation - icmp: add support for RFC 8335 extended PROBE messages - inet: use bigger hash table for IP ID generation - tcp: deal better with delayed TX completions - make sure we don't give up on fast TCP retransmissions only because driver is slow in reporting that it completed transmitting the original - tcp: reorder tcp_congestion_ops for better cache locality - mptcp: - add sockopt support for common TCP options - add support for common TCP msg flags - include multiple address ids in RM_ADDR - add reset option support for resetting one subflow - udp: GRO L4 improvements - improve 'forward' / 'frag_list' co-existence with UDP tunnel GRO, allowing the first to take place correctly even for encapsulated UDP traffic - micro-optimize dev_gro_receive() and flow dissection, avoid retpoline overhead on VLAN and TEB GRO - use less memory for sysctls, add a new sysctl type, to allow using u8 instead of "int" and "long" and shrink networking sysctls - veth: allow GRO without XDP - this allows aggregating UDP packets before handing them off to routing, bridge, OvS, etc. - allow specifing ifindex when device is moved to another namespace - netfilter: - nft_socket: add support for cgroupsv2 - nftables: add catch-all set element - special element used to define a default action in case normal lookup missed - use net_generic infra in many modules to avoid allocating per-ns memory unnecessarily - xps: improve the xps handling to avoid potential out-of-bound accesses and use-after-free when XPS change race with other re-configuration under traffic - add a config knob to turn off per-cpu netdev refcnt to catch underflows in testing Device APIs: - add WWAN subsystem to organize the WWAN interfaces better and hopefully start driving towards more unified and vendor- independent APIs - ethtool: - add interface for reading IEEE MIB stats (incl. mlx5 and bnxt support) - allow network drivers to dump arbitrary SFP EEPROM data, current offset+length API was a poor fit for modern SFP which define EEPROM in terms of pages (incl. mlx5 support) - act_police, flow_offload: add support for packet-per-second policing (incl. offload for nfp) - psample: add additional metadata attributes like transit delay for packets sampled from switch HW (and corresponding egress and policy-based sampling in the mlxsw driver) - dsa: improve support for sandwiched LAGs with bridge and DSA - netfilter: - flowtable: use direct xmit in topologies with IP forwarding, bridging, vlans etc. - nftables: counter hardware offload support - Bluetooth: - improvements for firmware download w/ Intel devices - add support for reading AOSP vendor capabilities - add support for virtio transport driver - mac80211: - allow concurrent monitor iface and ethernet rx decap - set priority and queue mapping for injected frames - phy: add support for Clause-45 PHY Loopback - pci/iov: add sysfs MSI-X vector assignment interface to distribute MSI-X resources to VFs (incl. mlx5 support) New hardware/drivers: - dsa: mv88e6xxx: add support for Marvell mv88e6393x - 11-port Ethernet switch with 8x 1-Gigabit Ethernet and 3x 10-Gigabit interfaces. - dsa: support for legacy Broadcom tags used on BCM5325, BCM5365 and BCM63xx switches - Microchip KSZ8863 and KSZ8873; 3x 10/100Mbps Ethernet switches - ath11k: support for QCN9074 a 802.11ax device - Bluetooth: Broadcom BCM4330 and BMC4334 - phy: Marvell 88X2222 transceiver support - mdio: add BCM6368 MDIO mux bus controller - r8152: support RTL8153 and RTL8156 (USB Ethernet) chips - mana: driver for Microsoft Azure Network Adapter (MANA) - Actions Semi Owl Ethernet MAC - can: driver for ETAS ES58X CAN/USB interfaces Pure driver changes: - add XDP support to: enetc, igc, stmmac - add AF_XDP support to: stmmac - virtio: - page_to_skb() use build_skb when there's sufficient tailroom (21% improvement for 1000B UDP frames) - support XDP even without dedicated Tx queues - share the Tx queues with the stack when necessary - mlx5: - flow rules: add support for mirroring with conntrack, matching on ICMP, GTP, flex filters and more - support packet sampling with flow offloads - persist uplink representor netdev across eswitch mode changes - allow coexistence of CQE compression and HW time-stamping - add ethtool extended link error state reporting - ice, iavf: support flow filters, UDP Segmentation Offload - dpaa2-switch: - move the driver out of staging - add spanning tree (STP) support - add rx copybreak support - add tc flower hardware offload on ingress traffic - ionic: - implement Rx page reuse - support HW PTP time-stamping - octeon: support TC hardware offloads - flower matching on ingress and egress ratelimitting. - stmmac: - add RX frame steering based on VLAN priority in tc flower - support frame preemption (FPE) - intel: add cross time-stamping freq difference adjustment - ocelot: - support forwarding of MRP frames in HW - support multiple bridges - support PTP Sync one-step timestamping - dsa: mv88e6xxx, dpaa2-switch: offload bridge port flags like learning, flooding etc. - ipa: add IPA v4.5, v4.9 and v4.11 support (Qualcomm SDX55, SM8350, SC7280 SoCs) - mt7601u: enable TDLS support - mt76: - add support for 802.3 rx frames (mt7915/mt7615) - mt7915 flash pre-calibration support - mt7921/mt7663 runtime power management fixes" * tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2451 commits) net: selftest: fix build issue if INET is disabled net: netrom: nr_in: Remove redundant assignment to ns net: tun: Remove redundant assignment to ret net: phy: marvell: add downshift support for M88E1240 net: dsa: ksz: Make reg_mib_cnt a u8 as it never exceeds 255 net/sched: act_ct: Remove redundant ct get and check icmp: standardize naming of RFC 8335 PROBE constants bpf, selftests: Update array map tests for per-cpu batched ops bpf: Add batched ops support for percpu array bpf: Implement formatted output helpers with bstr_printf seq_file: Add a seq_bprintf function sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues net:nfc:digital: Fix a double free in digital_tg_recv_dep_req net: fix a concurrency bug in l2tp_tunnel_register() net/smc: Remove redundant assignment to rc mpls: Remove redundant assignment to err llc2: Remove redundant assignment to rc net/tls: Remove redundant initialization of record rds: Remove redundant assignment to nr_sig dt-bindings: net: mdio-gpio: add compatible for microchip,mdio-smi0 ...
Diffstat (limited to 'tools/testing/selftests/bpf/progs/test_tc_tunnel.c')
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c113
1 files changed, 99 insertions, 14 deletions
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a7c394..84cd63259554 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -24,14 +24,29 @@ static const int cfg_port = 8000;
static const int cfg_udp_src = 20000;
+#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
+
#define UDP_PORT 5555
#define MPLS_OVER_UDP_PORT 6635
#define ETH_OVER_UDP_PORT 7777
+#define VXLAN_UDP_PORT 8472
+
+#define EXTPROTO_VXLAN 0x1
+
+#define VXLAN_N_VID (1u << 24)
+#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
+#define VXLAN_FLAGS 0x8
+#define VXLAN_VNI 1
/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
MPLS_LS_S_MASK | 0xff);
+struct vxlanhdr {
+ __be32 vx_flags;
+ __be32 vx_vni;
+} __attribute__((packed));
+
struct gre_hdr {
__be16 flags;
__be16 protocol;
@@ -45,13 +60,13 @@ union l4hdr {
struct v4hdr {
struct iphdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
struct v6hdr {
struct ipv6hdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
- __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
{
__u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
int tcp_off;
__u64 flags;
@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
+
break;
}
olen += l2_len;
@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto)
{
+ return __encap_ipv4(skb, encap_proto, l2_proto, 0);
+}
+
+static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
+{
__u16 udp_dst = UDP_PORT;
struct ipv6hdr iph_inner;
struct v6hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
__u16 tot_len;
__u64 flags;
@@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
- sizeof(h_outer.l4hdr.udp);
+ sizeof(h_outer.l4hdr.udp) + l2_len;
h_outer.l4hdr.udp.check = 0;
h_outer.l4hdr.udp.len = bpf_htons(tot_len);
break;
@@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
break;
}
@@ -309,6 +363,12 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
+{
+ return __encap_ipv6(skb, encap_proto, l2_proto, 0);
+}
+
SEC("encap_ipip_none")
int __encap_ipip_none(struct __sk_buff *skb)
{
@@ -372,6 +432,17 @@ int __encap_udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_vxlan_eth")
+int __encap_vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return __encap_ipv4(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
SEC("encap_sit_none")
int __encap_sit_none(struct __sk_buff *skb)
{
@@ -444,6 +515,17 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_ip6vxlan_eth")
+int __encap_ip6vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return __encap_ipv6(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
char buf[sizeof(struct v6hdr)];
@@ -479,6 +561,9 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
case ETH_OVER_UDP_PORT:
olen += ETH_HLEN;
break;
+ case VXLAN_UDP_PORT:
+ olen += ETH_HLEN + sizeof(struct vxlanhdr);
+ break;
}
break;
default: