diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-08-03 15:34:36 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-08-03 15:34:36 -0700 |
commit | d07b7b32da6f678d42d96a8b9824cf0a181ce140 (patch) | |
tree | 606829d4b33a57dbe0f0e825ca8505e0b5fcb759 /include/net | |
parent | 35b1b1fd96388d5e3cf179bf36bd8a4153baf4a3 (diff) | |
parent | 648880e9331c68b2008430fd90f3648d1795399d (diff) | |
download | linux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.tar.gz linux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.tar.bz2 linux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.zip |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says:
====================
pull-request: bpf-next 2023-08-03
We've added 54 non-merge commits during the last 10 day(s) which contain
a total of 84 files changed, 4026 insertions(+), 562 deletions(-).
The main changes are:
1) Add SO_REUSEPORT support for TC bpf_sk_assign from Lorenz Bauer,
Daniel Borkmann
2) Support new insns from cpu v4 from Yonghong Song
3) Non-atomically allocate freelist during prefill from YiFei Zhu
4) Support defragmenting IPv(4|6) packets in BPF from Daniel Xu
5) Add tracepoint to xdp attaching failure from Leon Hwang
6) struct netdev_rx_queue and xdp.h reshuffling to reduce
rebuild time from Jakub Kicinski
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (54 commits)
net: invert the netdevice.h vs xdp.h dependency
net: move struct netdev_rx_queue out of netdevice.h
eth: add missing xdp.h includes in drivers
selftests/bpf: Add testcase for xdp attaching failure tracepoint
bpf, xdp: Add tracepoint to xdp attaching failure
selftests/bpf: fix static assert compilation issue for test_cls_*.c
bpf: fix bpf_probe_read_kernel prototype mismatch
riscv, bpf: Adapt bpf trampoline to optimized riscv ftrace framework
libbpf: fix typos in Makefile
tracing: bpf: use struct trace_entry in struct syscall_tp_t
bpf, devmap: Remove unused dtab field from bpf_dtab_netdev
bpf, cpumap: Remove unused cmap field from bpf_cpu_map_entry
netfilter: bpf: Only define get_proto_defrag_hook() if necessary
bpf: Fix an array-index-out-of-bounds issue in disasm.c
net: remove duplicate INDIRECT_CALLABLE_DECLARE of udp[6]_ehashfn
docs/bpf: Fix malformed documentation
bpf: selftests: Add defrag selftests
bpf: selftests: Support custom type and proto for client sockets
bpf: selftests: Support not connecting client socket
netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link
...
====================
Link: https://lore.kernel.org/r/20230803174845.825419-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/net')
-rw-r--r-- | include/net/busy_poll.h | 1 | ||||
-rw-r--r-- | include/net/inet6_hashtables.h | 81 | ||||
-rw-r--r-- | include/net/inet_hashtables.h | 74 | ||||
-rw-r--r-- | include/net/mana/mana.h | 2 | ||||
-rw-r--r-- | include/net/netdev_rx_queue.h | 53 | ||||
-rw-r--r-- | include/net/sock.h | 7 | ||||
-rw-r--r-- | include/net/xdp.h | 29 |
7 files changed, 230 insertions, 17 deletions
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index f90f0021f5f2..4dabeb6c76d3 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -16,6 +16,7 @@ #include <linux/sched/clock.h> #include <linux/sched/signal.h> #include <net/ip.h> +#include <net/xdp.h> /* 0 - Reserved to indicate value not set * 1..NR_CPUS - Reserved for sender_cpu diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 56f1286583d3..284b5ce7205d 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -48,6 +48,22 @@ struct sock *__inet6_lookup_established(struct net *net, const u16 hnum, const int dif, const int sdif); +typedef u32 (inet6_ehashfn_t)(const struct net *net, + const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const __be16 fport); + +inet6_ehashfn_t inet6_ehashfn; + +INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn); + +struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum, + inet6_ehashfn_t *ehashfn); + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -57,6 +73,15 @@ struct sock *inet6_lookup_listener(struct net *net, const unsigned short hnum, const int dif, const int sdif); +struct sock *inet6_lookup_run_sk_lookup(struct net *net, + int protocol, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum, const int dif, + inet6_ehashfn_t *ehashfn); + static inline struct sock *__inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -78,6 +103,46 @@ static inline struct sock *__inet6_lookup(struct net *net, daddr, hnum, dif, sdif); } +static inline +struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff, + const struct in6_addr *saddr, const __be16 sport, + const struct in6_addr *daddr, const __be16 dport, + bool *refcounted, inet6_ehashfn_t *ehashfn) +{ + struct sock *sk, *reuse_sk; + bool prefetched; + + sk = skb_steal_sock(skb, refcounted, &prefetched); + if (!sk) + return NULL; + + if (!prefetched) + return sk; + + if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_state != TCP_LISTEN) + return sk; + } else if (sk->sk_protocol == IPPROTO_UDP) { + if (sk->sk_state != TCP_CLOSE) + return sk; + } else { + return sk; + } + + reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, ntohs(dport), + ehashfn); + if (!reuse_sk) + return sk; + + /* We've chosen a new reuseport sock which is never refcounted. This + * implies that sk also isn't refcounted. + */ + WARN_ON_ONCE(*refcounted); + + return reuse_sk; +} + static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, @@ -85,14 +150,20 @@ static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, int iif, int sdif, bool *refcounted) { - struct sock *sk = skb_steal_sock(skb, refcounted); - + struct net *net = dev_net(skb_dst(skb)->dev); + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct sock *sk; + + sk = inet6_steal_sock(net, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, dport, + refcounted, inet6_ehashfn); + if (IS_ERR(sk)) + return NULL; if (sk) return sk; - return __inet6_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, - doff, &ipv6_hdr(skb)->saddr, sport, - &ipv6_hdr(skb)->daddr, ntohs(dport), + return __inet6_lookup(net, hashinfo, skb, + doff, &ip6h->saddr, sport, + &ip6h->daddr, ntohs(dport), iif, sdif, refcounted); } diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 99bd823e97f6..1177effabed3 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -379,6 +379,27 @@ struct sock *__inet_lookup_established(struct net *net, const __be32 daddr, const u16 hnum, const int dif, const int sdif); +typedef u32 (inet_ehashfn_t)(const struct net *net, + const __be32 laddr, const __u16 lport, + const __be32 faddr, const __be16 fport); + +inet_ehashfn_t inet_ehashfn; + +INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn); + +struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum, + inet_ehashfn_t *ehashfn); + +struct sock *inet_lookup_run_sk_lookup(struct net *net, + int protocol, + struct sk_buff *skb, int doff, + __be32 saddr, __be16 sport, + __be32 daddr, u16 hnum, const int dif, + inet_ehashfn_t *ehashfn); + static inline struct sock * inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, @@ -428,6 +449,46 @@ static inline struct sock *inet_lookup(struct net *net, return sk; } +static inline +struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, + bool *refcounted, inet_ehashfn_t *ehashfn) +{ + struct sock *sk, *reuse_sk; + bool prefetched; + + sk = skb_steal_sock(skb, refcounted, &prefetched); + if (!sk) + return NULL; + + if (!prefetched) + return sk; + + if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_state != TCP_LISTEN) + return sk; + } else if (sk->sk_protocol == IPPROTO_UDP) { + if (sk->sk_state != TCP_CLOSE) + return sk; + } else { + return sk; + } + + reuse_sk = inet_lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, ntohs(dport), + ehashfn); + if (!reuse_sk) + return sk; + + /* We've chosen a new reuseport sock which is never refcounted. This + * implies that sk also isn't refcounted. + */ + WARN_ON_ONCE(*refcounted); + + return reuse_sk; +} + static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, @@ -436,22 +497,23 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, const int sdif, bool *refcounted) { - struct sock *sk = skb_steal_sock(skb, refcounted); + struct net *net = dev_net(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); + struct sock *sk; + sk = inet_steal_sock(net, skb, doff, iph->saddr, sport, iph->daddr, dport, + refcounted, inet_ehashfn); + if (IS_ERR(sk)) + return NULL; if (sk) return sk; - return __inet_lookup(dev_net(skb_dst(skb)->dev), hashinfo, skb, + return __inet_lookup(net, hashinfo, skb, doff, iph->saddr, sport, iph->daddr, dport, inet_iif(skb), sdif, refcounted); } -u32 inet6_ehashfn(const struct net *net, - const struct in6_addr *laddr, const u16 lport, - const struct in6_addr *faddr, const __be16 fport); - static inline void sk_daddr_set(struct sock *sk, __be32 addr) { sk->sk_daddr = addr; /* alias of inet_daddr */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 024ad8ddb27e..1ccdca03e166 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -4,6 +4,8 @@ #ifndef _MANA_H #define _MANA_H +#include <net/xdp.h> + #include "gdma.h" #include "hw_channel.h" diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h new file mode 100644 index 000000000000..cdcafb30d437 --- /dev/null +++ b/include/net/netdev_rx_queue.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_NETDEV_RX_QUEUE_H +#define _LINUX_NETDEV_RX_QUEUE_H + +#include <linux/kobject.h> +#include <linux/netdevice.h> +#include <linux/sysfs.h> +#include <net/xdp.h> + +/* This structure contains an instance of an RX queue. */ +struct netdev_rx_queue { + struct xdp_rxq_info xdp_rxq; +#ifdef CONFIG_RPS + struct rps_map __rcu *rps_map; + struct rps_dev_flow_table __rcu *rps_flow_table; +#endif + struct kobject kobj; + struct net_device *dev; + netdevice_tracker dev_tracker; + +#ifdef CONFIG_XDP_SOCKETS + struct xsk_buff_pool *pool; +#endif +} ____cacheline_aligned_in_smp; + +/* + * RX queue sysfs structures and functions. + */ +struct rx_queue_attribute { + struct attribute attr; + ssize_t (*show)(struct netdev_rx_queue *queue, char *buf); + ssize_t (*store)(struct netdev_rx_queue *queue, + const char *buf, size_t len); +}; + +static inline struct netdev_rx_queue * +__netif_get_rx_queue(struct net_device *dev, unsigned int rxq) +{ + return dev->_rx + rxq; +} + +#ifdef CONFIG_SYSFS +static inline unsigned int +get_netdev_rx_queue_index(struct netdev_rx_queue *queue) +{ + struct net_device *dev = queue->dev; + int index = queue - dev->_rx; + + BUG_ON(index >= dev->num_rx_queues); + return index; +} +#endif +#endif diff --git a/include/net/sock.h b/include/net/sock.h index 7ae44bf866af..74cbfb15d289 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2815,20 +2815,23 @@ sk_is_refcounted(struct sock *sk) * skb_steal_sock - steal a socket from an sk_buff * @skb: sk_buff to steal the socket from * @refcounted: is set to true if the socket is reference-counted + * @prefetched: is set to true if the socket was assigned from bpf */ static inline struct sock * -skb_steal_sock(struct sk_buff *skb, bool *refcounted) +skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched) { if (skb->sk) { struct sock *sk = skb->sk; *refcounted = true; - if (skb_sk_is_prefetched(skb)) + *prefetched = skb_sk_is_prefetched(skb); + if (*prefetched) *refcounted = sk_is_refcounted(sk); skb->destructor = NULL; skb->sk = NULL; return sk; } + *prefetched = false; *refcounted = false; return NULL; } diff --git a/include/net/xdp.h b/include/net/xdp.h index d1c5381fc95f..de08c8e0d134 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -6,9 +6,10 @@ #ifndef __LINUX_NET_XDP_H__ #define __LINUX_NET_XDP_H__ -#include <linux/skbuff.h> /* skb_shared_info */ -#include <uapi/linux/netdev.h> #include <linux/bitfield.h> +#include <linux/filter.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> /* skb_shared_info */ /** * DOC: XDP RX-queue information @@ -45,8 +46,6 @@ enum xdp_mem_type { MEM_TYPE_MAX, }; -typedef u32 xdp_features_t; - /* XDP flags for ndo_xdp_xmit */ #define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ #define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH @@ -443,6 +442,12 @@ enum xdp_rss_hash_type { XDP_RSS_TYPE_L4_IPV6_SCTP_EX = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR, }; +struct xdp_metadata_ops { + int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); + int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type); +}; + #ifdef CONFIG_NET u32 bpf_xdp_metadata_kfunc_id(int id); bool bpf_dev_bound_kfunc_id(u32 btf_id); @@ -474,4 +479,20 @@ static inline void xdp_clear_features_flag(struct net_device *dev) xdp_set_features_flag(dev, 0); } +static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, + struct xdp_buff *xdp) +{ + /* Driver XDP hooks are invoked within a single NAPI poll cycle and thus + * under local_bh_disable(), which provides the needed RCU protection + * for accessing map entries. + */ + u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + + if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { + if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev)) + act = xdp_master_redirect(xdp); + } + + return act; +} #endif /* __LINUX_NET_XDP_H__ */ |