diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-11-30 16:56:09 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-11-30 16:58:42 -0800 |
commit | 753c8608f3e579307493a63b9242667aee35a751 (patch) | |
tree | 4197358069e8db7bc0d36a474612f7ffefc7ba72 /include/net | |
parent | 975f2d73a99f35b57ffa2ad7bff8562225cdcfcb (diff) | |
parent | f690ff9122d2ca8e38769f3bcf217bd3df681a36 (diff) | |
download | linux-stable-753c8608f3e579307493a63b9242667aee35a751.tar.gz linux-stable-753c8608f3e579307493a63b9242667aee35a751.tar.bz2 linux-stable-753c8608f3e579307493a63b9242667aee35a751.zip |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2023-11-30
We've added 30 non-merge commits during the last 7 day(s) which contain
a total of 58 files changed, 1598 insertions(+), 154 deletions(-).
The main changes are:
1) Add initial TX metadata implementation for AF_XDP with support in mlx5
and stmmac drivers. Two types of offloads are supported right now, that
is, TX timestamp and TX checksum offload, from Stanislav Fomichev with
stmmac implementation from Song Yoong Siang.
2) Change BPF verifier logic to validate global subprograms lazily instead
of unconditionally before the main program, so they can be guarded using
BPF CO-RE techniques, from Andrii Nakryiko.
3) Add BPF link_info support for uprobe multi link along with bpftool
integration for the latter, from Jiri Olsa.
4) Use pkg-config in BPF selftests to determine ld flags which is
in particular needed for linking statically, from Akihiko Odaki.
5) Fix a few BPF selftest failures to adapt to the upcoming LLVM18,
from Yonghong Song.
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (30 commits)
bpf/tests: Remove duplicate JSGT tests
selftests/bpf: Add TX side to xdp_hw_metadata
selftests/bpf: Convert xdp_hw_metadata to XDP_USE_NEED_WAKEUP
selftests/bpf: Add TX side to xdp_metadata
selftests/bpf: Add csum helpers
selftests/xsk: Support tx_metadata_len
xsk: Add option to calculate TX checksum in SW
xsk: Validate xsk_tx_metadata flags
xsk: Document tx_metadata_len layout
net: stmmac: Add Tx HWTS support to XDP ZC
net/mlx5e: Implement AF_XDP TX timestamp and checksum offload
tools: ynl: Print xsk-features from the sample
xsk: Add TX timestamp and TX checksum offload support
xsk: Support tx_metadata_len
selftests/bpf: Use pkg-config for libelf
selftests/bpf: Override PKG_CONFIG for static builds
selftests/bpf: Choose pkg-config for the target
bpftool: Add support to display uprobe_multi links
selftests/bpf: Add link_info test for uprobe_multi link
selftests/bpf: Use bpf_link__destroy in fill_link_info tests
...
====================
Conflicts:
Documentation/netlink/specs/netdev.yaml:
839ff60df3ab ("net: page_pool: add nlspec for basic access to page pools")
48eb03dd2630 ("xsk: Add TX timestamp and TX checksum offload support")
https://lore.kernel.org/all/20231201094705.1ee3cab8@canb.auug.org.au/
While at it also regen, tree is dirty after:
48eb03dd2630 ("xsk: Add TX timestamp and TX checksum offload support")
looks like code wasn't re-rendered after "render-max" was removed.
Link: https://lore.kernel.org/r/20231130145708.32573-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/net')
-rw-r--r-- | include/net/xdp_sock.h | 111 | ||||
-rw-r--r-- | include/net/xdp_sock_drv.h | 34 | ||||
-rw-r--r-- | include/net/xsk_buff_pool.h | 8 |
3 files changed, 153 insertions, 0 deletions
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index f83128007fb0..3cb4dc9bd70e 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -30,6 +30,7 @@ struct xdp_umem { struct user_struct *user; refcount_t users; u8 flags; + u8 tx_metadata_len; bool zc; struct page **pgs; int id; @@ -92,12 +93,105 @@ struct xdp_sock { struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */ }; +/* + * AF_XDP TX metadata hooks for network devices. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. + * + * void (*tmo_request_timestamp)(void *priv) + * Called when AF_XDP frame requested egress timestamp. + * + * u64 (*tmo_fill_timestamp)(void *priv) + * Called when AF_XDP frame, that had requested egress timestamp, + * received a completion. The hook needs to return the actual HW timestamp. + * + * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv) + * Called when AF_XDP frame requested HW checksum offload. csum_start + * indicates position where checksumming should start. + * csum_offset indicates position where checksum should be stored. + * + */ +struct xsk_tx_metadata_ops { + void (*tmo_request_timestamp)(void *priv); + u64 (*tmo_fill_timestamp)(void *priv); + void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv); +}; + #ifdef CONFIG_XDP_SOCKETS int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); void __xsk_map_flush(void); +/** + * xsk_tx_metadata_to_compl - Save enough relevant metadata information + * to perform tx completion in the future. + * @meta: pointer to AF_XDP metadata area + * @compl: pointer to output struct xsk_tx_metadata_to_compl + * + * This function should be called by the networking device when + * it prepares AF_XDP egress packet. The value of @compl should be stored + * and passed to xsk_tx_metadata_complete upon TX completion. + */ +static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, + struct xsk_tx_metadata_compl *compl) +{ + if (!meta) + return; + + if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP) + compl->tx_timestamp = &meta->completion.tx_timestamp; + else + compl->tx_timestamp = NULL; +} + +/** + * xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission + * and call appropriate xsk_tx_metadata_ops operation. + * @meta: pointer to AF_XDP metadata area + * @ops: pointer to struct xsk_tx_metadata_ops + * @priv: pointer to driver-private aread + * + * This function should be called by the networking device when + * it prepares AF_XDP egress packet. + */ +static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ + if (!meta) + return; + + if (ops->tmo_request_timestamp) + if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP) + ops->tmo_request_timestamp(priv); + + if (ops->tmo_request_checksum) + if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) + ops->tmo_request_checksum(meta->request.csum_start, + meta->request.csum_offset, priv); +} + +/** + * xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion + * and call appropriate xsk_tx_metadata_ops operation. + * @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl + * @ops: pointer to struct xsk_tx_metadata_ops + * @priv: pointer to driver-private aread + * + * This function should be called by the networking device upon + * AF_XDP egress completion. + */ +static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ + if (!compl) + return; + + *compl->tx_timestamp = ops->tmo_fill_timestamp(priv); +} + #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) @@ -114,6 +208,23 @@ static inline void __xsk_map_flush(void) { } +static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, + struct xsk_tx_metadata_compl *compl) +{ +} + +static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ +} + +static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ +} + #endif /* CONFIG_XDP_SOCKETS */ #if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 1f6fc8c7a84c..81e02de3f453 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -165,6 +165,30 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return xp_raw_get_data(pool, addr); } +#define XDP_TXMD_FLAGS_VALID ( \ + XDP_TXMD_FLAGS_TIMESTAMP | \ + XDP_TXMD_FLAGS_CHECKSUM | \ + 0) + +static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) +{ + return !(meta->flags & ~XDP_TXMD_FLAGS_VALID); +} + +static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + struct xsk_tx_metadata *meta; + + if (!pool->tx_metadata_len) + return NULL; + + meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len; + if (unlikely(!xsk_buff_valid_tx_metadata(meta))) + return NULL; /* no way to signal the error to the user */ + + return meta; +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); @@ -324,6 +348,16 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return NULL; } +static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) +{ + return false; +} + +static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + return NULL; +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index b0bdff26fc88..8d48d37ab7c0 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -33,6 +33,7 @@ struct xdp_buff_xsk { }; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) +#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t)) struct xsk_dma_map { dma_addr_t *dma_pages; @@ -77,10 +78,12 @@ struct xsk_buff_pool { u32 chunk_size; u32 chunk_shift; u32 frame_len; + u8 tx_metadata_len; /* inherited from umem */ u8 cached_need_wakeup; bool uses_need_wakeup; bool dma_need_sync; bool unaligned; + bool tx_sw_csum; void *addrs; /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when @@ -233,4 +236,9 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } +static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool) +{ + return pool->tx_metadata_len > 0; +} + #endif /* XSK_BUFF_POOL_H_ */ |