From c61bcebde72de7f5dc194d28f29894f0f7661ff7 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:44:54 +0100 Subject: ice: Prepare legacy-rx for upcoming XDP multi-buffer support Rx path is going to be modified in a way that fragmented frame will be gathered within xdp_buff in the first place. This approach implies that underlying buffer has to provide tailroom for skb_shared_info. This is currently the case when ring uses build_skb but not when legacy-rx knob is turned on. This case configures 2k Rx buffers and has no way to provide either headroom or tailroom - FWIW it currently has XDP_PACKET_HEADROOM which is broken and in here it is removed. 2k Rx buffers were used so driver in this setting was able to support 9k MTU as it can chain up to 5 Rx buffers. With offset configuring HW writing 2k of a data was passing the half of the page which broke the assumption of our internal page recycling tricks. Now if above got fixed and legacy-rx path would be left as is, when referring to skb_shared_info via xdp_get_shared_info_from_buff(), packet's content would be corrupted again. Hence size of Rx buffer needs to be lowered and therefore supported MTU. This operation will allow us to keep the unified data path and with 8k MTU users (if any of legacy-rx) would still be good to go. However, tendency is to drop the support for this code path at some point. Add ICE_RXBUF_1664 as vsi::rx_buf_len and ICE_MAX_FRAME_LEGACY_RX (8320) as vsi::max_frame for legacy-rx. For bigger page sizes configure 3k Rx buffers, not 2k. Since headroom support is removed, disable data_meta support on legacy-rx. When preparing XDP buff, rely on ice_rx_ring::rx_offset setting when deciding whether to support data_meta or not. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-2-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_base.c | 3 --- drivers/net/ethernet/intel/ice/ice_lib.c | 8 ++------ drivers/net/ethernet/intel/ice/ice_main.c | 10 ++++++++-- drivers/net/ethernet/intel/ice/ice_txrx.c | 17 +++++------------ drivers/net/ethernet/intel/ice/ice_txrx.h | 2 ++ 5 files changed, 17 insertions(+), 23 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 554095b25f44..e36abcfeb958 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -355,9 +355,6 @@ static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring) { if (ice_ring_uses_build_skb(rx_ring)) return ICE_SKB_PAD; - else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) - return XDP_PACKET_HEADROOM; - return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 301c1efe08b4..4a35479bd7dd 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1992,8 +1992,8 @@ void ice_update_eth_stats(struct ice_vsi *vsi) void ice_vsi_cfg_frame_size(struct ice_vsi *vsi) { if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) { - vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; - vsi->rx_buf_len = ICE_RXBUF_2048; + vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX; + vsi->rx_buf_len = ICE_RXBUF_1664; #if (PAGE_SIZE < 8192) } else if (!ICE_2K_TOO_SMALL_WITH_PADDING && (vsi->netdev->mtu <= ETH_DATA_LEN)) { @@ -2002,11 +2002,7 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi) #endif } else { vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX; -#if (PAGE_SIZE < 8192) vsi->rx_buf_len = ICE_RXBUF_3072; -#else - vsi->rx_buf_len = ICE_RXBUF_2048; -#endif } } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index fce86e8ff834..f7b1acee2b71 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7336,8 +7336,8 @@ clear_recovery: */ static int ice_max_xdp_frame_size(struct ice_vsi *vsi) { - if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) - return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM; + if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) + return ICE_RXBUF_1664; else return ICE_RXBUF_3072; } @@ -7370,6 +7370,12 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) frame_size - ICE_ETH_PKT_HDR_PAD); return -EINVAL; } + } else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) { + if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) { + netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n", + ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD); + return -EINVAL; + } } /* if a reset is in progress, wait for some time for it to complete */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index ccf09c957a1c..d0d78e5003d8 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -984,17 +984,15 @@ static struct sk_buff * ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { - unsigned int metasize = xdp->data - xdp->data_meta; unsigned int size = xdp->data_end - xdp->data; unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ - net_prefetch(xdp->data_meta); + net_prefetch(xdp->data); /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - ICE_RX_HDR_SIZE + metasize, + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; @@ -1006,13 +1004,8 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, - ALIGN(headlen + metasize, sizeof(long))); - - if (metasize) { - skb_metadata_set(skb, metasize); - __skb_pull(skb, metasize); - } + memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, + sizeof(long))); /* if we exhaust the linear part then add what is left as a frag */ size -= headlen; @@ -1187,7 +1180,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) hard_start = page_address(rx_buf->page) + rx_buf->page_offset - offset; - xdp_prepare_buff(&xdp, hard_start, offset, size, true); + xdp_prepare_buff(&xdp, hard_start, offset, size, !!offset); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 4fd0e5d0a313..166713f8abbd 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -9,10 +9,12 @@ #define ICE_DFLT_IRQ_WORK 256 #define ICE_RXBUF_3072 3072 #define ICE_RXBUF_2048 2048 +#define ICE_RXBUF_1664 1664 #define ICE_RXBUF_1536 1536 #define ICE_MAX_CHAINED_RX_BUFS 5 #define ICE_MAX_BUF_TXD 8 #define ICE_MIN_TX_LEN 17 +#define ICE_MAX_FRAME_LEGACY_RX 8320 /* The size limit for a transmit buffer in a descriptor is (16K - 1). * In order to align with the read requests we will align the value to -- cgit v1.2.3 From cb0473e0e9dccaa0ddafb252f2c3ef943b86bb56 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:44:55 +0100 Subject: ice: Add xdp_buff to ice_rx_ring struct In preparation for XDP multi-buffer support, let's store xdp_buff on Rx ring struct. This will allow us to combine fragmented frames across separate NAPI cycles in the same way as currently skb fragments are handled. This means that skb pointer on Rx ring will become redundant and will be removed. For now it is kept and layout of Rx ring struct was not inspected, some member movement will be needed later on so that will be the time to take care of it. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-3-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_base.c | 1 + drivers/net/ethernet/intel/ice/ice_txrx.c | 39 ++++++++++++++++++------------- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 + 3 files changed, 25 insertions(+), 16 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index e36abcfeb958..5b66f6f7db78 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -533,6 +533,7 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) } } + xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq); err = ice_setup_rx_ctx(ring); if (err) { dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index d0d78e5003d8..3486976b3998 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -523,8 +523,16 @@ err: return -ENOMEM; } +/** + * ice_rx_frame_truesize + * @rx_ring: ptr to Rx ring + * @size: size + * + * calculate the truesize with taking into the account PAGE_SIZE of + * underlying arch + */ static unsigned int -ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused size) +ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) { unsigned int truesize; @@ -1103,21 +1111,20 @@ ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) */ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0; + unsigned int total_rx_bytes = 0, total_rx_pkts = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); unsigned int offset = rx_ring->rx_offset; + struct xdp_buff *xdp = &rx_ring->xdp; struct ice_tx_ring *xdp_ring = NULL; unsigned int xdp_res, xdp_xmit = 0; struct sk_buff *skb = rx_ring->skb; struct bpf_prog *xdp_prog = NULL; - struct xdp_buff xdp; bool failure; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ #if (PAGE_SIZE < 8192) - frame_sz = ice_rx_frame_truesize(rx_ring, 0); + xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0); #endif - xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) @@ -1171,30 +1178,30 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) rx_buf = ice_get_rx_buf(rx_ring, size, &rx_buf_pgcnt); if (!size) { - xdp.data = NULL; - xdp.data_end = NULL; - xdp.data_hard_start = NULL; - xdp.data_meta = NULL; + xdp->data = NULL; + xdp->data_end = NULL; + xdp->data_hard_start = NULL; + xdp->data_meta = NULL; goto construct_skb; } hard_start = page_address(rx_buf->page) + rx_buf->page_offset - offset; - xdp_prepare_buff(&xdp, hard_start, offset, size, !!offset); + xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); + xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); #endif if (!xdp_prog) goto construct_skb; - xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring); + xdp_res = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring); if (!xdp_res) goto construct_skb; if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { xdp_xmit |= xdp_res; - ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz); + ice_rx_buf_adjust_pg_offset(rx_buf, xdp->frame_sz); } else { rx_buf->pagecnt_bias++; } @@ -1207,11 +1214,11 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) construct_skb: if (skb) { ice_add_rx_frag(rx_ring, rx_buf, skb, size); - } else if (likely(xdp.data)) { + } else if (likely(xdp->data)) { if (ice_ring_uses_build_skb(rx_ring)) - skb = ice_build_skb(rx_ring, rx_buf, &xdp); + skb = ice_build_skb(rx_ring, rx_buf, xdp); else - skb = ice_construct_skb(rx_ring, rx_buf, &xdp); + skb = ice_construct_skb(rx_ring, rx_buf, xdp); } /* exit if we failed to retrieve a buffer */ if (!skb) { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 166713f8abbd..b0c39d557008 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -303,6 +303,7 @@ struct ice_rx_ring { struct bpf_prog *xdp_prog; struct ice_tx_ring *xdp_ring; struct xsk_buff_pool *xsk_pool; + struct xdp_buff xdp; struct sk_buff *skb; dma_addr_t dma; /* physical address of ring */ u64 cached_phctime; -- cgit v1.2.3 From ac0753391195011ded23696d7882428e5c419a98 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:44:56 +0100 Subject: ice: Store page count inside ice_rx_buf This will allow us to avoid carrying additional auxiliary array of page counts when dealing with XDP multi buffer support. Previously combining fragmented frame to skb was not affected in the same way as XDP would be as whole frame is needed to be in place before executing XDP prog. Therefore, when going through HW Rx descriptors one-by-one, calls to ice_put_rx_buf() need to be taken *after* running XDP prog on a potentially multi buffered frame, so some additional storage of page count is needed. By adding page count to rx buf, it will make it easier to walk through processed entries at the end of rx cleaning routine and decide whether or not buffers should be recycled. While at it, bump ice_rx_buf::pagecnt_bias from u16 up to u32. It was proven many times that calculations on variables smaller than standard register size are harmful. This was also the case during experiments with embedding page count to ice_rx_buf - when this was added as u16 it had a performance impact. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-4-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 26 ++++++++++---------------- drivers/net/ethernet/intel/ice/ice_txrx.h | 3 ++- 2 files changed, 12 insertions(+), 17 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 3486976b3998..220d1c43674a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -791,7 +791,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) /** * ice_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buf: buffer containing the page - * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call * * If page is reusable, we have a green light for calling ice_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is @@ -799,7 +798,7 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) * page freed */ static bool -ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) +ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) { unsigned int pagecnt_bias = rx_buf->pagecnt_bias; struct page *page = rx_buf->page; @@ -810,7 +809,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) + if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) return false; #else #define ICE_LAST_OFFSET \ @@ -894,19 +893,17 @@ ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf) * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use * @rx_ring: Rx descriptor ring to transact packets on * @size: size of buffer to add to skb - * @rx_buf_pgcnt: rx_buf page refcount * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU. */ static struct ice_rx_buf * -ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, - int *rx_buf_pgcnt) +ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size) { struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; - *rx_buf_pgcnt = + rx_buf->pgcnt = #if (PAGE_SIZE < 8192) page_count(rx_buf->page); #else @@ -1042,15 +1039,13 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, * ice_put_rx_buf - Clean up used buffer and either recycle or free * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from - * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect() * * This function will update next_to_clean and then clean up the contents * of the rx_buf. It will either recycle the buffer or unmap it and free * the associated resources. */ static void -ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, - int rx_buf_pgcnt) +ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) { u16 ntc = rx_ring->next_to_clean + 1; @@ -1061,7 +1056,7 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, if (!rx_buf) return; - if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) { + if (ice_can_reuse_rx_page(rx_buf)) { /* hand second half of page back to the ring */ ice_reuse_rx_page(rx_ring, rx_buf); } else { @@ -1137,7 +1132,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned char *hard_start; unsigned int size; u16 stat_err_bits; - int rx_buf_pgcnt; u16 vlan_tag = 0; u16 rx_ptype; @@ -1166,7 +1160,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (rx_desc->wb.rxdid == FDIR_DESC_RXDID && ctrl_vsi->vf) ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); - ice_put_rx_buf(rx_ring, NULL, 0); + ice_put_rx_buf(rx_ring, NULL); cleaned_count++; continue; } @@ -1175,7 +1169,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) ICE_RX_FLX_DESC_PKT_LEN_M; /* retrieve a buffer from the ring */ - rx_buf = ice_get_rx_buf(rx_ring, size, &rx_buf_pgcnt); + rx_buf = ice_get_rx_buf(rx_ring, size); if (!size) { xdp->data = NULL; @@ -1209,7 +1203,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) total_rx_pkts++; cleaned_count++; - ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); + ice_put_rx_buf(rx_ring, rx_buf); continue; construct_skb: if (skb) { @@ -1228,7 +1222,7 @@ construct_skb: break; } - ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); + ice_put_rx_buf(rx_ring, rx_buf); cleaned_count++; /* skip if it is NOP desc */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index b0c39d557008..717355c6d4c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -172,7 +172,8 @@ struct ice_rx_buf { dma_addr_t dma; struct page *page; unsigned int page_offset; - u16 pagecnt_bias; + unsigned int pgcnt; + unsigned int pagecnt_bias; }; struct ice_q_stats { -- cgit v1.2.3 From d7956d81f1502d3818500cff4847f3e9ae0c6aa4 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:44:57 +0100 Subject: ice: Pull out next_to_clean bump out of ice_put_rx_buf() Plan is to move ice_put_rx_buf() to the end of ice_clean_rx_irq() so in order to keep the ability of walking through HW Rx descriptors, pull out next_to_clean handling out of ice_put_rx_buf(). Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-5-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 220d1c43674a..38f38cb0d412 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -898,11 +898,12 @@ ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf) * for use by the CPU. */ static struct ice_rx_buf * -ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size) +ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, + const unsigned int ntc) { struct ice_rx_buf *rx_buf; - rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; + rx_buf = &rx_ring->rx_buf[ntc]; rx_buf->pgcnt = #if (PAGE_SIZE < 8192) page_count(rx_buf->page); @@ -1040,19 +1041,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from * - * This function will update next_to_clean and then clean up the contents - * of the rx_buf. It will either recycle the buffer or unmap it and free - * the associated resources. + * This function will clean up the contents of the rx_buf. It will either + * recycle the buffer or unmap it and free the associated resources. */ static void ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) { - u16 ntc = rx_ring->next_to_clean + 1; - - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - if (!rx_buf) return; @@ -1114,6 +1108,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int xdp_res, xdp_xmit = 0; struct sk_buff *skb = rx_ring->skb; struct bpf_prog *xdp_prog = NULL; + u32 ntc = rx_ring->next_to_clean; + u32 cnt = rx_ring->count; bool failure; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ @@ -1136,7 +1132,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) u16 rx_ptype; /* get the Rx desc from Rx ring based on 'next_to_clean' */ - rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean); + rx_desc = ICE_RX_DESC(rx_ring, ntc); /* status_error_len will always be zero for unused descriptors * because it's cleared in cleanup, and overlaps with hdr_addr @@ -1160,6 +1156,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (rx_desc->wb.rxdid == FDIR_DESC_RXDID && ctrl_vsi->vf) ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); + if (++ntc == cnt) + ntc = 0; ice_put_rx_buf(rx_ring, NULL); cleaned_count++; continue; @@ -1169,7 +1167,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) ICE_RX_FLX_DESC_PKT_LEN_M; /* retrieve a buffer from the ring */ - rx_buf = ice_get_rx_buf(rx_ring, size); + rx_buf = ice_get_rx_buf(rx_ring, size, ntc); if (!size) { xdp->data = NULL; @@ -1203,6 +1201,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) total_rx_pkts++; cleaned_count++; + if (++ntc == cnt) + ntc = 0; ice_put_rx_buf(rx_ring, rx_buf); continue; construct_skb: @@ -1222,6 +1222,8 @@ construct_skb: break; } + if (++ntc == cnt) + ntc = 0; ice_put_rx_buf(rx_ring, rx_buf); cleaned_count++; @@ -1262,6 +1264,7 @@ construct_skb: total_rx_pkts++; } + rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); -- cgit v1.2.3 From e44f4790a2ba3585fad5d40b61e1fd5d2a9835ab Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:44:58 +0100 Subject: ice: Inline eop check This might be in future used by ZC driver and might potentially yield a minor performance boost. While at it, constify arguments that ice_is_non_eop() takes, since they are pointers and this will help compiler while generating asm. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-6-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 21 --------------------- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 21 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 38f38cb0d412..11876f5d1a79 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1065,27 +1065,6 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_buf->page = NULL; } -/** - * ice_is_non_eop - process handling of non-EOP buffers - * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * - * If the buffer is an EOP buffer, this function exits returning false, - * otherwise return true indicating that this is in fact a non-EOP buffer. - */ -static bool -ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) -{ - /* if we are the last buffer then there is nothing else to do */ -#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) - if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) - return false; - - rx_ring->ring_stats->rx_stats.non_eop_descs++; - - return true; -} - /** * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: Rx descriptor ring to transact packets on diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index c7d2954dc9ea..30e3cffdb2f1 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -21,6 +21,28 @@ ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits) return !!(status_err_n & cpu_to_le16(stat_err_bits)); } +/** + * ice_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * + * If the buffer is an EOP buffer, this function exits returning false, + * otherwise return true indicating that this is in fact a non-EOP buffer. + */ +static inline bool +ice_is_non_eop(const struct ice_rx_ring *rx_ring, + const union ice_32b_rx_flex_desc *rx_desc) +{ + /* if we are the last buffer then there is nothing else to do */ +#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) + if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) + return false; + + rx_ring->ring_stats->rx_stats.non_eop_descs++; + + return true; +} + static inline __le64 ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) { -- cgit v1.2.3 From 1dc1a7e7f4108bad4af4c7c838f963d342ac0544 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:44:59 +0100 Subject: ice: Centrallize Rx buffer recycling Currently calls to ice_put_rx_buf() are sprinkled through ice_clean_rx_irq() - first place is for explicit flow director's descriptor handling, second is after running XDP prog and the last one is after taking care of skb. 1st callsite was actually only for ntc bump purpose, as Rx buffer to be recycled is not even passed to a function. It is possible to walk through Rx buffers processed in particular NAPI cycle by caching ntc from beginning of the ice_clean_rx_irq(). To do so, let us store XDP verdict inside ice_rx_buf, so action we need to take on will be known. For XDP prog absence, just store ICE_XDP_PASS as a verdict. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-7-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 81 ++++++++++++++++--------------- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 + 2 files changed, 44 insertions(+), 38 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 11876f5d1a79..dd4ed9340935 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -553,34 +553,39 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action + * @rx_buf: Rx buffer to store the XDP action * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ -static int +static void ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, + struct ice_rx_buf *rx_buf) { - int err; + unsigned int ret = ICE_XDP_PASS; u32 act; + if (!xdp_prog) + goto exit; + act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: - return ICE_XDP_PASS; + break; case XDP_TX: if (static_branch_unlikely(&ice_xdp_locking_key)) spin_lock(&xdp_ring->tx_lock); - err = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring); + ret = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring); if (static_branch_unlikely(&ice_xdp_locking_key)) spin_unlock(&xdp_ring->tx_lock); - if (err == ICE_XDP_CONSUMED) + if (ret == ICE_XDP_CONSUMED) goto out_failure; - return err; + break; case XDP_REDIRECT: - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - if (err) + if (xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog)) goto out_failure; - return ICE_XDP_REDIR; + ret = ICE_XDP_REDIR; + break; default: bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); fallthrough; @@ -589,8 +594,10 @@ out_failure: trace_xdp_exception(rx_ring->netdev, xdp_prog, act); fallthrough; case XDP_DROP: - return ICE_XDP_CONSUMED; + ret = ICE_XDP_CONSUMED; } +exit: + rx_buf->act = ret; } /** @@ -855,9 +862,6 @@ ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, return; skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, rx_buf->page_offset, size, truesize); - - /* page is being used so we must update the page offset */ - ice_rx_buf_adjust_pg_offset(rx_buf, truesize); } /** @@ -970,9 +974,6 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, if (metasize) skb_metadata_set(skb, metasize); - /* buffer is used by skb, update page_offset */ - ice_rx_buf_adjust_pg_offset(rx_buf, truesize); - return skb; } @@ -1023,14 +1024,13 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, #endif skb_add_rx_frag(skb, 0, rx_buf->page, rx_buf->page_offset + headlen, size, truesize); - /* buffer is used by skb, update page_offset */ - ice_rx_buf_adjust_pg_offset(rx_buf, truesize); } else { - /* buffer is unused, reset bias back to rx_buf; data was copied - * onto skb's linear part so there's no need for adjusting - * page offset and we can reuse this buffer as-is + /* buffer is unused, change the act that should be taken later + * on; data was copied onto skb's linear part so there's no + * need for adjusting page offset and we can reuse this buffer + * as-is */ - rx_buf->pagecnt_bias++; + rx_buf->act = ICE_XDP_CONSUMED; } return skb; @@ -1084,11 +1084,12 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; struct ice_tx_ring *xdp_ring = NULL; - unsigned int xdp_res, xdp_xmit = 0; struct sk_buff *skb = rx_ring->skb; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; u32 cnt = rx_ring->count; + u32 cached_ntc = ntc; + u32 xdp_xmit = 0; bool failure; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ @@ -1137,7 +1138,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); if (++ntc == cnt) ntc = 0; - ice_put_rx_buf(rx_ring, NULL); cleaned_count++; continue; } @@ -1164,25 +1164,15 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); #endif - if (!xdp_prog) + ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf); + if (rx_buf->act == ICE_XDP_PASS) goto construct_skb; - - xdp_res = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring); - if (!xdp_res) - goto construct_skb; - if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { - xdp_xmit |= xdp_res; - ice_rx_buf_adjust_pg_offset(rx_buf, xdp->frame_sz); - } else { - rx_buf->pagecnt_bias++; - } total_rx_bytes += size; total_rx_pkts++; cleaned_count++; if (++ntc == cnt) ntc = 0; - ice_put_rx_buf(rx_ring, rx_buf); continue; construct_skb: if (skb) { @@ -1203,7 +1193,6 @@ construct_skb: if (++ntc == cnt) ntc = 0; - ice_put_rx_buf(rx_ring, rx_buf); cleaned_count++; /* skip if it is NOP desc */ @@ -1243,6 +1232,22 @@ construct_skb: total_rx_pkts++; } + while (cached_ntc != ntc) { + struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc]; + + if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + xdp_xmit |= buf->act; + } else if (buf->act & ICE_XDP_CONSUMED) { + buf->pagecnt_bias++; + } else if (buf->act == ICE_XDP_PASS) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + } + + ice_put_rx_buf(rx_ring, buf); + if (++cached_ntc >= cnt) + cached_ntc = 0; + } rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 717355c6d4c5..9d67d6f1b1f5 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -173,6 +173,7 @@ struct ice_rx_buf { struct page *page; unsigned int page_offset; unsigned int pgcnt; + unsigned int act; unsigned int pagecnt_bias; }; -- cgit v1.2.3 From 60bc72b3c4e9127f29686770005da40b10be0576 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:45:00 +0100 Subject: ice: Use ice_max_xdp_frame_size() in ice_xdp_setup_prog() This should have been used in there from day 1, let us address that before introducing XDP multi-buffer support for Rx side. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-8-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_main.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f7b1acee2b71..47b35084888a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2862,6 +2862,18 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi) return 0; } +/** + * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP + * @vsi: Pointer to VSI structure + */ +static int ice_max_xdp_frame_size(struct ice_vsi *vsi) +{ + if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) + return ICE_RXBUF_1664; + else + return ICE_RXBUF_3072; +} + /** * ice_xdp_setup_prog - Add or remove XDP eBPF program * @vsi: VSI to setup XDP for @@ -2872,11 +2884,11 @@ static int ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, struct netlink_ext_ack *extack) { - int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; + unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; bool if_running = netif_running(vsi->netdev); int ret = 0, xdp_ring_err = 0; - if (frame_size > vsi->rx_buf_len) { + if (frame_size > ice_max_xdp_frame_size(vsi)) { NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP"); return -EOPNOTSUPP; } @@ -7330,18 +7342,6 @@ clear_recovery: dev_err(dev, "Rebuild failed, unload and reload driver\n"); } -/** - * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP - * @vsi: Pointer to VSI structure - */ -static int ice_max_xdp_frame_size(struct ice_vsi *vsi) -{ - if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) - return ICE_RXBUF_1664; - else - return ICE_RXBUF_3072; -} - /** * ice_change_mtu - NDO callback to change the MTU * @netdev: network interface device structure -- cgit v1.2.3 From 9070fe3da0b126044c3cffd169d5fb2565365a0e Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:45:01 +0100 Subject: ice: Do not call ice_finalize_xdp_rx() unnecessarily Currently ice_finalize_xdp_rx() is called only when xdp_prog is present on VSI, which is a good thing. However, this optimization can be enhanced and check only if any of the XDP_TX/XDP_REDIRECT took place in current Rx processing. Non-zero value of @xdp_xmit indicates that xdp_prog is present on VSI. This way XDP_DROP-based workloads will not suffer from unnecessary calls to external function. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-9-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dd4ed9340935..b406e8cb8625 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1252,7 +1252,7 @@ construct_skb: /* return up to cleaned_count buffers to hardware */ failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); - if (xdp_prog) + if (xdp_xmit) ice_finalize_xdp_rx(xdp_ring, xdp_xmit); rx_ring->skb = skb; -- cgit v1.2.3 From 8a11b334ec9b8088b54764424a49adb8ef1c002a Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:45:02 +0100 Subject: ice: Use xdp->frame_sz instead of recalculating truesize SKB path calculates truesize on three different functions, which could be avoided as xdp_buff carries the already calculated truesize under xdp_buff::frame_sz. If ice_add_rx_frag() is adjusted to take the xdp_buff as an input just like functions responsible for creating sk_buff initially, codebase could be simplified by removing these redundant recalculations and rely on xdp_buff::frame_sz instead. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-10-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 33 +++++++++---------------------- 1 file changed, 9 insertions(+), 24 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index b406e8cb8625..8db712254cb5 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -840,6 +840,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) /** * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag * @rx_ring: Rx descriptor ring to transact packets on + * @xdp: XDP buffer * @rx_buf: buffer containing page to add * @skb: sk_buff to place the data into * @size: packet length from rx_desc @@ -849,19 +850,14 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) * The function will then update the page offset. */ static void -ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, - struct sk_buff *skb, unsigned int size) +ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + struct ice_rx_buf *rx_buf, struct sk_buff *skb, + unsigned int size) { -#if (PAGE_SIZE >= 8192) - unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset); -#else - unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; -#endif - if (!size) return; skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, - rx_buf->page_offset, size, truesize); + rx_buf->page_offset, size, xdp->frame_sz); } /** @@ -943,13 +939,6 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { u8 metasize = xdp->data - xdp->data_meta; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); -#endif struct sk_buff *skb; /* Prefetch first cache line of first page. If xdp->data_meta @@ -959,7 +948,7 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, */ net_prefetch(xdp->data_meta); /* build an skb around the page buffer */ - skb = napi_build_skb(xdp->data_hard_start, truesize); + skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); if (unlikely(!skb)) return NULL; @@ -1017,13 +1006,9 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, /* if we exhaust the linear part then add what is left as a frag */ size -= headlen; if (size) { -#if (PAGE_SIZE >= 8192) - unsigned int truesize = SKB_DATA_ALIGN(size); -#else - unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; -#endif skb_add_rx_frag(skb, 0, rx_buf->page, - rx_buf->page_offset + headlen, size, truesize); + rx_buf->page_offset + headlen, size, + xdp->frame_sz); } else { /* buffer is unused, change the act that should be taken later * on; data was copied onto skb's linear part so there's no @@ -1176,7 +1161,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) continue; construct_skb: if (skb) { - ice_add_rx_frag(rx_ring, rx_buf, skb, size); + ice_add_rx_frag(rx_ring, xdp, rx_buf, skb, size); } else if (likely(xdp->data)) { if (ice_ring_uses_build_skb(rx_ring)) skb = ice_build_skb(rx_ring, rx_buf, xdp); -- cgit v1.2.3 From 2fba7dc5157b6f85dbf1b8e26e63a724db1f3d79 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:45:03 +0100 Subject: ice: Add support for XDP multi-buffer on Rx side Ice driver needs to be a bit reworked on Rx data path in order to support multi-buffer XDP. For skb path, it currently works in a way that Rx ring carries pointer to skb so if driver didn't manage to combine fragmented frame at current NAPI instance, it can restore the state on next instance and keep looking for last fragment (so descriptor with EOP bit set). What needs to be achieved is that xdp_buff needs to be combined in such way (linear + frags part) in the first place. Then skb will be ready to go in case of XDP_PASS or BPF program being not present on interface. If BPF program is there, it would work on multi-buffer XDP. At this point xdp_buff resides directly on Rx ring, so given the fact that skb will be built straight from xdp_buff, there will be no further need to carry skb on Rx ring. Besides removing skb pointer from Rx ring, lots of members have been moved around within ice_rx_ring. First and foremost reason was to place rx_buf with xdp_buff on the same cacheline. This means that once we touch rx_buf (which is a preceding step before touching xdp_buff), xdp_buff will already be hot in cache. Second thing was that xdp_rxq is used rather rarely and it occupies a separate cacheline, so maybe it is better to have it at the end of ice_rx_ring. Other change that affects ice_rx_ring is the introduction of ice_rx_ring::first_desc. Its purpose is twofold - first is to propagate rx_buf->act to all the parts of current xdp_buff after running XDP program, so that ice_put_rx_buf() that got moved out of the main Rx processing loop will be able to tak an appriopriate action on each buffer. Second is for ice_construct_skb(). ice_construct_skb() has a copybreak mechanism which had an explicit impact on xdp_buff->skb conversion in the new approach when legacy Rx flag is toggled. It works in a way that linear part is 256 bytes long, if frame is bigger than that, remaining bytes are going as a frag to skb_shared_info. This means while memcpying frags from xdp_buff to newly allocated skb, care needs to be taken when picking the destination frag array entry. Upon the time ice_construct_skb() is called, when dealing with fragmented frame, current rx_buf points to the *last* fragment, but copybreak needs to be done against the first one. That's where ice_rx_ring::first_desc helps. When frame building spans across NAPI polls (DD bit is not set on current descriptor and xdp->data is not NULL) with current Rx buffer handling state there might be some problems. Since calls to ice_put_rx_buf() were pulled out of the main Rx processing loop and were scoped from cached_ntc to current ntc, remember that now mentioned function relies on rx_buf->act, which is set within ice_run_xdp(). ice_run_xdp() is called when EOP bit was found, so currently we could put Rx buffer with rx_buf->act being *uninitialized*. To address this, change scoping to rely on first_desc on both boundaries instead. This also implies that cleaned_count which is used as an input to ice_alloc_rx_buffers() and tells how many new buffers should be refilled has to be adjusted. If it stayed as is, what could happen is a case where ntc would go over ntu. Therefore, remove cleaned_count altogether and use against allocing routine newly introduced ICE_RX_DESC_UNUSED() macro which is an equivalent of ICE_DESC_UNUSED() dedicated for Rx side and based on struct ice_rx_ring::first_desc instead of next_to_clean. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-11-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_base.c | 17 ++- drivers/net/ethernet/intel/ice/ice_ethtool.c | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 13 +- drivers/net/ethernet/intel/ice/ice_txrx.c | 202 ++++++++++++++++---------- drivers/net/ethernet/intel/ice/ice_txrx.h | 34 +++-- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 30 ++++ 6 files changed, 200 insertions(+), 98 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 5b66f6f7db78..1911d644dfa8 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -492,7 +492,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { struct device *dev = ice_pf_to_dev(ring->vsi->back); - u16 num_bufs = ICE_DESC_UNUSED(ring); + u32 num_bufs = ICE_RX_DESC_UNUSED(ring); int err; ring->rx_buf_len = ring->vsi->rx_buf_len; @@ -500,8 +500,10 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) if (ring->vsi->type == ICE_VSI_PF) { if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) /* coverity[check_return] */ - xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, - ring->q_index, ring->q_vector->napi.napi_id); + __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->vsi->rx_buf_len); ring->xsk_pool = ice_xsk_pool(ring); if (ring->xsk_pool) { @@ -521,9 +523,11 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) } else { if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) /* coverity[check_return] */ - xdp_rxq_info_reg(&ring->xdp_rxq, - ring->netdev, - ring->q_index, ring->q_vector->napi.napi_id); + __xdp_rxq_info_reg(&ring->xdp_rxq, + ring->netdev, + ring->q_index, + ring->q_vector->napi.napi_id, + ring->vsi->rx_buf_len); err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, @@ -534,6 +538,7 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) } xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq); + ring->xdp.data = NULL; err = ice_setup_rx_ctx(ring); if (err) { dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 5b71d40a7dc0..3798a89284e6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3092,7 +3092,7 @@ process_rx: /* allocate Rx buffers */ err = ice_alloc_rx_bufs(&rx_rings[i], - ICE_DESC_UNUSED(&rx_rings[i])); + ICE_RX_DESC_UNUSED(&rx_rings[i])); rx_unwind: if (err) { while (i) { diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 47b35084888a..eb6fa52bd971 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2888,9 +2888,12 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, bool if_running = netif_running(vsi->netdev); int ret = 0, xdp_ring_err = 0; - if (frame_size > ice_max_xdp_frame_size(vsi)) { - NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP"); - return -EOPNOTSUPP; + if (prog && !prog->aux->xdp_has_frags) { + if (frame_size > ice_max_xdp_frame_size(vsi)) { + NL_SET_ERR_MSG_MOD(extack, + "MTU is too large for linear frames and XDP prog does not support frags"); + return -EOPNOTSUPP; + } } /* need to stop netdev while setting up the program for Rx rings */ @@ -7354,6 +7357,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct bpf_prog *prog; u8 count = 0; int err = 0; @@ -7362,7 +7366,8 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) return 0; } - if (ice_is_xdp_ena_vsi(vsi)) { + prog = vsi->xdp_prog; + if (prog && !prog->aux->xdp_has_frags) { int frame_size = ice_max_xdp_frame_size(vsi); if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8db712254cb5..05028921fadd 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -382,6 +382,7 @@ err: */ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) { + struct xdp_buff *xdp = &rx_ring->xdp; struct device *dev = rx_ring->dev; u32 size; u16 i; @@ -390,16 +391,16 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) if (!rx_ring->rx_buf) return; - if (rx_ring->skb) { - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - } - if (rx_ring->xsk_pool) { ice_xsk_clean_rx_ring(rx_ring); goto rx_skip_free; } + if (xdp->data) { + xdp_return_buff(xdp); + xdp->data = NULL; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; @@ -437,6 +438,7 @@ rx_skip_free: rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; + rx_ring->first_desc = 0; rx_ring->next_to_use = 0; } @@ -506,6 +508,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) rx_ring->next_to_use = 0; rx_ring->next_to_clean = 0; + rx_ring->first_desc = 0; if (ice_is_xdp_ena_vsi(rx_ring->vsi)) WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); @@ -598,6 +601,8 @@ out_failure: } exit: rx_buf->act = ret; + if (unlikely(xdp_buff_has_frags(xdp))) + ice_set_rx_bufs_act(xdp, rx_ring, ret); } /** @@ -721,7 +726,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) * buffers. Then bump tail at most one time. Grouping like this lets us avoid * multiple tail writes per call. */ -bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, u16 cleaned_count) +bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count) { union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; @@ -838,26 +843,44 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) } /** - * ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag + * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag * @rx_ring: Rx descriptor ring to transact packets on - * @xdp: XDP buffer + * @xdp: xdp buff to place the data into * @rx_buf: buffer containing page to add - * @skb: sk_buff to place the data into * @size: packet length from rx_desc * - * This function will add the data contained in rx_buf->page to the skb. - * It will just attach the page as a frag to the skb. - * The function will then update the page offset. + * This function will add the data contained in rx_buf->page to the xdp buf. + * It will just attach the page as a frag. */ -static void -ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct ice_rx_buf *rx_buf, struct sk_buff *skb, - unsigned int size) +static int +ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + struct ice_rx_buf *rx_buf, const unsigned int size) { + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + if (!size) - return; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page, - rx_buf->page_offset, size, xdp->frame_sz); + return 0; + + if (!xdp_buff_has_frags(xdp)) { + sinfo->nr_frags = 0; + sinfo->xdp_frags_size = 0; + xdp_buff_set_frags_flag(xdp); + } + + if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { + if (unlikely(xdp_buff_has_frags(xdp))) + ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); + return -ENOMEM; + } + + __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, + rx_buf->page_offset, size); + sinfo->xdp_frags_size += size; + + if (page_is_pfmemalloc(rx_buf->page)) + xdp_buff_set_frag_pfmemalloc(xdp); + + return 0; } /** @@ -928,19 +951,25 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, /** * ice_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on - * @rx_buf: Rx buffer to pull data from * @xdp: xdp_buff pointing to the data * - * This function builds an skb around an existing Rx buffer, taking care - * to set up the skb correctly and avoid any memcpy overhead. + * This function builds an skb around an existing XDP buffer, taking care + * to set up the skb correctly and avoid any memcpy overhead. Driver has + * already combined frags (if any) to skb_shared_info. */ static struct sk_buff * -ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, - struct xdp_buff *xdp) +ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { u8 metasize = xdp->data - xdp->data_meta; + struct skb_shared_info *sinfo = NULL; + unsigned int nr_frags; struct sk_buff *skb; + if (unlikely(xdp_buff_has_frags(xdp))) { + sinfo = xdp_get_shared_info_from_buff(xdp); + nr_frags = sinfo->nr_frags; + } + /* Prefetch first cache line of first page. If xdp->data_meta * is unused, this points exactly as xdp->data, otherwise we * likely have a consumer accessing first few bytes of meta @@ -963,6 +992,12 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, if (metasize) skb_metadata_set(skb, metasize); + if (unlikely(xdp_buff_has_frags(xdp))) + xdp_update_skb_shared_info(skb, nr_frags, + sinfo->xdp_frags_size, + nr_frags * xdp->frame_sz, + xdp_buff_is_frag_pfmemalloc(xdp)); + return skb; } @@ -977,22 +1012,30 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, * skb correctly. */ static struct sk_buff * -ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, - struct xdp_buff *xdp) +ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) { unsigned int size = xdp->data_end - xdp->data; + struct skb_shared_info *sinfo = NULL; + struct ice_rx_buf *rx_buf; + unsigned int nr_frags = 0; unsigned int headlen; struct sk_buff *skb; /* prefetch first cache line of first page */ net_prefetch(xdp->data); + if (unlikely(xdp_buff_has_frags(xdp))) { + sinfo = xdp_get_shared_info_from_buff(xdp); + nr_frags = sinfo->nr_frags; + } + /* allocate a skb to store the frags */ skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE, GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; + rx_buf = &rx_ring->rx_buf[rx_ring->first_desc]; skb_record_rx_queue(skb, rx_ring->q_index); /* Determine available headroom for copy */ headlen = size; @@ -1006,6 +1049,14 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, /* if we exhaust the linear part then add what is left as a frag */ size -= headlen; if (size) { + /* besides adding here a partial frag, we are going to add + * frags from xdp_buff, make sure there is enough space for + * them + */ + if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) { + dev_kfree_skb(skb); + return NULL; + } skb_add_rx_frag(skb, 0, rx_buf->page, rx_buf->page_offset + headlen, size, xdp->frame_sz); @@ -1015,7 +1066,19 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, * need for adjusting page offset and we can reuse this buffer * as-is */ - rx_buf->act = ICE_XDP_CONSUMED; + rx_buf->act = ICE_SKB_CONSUMED; + } + + if (unlikely(xdp_buff_has_frags(xdp))) { + struct skb_shared_info *skinfo = skb_shinfo(skb); + + memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], + sizeof(skb_frag_t) * nr_frags); + + xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags, + sinfo->xdp_frags_size, + nr_frags * xdp->frame_sz, + xdp_buff_is_frag_pfmemalloc(xdp)); } return skb; @@ -1065,17 +1128,16 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_pkts = 0; - u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; struct ice_tx_ring *xdp_ring = NULL; - struct sk_buff *skb = rx_ring->skb; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; u32 cnt = rx_ring->count; u32 cached_ntc = ntc; u32 xdp_xmit = 0; bool failure; + u32 first; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ #if (PAGE_SIZE < 8192) @@ -1090,7 +1152,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) while (likely(total_rx_pkts < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; struct ice_rx_buf *rx_buf; - unsigned char *hard_start; + struct sk_buff *skb; unsigned int size; u16 stat_err_bits; u16 vlan_tag = 0; @@ -1123,7 +1185,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); if (++ntc == cnt) ntc = 0; - cleaned_count++; continue; } @@ -1133,56 +1194,54 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* retrieve a buffer from the ring */ rx_buf = ice_get_rx_buf(rx_ring, size, ntc); - if (!size) { - xdp->data = NULL; - xdp->data_end = NULL; - xdp->data_hard_start = NULL; - xdp->data_meta = NULL; - goto construct_skb; - } + if (!xdp->data) { + void *hard_start; - hard_start = page_address(rx_buf->page) + rx_buf->page_offset - - offset; - xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); + hard_start = page_address(rx_buf->page) + rx_buf->page_offset - + offset; + xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); #if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); + /* At larger PAGE_SIZE, frame_sz depend on len size */ + xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); #endif + xdp_buff_clear_frags_flag(xdp); + } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { + break; + } + if (++ntc == cnt) + ntc = 0; + + /* skip if it is NOP desc */ + if (ice_is_non_eop(rx_ring, rx_desc)) + continue; ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf); if (rx_buf->act == ICE_XDP_PASS) goto construct_skb; - total_rx_bytes += size; + total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - cleaned_count++; - if (++ntc == cnt) - ntc = 0; + xdp->data = NULL; + rx_ring->first_desc = ntc; continue; construct_skb: - if (skb) { - ice_add_rx_frag(rx_ring, xdp, rx_buf, skb, size); - } else if (likely(xdp->data)) { - if (ice_ring_uses_build_skb(rx_ring)) - skb = ice_build_skb(rx_ring, rx_buf, xdp); - else - skb = ice_construct_skb(rx_ring, rx_buf, xdp); - } + if (likely(ice_ring_uses_build_skb(rx_ring))) + skb = ice_build_skb(rx_ring, xdp); + else + skb = ice_construct_skb(rx_ring, xdp); /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->ring_stats->rx_stats.alloc_buf_failed++; - if (rx_buf) - rx_buf->pagecnt_bias++; + rx_ring->ring_stats->rx_stats.alloc_page_failed++; + rx_buf->act = ICE_XDP_CONSUMED; + if (unlikely(xdp_buff_has_frags(xdp))) + ice_set_rx_bufs_act(xdp, rx_ring, + ICE_XDP_CONSUMED); + xdp->data = NULL; + rx_ring->first_desc = ntc; break; } - - if (++ntc == cnt) - ntc = 0; - cleaned_count++; - - /* skip if it is NOP desc */ - if (ice_is_non_eop(rx_ring, rx_desc)) - continue; + xdp->data = NULL; + rx_ring->first_desc = ntc; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, @@ -1194,10 +1253,8 @@ construct_skb: vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc); /* pad the skb if needed, to make a valid ethernet frame */ - if (eth_skb_pad(skb)) { - skb = NULL; + if (eth_skb_pad(skb)) continue; - } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1211,13 +1268,13 @@ construct_skb: ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb); /* send completed skb up the stack */ ice_receive_skb(rx_ring, skb, vlan_tag); - skb = NULL; /* update budget accounting */ total_rx_pkts++; } - while (cached_ntc != ntc) { + first = rx_ring->first_desc; + while (cached_ntc != first) { struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc]; if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { @@ -1235,11 +1292,10 @@ construct_skb: } rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ - failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); + failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); if (xdp_xmit) ice_finalize_xdp_rx(xdp_ring, xdp_xmit); - rx_ring->skb = skb; if (rx_ring->ring_stats) ice_update_rx_ring_stats(rx_ring, total_rx_pkts, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 9d67d6f1b1f5..26624723352b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -112,6 +112,10 @@ static inline int ice_skb_pad(void) (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) +#define ICE_RX_DESC_UNUSED(R) \ + ((((R)->first_desc > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->first_desc - (R)->next_to_use - 1) + #define ICE_RING_QUARTER(R) ((R)->count >> 2) #define ICE_TX_FLAGS_TSO BIT(0) @@ -136,6 +140,7 @@ static inline int ice_skb_pad(void) #define ICE_XDP_TX BIT(1) #define ICE_XDP_REDIR BIT(2) #define ICE_XDP_EXIT BIT(3) +#define ICE_SKB_CONSUMED ICE_XDP_CONSUMED #define ICE_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) @@ -277,43 +282,44 @@ struct ice_rx_ring { struct ice_vsi *vsi; /* Backreference to associated VSI */ struct ice_q_vector *q_vector; /* Backreference to associated vector */ u8 __iomem *tail; + u16 q_index; /* Queue number of ring */ + + u16 count; /* Number of descriptors */ + u16 reg_idx; /* HW register index of the ring */ + u16 next_to_alloc; + /* CL2 - 2nd cacheline starts here */ union { struct ice_rx_buf *rx_buf; struct xdp_buff **xdp_buf; }; - /* CL2 - 2nd cacheline starts here */ - struct xdp_rxq_info xdp_rxq; + struct xdp_buff xdp; /* CL3 - 3rd cacheline starts here */ - u16 q_index; /* Queue number of ring */ - - u16 count; /* Number of descriptors */ - u16 reg_idx; /* HW register index of the ring */ + struct bpf_prog *xdp_prog; + u16 rx_offset; /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; - u16 next_to_alloc; - u16 rx_offset; - u16 rx_buf_len; + u16 first_desc; /* stats structs */ struct ice_ring_stats *ring_stats; struct rcu_head rcu; /* to avoid race on free */ - /* CL4 - 3rd cacheline starts here */ + /* CL4 - 4th cacheline starts here */ struct ice_channel *ch; - struct bpf_prog *xdp_prog; struct ice_tx_ring *xdp_ring; struct xsk_buff_pool *xsk_pool; - struct xdp_buff xdp; - struct sk_buff *skb; dma_addr_t dma; /* physical address of ring */ u64 cached_phctime; + u16 rx_buf_len; u8 dcb_tc; /* Traffic class of ring */ u8 ptp_rx; #define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) #define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2) u8 flags; + /* CL5 - 5th cacheline starts here */ + struct xdp_rxq_info xdp_rxq; } ____cacheline_internodealigned_in_smp; struct ice_tx_ring { @@ -436,7 +442,7 @@ static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring) union ice_32b_rx_flex_desc; -bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, u16 cleaned_count); +bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count); netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); u16 ice_select_queue(struct net_device *dev, struct sk_buff *skb, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 30e3cffdb2f1..1fdc37095767 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -5,6 +5,36 @@ #define _ICE_TXRX_LIB_H_ #include "ice.h" +/** + * ice_set_rx_bufs_act - propagate Rx buffer action to frags + * @xdp: XDP buffer representing frame (linear and frags part) + * @rx_ring: Rx ring struct + * act: action to store onto Rx buffers related to XDP buffer parts + * + * Set action that should be taken before putting Rx buffer from first frag + * to one before last. Last one is handled by caller of this function as it + * is the EOP frag that is currently being processed. This function is + * supposed to be called only when XDP buffer contains frags. + */ +static inline void +ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, + const unsigned int act) +{ + const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + u32 first = rx_ring->first_desc; + u32 nr_frags = sinfo->nr_frags; + u32 cnt = rx_ring->count; + struct ice_rx_buf *buf; + + for (int i = 0; i < nr_frags; i++) { + buf = &rx_ring->rx_buf[first]; + buf->act = act; + + if (++first == cnt) + first = 0; + } +} + /** * ice_test_staterr - tests bits in Rx descriptor status and error fields * @status_err_n: Rx descriptor status_error0 or status_error1 bits -- cgit v1.2.3 From 3246a10752a7120878740f345f3956997777b3d6 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:45:04 +0100 Subject: ice: Add support for XDP multi-buffer on Tx side Similarly as for Rx side in previous patch, logic on XDP Tx in ice driver needs to be adjusted for multi-buffer support. Specifically, the way how HW Tx descriptors are produced and cleaned. Currently, XDP_TX works on strict ring boundaries, meaning it sets RS bit (on producer side) / looks up DD bit (on consumer/cleaning side) every quarter of the ring. It means that if for example multi buffer frame would span across the ring quarter boundary (say that frame consists of 4 frames and we start from 62 descriptor where ring is sized to 256 entries), RS bit would be produced in the middle of multi buffer frame, which would be a broken behavior as it needs to be set on the last descriptor of the frame. To make it work, set RS bit at the last descriptor from the batch of frames that XDP_TX action was used on and make the first entry remember the index of last descriptor with RS bit set. This way, cleaning side can take the index of descriptor with RS bit, look up DD bit's presence and clean from first entry to last. In order to clean up the code base introduce the common ice_set_rs_bit() which will return index of descriptor that got RS bit produced on so that standard driver can store this within proper ice_tx_buf and ZC driver can simply ignore return value. Co-developed-by: Martyna Szapar-Mudlaw Signed-off-by: Martyna Szapar-Mudlaw Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-12-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 14 +- drivers/net/ethernet/intel/ice/ice_txrx.h | 12 +- drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 230 ++++++++++++++++++-------- drivers/net/ethernet/intel/ice/ice_txrx_lib.h | 23 ++- drivers/net/ethernet/intel/ice/ice_xsk.c | 16 +- 5 files changed, 199 insertions(+), 96 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 05028921fadd..fd731229c4de 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -578,7 +578,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, case XDP_TX: if (static_branch_unlikely(&ice_xdp_locking_key)) spin_lock(&xdp_ring->tx_lock); - ret = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring); + ret = __ice_xmit_xdp_ring(xdp, xdp_ring); if (static_branch_unlikely(&ice_xdp_locking_key)) spin_unlock(&xdp_ring->tx_lock); if (ret == ICE_XDP_CONSUMED) @@ -625,6 +625,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, unsigned int queue_index = smp_processor_id(); struct ice_vsi *vsi = np->vsi; struct ice_tx_ring *xdp_ring; + struct ice_tx_buf *tx_buf; int nxmit = 0, i; if (test_bit(ICE_VSI_DOWN, vsi->state)) @@ -647,16 +648,18 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, xdp_ring = vsi->xdp_rings[queue_index]; } + tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use]; for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; int err; - err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); + err = ice_xmit_xdp_ring(xdpf, xdp_ring); if (err != ICE_XDP_TX) break; nxmit++; } + tx_buf->rs_idx = ice_set_rs_bit(xdp_ring); if (unlikely(flags & XDP_XMIT_FLUSH)) ice_xdp_ring_update_tail(xdp_ring); @@ -1136,6 +1139,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) u32 cnt = rx_ring->count; u32 cached_ntc = ntc; u32 xdp_xmit = 0; + u32 cached_ntu; bool failure; u32 first; @@ -1145,8 +1149,10 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) #endif xdp_prog = READ_ONCE(rx_ring->xdp_prog); - if (xdp_prog) + if (xdp_prog) { xdp_ring = rx_ring->xdp_ring; + cached_ntu = xdp_ring->next_to_use; + } /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { @@ -1295,7 +1301,7 @@ construct_skb: failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); if (xdp_xmit) - ice_finalize_xdp_rx(xdp_ring, xdp_xmit); + ice_finalize_xdp_rx(xdp_ring, xdp_xmit, cached_ntu); if (rx_ring->ring_stats) ice_update_rx_ring_stats(rx_ring, total_rx_pkts, diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 26624723352b..55f47c560981 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -150,13 +150,19 @@ static inline int ice_skb_pad(void) #define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS) struct ice_tx_buf { - struct ice_tx_desc *next_to_watch; + union { + struct ice_tx_desc *next_to_watch; + u32 rs_idx; + }; union { struct sk_buff *skb; void *raw_buf; /* used for XDP */ }; unsigned int bytecount; - unsigned short gso_segs; + union { + unsigned int gso_segs; + unsigned int nr_frags; /* used for mbuf XDP */ + }; u32 tx_flags; DEFINE_DMA_UNMAP_LEN(len); DEFINE_DMA_UNMAP_ADDR(dma); @@ -343,6 +349,7 @@ struct ice_tx_ring { u16 reg_idx; /* HW register index of the ring */ u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ + u16 xdp_tx_active; /* stats structs */ struct ice_ring_stats *ring_stats; /* CL3 - 3rd cacheline starts here */ @@ -353,7 +360,6 @@ struct ice_tx_ring { spinlock_t tx_lock; u32 txq_teid; /* Added Tx queue TEID */ /* CL4 - 4th cacheline starts here */ - u16 xdp_tx_active; #define ICE_TX_FLAGS_RING_XDP BIT(0) #define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) #define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 25f04266c668..3dcc40b96b58 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -220,112 +220,191 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) napi_gro_receive(&rx_ring->q_vector->napi, skb); } +/** + * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer + * @xdp_ring: XDP Tx ring + * @tx_buf: Tx buffer to clean + */ +static void +ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) +{ + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + xdp_ring->xdp_tx_active--; + page_frag_free(tx_buf->raw_buf); + tx_buf->raw_buf = NULL; +} + /** * ice_clean_xdp_irq - Reclaim resources after transmit completes on XDP ring * @xdp_ring: XDP ring to clean */ -static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) +static u32 ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) { - unsigned int total_bytes = 0, total_pkts = 0; - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); - u16 ntc = xdp_ring->next_to_clean; - struct ice_tx_desc *next_dd_desc; - u16 next_dd = xdp_ring->next_dd; - struct ice_tx_buf *tx_buf; - int i; + int total_bytes = 0, total_pkts = 0; + u32 ntc = xdp_ring->next_to_clean; + struct ice_tx_desc *tx_desc; + u32 cnt = xdp_ring->count; + u32 ready_frames = 0; + u32 frags; + u32 idx; + u32 ret; + + idx = xdp_ring->tx_buf[ntc].rs_idx; + tx_desc = ICE_TX_DESC(xdp_ring, idx); + if (tx_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) { + if (idx >= ntc) + ready_frames = idx - ntc + 1; + else + ready_frames = idx + cnt - ntc + 1; + } - next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd); - if (!(next_dd_desc->cmd_type_offset_bsz & - cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) - return; + if (!ready_frames) + return 0; + ret = ready_frames; - for (i = 0; i < tx_thresh; i++) { - tx_buf = &xdp_ring->tx_buf[ntc]; + while (ready_frames) { + struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc]; + /* bytecount holds size of head + frags */ total_bytes += tx_buf->bytecount; - /* normally tx_buf->gso_segs was taken but at this point - * it's always 1 for us - */ + frags = tx_buf->nr_frags; total_pkts++; + /* count head + frags */ + ready_frames -= frags + 1; - page_frag_free(tx_buf->raw_buf); - dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buf, len, 0); - tx_buf->raw_buf = NULL; - + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); ntc++; - if (ntc >= xdp_ring->count) + if (ntc == cnt) ntc = 0; + + for (int i = 0; i < frags; i++) { + tx_buf = &xdp_ring->tx_buf[ntc]; + + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + ntc++; + if (ntc == cnt) + ntc = 0; + } } - next_dd_desc->cmd_type_offset_bsz = 0; - xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh; - if (xdp_ring->next_dd > xdp_ring->count) - xdp_ring->next_dd = tx_thresh - 1; + tx_desc->cmd_type_offset_bsz = 0; xdp_ring->next_to_clean = ntc; ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes); + + return ret; } /** - * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission - * @data: packet data pointer - * @size: packet data size + * __ice_xmit_xdp_ring - submit frame to XDP ring for transmission + * @xdp: XDP buffer to be placed onto Tx descriptors * @xdp_ring: XDP ring for transmission */ -int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) +int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring) { - u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); - u16 i = xdp_ring->next_to_use; + struct skb_shared_info *sinfo = NULL; + u32 size = xdp->data_end - xdp->data; + struct device *dev = xdp_ring->dev; + u32 ntu = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_head; struct ice_tx_buf *tx_buf; - dma_addr_t dma; + u32 cnt = xdp_ring->count; + void *data = xdp->data; + u32 nr_frags = 0; + u32 free_space; + u32 frag = 0; + + free_space = ICE_DESC_UNUSED(xdp_ring); + + if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring)) + free_space += ice_clean_xdp_irq(xdp_ring); + + if (unlikely(xdp_buff_has_frags(xdp))) { + sinfo = xdp_get_shared_info_from_buff(xdp); + nr_frags = sinfo->nr_frags; + if (free_space < nr_frags + 1) { + xdp_ring->ring_stats->tx_stats.tx_busy++; + return ICE_XDP_CONSUMED; + } + } - if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh) - ice_clean_xdp_irq(xdp_ring); + tx_desc = ICE_TX_DESC(xdp_ring, ntu); + tx_head = &xdp_ring->tx_buf[ntu]; + tx_buf = tx_head; - if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { - xdp_ring->ring_stats->tx_stats.tx_busy++; - return ICE_XDP_CONSUMED; - } + for (;;) { + dma_addr_t dma; - dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE); - if (dma_mapping_error(xdp_ring->dev, dma)) - return ICE_XDP_CONSUMED; + dma = dma_map_single(dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma)) + goto dma_unmap; - tx_buf = &xdp_ring->tx_buf[i]; - tx_buf->bytecount = size; - tx_buf->gso_segs = 1; - tx_buf->raw_buf = data; + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); - /* record length, and DMA address */ - dma_unmap_len_set(tx_buf, len, size); - dma_unmap_addr_set(tx_buf, dma, dma); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0); + tx_buf->raw_buf = data; - tx_desc = ICE_TX_DESC(xdp_ring, i); - tx_desc->buf_addr = cpu_to_le64(dma); - tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0, - size, 0); + ntu++; + if (ntu == cnt) + ntu = 0; - xdp_ring->xdp_tx_active++; - i++; - if (i == xdp_ring->count) { - i = 0; - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs = tx_thresh - 1; - } - xdp_ring->next_to_use = i; + if (frag == nr_frags) + break; + + tx_desc = ICE_TX_DESC(xdp_ring, ntu); + tx_buf = &xdp_ring->tx_buf[ntu]; - if (i > xdp_ring->next_rs) { - tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); - xdp_ring->next_rs += tx_thresh; + data = skb_frag_address(&sinfo->frags[frag]); + size = skb_frag_size(&sinfo->frags[frag]); + frag++; } + /* store info about bytecount and frag count in first desc */ + tx_head->bytecount = xdp_get_buff_len(xdp); + tx_head->nr_frags = nr_frags; + + /* update last descriptor from a frame with EOP */ + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_EOP << ICE_TXD_QW1_CMD_S); + + xdp_ring->xdp_tx_active++; + xdp_ring->next_to_use = ntu; + return ICE_XDP_TX; + +dma_unmap: + for (;;) { + tx_buf = &xdp_ring->tx_buf[ntu]; + dma_unmap_page(dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + if (tx_buf == tx_head) + break; + + if (!ntu) + ntu += cnt; + ntu--; + } + return ICE_XDP_CONSUMED; +} + +/** + * ice_xmit_xdp_ring - submit frame to XDP ring for transmission + * @xdpf: XDP frame that will be converted to XDP buff + * @xdp_ring: XDP ring for transmission + */ +int ice_xmit_xdp_ring(struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring) +{ + struct xdp_buff xdp; + + xdp_convert_frame_to_buff(xdpf, &xdp); + return __ice_xmit_xdp_ring(&xdp, xdp_ring); } /** @@ -342,7 +421,7 @@ int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring) if (unlikely(!xdpf)) return ICE_XDP_CONSUMED; - return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); + return ice_xmit_xdp_ring(xdpf, xdp_ring); } /** @@ -354,14 +433,21 @@ int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring) * should be called when a batch of packets has been processed in the * napi loop. */ -void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res) +void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, + u32 first_idx) { + struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[first_idx]; + if (xdp_res & ICE_XDP_REDIR) xdp_do_flush_map(); if (xdp_res & ICE_XDP_TX) { if (static_branch_unlikely(&ice_xdp_locking_key)) spin_lock(&xdp_ring->tx_lock); + /* store index of descriptor with RS bit set in the first + * ice_tx_buf of given NAPI batch + */ + tx_buf->rs_idx = ice_set_rs_bit(xdp_ring); ice_xdp_ring_update_tail(xdp_ring); if (static_branch_unlikely(&ice_xdp_locking_key)) spin_unlock(&xdp_ring->tx_lock); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 1fdc37095767..ea977f283c22 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -122,9 +122,28 @@ static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring) writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail); } -void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res); +/** + * ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU) + * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * + * returns index of descriptor that had RS bit produced on + */ +static inline u32 ice_set_rs_bit(const struct ice_tx_ring *xdp_ring) +{ + u32 rs_idx = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1; + struct ice_tx_desc *tx_desc; + + tx_desc = ICE_TX_DESC(xdp_ring, rs_idx); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + + return rs_idx; +} + +void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, u32 first_idx); int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring); -int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring); +int ice_xmit_xdp_ring(struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring); +int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring); void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val); void ice_process_skb_fields(struct ice_rx_ring *rx_ring, diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 7105de6fb344..b1b419cdadd1 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -760,7 +760,7 @@ construct_skb: if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc); - ice_finalize_xdp_rx(xdp_ring, xdp_xmit); + ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) { @@ -917,20 +917,6 @@ static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *d ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); } -/** - * ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU) - * @xdp_ring: XDP ring to produce the HW Tx descriptors on - */ -static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring) -{ - u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1; - struct ice_tx_desc *tx_desc; - - tx_desc = ICE_TX_DESC(xdp_ring, ntu); - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); -} - /** * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on -- cgit v1.2.3 From f4db7b314dd54ab9335bb449640484a6f27baf28 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:45:05 +0100 Subject: ice: Remove next_{dd,rs} fields from ice_tx_ring Now that both ZC and standard XDP data paths stopped using Tx logic based on next_dd and next_rs fields, we can safely remove these fields and shrink Tx ring structure. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-13-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 2 -- drivers/net/ethernet/intel/ice/ice_main.c | 2 -- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 -- drivers/net/ethernet/intel/ice/ice_txrx.h | 2 -- 4 files changed, 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 3798a89284e6..1adbcb4786c2 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3046,8 +3046,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, /* clone ring and setup updated count */ xdp_rings[i] = *vsi->xdp_rings[i]; xdp_rings[i].count = new_tx_cnt; - xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1; - xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1; xdp_rings[i].desc = NULL; xdp_rings[i].tx_buf = NULL; err = ice_setup_tx_ring(&xdp_rings[i]); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index eb6fa52bd971..26a8910a41ff 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2570,8 +2570,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->netdev = NULL; xdp_ring->dev = dev; xdp_ring->count = vsi->num_tx_desc; - xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1; - xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1; WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index fd731229c4de..3cc63e65a354 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -174,8 +174,6 @@ tx_skip_free: tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1; - tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1; if (!tx_ring->netdev) return; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 55f47c560981..7903bb692c1f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -343,8 +343,6 @@ struct ice_tx_ring { struct xsk_buff_pool *xsk_pool; u16 next_to_use; u16 next_to_clean; - u16 next_rs; - u16 next_dd; u16 q_handle; /* Queue handle per TC */ u16 reg_idx; /* HW register index of the ring */ u16 count; /* Number of descriptors */ -- cgit v1.2.3 From a24b4c6e9aab4f39982d40cfeb7c142e93310f8b Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 31 Jan 2023 21:45:06 +0100 Subject: ice: xsk: Do not convert to buff to frame for XDP_TX Let us store pointer to xdp_buff that came from xsk_buff_pool on tx_buf so that it will be possible to recycle it via xsk_buff_free() on Tx cleaning side. This way it is not necessary to do expensive copy to another xdp_buff backed by a newly allocated page. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/bpf/20230131204506.219292-14-maciej.fijalkowski@intel.com --- drivers/net/ethernet/intel/ice/ice_txrx.c | 12 +- drivers/net/ethernet/intel/ice/ice_txrx.h | 1 + drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 22 +--- drivers/net/ethernet/intel/ice/ice_xsk.c | 176 +++++++++++++++----------- 4 files changed, 117 insertions(+), 94 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 3cc63e65a354..466113c86e6f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -113,12 +113,16 @@ static void ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) { if (tx_buf->skb) { - if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) + if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) { devm_kfree(ring->dev, tx_buf->raw_buf); - else if (ice_ring_is_xdp(ring)) - page_frag_free(tx_buf->raw_buf); - else + } else if (ice_ring_is_xdp(ring)) { + if (ring->xsk_pool) + xsk_buff_free(tx_buf->xdp); + else + page_frag_free(tx_buf->raw_buf); + } else { dev_kfree_skb_any(tx_buf->skb); + } if (dma_unmap_len(tx_buf, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buf, dma), diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 7903bb692c1f..efa3d378f19e 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -157,6 +157,7 @@ struct ice_tx_buf { union { struct sk_buff *skb; void *raw_buf; /* used for XDP */ + struct xdp_buff *xdp; /* used for XDP_TX ZC */ }; unsigned int bytecount; union { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 3dcc40b96b58..9bbed3f14e42 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -275,7 +275,10 @@ static u32 ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) /* count head + frags */ ready_frames -= frags + 1; - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + if (xdp_ring->xsk_pool) + xsk_buff_free(tx_buf->xdp); + else + ice_clean_xdp_tx_buf(xdp_ring, tx_buf); ntc++; if (ntc == cnt) ntc = 0; @@ -407,23 +410,6 @@ int ice_xmit_xdp_ring(struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring) return __ice_xmit_xdp_ring(&xdp, xdp_ring); } -/** - * ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it - * @xdp: XDP buffer - * @xdp_ring: XDP Tx ring - * - * Returns negative on failure, 0 on success. - */ -int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring) -{ - struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); - - if (unlikely(!xdpf)) - return ICE_XDP_CONSUMED; - - return ice_xmit_xdp_ring(xdpf, xdp_ring); -} - /** * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map * @xdp_ring: XDP ring diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index b1b419cdadd1..a25a68c69f22 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -597,6 +597,107 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) return skb; } +/** + * ice_clean_xdp_irq_zc - AF_XDP ZC specific Tx cleaning routine + * @xdp_ring: XDP Tx ring + */ +static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean; + struct ice_tx_desc *tx_desc; + u16 cnt = xdp_ring->count; + struct ice_tx_buf *tx_buf; + u16 xsk_frames = 0; + u16 last_rs; + int i; + + last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1; + tx_desc = ICE_TX_DESC(xdp_ring, last_rs); + if (tx_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) { + if (last_rs >= ntc) + xsk_frames = last_rs - ntc + 1; + else + xsk_frames = last_rs + cnt - ntc + 1; + } + + if (!xsk_frames) + return; + + if (likely(!xdp_ring->xdp_tx_active)) + goto skip; + + ntc = xdp_ring->next_to_clean; + for (i = 0; i < xsk_frames; i++) { + tx_buf = &xdp_ring->tx_buf[ntc]; + + if (tx_buf->xdp) { + xsk_buff_free(tx_buf->xdp); + xdp_ring->xdp_tx_active--; + } else { + xsk_frames++; + } + + ntc++; + if (ntc == cnt) + ntc = 0; + } +skip: + tx_desc->cmd_type_offset_bsz = 0; + xdp_ring->next_to_clean += xsk_frames; + if (xdp_ring->next_to_clean >= cnt) + xdp_ring->next_to_clean -= cnt; + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); +} + +/** + * ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX + * @xdp: XDP buffer to xmit + * @xdp_ring: XDP ring to produce descriptor onto + * + * note that this function works directly on xdp_buff, no need to convert + * it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning + * side will be able to xsk_buff_free() it. + * + * Returns ICE_XDP_TX for successfully produced desc, ICE_XDP_CONSUMED if there + * was not enough space on XDP ring + */ +static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, + struct ice_tx_ring *xdp_ring) +{ + u32 size = xdp->data_end - xdp->data; + u32 ntu = xdp_ring->next_to_use; + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + dma_addr_t dma; + + if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring)) { + ice_clean_xdp_irq_zc(xdp_ring); + if (!ICE_DESC_UNUSED(xdp_ring)) { + xdp_ring->ring_stats->tx_stats.tx_busy++; + return ICE_XDP_CONSUMED; + } + } + + dma = xsk_buff_xdp_get_dma(xdp); + xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size); + + tx_buf = &xdp_ring->tx_buf[ntu]; + tx_buf->xdp = xdp; + tx_desc = ICE_TX_DESC(xdp_ring, ntu); + tx_desc->buf_addr = cpu_to_le64(dma); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, + 0, size, 0); + xdp_ring->xdp_tx_active++; + + if (++ntu == xdp_ring->count) + ntu = 0; + xdp_ring->next_to_use = ntu; + + return ICE_XDP_TX; +} + /** * ice_run_xdp_zc - Executes an XDP program in zero-copy path * @rx_ring: Rx ring @@ -630,7 +731,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, case XDP_PASS: break; case XDP_TX: - result = ice_xmit_xdp_buff(xdp, xdp_ring); + result = ice_xmit_xdp_tx_zc(xdp, xdp_ring); if (result == ICE_XDP_CONSUMED) goto out_failure; break; @@ -775,75 +876,6 @@ construct_skb: return failure ? budget : (int)total_rx_packets; } -/** - * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer - * @xdp_ring: XDP Tx ring - * @tx_buf: Tx buffer to clean - */ -static void -ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) -{ - page_frag_free(tx_buf->raw_buf); - xdp_ring->xdp_tx_active--; - dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), - dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); - dma_unmap_len_set(tx_buf, len, 0); -} - -/** - * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ - * @xdp_ring: XDP Tx ring - */ -static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) -{ - u16 ntc = xdp_ring->next_to_clean; - struct ice_tx_desc *tx_desc; - u16 cnt = xdp_ring->count; - struct ice_tx_buf *tx_buf; - u16 xsk_frames = 0; - u16 last_rs; - int i; - - last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1; - tx_desc = ICE_TX_DESC(xdp_ring, last_rs); - if ((tx_desc->cmd_type_offset_bsz & - cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) { - if (last_rs >= ntc) - xsk_frames = last_rs - ntc + 1; - else - xsk_frames = last_rs + cnt - ntc + 1; - } - - if (!xsk_frames) - return; - - if (likely(!xdp_ring->xdp_tx_active)) - goto skip; - - ntc = xdp_ring->next_to_clean; - for (i = 0; i < xsk_frames; i++) { - tx_buf = &xdp_ring->tx_buf[ntc]; - - if (tx_buf->raw_buf) { - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); - tx_buf->raw_buf = NULL; - } else { - xsk_frames++; - } - - ntc++; - if (ntc >= xdp_ring->count) - ntc = 0; - } -skip: - tx_desc->cmd_type_offset_bsz = 0; - xdp_ring->next_to_clean += xsk_frames; - if (xdp_ring->next_to_clean >= cnt) - xdp_ring->next_to_clean -= cnt; - if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); -} - /** * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor * @xdp_ring: XDP ring to produce the HW Tx descriptor on @@ -1051,8 +1083,8 @@ void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) while (ntc != ntu) { struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc]; - if (tx_buf->raw_buf) - ice_clean_xdp_tx_buf(xdp_ring, tx_buf); + if (tx_buf->xdp) + xsk_buff_free(tx_buf->xdp); else xsk_frames++; -- cgit v1.2.3 From 66c0e13ad236c74ea88c7c1518f3cef7f372e3da Mon Sep 17 00:00:00 2001 From: Marek Majtyka Date: Wed, 1 Feb 2023 11:24:18 +0100 Subject: drivers: net: turn on XDP features A summary of the flags being set for various drivers is given below. Note that XDP_F_REDIRECT_TARGET and XDP_F_FRAG_TARGET are features that can be turned off and on at runtime. This means that these flags may be set and unset under RTNL lock protection by the driver. Hence, READ_ONCE must be used by code loading the flag value. Also, these flags are not used for synchronization against the availability of XDP resources on a device. It is merely a hint, and hence the read may race with the actual teardown of XDP resources on the device. This may change in the future, e.g. operations taking a reference on the XDP resources of the driver, and in turn inhibiting turning off this flag. However, for now, it can only be used as a hint to check whether device supports becoming a redirection target. Turn 'hw-offload' feature flag on for: - netronome (nfp) - netdevsim. Turn 'native' and 'zerocopy' features flags on for: - intel (i40e, ice, ixgbe, igc) - mellanox (mlx5). - stmmac - netronome (nfp) Turn 'native' features flags on for: - amazon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2, enetc) - funeth - intel (igb) - marvell (mvneta, mvpp2, octeontx2) - mellanox (mlx4) - mtk_eth_soc - qlogic (qede) - sfc - socionext (netsec) - ti (cpsw) - tap - tsnep - veth - xen - virtio_net. Turn 'basic' (tx, pass, aborted and drop) features flags on for: - netronome (nfp) - cavium (thunder) - hyperv. Turn 'redirect_target' feature flag on for: - amanzon (ena) - broadcom (bnxt) - freescale (dpaa, dpaa2) - intel (i40e, ice, igb, ixgbe) - ti (cpsw) - marvell (mvneta, mvpp2) - sfc - socionext (netsec) - qlogic (qede) - mellanox (mlx5) - tap - veth - virtio_net - xen Reviewed-by: Gerhard Engleder Reviewed-by: Simon Horman Acked-by: Stanislav Fomichev Acked-by: Jakub Kicinski Co-developed-by: Kumar Kartikeya Dwivedi Signed-off-by: Kumar Kartikeya Dwivedi Co-developed-by: Lorenzo Bianconi Signed-off-by: Lorenzo Bianconi Signed-off-by: Marek Majtyka Link: https://lore.kernel.org/r/3eca9fafb308462f7edb1f58e451d59209aa07eb.1675245258.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++++ drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 5 +++++ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 2 ++ drivers/net/ethernet/cavium/thunder/nicvf_main.c | 2 ++ drivers/net/ethernet/engleder/tsnep_main.c | 4 ++++ drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 ++++ drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 4 ++++ drivers/net/ethernet/freescale/enetc/enetc_pf.c | 3 +++ drivers/net/ethernet/fungible/funeth/funeth_main.c | 6 ++++++ drivers/net/ethernet/intel/i40e/i40e_main.c | 10 ++++++++-- drivers/net/ethernet/intel/ice/ice_main.c | 5 +++++ drivers/net/ethernet/intel/igb/igb_main.c | 9 ++++++++- drivers/net/ethernet/intel/igc/igc_main.c | 3 +++ drivers/net/ethernet/intel/igc/igc_xdp.c | 5 +++++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 ++++++ drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 1 + drivers/net/ethernet/marvell/mvneta.c | 3 +++ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 4 ++++ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 8 ++++++-- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++++++++ drivers/net/ethernet/microsoft/mana/mana_en.c | 2 ++ drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 5 +++++ drivers/net/ethernet/qlogic/qede/qede_main.c | 3 +++ drivers/net/ethernet/sfc/efx.c | 4 ++++ drivers/net/ethernet/sfc/siena/efx.c | 4 ++++ drivers/net/ethernet/socionext/netsec.c | 3 +++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ drivers/net/ethernet/ti/cpsw.c | 4 ++++ drivers/net/ethernet/ti/cpsw_new.c | 4 ++++ drivers/net/hyperv/netvsc_drv.c | 2 ++ drivers/net/netdevsim/netdev.c | 1 + drivers/net/tun.c | 5 +++++ drivers/net/veth.c | 4 ++++ drivers/net/virtio_net.c | 4 ++++ drivers/net/xen-netfront.c | 2 ++ 38 files changed, 154 insertions(+), 5 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index e8ad5ea31aff..d3999db7c6a2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -597,7 +597,9 @@ static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) if (rc) return rc; } + xdp_features_set_redirect_target(netdev, false); } else if (old_bpf_prog) { + xdp_features_clear_redirect_target(netdev); rc = ena_destroy_and_free_all_xdp_queues(adapter); if (rc) return rc; @@ -4103,6 +4105,8 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter, /* Set offload features */ ena_set_dev_offloads(feat, netdev); + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; + adapter->max_mtu = feat->dev_attr.max_mtu; netdev->max_mtu = adapter->max_mtu; netdev->min_mtu = ENA_MIN_MTU; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 06508eebb585..d6d6d5d37ff3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -384,6 +384,11 @@ void aq_nic_ndev_init(struct aq_nic_s *self) self->ndev->mtu = aq_nic_cfg->mtu - ETH_HLEN; self->ndev->max_mtu = aq_hw_caps->mtu - ETH_FCS_LEN - ETH_HLEN; + self->ndev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG; } void aq_nic_set_tx_ring(struct aq_nic_s *self, unsigned int idx, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 240a7e8a7652..a1b4356dfb6c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13686,6 +13686,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_set_tso_max_size(dev, GSO_MAX_SIZE); + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG; + #ifdef CONFIG_BNXT_SRIOV init_waitqueue_head(&bp->sriov_cfg_wait); #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 36d5202c0aee..5843c93b1711 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -422,9 +422,11 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) if (prog) { bnxt_set_rx_skb_mode(bp, true); + xdp_features_set_redirect_target(dev, true); } else { int rx, tx; + xdp_features_clear_redirect_target(dev); bnxt_set_rx_skb_mode(bp, false); bnxt_get_max_rings(bp, &rx, &tx, true); if (rx > 1) { diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index f2f95493ec89..8b25313c7f6b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2218,6 +2218,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->netdev_ops = &nicvf_netdev_ops; netdev->watchdog_timeo = NICVF_TX_TIMEOUT; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + /* MTU range: 64 - 9200 */ netdev->min_mtu = NIC_HW_MIN_FRS; netdev->max_mtu = NIC_HW_MAX_FRS; diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index c3cf427a9409..6982aaa928b5 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -1926,6 +1926,10 @@ static int tsnep_probe(struct platform_device *pdev) netdev->features = NETIF_F_SG; netdev->hw_features = netdev->features | NETIF_F_LOOPBACK; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; + /* carrier off reporting is important to ethtool even BEFORE open */ netif_carrier_off(netdev); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 027fff9f7db0..9318a2554056 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -244,6 +244,10 @@ static int dpaa_netdev_init(struct net_device *net_dev, net_dev->features |= net_dev->hw_features; net_dev->vlan_features = net_dev->features; + net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + if (is_valid_ether_addr(mac_addr)) { memcpy(net_dev->perm_addr, mac_addr, net_dev->addr_len); eth_hw_addr_set(net_dev, mac_addr); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 2e79d18fc3c7..746ccfde7255 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4596,6 +4596,10 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) NETIF_F_LLTX | NETIF_F_HW_TC | NETIF_F_TSO; net_dev->gso_max_segs = DPAA2_ETH_ENQUEUE_MAX_FDS; net_dev->hw_features = net_dev->features; + net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_NDO_XMIT; if (priv->dpni_attrs.vlan_filter_entries) net_dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 7facc7d5261e..6b54071d4ecc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -807,6 +807,9 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features |= NETIF_F_RXHASH; ndev->priv_flags |= IFF_UNICAST_FLT; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG; if (si->hw_features & ENETC_SI_F_PSFP && !enetc_psfp_enable(priv)) { priv->active_offloads |= ENETC_F_QCI; diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c index b4cce30e526a..df86770731ad 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_main.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c @@ -1160,6 +1160,11 @@ static int fun_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp) WRITE_ONCE(rxqs[i]->xdp_prog, prog); } + if (prog) + xdp_features_set_redirect_target(dev, true); + else + xdp_features_clear_redirect_target(dev); + dev->max_mtu = prog ? XDP_MAX_MTU : FUN_MAX_MTU; old_prog = xchg(&fp->xdp_prog, prog); if (old_prog) @@ -1765,6 +1770,7 @@ static int fun_create_netdev(struct fun_ethdev *ed, unsigned int portid) netdev->vlan_features = netdev->features & VLAN_FEAT; netdev->mpls_features = netdev->vlan_features; netdev->hw_enc_features = netdev->hw_features; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = FUN_MAX_MTU; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 53d0083e35da..8a79cc18c428 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13339,9 +13339,11 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, old_prog = xchg(&vsi->xdp_prog, prog); if (need_reset) { - if (!prog) + if (!prog) { + xdp_features_clear_redirect_target(vsi->netdev); /* Wait until ndo_xsk_wakeup completes. */ synchronize_rcu(); + } i40e_reset_and_rebuild(pf, true, true); } @@ -13362,11 +13364,13 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, /* Kick start the NAPI context if there is an AF_XDP socket open * on that queue id. This so that receiving will start. */ - if (need_reset && prog) + if (need_reset && prog) { for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->xdp_rings[i]->xsk_pool) (void)i40e_xsk_wakeup(vsi->netdev, i, XDP_WAKEUP_RX); + xdp_features_set_redirect_target(vsi->netdev, true); + } return 0; } @@ -13783,6 +13787,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; netdev->features &= ~NETIF_F_HW_TC; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 26a8910a41ff..074b0e6d0e2d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -22,6 +22,7 @@ #include "ice_eswitch.h" #include "ice_tc_lib.h" #include "ice_vsi_vlan_ops.h" +#include #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" static const char ice_driver_string[] = DRV_SUMMARY; @@ -2912,11 +2913,13 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); } + xdp_features_set_redirect_target(vsi->netdev, false); /* reallocate Rx queues that are used for zero-copy */ xdp_ring_err = ice_realloc_zc_buf(vsi, true); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed"); } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { + xdp_features_clear_redirect_target(vsi->netdev); xdp_ring_err = ice_destroy_xdp_rings(vsi); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); @@ -3459,6 +3462,8 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) np->vsi = vsi; ice_set_netdev_features(netdev); + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; ice_set_ops(netdev); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3c0c35ecea10..0e11a082f7a1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2871,8 +2871,14 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) bpf_prog_put(old_prog); /* bpf is just replaced, RXQ and MTU are already setup */ - if (!need_reset) + if (!need_reset) { return 0; + } else { + if (prog) + xdp_features_set_redirect_target(dev, true); + else + xdp_features_clear_redirect_target(dev); + } if (running) igb_open(dev); @@ -3317,6 +3323,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_SUPP_NOFCS; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e86b15efaeb8..8b572cd2c350 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6533,6 +6533,9 @@ static int igc_probe(struct pci_dev *pdev, netdev->mpls_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= netdev->vlan_features; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; + /* MTU range: 68 - 9216 */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c index aeeb34e64610..e27af72aada8 100644 --- a/drivers/net/ethernet/intel/igc/igc_xdp.c +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -29,6 +29,11 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, if (old_prog) bpf_prog_put(old_prog); + if (prog) + xdp_features_set_redirect_target(dev, true); + else + xdp_features_clear_redirect_target(dev); + if (if_running) igc_open(dev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 43a44c1e1576..af4c12b6059f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10301,6 +10301,8 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (err) return -EINVAL; + if (!prog) + xdp_features_clear_redirect_target(dev); } else { for (i = 0; i < adapter->num_rx_queues; i++) { WRITE_ONCE(adapter->rx_ring[i]->xdp_prog, @@ -10321,6 +10323,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (adapter->xdp_ring[i]->xsk_pool) (void)ixgbe_xsk_wakeup(adapter->netdev, i, XDP_WAKEUP_RX); + xdp_features_set_redirect_target(dev, true); } return 0; @@ -11018,6 +11021,9 @@ skip_sriov: netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY; + /* MTU range: 68 - 9710 */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index ea0a230c1153..a44e4bd56142 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4634,6 +4634,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC; /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f8925cac61e4..dc2989103a77 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5612,6 +5612,9 @@ static int mvneta_probe(struct platform_device *pdev) NETIF_F_TSO | NETIF_F_RXCSUM; dev->hw_features |= dev->features; dev->vlan_features |= dev->features; + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_set_tso_max_segs(dev, MVNETA_MAX_TSO_SEGS); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 4da45c5abba5..9b4ecbe4f36d 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6866,6 +6866,10 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->vlan_features |= features; netif_set_tso_max_segs(dev, MVPP2_MAX_TSO_SEGS); + + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + dev->priv_flags |= IFF_UNICAST_FLT; /* MTU range: 68 - 9704 */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index c1ea60bc2630..179433d0a54a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2512,10 +2512,13 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog) /* Network stack and XDP shared same rx queues. * Use separate tx queues for XDP and network stack. */ - if (pf->xdp_prog) + if (pf->xdp_prog) { pf->hw.xdp_queues = pf->hw.rx_queues; - else + xdp_features_set_redirect_target(dev, false); + } else { pf->hw.xdp_queues = 0; + xdp_features_clear_redirect_target(dev); + } pf->hw.tot_tx_queues += pf->hw.xdp_queues; @@ -2878,6 +2881,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) netdev->watchdog_timeo = OTX2_TX_TIMEOUT; netdev->netdev_ops = &otx2_netdev_ops; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; netdev->min_mtu = OTX2_MIN_MTU; netdev->max_mtu = otx2_get_max_mtu(pf); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 801deac58bf7..ac54b6f2bb5c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4447,6 +4447,12 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) register_netdevice_notifier(&mac->device_notifier); } + if (mtk_page_pool_enabled(eth)) + eth->netdev[id]->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG; + return 0; free_netdev: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index af4c4858f397..e11bc0ac880e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3416,6 +3416,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->rss_hash_fn = ETH_RSS_HASH_TOP; } + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; + /* MTU range: 68 - hw-specific max */ dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = priv->max_mtu; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0e87432ec6f1..e4996ef04d86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4780,6 +4780,13 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) if (old_prog) bpf_prog_put(old_prog); + if (reset) { + if (prog) + xdp_features_set_redirect_target(netdev, true); + else + xdp_features_clear_redirect_target(netdev); + } + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) goto unlock; @@ -5175,6 +5182,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_RX_SG; + netdev->priv_flags |= IFF_UNICAST_FLT; netif_set_tso_max_size(netdev, GSO_MAX_SIZE); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 2f6a048dee90..6120f2b6684f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -2160,6 +2160,8 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->hw_features |= NETIF_F_RXHASH; ndev->features = ndev->hw_features; ndev->vlan_features = 0; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; err = register_netdev(ndev); if (err) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 18fc9971f1c8..e4825d885560 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2529,10 +2529,15 @@ static void nfp_net_netdev_init(struct nfp_net *nn) netdev->features &= ~NETIF_F_HW_VLAN_STAG_RX; nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_RXQINQ; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + if (nn->app && nn->app->type->id == NFP_APP_BPF_NIC) + netdev->xdp_features |= NETDEV_XDP_ACT_HW_OFFLOAD; + /* Finalise the netdev setup */ switch (nn->dp.ops->version) { case NFP_NFD_VER_NFD3: netdev->netdev_ops = &nfp_nfd3_netdev_ops; + netdev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; break; case NFP_NFD_VER_NFDK: netdev->netdev_ops = &nfp_nfdk_netdev_ops; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 953f304b8588..b6d999927e86 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -892,6 +892,9 @@ static void qede_init_ndev(struct qede_dev *edev) ndev->hw_features = hw_features; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + /* MTU range: 46 - 9600 */ ndev->min_mtu = ETH_ZLEN - ETH_HLEN; ndev->max_mtu = QEDE_MAX_JUMBO_PACKET_SIZE; diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 0556542d7a6b..18ff8d8cff42 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1078,6 +1078,10 @@ static int efx_pci_probe(struct pci_dev *pci_dev, pci_info(pci_dev, "Solarflare NIC detected\n"); + efx->net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + if (!efx->type->is_vf) efx_probe_vpd_strings(efx); diff --git a/drivers/net/ethernet/sfc/siena/efx.c b/drivers/net/ethernet/sfc/siena/efx.c index 60e5b7c8ccf9..a6ef21845224 100644 --- a/drivers/net/ethernet/sfc/siena/efx.c +++ b/drivers/net/ethernet/sfc/siena/efx.c @@ -1048,6 +1048,10 @@ static int efx_pci_probe(struct pci_dev *pci_dev, pci_info(pci_dev, "Solarflare NIC detected\n"); + efx->net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + if (!efx->type->is_vf) efx_probe_vpd_strings(efx); diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 9b46579b5a10..2d7347b71c41 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -2104,6 +2104,9 @@ static int netsec_probe(struct platform_device *pdev) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; ndev->hw_features = ndev->features; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + priv->rx_cksum_offload_flag = true; ret = netsec_register_mdio(priv, phy_addr); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b7e5af58ab75..734d84263fd2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7150,6 +7150,8 @@ int stmmac_dvr_probe(struct device *device, ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; ret = stmmac_tc_init(priv, priv); if (!ret) { diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 13c9c2d6b79b..37f0b62ec5d6 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1458,6 +1458,8 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) priv_sl2->emac_port = 1; cpsw->slaves[1].ndev = ndev; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; @@ -1635,6 +1637,8 @@ static int cpsw_probe(struct platform_device *pdev) cpsw->slaves[0].ndev = ndev; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 83596ec0c7cb..35128dd45ffc 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1405,6 +1405,10 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_NETNS_LOCAL | NETIF_F_HW_TC; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; SET_NETDEV_DEV(ndev, dev); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f9b219e6cd58..a9b139bbdb2c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2559,6 +2559,8 @@ static int netvsc_probe(struct hv_device *dev, netdev_lockdep_set_classes(net); + net->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; + /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 6db6a75ff9b9..35fa1ca98671 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -286,6 +286,7 @@ static void nsim_setup(struct net_device *dev) NETIF_F_TSO; dev->hw_features |= NETIF_F_HW_TC; dev->max_mtu = ETH_MAX_MTU; + dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD; } static int nsim_init_netdevsim(struct netdevsim *ns) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index a7d17c680f4a..36620afde373 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1401,6 +1401,11 @@ static void tun_net_initialize(struct net_device *dev) eth_hw_addr_random(dev); + /* Currently tun does not support XDP, only tap does. */ + dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + break; } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index ba3e05832843..1bb54de7124d 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1686,6 +1686,10 @@ static void veth_setup(struct net_device *dev) dev->hw_enc_features = VETH_FEATURES; dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; netif_set_tso_max_size(dev, GSO_MAX_SIZE); + + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_NDO_XMIT_SG; } /* diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7e1a98430190..692dff071782 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3280,7 +3280,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); } + if (!old_prog) + xdp_features_set_redirect_target(dev, false); } else { + xdp_features_clear_redirect_target(dev); vi->xdp_enabled = false; } @@ -3910,6 +3913,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->hw_features |= NETIF_F_GRO_HW; dev->vlan_features = dev->features; + dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT; /* MTU range: 68 - 65535 */ dev->min_mtu = MIN_MTU; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 12b074286df9..47d54d8ea59d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1741,6 +1741,8 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) * negotiate with the backend regarding supported features. */ netdev->features |= netdev->hw_features; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; netdev->ethtool_ops = &xennet_ethtool_ops; netdev->min_mtu = ETH_MIN_MTU; -- cgit v1.2.3 From 30bbf891f1b8fd92db271c7198609a076d7fbf14 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 7 Feb 2023 10:53:40 +0100 Subject: virtio_net: Update xdp_features with xdp multi-buff Now virtio-net supports xdp multi-buffer so add it to xdp_features. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/bpf/60c76cd63a0246db785606e8891b925fd5c9bf06.1675763384.git.lorenzo@kernel.org --- drivers/net/virtio_net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 692dff071782..ddc3dc7ea73c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3281,7 +3281,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, virtnet_clear_guest_offloads(vi); } if (!old_prog) - xdp_features_set_redirect_target(dev, false); + xdp_features_set_redirect_target(dev, true); } else { xdp_features_clear_redirect_target(dev); vi->xdp_enabled = false; @@ -3940,8 +3940,10 @@ static int virtnet_probe(struct virtio_device *vdev) INIT_WORK(&vi->config_work, virtnet_config_changed_work); spin_lock_init(&vi->refill_lock); - if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) + if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) { vi->mergeable_rx_bufs = true; + dev->xdp_features |= NETDEV_XDP_ACT_RX_SG; + } if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { vi->rx_usecs = 0; -- cgit v1.2.3 From 9b0651e429a04b58c29f2133da4ccc70f81263b8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 8 Feb 2023 17:58:40 +0100 Subject: sfc: move xdp_features configuration in efx_pci_probe_post_io() Move xdp_features configuration from efx_pci_probe() to efx_pci_probe_post_io() since it is where all the other basic netdev features are initialised. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/9bd31c9a29bcf406ab90a249a28fc328e5578fd1.1675875404.git.lorenzo@kernel.org Signed-off-by: Martin KaFai Lau --- drivers/net/ethernet/sfc/efx.c | 8 ++++---- drivers/net/ethernet/sfc/siena/efx.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 18ff8d8cff42..9569c3356b4a 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1025,6 +1025,10 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; net_dev->features |= efx->fixed_features; + net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + rc = efx_register_netdev(efx); if (!rc) return 0; @@ -1078,10 +1082,6 @@ static int efx_pci_probe(struct pci_dev *pci_dev, pci_info(pci_dev, "Solarflare NIC detected\n"); - efx->net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | - NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; - if (!efx->type->is_vf) efx_probe_vpd_strings(efx); diff --git a/drivers/net/ethernet/sfc/siena/efx.c b/drivers/net/ethernet/sfc/siena/efx.c index a6ef21845224..ef52ec71d197 100644 --- a/drivers/net/ethernet/sfc/siena/efx.c +++ b/drivers/net/ethernet/sfc/siena/efx.c @@ -1007,6 +1007,10 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; net_dev->features |= efx->fixed_features; + net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + rc = efx_register_netdev(efx); if (!rc) return 0; @@ -1048,10 +1052,6 @@ static int efx_pci_probe(struct pci_dev *pci_dev, pci_info(pci_dev, "Solarflare NIC detected\n"); - efx->net_dev->xdp_features = NETDEV_XDP_ACT_BASIC | - NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; - if (!efx->type->is_vf) efx_probe_vpd_strings(efx); -- cgit v1.2.3