idpf: convert header split mode to libeth + napi_build_skb()

Currently, idpf uses the following model for the header buffers: * buffers are allocated via dma_alloc_coherent(); * when receiving, napi_alloc_skb() is called and then the header is copied to the newly allocated linear part. This is far from optimal as DMA coherent zone is slow on many systems and memcpy() neutralizes the idea and benefits of the header split. Not speaking of that XDP can't be run on DMA coherent buffers, but at the same time the idea of allocating an skb to run XDP program is ill. Instead, use libeth to create page_pools for the header buffers, allocate them dynamically and then build an skb via napi_build_skb() around them with no memory copy. With one exception... When you enable header split, you expect you'll always have a separate header buffer, so that you could reserve headroom and tailroom only there and then use full buffers for the data. For example, this is how TCP zerocopy works -- you have to have the payload aligned to PAGE_SIZE. The current hardware running idpf does *not* guarantee that you'll always have headers placed separately. For example, on my setup, even ICMP packets are written as one piece to the data buffers. You can't build a valid skb around a data buffer in this case. To not complicate things and not lose TCP zerocopy etc., when such thing happens, use the empty header buffer and pull either full frame (if it's short) or the Ethernet header there and build an skb around it. GRO layer will pull more from the data buffer later. This W/A will hopefully be removed one day. Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
author: Alexander Lobakin <aleksander.lobakin@intel.com> 2024-06-20 15:53:46 +0200
committer: Tony Nguyen <anthony.l.nguyen@intel.com> 2024-07-10 10:47:45 -0700
commit: 90912f9f4f2d159e93cf9a4c732506342489a1d5 (patch)
tree: c05abc4c4e1c71b7348e511fd54dd57cc3a185ed /drivers/net/ethernet/intel/idpf/idpf_txrx.h
parent: 5aaac1aece4e4db9d620791a33f7a4173c660e65 (diff)
download: linux-90912f9f4f2d159e93cf9a4c732506342489a1d5.tar.gz
linux-90912f9f4f2d159e93cf9a4c732506342489a1d5.tar.bz2
linux-90912f9f4f2d159e93cf9a4c732506342489a1d5.zip
1 files changed, 28 insertions, 24 deletions
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 98d47728cc8c..47d70ad0d6b4 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -7,7 +7,7 @@
 #include <linux/dim.h>
 
 #include <net/libeth/cache.h>
-#include <net/page_pool/helpers.h>
+#include <net/libeth/rx.h>
 #include <net/tcp.h>
 #include <net/netdev_queues.h>
 
@@ -103,8 +103,6 @@ do {								\
 #define IDPF_RX_BUF_STRIDE			32
 #define IDPF_RX_BUF_POST_STRIDE			16
 #define IDPF_LOW_WATERMARK			64
-/* Size of header buffer specifically for header split */
-#define IDPF_HDR_BUF_SIZE			256
 #define IDPF_PACKET_HDR_PAD	\
 	(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2)
 #define IDPF_TX_TSO_MIN_MSS			88
@@ -300,14 +298,7 @@ struct idpf_rx_extracted {
 #define IDPF_TX_MAX_DESC_DATA_ALIGNED \
 	ALIGN_DOWN(IDPF_TX_MAX_DESC_DATA, IDPF_TX_MAX_READ_REQ_SIZE)
 
-#define IDPF_RX_DMA_ATTR \
-	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
-
-struct idpf_rx_buf {
-	struct page *page;
-	unsigned int page_offset;
-	u16 truesize;
-};
+#define idpf_rx_buf libeth_fqe
 
 #define IDPF_RX_MAX_PTYPE_PROTO_IDS    32
 #define IDPF_RX_MAX_PTYPE_SZ	(sizeof(struct virtchnl2_ptype) + \
@@ -751,14 +742,14 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64,
 /**
  * struct idpf_buf_queue - software structure representing a buffer queue
  * @split_buf: buffer descriptor array
- * @rx_buf: Struct with RX buffer related members
- * @rx_buf.buf: See struct idpf_rx_buf
- * @rx_buf.hdr_buf_pa: DMA handle
- * @rx_buf.hdr_buf_va: Virtual address
- * @pp: Page pool pointer
+ * @hdr_buf: &libeth_fqe for header buffers
+ * @hdr_pp: &page_pool for header buffers
+ * @buf: &idpf_rx_buf for data buffers
+ * @pp: &page_pool for data buffers
  * @tail: Tail offset
  * @flags: See enum idpf_queue_flags_t
  * @desc_count: Number of descriptors
+ * @hdr_truesize: truesize for buffer headers
  * @next_to_use: Next descriptor to use
  * @next_to_clean: Next descriptor to clean
  * @next_to_alloc: RX buffer to allocate at
@@ -773,16 +764,16 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64,
 struct idpf_buf_queue {
 	__cacheline_group_begin_aligned(read_mostly);
 	struct virtchnl2_splitq_rx_buf_desc *split_buf;
-	struct {
-		struct idpf_rx_buf *buf;
-		dma_addr_t hdr_buf_pa;
-		void *hdr_buf_va;
-	} rx_buf;
+	struct libeth_fqe *hdr_buf;
+	struct page_pool *hdr_pp;
+	struct idpf_rx_buf *buf;
 	struct page_pool *pp;
 	void __iomem *tail;
 
 	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
 	u32 desc_count;
+
+	u32 hdr_truesize;
 	__cacheline_group_end_aligned(read_mostly);
 
 	__cacheline_group_begin_aligned(read_write);
@@ -976,6 +967,18 @@ struct idpf_txq_group {
 	u32 num_completions_pending;
 };
 
+static inline int idpf_q_vector_to_mem(const struct idpf_q_vector *q_vector)
+{
+	u32 cpu;
+
+	if (!q_vector)
+		return NUMA_NO_NODE;
+
+	cpu = cpumask_first(q_vector->affinity_mask);
+
+	return cpu < nr_cpu_ids ? cpu_to_mem(cpu) : NUMA_NO_NODE;
+}
+
 /**
  * idpf_size_to_txd_count - Get number of descriptors needed for large Tx frag
  * @size: transmit request size in bytes
@@ -1044,7 +1047,7 @@ static inline dma_addr_t idpf_alloc_page(struct page_pool *pool,
 					 unsigned int buf_size)
 {
 	if (buf_size == IDPF_RX_BUF_2048)
-		buf->page = page_pool_dev_alloc_frag(pool, &buf->page_offset,
+		buf->page = page_pool_dev_alloc_frag(pool, &buf->offset,
 						     buf_size);
 	else
 		buf->page = page_pool_dev_alloc_pages(pool);
@@ -1054,7 +1057,7 @@ static inline dma_addr_t idpf_alloc_page(struct page_pool *pool,
 
 	buf->truesize = buf_size;
 
-	return page_pool_get_dma_addr(buf->page) + buf->page_offset +
+	return page_pool_get_dma_addr(buf->page) + buf->offset +
 	       pool->p.offset;
 }
 
@@ -1081,7 +1084,7 @@ static inline void idpf_rx_sync_for_cpu(struct idpf_rx_buf *rx_buf, u32 len)
 
 	dma_sync_single_range_for_cpu(pp->p.dev,
 				      page_pool_get_dma_addr(page),
-				      rx_buf->page_offset + pp->p.offset, len,
+				      rx_buf->offset + pp->p.offset, len,
 				      page_pool_get_dma_dir(pp));
 }
 
@@ -1110,6 +1113,7 @@ void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
 struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq,
 				      struct idpf_rx_buf *rx_buf,
 				      unsigned int size);
+struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size);
 void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
 			   bool xmit_more);
 unsigned int idpf_size_to_txd_count(unsigned int size);
author	Alexander Lobakin <aleksander.lobakin@intel.com>	2024-06-20 15:53:46 +0200
committer	Tony Nguyen <anthony.l.nguyen@intel.com>	2024-07-10 10:47:45 -0700
commit	90912f9f4f2d159e93cf9a4c732506342489a1d5 (patch)
tree	c05abc4c4e1c71b7348e511fd54dd57cc3a185ed /drivers/net/ethernet/intel/idpf/idpf_txrx.h
parent	5aaac1aece4e4db9d620791a33f7a4173c660e65 (diff)
download	linux-90912f9f4f2d159e93cf9a4c732506342489a1d5.tar.gz linux-90912f9f4f2d159e93cf9a4c732506342489a1d5.tar.bz2 linux-90912f9f4f2d159e93cf9a4c732506342489a1d5.zip