Merge branch 'xdp_xmit-bulking'

Jesper Dangaard Brouer says: ==================== This patchset change ndo_xdp_xmit API to take a bulk of xdp frames. When kernel is compiled with CONFIG_RETPOLINE, every indirect function pointer (branch) call hurts performance. For XDP this have a huge negative performance impact. This patchset reduce the needed (indirect) calls to ndo_xdp_xmit, but also prepares for further optimizations. The DMA APIs use of indirect function pointer calls is the primary source the regression. It is left for a followup patchset, to use bulking calls towards the DMA API (via the scatter-gatter calls). The other advantage of this API change is that drivers can easier amortize the cost of any sync/locking scheme, over the bulk of packets. The assumption of the current API is that the driver implemementing the NDO will also allocate a dedicated XDP TX queue for every CPU in the system. Which is not always possible or practical to configure. E.g. ixgbe cannot load an XDP program on a machine with more than 96 CPUs, due to limited hardware TX queues. E.g. virtio_net is hard to configure as it requires manually increasing the queues. E.g. tun driver chooses to use a per XDP frame producer lock modulo smp_processor_id over avail queues. I'm considered adding 'flags' to ndo_xdp_xmit, but it's not part of this patchset. This will be a followup patchset, once we know if this will be needed (e.g. for non-map xdp_redirect flush-flag, and if AF_XDP chooses to use ndo_xdp_xmit for TX). --- V5: Fixed up issues spotted by Daniel and John V4: Splitout the patches from 4 to 8 patches. I cannot split the driver changes from the NDO change, but I've tried to isolated the NDO change together with the driver change as much as possible. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
author: Alexei Starovoitov <ast@kernel.org> 2018-05-24 18:36:16 -0700
committer: Alexei Starovoitov <ast@kernel.org> 2018-05-24 18:36:16 -0700
commit: 10f678683e4026e43524b0492068a371d00fdeed (patch)
tree: 069715fbcf7b0f4b73103861fd0a111e143b5705 /net
parent: f80acbd233382619f597f785f8c238084dc62e21 (diff)
parent: a570e48fee1bc26f47aba2e1493f96a03bed3c8f (diff)
download: linux-10f678683e4026e43524b0492068a371d00fdeed.tar.gz
linux-10f678683e4026e43524b0492068a371d00fdeed.tar.bz2
linux-10f678683e4026e43524b0492068a371d00fdeed.zip
2 files changed, 22 insertions, 21 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index aa114c4acb25..1d75f9322275 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3039,7 +3039,7 @@ static int __bpf_tx_xdp(struct net_device *dev,
 			u32 index)
 {
 	struct xdp_frame *xdpf;
-	int err;
+	int sent;
 
 	if (!dev->netdev_ops->ndo_xdp_xmit) {
 		return -EOPNOTSUPP;
@@ -3049,9 +3049,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
 
-	err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
-	if (err)
-		return err;
+	sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
+	if (sent <= 0)
+		return sent;
 	dev->netdev_ops->ndo_xdp_flush(dev);
 	return 0;
 }
@@ -3065,20 +3065,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 
 	switch (map->map_type) {
 	case BPF_MAP_TYPE_DEVMAP: {
-		struct net_device *dev = fwd;
-		struct xdp_frame *xdpf;
+		struct bpf_dtab_netdev *dst = fwd;
 
-		if (!dev->netdev_ops->ndo_xdp_xmit)
-			return -EOPNOTSUPP;
-
-		xdpf = convert_to_xdp_frame(xdp);
-		if (unlikely(!xdpf))
-			return -EOVERFLOW;
-
-		/* TODO: move to inside map code instead, for bulk support
-		 * err = dev_map_enqueue(dev, xdp);
-		 */
-		err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
+		err = dev_map_enqueue(dst, xdp, dev_rx);
 		if (err)
 			return err;
 		__dev_map_insert_ctx(map, index);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index bf6758f74339..cb8c4e061a5a 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -308,7 +308,13 @@ err:
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
 
-static void xdp_return(void *data, struct xdp_mem_info *mem)
+/* XDP RX runs under NAPI protection, and in different delivery error
+ * scenarios (e.g. queue full), it is possible to return the xdp_frame
+ * while still leveraging this protection.  The @napi_direct boolian
+ * is used for those calls sites.  Thus, allowing for faster recycling
+ * of xdp_frames/pages in those cases.
+ */
+static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
 {
 	struct xdp_mem_allocator *xa;
 	struct page *page;
@@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 		page = virt_to_head_page(data);
 		if (xa)
-			page_pool_put_page(xa->page_pool, page);
+			page_pool_put_page(xa->page_pool, page, napi_direct);
 		else
 			put_page(page);
 		rcu_read_unlock();
@@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
 
 void xdp_return_frame(struct xdp_frame *xdpf)
 {
-	xdp_return(xdpf->data, &xdpf->mem);
+	__xdp_return(xdpf->data, &xdpf->mem, false);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
 
+void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
+{
+	__xdp_return(xdpf->data, &xdpf->mem, true);
+}
+EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
+
 void xdp_return_buff(struct xdp_buff *xdp)
 {
-	xdp_return(xdp->data, &xdp->rxq->mem);
+	__xdp_return(xdp->data, &xdp->rxq->mem, true);
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
author	Alexei Starovoitov <ast@kernel.org>	2018-05-24 18:36:16 -0700
committer	Alexei Starovoitov <ast@kernel.org>	2018-05-24 18:36:16 -0700
commit	10f678683e4026e43524b0492068a371d00fdeed (patch)
tree	069715fbcf7b0f4b73103861fd0a111e143b5705 /net
parent	f80acbd233382619f597f785f8c238084dc62e21 (diff)
parent	a570e48fee1bc26f47aba2e1493f96a03bed3c8f (diff)
download	linux-10f678683e4026e43524b0492068a371d00fdeed.tar.gz linux-10f678683e4026e43524b0492068a371d00fdeed.tar.bz2 linux-10f678683e4026e43524b0492068a371d00fdeed.zip