summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c140
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h27
5 files changed, 211 insertions, 8 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 51a2e8252b82..f32e272c83dd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1722,6 +1722,12 @@ static int mlx4_en_set_channels(struct net_device *dev,
!channel->tx_count || !channel->rx_count)
return -EINVAL;
+ if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) {
+ en_err(priv, "Minimum %d tx channels required with XDP on\n",
+ priv->xdp_ring_num / MLX4_EN_NUM_UP + 1);
+ return -EINVAL;
+ }
+
mutex_lock(&mdev->state_lock);
if (priv->port_up) {
port_up = 1;
@@ -1740,7 +1746,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
goto out;
}
- netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
+ netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
+ priv->xdp_ring_num);
netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
if (dev->num_tc)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 47ae2a211300..9abbba6c1475 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1522,6 +1522,24 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
}
+static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv,
+ int tx_ring_idx)
+{
+ struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx];
+ int rr_index;
+
+ rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx;
+ if (rr_index >= 0) {
+ tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc;
+ tx_ring->recycle_ring = priv->rx_ring[rr_index];
+ en_dbg(DRV, priv,
+ "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n",
+ tx_ring_idx, rr_index);
+ } else {
+ tx_ring->recycle_ring = NULL;
+ }
+}
+
int mlx4_en_start_port(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -1644,6 +1662,8 @@ int mlx4_en_start_port(struct net_device *dev)
}
tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
+ mlx4_en_init_recycle_ring(priv, i);
+
/* Arm CQ for TX completions */
mlx4_en_arm_cq(priv, cq);
@@ -2561,6 +2581,13 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
return -EOPNOTSUPP;
}
+ if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) {
+ en_err(priv,
+ "Minimum %d tx channels required to run XDP\n",
+ (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP);
+ return -EINVAL;
+ }
+
if (prog) {
prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
if (IS_ERR(prog))
@@ -2574,6 +2601,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
}
priv->xdp_ring_num = xdp_ring_num;
+ netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
+ priv->xdp_ring_num);
for (i = 0; i < priv->rx_ring_num; i++) {
old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 9dd5dc19a537..11d88c817137 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -783,7 +783,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
struct mlx4_en_rx_alloc *frags;
struct mlx4_en_rx_desc *rx_desc;
struct bpf_prog *xdp_prog;
+ int doorbell_pending;
struct sk_buff *skb;
+ int tx_index;
int index;
int nr;
unsigned int length;
@@ -800,6 +802,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
return polled;
xdp_prog = READ_ONCE(ring->xdp_prog);
+ doorbell_pending = 0;
+ tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
* descriptor offset can be deduced from the CQE index instead of
@@ -898,6 +902,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
switch (act) {
case XDP_PASS:
break;
+ case XDP_TX:
+ if (!mlx4_en_xmit_frame(frags, dev,
+ length, tx_index,
+ &doorbell_pending))
+ goto consumed;
+ break;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
@@ -1068,6 +1078,9 @@ consumed:
}
out:
+ if (doorbell_pending)
+ mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
+
AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
mlx4_cq_set_ci(&cq->mcq);
wmb(); /* ensure HW sees CQ consumer before we post new buffers */
@@ -1147,6 +1160,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
* This only works when num_frags == 1.
*/
if (priv->xdp_ring_num) {
+ dma_dir = PCI_DMA_BIDIRECTIONAL;
/* This will gain efficient xdp frame recycling at the expense
* of more costly truesize accounting
*/
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 2f56018ddae9..9df87ca0515a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -196,6 +196,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
ring->last_nr_txbb = 1;
memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
memset(ring->buf, 0, ring->buf_size);
+ ring->free_tx_desc = mlx4_en_free_tx_desc;
ring->qp_state = MLX4_QP_STATE_RST;
ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8);
@@ -265,10 +266,10 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
}
-static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
- struct mlx4_en_tx_ring *ring,
- int index, u8 owner, u64 timestamp,
- int napi_mode)
+u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner, u64 timestamp,
+ int napi_mode)
{
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
@@ -344,6 +345,27 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
return tx_info->nr_txbb;
}
+u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner, u64 timestamp,
+ int napi_mode)
+{
+ struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
+ struct mlx4_en_rx_alloc frame = {
+ .page = tx_info->page,
+ .dma = tx_info->map0_dma,
+ .page_offset = 0,
+ .page_size = PAGE_SIZE,
+ };
+
+ if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
+ dma_unmap_page(priv->ddev, tx_info->map0_dma,
+ PAGE_SIZE, priv->frag_info[0].dma_dir);
+ put_page(tx_info->page);
+ }
+
+ return tx_info->nr_txbb;
+}
int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
{
@@ -362,7 +384,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
}
while (ring->cons != ring->prod) {
- ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring,
+ ring->last_nr_txbb = ring->free_tx_desc(priv, ring,
ring->cons & ring->size_mask,
!!(ring->cons & ring->size), 0,
0 /* Non-NAPI caller */);
@@ -444,7 +466,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
timestamp = mlx4_en_get_cqe_ts(cqe);
/* free next descriptor */
- last_nr_txbb = mlx4_en_free_tx_desc(
+ last_nr_txbb = ring->free_tx_desc(
priv, ring, ring_index,
!!((ring_cons + txbbs_skipped) &
ring->size), timestamp, napi_budget);
@@ -476,6 +498,9 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
+ if (ring->free_tx_desc == mlx4_en_recycle_tx_desc)
+ return done < budget;
+
netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
/* Wakeup Tx queue if this stopped, and ring is not full.
@@ -1052,3 +1077,106 @@ tx_drop:
return NETDEV_TX_OK;
}
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+ struct net_device *dev, unsigned int length,
+ int tx_ind, int *doorbell_pending)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ union mlx4_wqe_qpn_vlan qpn_vlan = {};
+ struct mlx4_en_tx_ring *ring;
+ struct mlx4_en_tx_desc *tx_desc;
+ struct mlx4_wqe_data_seg *data;
+ struct mlx4_en_tx_info *tx_info;
+ int index, bf_index;
+ bool send_doorbell;
+ int nr_txbb = 1;
+ bool stop_queue;
+ dma_addr_t dma;
+ int real_size;
+ __be32 op_own;
+ u32 ring_cons;
+ bool bf_ok;
+
+ BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE,
+ "mlx4_en_xmit_frame requires minimum size tx desc");
+
+ ring = priv->tx_ring[tx_ind];
+
+ if (!priv->port_up)
+ goto tx_drop;
+
+ if (mlx4_en_is_tx_ring_full(ring))
+ goto tx_drop;
+
+ /* fetch ring->cons far ahead before needing it to avoid stall */
+ ring_cons = READ_ONCE(ring->cons);
+
+ index = ring->prod & ring->size_mask;
+ tx_info = &ring->tx_info[index];
+
+ bf_ok = ring->bf_enabled;
+
+ /* Track current inflight packets for performance analysis */
+ AVG_PERF_COUNTER(priv->pstats.inflight_avg,
+ (u32)(ring->prod - ring_cons - 1));
+
+ bf_index = ring->prod;
+ tx_desc = ring->buf + index * TXBB_SIZE;
+ data = &tx_desc->data;
+
+ dma = frame->dma;
+
+ tx_info->page = frame->page;
+ frame->page = NULL;
+ tx_info->map0_dma = dma;
+ tx_info->map0_byte_count = length;
+ tx_info->nr_txbb = nr_txbb;
+ tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
+ tx_info->data_offset = (void *)data - (void *)tx_desc;
+ tx_info->ts_requested = 0;
+ tx_info->nr_maps = 1;
+ tx_info->linear = 1;
+ tx_info->inl = 0;
+
+ dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE);
+
+ data->addr = cpu_to_be64(dma);
+ data->lkey = ring->mr_key;
+ dma_wmb();
+ data->byte_count = cpu_to_be32(length);
+
+ /* tx completion can avoid cache line miss for common cases */
+ tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+
+ op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
+ ((ring->prod & ring->size) ?
+ cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
+
+ ring->packets++;
+ ring->bytes += tx_info->nr_bytes;
+ AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length);
+
+ ring->prod += nr_txbb;
+
+ stop_queue = mlx4_en_is_tx_ring_full(ring);
+ send_doorbell = stop_queue ||
+ *doorbell_pending > MLX4_EN_DOORBELL_BUDGET;
+ bf_ok &= send_doorbell;
+
+ real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f;
+
+ if (bf_ok)
+ qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size);
+ else
+ qpn_vlan.fence_size = real_size;
+
+ mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index,
+ op_own, bf_ok, send_doorbell);
+ *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1;
+
+ return NETDEV_TX_OK;
+
+tx_drop:
+ ring->tx_dropped++;
+ return NETDEV_TX_BUSY;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index eff4be0279e6..29c81d26f9f5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -132,6 +132,7 @@ enum {
MLX4_EN_NUM_UP)
#define MLX4_EN_DEFAULT_TX_WORK 256
+#define MLX4_EN_DOORBELL_BUDGET 8
/* Target number of packets to coalesce with interrupt moderation */
#define MLX4_EN_RX_COAL_TARGET 44
@@ -219,7 +220,10 @@ enum cq_type {
struct mlx4_en_tx_info {
- struct sk_buff *skb;
+ union {
+ struct sk_buff *skb;
+ struct page *page;
+ };
dma_addr_t map0_dma;
u32 map0_byte_count;
u32 nr_txbb;
@@ -265,6 +269,8 @@ struct mlx4_en_page_cache {
struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE];
};
+struct mlx4_en_priv;
+
struct mlx4_en_tx_ring {
/* cache line used and dirtied in tx completion
* (mlx4_en_free_tx_buf())
@@ -298,6 +304,11 @@ struct mlx4_en_tx_ring {
__be32 mr_key;
void *buf;
struct mlx4_en_tx_info *tx_info;
+ struct mlx4_en_rx_ring *recycle_ring;
+ u32 (*free_tx_desc)(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner,
+ u64 timestamp, int napi_mode);
u8 *bounce_buf;
struct mlx4_qp_context context;
int qpn;
@@ -678,6 +689,12 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq);
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback);
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+ struct net_device *dev, unsigned int length,
+ int tx_ind, int *doorbell_pending);
+void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
+bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
+ struct mlx4_en_rx_alloc *frame);
int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring **pring,
@@ -706,6 +723,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
int budget);
int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
+u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner, u64 timestamp,
+ int napi_mode);
+u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring,
+ int index, u8 owner, u64 timestamp,
+ int napi_mode);
void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
int is_tx, int rss, int qpn, int cqn, int user_prio,
struct mlx4_qp_context *context);