summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2023-10-19 15:41:38 +0200
committerPaolo Abeni <pabeni@redhat.com>2023-10-19 15:41:39 +0200
commitdcf02bac377e48a4864ed2b4e1ffef84fcb8284f (patch)
treed41aba8c1f603fc2b0069f38a55d61ce7e9524e5
parenta0e6323dbae6e96af5d1acbc8bb592b56f96c65e (diff)
parent039550960a2235cfe2dfaa773df9f98f8da31a0c (diff)
downloadlinux-stable-dcf02bac377e48a4864ed2b4e1ffef84fcb8284f.tar.gz
linux-stable-dcf02bac377e48a4864ed2b4e1ffef84fcb8284f.tar.bz2
linux-stable-dcf02bac377e48a4864ed2b4e1ffef84fcb8284f.zip
Merge branch 'net-stmmac-improve-tx-timer-logic'
Christian Marangi says: ==================== net: stmmac: improve tx timer logic This series comes with the intention of restoring original performance of stmmac on some router/device that used the stmmac driver to handle gigabit traffic. More info are present in patch 3. This cover letter is to show results and improvements of the following change. The move to hr_timer for tx timer and commit 8fce33317023 ("net: stmmac: Rework coalesce timer and fix multi-queue races") caused big performance regression on these kind of device. This was observed on ipq806x that after kernel 4.19 couldn't handle gigabit speed anymore. The following series is currently applied and tested in OpenWrt SNAPSHOT and have great performance increase. (the scenario is qca8k switch + stmmac dwmac1000) Some good comparison can be found here [1]. The difference is from a swconfig scenario (where dsa tagging is not used so very low CPU impact in handling traffic) and DSA scenario where tagging is used and there is a minimal impact in the CPU. As can be notice even with DSA in place we have better perf. It was observed by other user that also SQM scenario with cake scheduler were improved in the order of 100mbps (this scenario is CPU limited and any increase of perf is caused by removing load on the CPU) Been at least 15 days that this is in use without any complain or bug reported about queue timeout. (was the case with v1 before the additional patch was added, only appear on real world tests and not on iperf tests) [1] https://forum.openwrt.org/t/netgear-r7800-exploration-ipq8065-qca9984/285/3427?u=ansuel ==================== Link: https://lore.kernel.org/r/20231018123550.27110-1-ansuelsmth@gmail.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/sge.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c40
-rw-r--r--drivers/net/wireless/realtek/rtw89/core.c2
-rw-r--r--include/linux/netdevice.h23
-rw-r--r--net/core/dev.c2
6 files changed, 59 insertions, 18 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index dfe4e0102960..6268f96cb4aa 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -2501,14 +2501,6 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
return work_done;
}
-/*
- * Returns true if the device is already scheduled for polling.
- */
-static inline int napi_is_scheduled(struct napi_struct *napi)
-{
- return test_bit(NAPI_STATE_SCHED, &napi->state);
-}
-
/**
* process_pure_responses - process pure responses from a response queue
* @adap: the adapter
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1e996c29043d..e3f650e88f82 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -293,7 +293,7 @@ struct stmmac_safety_stats {
#define MIN_DMA_RIWT 0x10
#define DEF_DMA_RIWT 0xa0
/* Tx coalesce parameters */
-#define STMMAC_COAL_TX_TIMER 1000
+#define STMMAC_COAL_TX_TIMER 5000
#define STMMAC_MAX_COAL_TX_TICK 100000
#define STMMAC_TX_MAX_FRAMES 256
#define STMMAC_TX_FRAMES 25
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index bb1dbf4c9f6c..11055e98efcc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2543,9 +2543,13 @@ static void stmmac_bump_dma_threshold(struct stmmac_priv *priv, u32 chan)
* @priv: driver private structure
* @budget: napi budget limiting this functions packet handling
* @queue: TX queue index
+ * @pending_packets: signal to arm the TX coal timer
* Description: it reclaims the transmit resources after transmission completes.
+ * If some packets still needs to be handled, due to TX coalesce, set
+ * pending_packets to true to make NAPI arm the TX coal timer.
*/
-static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
+static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
+ bool *pending_packets)
{
struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue];
@@ -2706,7 +2710,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
/* We still have pending packets, let's call for a new scheduling */
if (tx_q->dirty_tx != tx_q->cur_tx)
- stmmac_tx_timer_arm(priv, queue);
+ *pending_packets = true;
flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
txq_stats->tx_packets += tx_packets;
@@ -2996,13 +3000,25 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
{
struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
u32 tx_coal_timer = priv->tx_coal_timer[queue];
+ struct stmmac_channel *ch;
+ struct napi_struct *napi;
if (!tx_coal_timer)
return;
- hrtimer_start(&tx_q->txtimer,
- STMMAC_COAL_TIMER(tx_coal_timer),
- HRTIMER_MODE_REL);
+ ch = &priv->channel[tx_q->queue_index];
+ napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi;
+
+ /* Arm timer only if napi is not already scheduled.
+ * Try to cancel any timer if napi is scheduled, timer will be armed
+ * again in the next scheduled napi.
+ */
+ if (unlikely(!napi_is_scheduled(napi)))
+ hrtimer_start(&tx_q->txtimer,
+ STMMAC_COAL_TIMER(tx_coal_timer),
+ HRTIMER_MODE_REL);
+ else
+ hrtimer_try_to_cancel(&tx_q->txtimer);
}
/**
@@ -5560,6 +5576,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
container_of(napi, struct stmmac_channel, tx_napi);
struct stmmac_priv *priv = ch->priv_data;
struct stmmac_txq_stats *txq_stats;
+ bool pending_packets = false;
u32 chan = ch->index;
unsigned long flags;
int work_done;
@@ -5569,7 +5586,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
txq_stats->napi_poll++;
u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
- work_done = stmmac_tx_clean(priv, budget, chan);
+ work_done = stmmac_tx_clean(priv, budget, chan, &pending_packets);
work_done = min(work_done, budget);
if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -5580,6 +5597,10 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
spin_unlock_irqrestore(&ch->lock, flags);
}
+ /* TX still have packet to handle, check if we need to arm tx timer */
+ if (pending_packets)
+ stmmac_tx_timer_arm(priv, chan);
+
return work_done;
}
@@ -5588,6 +5609,7 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
struct stmmac_channel *ch =
container_of(napi, struct stmmac_channel, rxtx_napi);
struct stmmac_priv *priv = ch->priv_data;
+ bool tx_pending_packets = false;
int rx_done, tx_done, rxtx_done;
struct stmmac_rxq_stats *rxq_stats;
struct stmmac_txq_stats *txq_stats;
@@ -5604,7 +5626,7 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
txq_stats->napi_poll++;
u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
- tx_done = stmmac_tx_clean(priv, budget, chan);
+ tx_done = stmmac_tx_clean(priv, budget, chan, &tx_pending_packets);
tx_done = min(tx_done, budget);
rx_done = stmmac_rx_zc(priv, budget, chan);
@@ -5629,6 +5651,10 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
spin_unlock_irqrestore(&ch->lock, flags);
}
+ /* TX still have packet to handle, check if we need to arm tx timer */
+ if (tx_pending_packets)
+ stmmac_tx_timer_arm(priv, chan);
+
return min(rxtx_done, budget - 1);
}
diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c
index 4bfb4188de72..3d75165e48be 100644
--- a/drivers/net/wireless/realtek/rtw89/core.c
+++ b/drivers/net/wireless/realtek/rtw89/core.c
@@ -2005,7 +2005,7 @@ static void rtw89_core_rx_to_mac80211(struct rtw89_dev *rtwdev,
struct napi_struct *napi = &rtwdev->napi;
/* In low power mode, napi isn't scheduled. Receive it to netif. */
- if (unlikely(!test_bit(NAPI_STATE_SCHED, &napi->state)))
+ if (unlikely(!napi_is_scheduled(napi)))
napi = NULL;
rtw89_core_hw_to_sband_rate(rx_status);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1c7681263d30..b8bf669212cc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -482,6 +482,29 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n)
return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
}
+/**
+ * napi_is_scheduled - test if NAPI is scheduled
+ * @n: NAPI context
+ *
+ * This check is "best-effort". With no locking implemented,
+ * a NAPI can be scheduled or terminate right after this check
+ * and produce not precise results.
+ *
+ * NAPI_STATE_SCHED is an internal state, napi_is_scheduled
+ * should not be used normally and napi_schedule should be
+ * used instead.
+ *
+ * Use only if the driver really needs to check if a NAPI
+ * is scheduled for example in the context of delayed timer
+ * that can be skipped if a NAPI is already scheduled.
+ *
+ * Return True if NAPI is scheduled, False otherwise.
+ */
+static inline bool napi_is_scheduled(struct napi_struct *n)
+{
+ return test_bit(NAPI_STATE_SCHED, &n->state);
+}
+
bool napi_schedule_prep(struct napi_struct *n);
/**
diff --git a/net/core/dev.c b/net/core/dev.c
index 97e7b9833db9..e7f61f5a5322 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6532,7 +6532,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
* accidentally calling ->poll() when NAPI is not scheduled.
*/
work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ if (napi_is_scheduled(n)) {
work = n->poll(n, weight);
trace_napi_poll(n, work, weight);
}