summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeal Cardwell <ncardwell@google.com>2020-12-08 22:57:59 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2020-12-21 13:28:17 +0100
commitf9c84d022929be7c15747ab1fc8696e5271ccc60 (patch)
treeff3342be6711718db27e0e4167af8b05be296564
parentd8d50009dadaa7f4d179af59e9796628c7487cf8 (diff)
downloadlinux-stable-f9c84d022929be7c15747ab1fc8696e5271ccc60.tar.gz
linux-stable-f9c84d022929be7c15747ab1fc8696e5271ccc60.tar.bz2
linux-stable-f9c84d022929be7c15747ab1fc8696e5271ccc60.zip
tcp: fix cwnd-limited bug for TSO deferral where we send nothing
[ Upstream commit 299bcb55ecd1412f6df606e9dc0912d55610029e ] When cwnd is not a multiple of the TSO skb size of N*MSS, we can get into persistent scenarios where we have the following sequence: (1) ACK for full-sized skb of N*MSS arrives -> tcp_write_xmit() transmit full-sized skb with N*MSS -> move pacing release time forward -> exit tcp_write_xmit() because pacing time is in the future (2) TSQ callback or TCP internal pacing timer fires -> try to transmit next skb, but TSO deferral finds remainder of available cwnd is not big enough to trigger an immediate send now, so we defer sending until the next ACK. (3) repeat... So we can get into a case where we never mark ourselves as cwnd-limited for many seconds at a time, even with bulk/infinite-backlog senders, because: o In case (1) above, every time in tcp_write_xmit() we have enough cwnd to send a full-sized skb, we are not fully using the cwnd (because cwnd is not a multiple of the TSO skb size). So every time we send data, we are not cwnd limited, and so in the cwnd-limited tracking code in tcp_cwnd_validate() we mark ourselves as not cwnd-limited. o In case (2) above, every time in tcp_write_xmit() that we try to transmit the "remainder" of the cwnd but defer, we set the local variable is_cwnd_limited to true, but we do not send any packets, so sent_pkts is zero, so we don't call the cwnd-limited logic to update tp->is_cwnd_limited. Fixes: ca8a22634381 ("tcp: make cwnd-limited checks measurement-based, and gentler") Reported-by: Ingemar Johansson <ingemar.s.johansson@ericsson.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/20201209035759.1225145-1-ncardwell.kernel@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--net/ipv4/tcp_output.c9
1 files changed, 6 insertions, 3 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 85ff417bda7f..b6ced107e2c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1723,7 +1723,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
* window, and remember whether we were cwnd-limited then.
*/
if (!before(tp->snd_una, tp->max_packets_seq) ||
- tp->packets_out > tp->max_packets_out) {
+ tp->packets_out > tp->max_packets_out ||
+ is_cwnd_limited) {
tp->max_packets_out = tp->packets_out;
tp->max_packets_seq = tp->snd_nxt;
tp->is_cwnd_limited = is_cwnd_limited;
@@ -2545,6 +2546,10 @@ repair:
else
tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+ if (likely(sent_pkts || is_cwnd_limited))
+ tcp_cwnd_validate(sk, is_cwnd_limited);
+
if (likely(sent_pkts)) {
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += sent_pkts;
@@ -2552,8 +2557,6 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk, false);
- is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
- tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
return !tp->packets_out && !tcp_write_queue_empty(sk);