summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2017-01-12 22:11:34 -0800
committerDavid S. Miller <davem@davemloft.net>2017-01-13 22:37:16 -0500
commit1d0833df594390876647c54c2c88069d29059665 (patch)
treeabd620968abad8f56e132a108b5bf354eb0d3b54 /net/ipv4
parent57dde7f70de34d4251f291c9eac7ad920aaf56b2 (diff)
downloadlinux-1d0833df594390876647c54c2c88069d29059665.tar.gz
linux-1d0833df594390876647c54c2c88069d29059665.tar.bz2
linux-1d0833df594390876647c54c2c88069d29059665.zip
tcp: use sequence to break TS ties for RACK loss detection
The packets inside a jumbo skb (e.g., TSO) share the same skb timestamp, even though they are sent sequentially on the wire. Since RACK is based on time, it can not detect some packets inside the same skb are lost. However, we can leverage the packet sequence numbers as extended timestamps to detect losses. Therefore, when RACK timestamp is identical to skb's timestamp (i.e., one of the packets of the skb is acked or sacked), we use the sequence numbers of the acked and unacked packets to break ties. We can use the same sequence logic to advance RACK xmit time as well to detect more losses and avoid timeout. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_recovery.c17
2 files changed, 17 insertions, 5 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index be1191829963..e42ca11c0326 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1218,7 +1218,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) {
- tcp_rack_advance(tp, sacked, xmit_time, &state->ack_time);
+ tcp_rack_advance(tp, sacked, end_seq,
+ xmit_time, &state->ack_time);
if (sacked & TCPCB_SACKED_RETRANS) {
/* If the segment is not tagged as lost,
@@ -3171,7 +3172,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
} else if (tcp_is_sack(tp)) {
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
- tcp_rack_advance(tp, sacked,
+ tcp_rack_advance(tp, sacked, scb->end_seq,
&skb->skb_mstamp,
&sack->ack_time);
}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index eb39b1b6d1dc..1e330a2f913d 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -16,6 +16,14 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
}
}
+static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
+ const struct skb_mstamp *t2,
+ u32 seq1, u32 seq2)
+{
+ return skb_mstamp_after(t1, t2) ||
+ (t1->v64 == t2->v64 && after(seq1, seq2));
+}
+
/* Marks a packet lost, if some packet sent later has been (s)acked.
* The underlying idea is similar to the traditional dupthresh and FACK
* but they look at different metrics:
@@ -60,7 +68,8 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
scb->sacked & TCPCB_SACKED_ACKED)
continue;
- if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) {
+ if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp,
+ tp->rack.end_seq, scb->end_seq)) {
/* Step 3 in draft-cheng-tcpm-rack-00.txt:
* A packet is lost if its elapsed time is beyond
* the recent RTT plus the reordering window.
@@ -113,14 +122,15 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
* This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
* draft-cheng-tcpm-rack-00.txt
*/
-void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
+void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
const struct skb_mstamp *xmit_time,
const struct skb_mstamp *ack_time)
{
u32 rtt_us;
if (tp->rack.mstamp.v64 &&
- !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
+ !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp,
+ end_seq, tp->rack.end_seq))
return;
rtt_us = skb_mstamp_us_delta(ack_time, xmit_time);
@@ -140,6 +150,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
}
tp->rack.rtt_us = rtt_us;
tp->rack.mstamp = *xmit_time;
+ tp->rack.end_seq = end_seq;
tp->rack.advanced = 1;
}