summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c79
1 files changed, 63 insertions, 16 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1db253e36045..c2f0f6065cb1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1354,6 +1354,8 @@ void tcp_mtup_init(struct sock *sk)
icsk->icsk_af_ops->net_header_len;
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
icsk->icsk_mtup.probe_size = 0;
+ if (icsk->icsk_mtup.enabled)
+ icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
}
EXPORT_SYMBOL(tcp_mtup_init);
@@ -1752,20 +1754,23 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
bool *is_cwnd_limited, u32 max_segs)
{
- struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- u32 send_win, cong_win, limit, in_flight;
+ u32 age, send_win, cong_win, limit, in_flight;
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct skb_mstamp now;
+ struct sk_buff *head;
int win_divisor;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto send_now;
- if (icsk->icsk_ca_state != TCP_CA_Open)
+ if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR)))
goto send_now;
- /* Defer for less than two clock ticks. */
- if (tp->tso_deferred &&
- (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
+ /* Avoid bursty behavior by allowing defer
+ * only if the last write was recent.
+ */
+ if ((s32)(tcp_time_stamp - tp->lsndtime) > 0)
goto send_now;
in_flight = tcp_packets_in_flight(tp);
@@ -1807,11 +1812,14 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
goto send_now;
}
- /* Ok, it looks like it is advisable to defer.
- * Do not rearm the timer if already set to not break TCP ACK clocking.
- */
- if (!tp->tso_deferred)
- tp->tso_deferred = 1 | (jiffies << 1);
+ head = tcp_write_queue_head(sk);
+ skb_mstamp_get(&now);
+ age = skb_mstamp_us_delta(&now, &head->skb_mstamp);
+ /* If next ACK is likely to come too late (half srtt), do not defer */
+ if (age < (tp->srtt_us >> 4))
+ goto send_now;
+
+ /* Ok, it looks like it is advisable to defer. */
if (cong_win < send_win && cong_win < skb->len)
*is_cwnd_limited = true;
@@ -1819,10 +1827,34 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
return true;
send_now:
- tp->tso_deferred = 0;
return false;
}
+static inline void tcp_mtu_check_reprobe(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
+ u32 interval;
+ s32 delta;
+
+ interval = net->ipv4.sysctl_tcp_probe_interval;
+ delta = tcp_time_stamp - icsk->icsk_mtup.probe_timestamp;
+ if (unlikely(delta >= interval * HZ)) {
+ int mss = tcp_current_mss(sk);
+
+ /* Update current search range */
+ icsk->icsk_mtup.probe_size = 0;
+ icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp +
+ sizeof(struct tcphdr) +
+ icsk->icsk_af_ops->net_header_len;
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
+
+ /* Update probe time stamp */
+ icsk->icsk_mtup.probe_timestamp = tcp_time_stamp;
+ }
+}
+
/* Create a new MTU probe if we are ready.
* MTU probe is regularly attempting to increase the path MTU by
* deliberately sending larger packets. This discovers routing
@@ -1837,11 +1869,13 @@ static int tcp_mtu_probe(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb, *nskb, *next;
+ struct net *net = sock_net(sk);
int len;
int probe_size;
int size_needed;
int copy;
int mss_now;
+ int interval;
/* Not currently probing/verifying,
* not in recovery,
@@ -1854,12 +1888,25 @@ static int tcp_mtu_probe(struct sock *sk)
tp->rx_opt.num_sacks || tp->rx_opt.dsack)
return -1;
- /* Very simple search strategy: just double the MSS. */
+ /* Use binary search for probe_size between tcp_mss_base,
+ * and current mss_clamp. if (search_high - search_low)
+ * smaller than a threshold, backoff from probing.
+ */
mss_now = tcp_current_mss(sk);
- probe_size = 2 * tp->mss_cache;
+ probe_size = tcp_mtu_to_mss(sk, (icsk->icsk_mtup.search_high +
+ icsk->icsk_mtup.search_low) >> 1);
size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
- if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
- /* TODO: set timer for probe_converge_event */
+ interval = icsk->icsk_mtup.search_high - icsk->icsk_mtup.search_low;
+ /* When misfortune happens, we are reprobing actively,
+ * and then reprobe timer has expired. We stick with current
+ * probing process by not resetting search range to its orignal.
+ */
+ if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
+ interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ /* Check whether enough time has elaplased for
+ * another round of probing.
+ */
+ tcp_mtu_check_reprobe(sk);
return -1;
}