summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLawrence Brakmo <brakmo@fb.com>2017-06-30 20:02:44 -0700
committerDavid S. Miller <davem@davemloft.net>2017-07-01 16:15:13 -0700
commit13d3b1ebe28762c79e981931a41914fae5d04386 (patch)
tree2ba5e7acc2245ff6296bc8b51e10d49559cdb928
parent61bc4d8daa7af018d7ea084ea38648828a5a90d5 (diff)
downloadlinux-13d3b1ebe28762c79e981931a41914fae5d04386.tar.gz
linux-13d3b1ebe28762c79e981931a41914fae5d04386.tar.bz2
linux-13d3b1ebe28762c79e981931a41914fae5d04386.zip
bpf: Support for setting initial receive window
This patch adds suppport for setting the initial advertized window from within a BPF_SOCK_OPS program. This can be used to support larger initial cwnd values in environments where it is known to be safe. Signed-off-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/tcp.h10
-rw-r--r--include/uapi/linux/bpf.h4
-rw-r--r--net/ipv4/tcp_minisocks.c9
-rw-r--r--net/ipv4/tcp_output.c7
4 files changed, 28 insertions, 2 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 564af2dee236..d6bb3948203d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2068,4 +2068,14 @@ static inline u32 tcp_timeout_init(struct sock *sk)
return timeout;
}
+static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
+{
+ int rwnd;
+
+ rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+
+ if (rwnd < 0)
+ rwnd = 0;
+ return rwnd;
+}
#endif /* _TCP_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 00702b294447..94d7ded1a6cf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -751,6 +751,10 @@ enum {
BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
* -1 if default value should be used
*/
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d30ee31e94eb..0ff83c1637d8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -351,6 +351,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
int full_space = tcp_full_space(sk_listener);
u32 window_clamp;
__u8 rcv_wscale;
+ u32 rcv_wnd;
int mss;
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
@@ -363,6 +364,12 @@ void tcp_openreq_init_rwin(struct request_sock *req,
(req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
req->rsk_window_clamp = full_space;
+ rcv_wnd = tcp_rwnd_init_bpf((struct sock *)req);
+ if (rcv_wnd == 0)
+ rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+ else if (full_space < rcv_wnd * mss)
+ full_space = rcv_wnd * mss;
+
/* tcp_full_space because it is guaranteed to be the first packet */
tcp_select_initial_window(full_space,
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -370,7 +377,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
&req->rsk_window_clamp,
ireq->wscale_ok,
&rcv_wscale,
- dst_metric(dst, RTAX_INITRWND));
+ rcv_wnd);
ireq->rcv_wscale = rcv_wscale;
}
EXPORT_SYMBOL(tcp_openreq_init_rwin);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 47fe0759a877..ef809426b538 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3266,6 +3266,7 @@ static void tcp_connect_init(struct sock *sk)
const struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u8 rcv_wscale;
+ u32 rcv_wnd;
/* We'll fix this up when we get a response from the other end.
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
@@ -3299,13 +3300,17 @@ static void tcp_connect_init(struct sock *sk)
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
tp->window_clamp = tcp_full_space(sk);
+ rcv_wnd = tcp_rwnd_init_bpf(sk);
+ if (rcv_wnd == 0)
+ rcv_wnd = dst_metric(dst, RTAX_INITRWND);
+
tcp_select_initial_window(tcp_full_space(sk),
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
&rcv_wscale,
- dst_metric(dst, RTAX_INITRWND));
+ rcv_wnd);
tp->rx_opt.rcv_wscale = rcv_wscale;
tp->rcv_ssthresh = tp->rcv_wnd;