summaryrefslogtreecommitdiffstats
path: root/net/ipv6/tcp_ipv6.c
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2015-10-02 11:43:32 -0700
committerDavid S. Miller <davem@davemloft.net>2015-10-03 04:32:41 -0700
commit079096f103faca2dd87342cca6f23d4b34da8871 (patch)
treefa3fb0fdc064f1611c464384e70a7a402179808f /net/ipv6/tcp_ipv6.c
parent2feda34192a379f8b35a7c6c5826b2f23e884f32 (diff)
downloadlinux-079096f103faca2dd87342cca6f23d4b34da8871.tar.gz
linux-079096f103faca2dd87342cca6f23d4b34da8871.tar.bz2
linux-079096f103faca2dd87342cca6f23d4b34da8871.zip
tcp/dccp: install syn_recv requests into ehash table
In this patch, we insert request sockets into TCP/DCCP regular ehash table (where ESTABLISHED and TIMEWAIT sockets are) instead of using the per listener hash table. ACK packets find SYN_RECV pseudo sockets without having to find and lock the listener. In nominal conditions, this halves pressure on listener lock. Note that this will allow for SO_REUSEPORT refinements, so that we can select a listener using cpu/numa affinities instead of the prior 'consistent hash', since only SYN packets will apply this selection logic. We will shrink listen_sock in the following patch to ease code review. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Ying Cai <ycai@google.com> Cc: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/tcp_ipv6.c')
-rw-r--r--net/ipv6/tcp_ipv6.c82
1 files changed, 36 insertions, 46 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cadb44a2d34e..a215614cfb2b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -727,7 +727,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.route_req = tcp_v6_route_req,
.init_seq = tcp_v6_init_sequence,
.send_synack = tcp_v6_send_synack,
- .queue_hash_add = inet6_csk_reqsk_queue_hash_add,
};
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
@@ -938,37 +937,11 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
}
-static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
+static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
{
+#ifdef CONFIG_SYN_COOKIES
const struct tcphdr *th = tcp_hdr(skb);
- struct request_sock *req;
- struct sock *nsk;
-
- /* Find possible connection requests. */
- req = inet6_csk_search_req(sk, th->source,
- &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
- if (req) {
- nsk = tcp_check_req(sk, skb, req, false);
- if (!nsk || nsk == sk)
- reqsk_put(req);
- return nsk;
- }
- nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
- &ipv6_hdr(skb)->saddr, th->source,
- &ipv6_hdr(skb)->daddr, ntohs(th->dest),
- tcp_v6_iif(skb));
-
- if (nsk) {
- if (nsk->sk_state != TCP_TIME_WAIT) {
- bh_lock_sock(nsk);
- return nsk;
- }
- inet_twsk_put(inet_twsk(nsk));
- return NULL;
- }
-#ifdef CONFIG_SYN_COOKIES
if (!th->syn)
sk = cookie_v6_check(sk, skb);
#endif
@@ -1258,15 +1231,11 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
- struct sock *nsk = tcp_v6_hnd_req(sk, skb);
+ struct sock *nsk = tcp_v6_cookie_check(sk, skb);
+
if (!nsk)
goto discard;
- /*
- * Queue it on the new socket if the new socket is active,
- * otherwise we just shortcircuit this and continue with
- * the new socket..
- */
if (nsk != sk) {
sock_rps_save_rxhash(nsk, skb);
sk_mark_napi_id(nsk, skb);
@@ -1402,6 +1371,33 @@ process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
+ if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ struct request_sock *req = inet_reqsk(sk);
+ struct sock *nsk = NULL;
+
+ sk = req->rsk_listener;
+ tcp_v6_fill_cb(skb, hdr, th);
+ if (tcp_v6_inbound_md5_hash(sk, skb)) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (sk->sk_state == TCP_LISTEN)
+ nsk = tcp_check_req(sk, skb, req, false);
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (nsk == sk) {
+ sock_hold(sk);
+ reqsk_put(req);
+ tcp_v6_restore_cb(skb);
+ } else if (tcp_child_process(sk, nsk, skb)) {
+ tcp_v6_send_reset(nsk, skb);
+ goto discard_it;
+ } else {
+ return 0;
+ }
+ }
if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
@@ -1765,18 +1761,12 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
}
st = seq->private;
- switch (st->state) {
- case TCP_SEQ_STATE_LISTENING:
- case TCP_SEQ_STATE_ESTABLISHED:
- if (sk->sk_state == TCP_TIME_WAIT)
- get_timewait6_sock(seq, v, st->num);
- else
- get_tcp6_sock(seq, v, st->num);
- break;
- case TCP_SEQ_STATE_OPENREQ:
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait6_sock(seq, v, st->num);
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
get_openreq6(seq, v, st->num);
- break;
- }
+ else
+ get_tcp6_sock(seq, v, st->num);
out:
return 0;
}