summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2013-10-03 00:22:02 -0700
committerDavid S. Miller <davem@davemloft.net>2013-10-08 23:19:24 -0400
commit05dbc7b59481ca891bbcfe6799a562d48159fbf7 (patch)
treef398ddbc5d2a72b3c3b7b16aed8a34b153491341 /net/ipv6
parent53af53ae83fe960ceb9ef74cac7915e9088f4266 (diff)
downloadlinux-05dbc7b59481ca891bbcfe6799a562d48159fbf7.tar.gz
linux-05dbc7b59481ca891bbcfe6799a562d48159fbf7.tar.bz2
linux-05dbc7b59481ca891bbcfe6799a562d48159fbf7.zip
tcp/dccp: remove twchain
TCP listener refactoring, part 3 : Our goal is to hash SYN_RECV sockets into main ehash for fast lookup, and parallel SYN processing. Current inet_ehash_bucket contains two chains, one for ESTABLISH (and friend states) sockets, another for TIME_WAIT sockets only. As the hash table is sized to get at most one socket per bucket, it makes little sense to have separate twchain, as it makes the lookup slightly more complicated, and doubles hash table memory usage. If we make sure all socket types have the lookup keys at the same offsets, we can use a generic and faster lookup. It turns out TIME_WAIT and ESTABLISHED sockets already have common lookup fields for IPv4. [ INET_TW_MATCH() is no longer needed ] I'll provide a follow-up to factorize IPv6 lookup as well, to remove INET6_TW_MATCH() This way, SYN_RECV pseudo sockets will be supported the same. A new sock_gen_put() helper is added, doing either a sock_put() or inet_twsk_put() [ and will support SYN_RECV later ]. Note this helper should only be called in real slow path, when rcu lookup found a socket that was moved to another identity (freed/reused immediately), but could eventually be used in other contexts, like sock_edemux() Before patch : dmesg | grep "TCP established" TCP established hash table entries: 524288 (order: 11, 8388608 bytes) After patch : TCP established hash table entries: 524288 (order: 10, 4194304 bytes) Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/inet6_hashtables.c75
-rw-r--r--net/ipv6/tcp_ipv6.c9
2 files changed, 36 insertions, 48 deletions
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 066640e0ba8e..46440777e1c5 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -89,43 +89,36 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
- goto begintw;
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ if (!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif))
+ continue;
+ } else {
+ if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+ continue;
+ }
+ if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+ goto out;
+
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
+ ports, dif))) {
+ sock_gen_put(sk);
+ goto begin;
+ }
+ } else {
if (unlikely(!INET6_MATCH(sk, net, saddr, daddr,
ports, dif))) {
sock_put(sk);
goto begin;
}
- goto out;
+ goto found;
}
}
if (get_nulls_value(node) != slot)
goto begin;
-
-begintw:
- /* Must check for a TIME_WAIT'er before going to listener hash. */
- sk_nulls_for_each_rcu(sk, node, &head->twchain) {
- if (sk->sk_hash != hash)
- continue;
- if (likely(INET6_TW_MATCH(sk, net, saddr, daddr,
- ports, dif))) {
- if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) {
- sk = NULL;
- goto out;
- }
- if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
- ports, dif))) {
- inet_twsk_put(inet_twsk(sk));
- goto begintw;
- }
- goto out;
- }
- }
- if (get_nulls_value(node) != slot)
- goto begintw;
- sk = NULL;
out:
+ sk = NULL;
+found:
rcu_read_unlock();
return sk;
}
@@ -248,31 +241,25 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_nulls_node *node;
- struct inet_timewait_sock *tw;
+ struct inet_timewait_sock *tw = NULL;
int twrefcnt = 0;
spin_lock(lock);
- /* Check TIME-WAIT sockets first. */
- sk_nulls_for_each(sk2, node, &head->twchain) {
+ sk_nulls_for_each(sk2, node, &head->chain) {
if (sk2->sk_hash != hash)
continue;
- if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
- ports, dif))) {
- tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
- goto unique;
- else
- goto not_unique;
+ if (sk2->sk_state == TCP_TIME_WAIT) {
+ if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr,
+ ports, dif))) {
+ tw = inet_twsk(sk2);
+ if (twsk_unique(sk, sk2, twp))
+ goto unique;
+ else
+ goto not_unique;
+ }
}
- }
- tw = NULL;
-
- /* And established part... */
- sk_nulls_for_each(sk2, node, &head->chain) {
- if (sk2->sk_hash != hash)
- continue;
if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif)))
goto not_unique;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index dde8bad04481..528e61afaf5e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1834,6 +1834,7 @@ static void get_timewait6_sock(struct seq_file *seq,
static int tcp6_seq_show(struct seq_file *seq, void *v)
{
struct tcp_iter_state *st;
+ struct sock *sk = v;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
@@ -1849,14 +1850,14 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
case TCP_SEQ_STATE_ESTABLISHED:
- get_tcp6_sock(seq, v, st->num);
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait6_sock(seq, v, st->num);
+ else
+ get_tcp6_sock(seq, v, st->num);
break;
case TCP_SEQ_STATE_OPENREQ:
get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
break;
- case TCP_SEQ_STATE_TIME_WAIT:
- get_timewait6_sock(seq, v, st->num);
- break;
}
out:
return 0;