From b7779d06f9950e14a008a2de970b44233fe49c86 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 3 Mar 2014 14:45:20 +0100 Subject: netfilter: conntrack: spinlock per cpu to protect special lists. One spinlock per cpu to protect dying/unconfirmed/template special lists. (These lists are now per cpu, a bit like the untracked ct) Add a @cpu field to nf_conn, to make sure we hold the appropriate spinlock at removal time. Signed-off-by: Eric Dumazet Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fbcc7fa536dc..c6a8994e9922 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -62,6 +62,13 @@ struct nf_ip_net { #endif }; +struct ct_pcpu { + spinlock_t lock; + struct hlist_nulls_head unconfirmed; + struct hlist_nulls_head dying; + struct hlist_nulls_head tmpl; +}; + struct netns_ct { atomic_t count; unsigned int expect_count; @@ -86,9 +93,7 @@ struct netns_ct { struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; - struct hlist_nulls_head unconfirmed; - struct hlist_nulls_head dying; - struct hlist_nulls_head tmpl; + struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb; -- cgit v1.2.3 From 93bb0ceb75be2fdfa9fc0dd1fb522d9ada515d9c Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 3 Mar 2014 14:46:13 +0100 Subject: netfilter: conntrack: remove central spinlock nf_conntrack_lock nf_conntrack_lock is a monolithic lock and suffers from huge contention on current generation servers (8 or more core/threads). Perf locking congestion is clear on base kernel: - 72.56% ksoftirqd/6 [kernel.kallsyms] [k] _raw_spin_lock_bh - _raw_spin_lock_bh + 25.33% init_conntrack + 24.86% nf_ct_delete_from_lists + 24.62% __nf_conntrack_confirm + 24.38% destroy_conntrack + 0.70% tcp_packet + 2.21% ksoftirqd/6 [kernel.kallsyms] [k] fib_table_lookup + 1.15% ksoftirqd/6 [kernel.kallsyms] [k] __slab_free + 0.77% ksoftirqd/6 [kernel.kallsyms] [k] inet_getpeer + 0.70% ksoftirqd/6 [nf_conntrack] [k] nf_ct_delete + 0.55% ksoftirqd/6 [ip_tables] [k] ipt_do_table This patch change conntrack locking and provides a huge performance improvement. SYN-flood attack tested on a 24-core E5-2695v2(ES) with 10Gbit/s ixgbe (with tool trafgen): Base kernel: 810.405 new conntrack/sec After patch: 2.233.876 new conntrack/sec Notice other floods attack (SYN+ACK or ACK) can easily be deflected using: # iptables -A INPUT -m state --state INVALID -j DROP # sysctl -w net/netfilter/nf_conntrack_tcp_loose=0 Use an array of hashed spinlocks to protect insertions/deletions of conntracks into the hash table. 1024 spinlocks seem to give good results, at minimal cost (4KB memory). Due to lockdep max depth, 1024 becomes 8 if CONFIG_LOCKDEP=y The hash resize is a bit tricky, because we need to take all locks in the array. A seqcount_t is used to synchronize the hash table users with the resizing process. Signed-off-by: Eric Dumazet Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net/netns') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index c6a8994e9922..773cce308bc6 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -5,6 +5,7 @@ #include #include #include +#include struct ctl_table_header; struct nf_conntrack_ecache; @@ -90,6 +91,7 @@ struct netns_ct { int sysctl_checksum; unsigned int htable_size; + seqcount_t generation; struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; -- cgit v1.2.3