From ca925cf1534ebcec332c08719a7dee6ee1782ce4 Mon Sep 17 00:00:00 2001 From: Fan Du Date: Sat, 18 Jan 2014 09:55:27 +0800 Subject: flowcache: Make flow cache name space aware Inserting a entry into flowcache, or flushing flowcache should be based on per net scope. The reason to do so is flushing operation from fat netns crammed with flow entries will also making the slim netns with only a few flow cache entries go away in original implementation. Since flowcache is tightly coupled with IPsec, so it would be easier to put flow cache global parameters into xfrm namespace part. And one last thing needs to do is bumping flow cache genid, and flush flow cache should also be made in per net style. Signed-off-by: Fan Du Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/net/netns') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 1006a265beb3..52d0086d55d3 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,6 +6,7 @@ #include #include #include +#include struct ctl_table_header; @@ -61,6 +62,16 @@ struct netns_xfrm { spinlock_t xfrm_policy_sk_bundle_lock; rwlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; + + /* flow cache part */ + struct flow_cache flow_cache_global; + struct kmem_cache *flow_cachep; + atomic_t flow_cache_genid; + struct list_head flow_cache_gc_list; + spinlock_t flow_cache_gc_lock; + struct work_struct flow_cache_gc_work; + struct work_struct flow_cache_flush_work; + struct mutex flow_flush_sem; }; #endif -- cgit v1.2.3 From 1a1ccc96abb2ed9b8fbb71018e64b97324caef53 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 19 Feb 2014 10:07:34 +0100 Subject: xfrm: Remove caching of xfrm_policy_sk_bundles We currently cache socket policy bundles at xfrm_policy_sk_bundles. These cached bundles are never used. Instead we create and cache a new one whenever xfrm_lookup() is called on a socket policy. Most protocols cache the used routes to the socket, so let's remove the unused caching of socket policy bundles in xfrm. Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net/netns') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 52d0086d55d3..51f0dce7b643 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -59,7 +59,6 @@ struct netns_xfrm { struct dst_ops xfrm6_dst_ops; #endif spinlock_t xfrm_state_lock; - spinlock_t xfrm_policy_sk_bundle_lock; rwlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; -- cgit v1.2.3 From 633fc86ff621bba79dcddfd4c67fb07ae5f8467c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 28 Feb 2014 07:32:49 +0100 Subject: net: ns: add ieee802154_6lowpan namespace This patch adds necessary ieee802154 6lowpan namespace to provide the inet_frag information. This is a initial support for handling 6lowpan fragmentation with the inet_frag api. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/netns/ieee802154_6lowpan.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 include/net/netns/ieee802154_6lowpan.h (limited to 'include/net/netns') diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h new file mode 100644 index 000000000000..88110b7e2428 --- /dev/null +++ b/include/net/netns/ieee802154_6lowpan.h @@ -0,0 +1,13 @@ +/* + * ieee802154 6lowpan in net namespaces + */ + +#include + +#ifndef __NETNS_IEEE802154_6LOWPAN_H__ +#define __NETNS_IEEE802154_6LOWPAN_H__ + +struct netns_ieee802154_lowpan { +}; + +#endif -- cgit v1.2.3 From 7240cdec60b136f3e64a453c7fbded4ed1aa047e Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 28 Feb 2014 07:32:50 +0100 Subject: 6lowpan: handling 6lowpan fragmentation via inet_frag api This patch drops the current way of 6lowpan fragmentation on receiving side and replace it with a implementation which use the inet_frag api. The old fragmentation handling has some race conditions and isn't rfc4944 compatible. Also adding support to match fragments on destination address, source address, tag value and datagram_size which is missing in the current implementation. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- include/net/netns/ieee802154_6lowpan.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net/netns') diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h index 88110b7e2428..079030c853d8 100644 --- a/include/net/netns/ieee802154_6lowpan.h +++ b/include/net/netns/ieee802154_6lowpan.h @@ -7,7 +7,16 @@ #ifndef __NETNS_IEEE802154_6LOWPAN_H__ #define __NETNS_IEEE802154_6LOWPAN_H__ +struct netns_sysctl_lowpan { +#ifdef CONFIG_SYSCTL + struct ctl_table_header *frags_hdr; +#endif +}; + struct netns_ieee802154_lowpan { + struct netns_sysctl_lowpan sysctl; + struct netns_frags frags; + u16 max_dsize; }; #endif -- cgit v1.2.3 From b7779d06f9950e14a008a2de970b44233fe49c86 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 3 Mar 2014 14:45:20 +0100 Subject: netfilter: conntrack: spinlock per cpu to protect special lists. One spinlock per cpu to protect dying/unconfirmed/template special lists. (These lists are now per cpu, a bit like the untracked ct) Add a @cpu field to nf_conn, to make sure we hold the appropriate spinlock at removal time. Signed-off-by: Eric Dumazet Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fbcc7fa536dc..c6a8994e9922 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -62,6 +62,13 @@ struct nf_ip_net { #endif }; +struct ct_pcpu { + spinlock_t lock; + struct hlist_nulls_head unconfirmed; + struct hlist_nulls_head dying; + struct hlist_nulls_head tmpl; +}; + struct netns_ct { atomic_t count; unsigned int expect_count; @@ -86,9 +93,7 @@ struct netns_ct { struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; - struct hlist_nulls_head unconfirmed; - struct hlist_nulls_head dying; - struct hlist_nulls_head tmpl; + struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb; -- cgit v1.2.3 From 93bb0ceb75be2fdfa9fc0dd1fb522d9ada515d9c Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 3 Mar 2014 14:46:13 +0100 Subject: netfilter: conntrack: remove central spinlock nf_conntrack_lock nf_conntrack_lock is a monolithic lock and suffers from huge contention on current generation servers (8 or more core/threads). Perf locking congestion is clear on base kernel: - 72.56% ksoftirqd/6 [kernel.kallsyms] [k] _raw_spin_lock_bh - _raw_spin_lock_bh + 25.33% init_conntrack + 24.86% nf_ct_delete_from_lists + 24.62% __nf_conntrack_confirm + 24.38% destroy_conntrack + 0.70% tcp_packet + 2.21% ksoftirqd/6 [kernel.kallsyms] [k] fib_table_lookup + 1.15% ksoftirqd/6 [kernel.kallsyms] [k] __slab_free + 0.77% ksoftirqd/6 [kernel.kallsyms] [k] inet_getpeer + 0.70% ksoftirqd/6 [nf_conntrack] [k] nf_ct_delete + 0.55% ksoftirqd/6 [ip_tables] [k] ipt_do_table This patch change conntrack locking and provides a huge performance improvement. SYN-flood attack tested on a 24-core E5-2695v2(ES) with 10Gbit/s ixgbe (with tool trafgen): Base kernel: 810.405 new conntrack/sec After patch: 2.233.876 new conntrack/sec Notice other floods attack (SYN+ACK or ACK) can easily be deflected using: # iptables -A INPUT -m state --state INVALID -j DROP # sysctl -w net/netfilter/nf_conntrack_tcp_loose=0 Use an array of hashed spinlocks to protect insertions/deletions of conntracks into the hash table. 1024 spinlocks seem to give good results, at minimal cost (4KB memory). Due to lockdep max depth, 1024 becomes 8 if CONFIG_LOCKDEP=y The hash resize is a bit tricky, because we need to take all locks in the array. A seqcount_t is used to synchronize the hash table users with the resizing process. Signed-off-by: Eric Dumazet Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net/netns') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index c6a8994e9922..773cce308bc6 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -5,6 +5,7 @@ #include #include #include +#include struct ctl_table_header; struct nf_conntrack_ecache; @@ -90,6 +91,7 @@ struct netns_ct { int sysctl_checksum; unsigned int htable_size; + seqcount_t generation; struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; -- cgit v1.2.3 From d32d9bb85c65f52bed99a0149b47e9f6578c44c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Mar 2014 07:09:07 -0700 Subject: flowcache: restore a single flow_cache kmem_cache It is not legal to create multiple kmem_cache having the same name. flowcache can use a single kmem_cache, no need for a per netns one. Fixes: ca925cf1534e ("flowcache: Make flow cache name space aware") Reported-by: Jakub Kicinski Tested-by: Jakub Kicinski Tested-by: Fan Du Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net/netns') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 51f0dce7b643..3492434baf88 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -64,7 +64,6 @@ struct netns_xfrm { /* flow cache part */ struct flow_cache flow_cache_global; - struct kmem_cache *flow_cachep; atomic_t flow_cache_genid; struct list_head flow_cache_gc_list; spinlock_t flow_cache_gc_lock; -- cgit v1.2.3