From bbde9fc1824aab58bc78c084163007dd6c03fe5b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 31 May 2015 17:54:44 +0200 Subject: netfilter: factor out packet duplication for IPv4/IPv6 Extracted from the xtables TEE target. This creates two new modules for IPv4 and IPv6 that are shared between the TEE target and the new nf_tables dup expressions. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_dup_ipv4.h | 7 +++++++ include/net/netfilter/ipv6/nf_dup_ipv6.h | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 include/net/netfilter/ipv4/nf_dup_ipv4.h create mode 100644 include/net/netfilter/ipv6/nf_dup_ipv6.h (limited to 'include/net') diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h new file mode 100644 index 000000000000..42008f10dfc4 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_dup_ipv4.h @@ -0,0 +1,7 @@ +#ifndef _NF_DUP_IPV4_H_ +#define _NF_DUP_IPV4_H_ + +void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum, + const struct in_addr *gw, int oif); + +#endif /* _NF_DUP_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h new file mode 100644 index 000000000000..ed6bd66fa5a0 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_dup_ipv6.h @@ -0,0 +1,7 @@ +#ifndef _NF_DUP_IPV6_H_ +#define _NF_DUP_IPV6_H_ + +void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum, + const struct in6_addr *gw, int oif); + +#endif /* _NF_DUP_IPV6_H_ */ -- cgit v1.2.3 From d877f07112f1e5a247c6b585c971a93895c9f738 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 31 May 2015 18:04:11 +0200 Subject: netfilter: nf_tables: add nft_dup expression This new expression uses the nf_dup engine to clone packets to a given gateway. Unlike xt_TEE, we use an index to indicate output interface which should be fine at this stage. Moreover, change to the preemtion-safe this_cpu_read(nf_skb_duplicated) from nf_dup_ipv{4,6} to silence a lockdep splat. Based on the original tee expression from Arturo Borrero Gonzalez, although this patch has diverted quite a bit from this initial effort due to the change to support maps. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_dup.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 include/net/netfilter/nft_dup.h (limited to 'include/net') diff --git a/include/net/netfilter/nft_dup.h b/include/net/netfilter/nft_dup.h new file mode 100644 index 000000000000..6b84cf6491a2 --- /dev/null +++ b/include/net/netfilter/nft_dup.h @@ -0,0 +1,9 @@ +#ifndef _NFT_DUP_H_ +#define _NFT_DUP_H_ + +struct nft_dup_inet { + enum nft_registers sreg_addr:8; + enum nft_registers sreg_dev:8; +}; + +#endif /* _NFT_DUP_H_ */ -- cgit v1.2.3 From 3499abb249bb5ed9d21031944bc3059ec4aa2909 Mon Sep 17 00:00:00 2001 From: Andreas Schultz Date: Wed, 5 Aug 2015 17:51:45 +0200 Subject: netfilter: nfacct: per network namespace support - Move the nfnl_acct_list into the network namespace, initialize and destroy it per namespace - Keep track of refcnt on nfacct objects, the old logic does not longer work with a per namespace list - Adjust xt_nfacct to pass the namespace when registring objects Signed-off-by: Andreas Schultz Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index e951453e0a23..2dcea635ecce 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -118,6 +118,9 @@ struct net { #endif struct sock *nfnl; struct sock *nfnl_stash; +#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) + struct list_head nfnl_acct_list; +#endif #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; -- cgit v1.2.3 From 308ac9143ee2208f54d061eca54a89da509b5d92 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 8 Aug 2015 21:40:01 +0200 Subject: netfilter: nf_conntrack: push zone object into functions This patch replaces the zone id which is pushed down into functions with the actual zone object. It's a bigger one-time change, but needed for later on extending zones with a direction parameter, and thus decoupling this additional information from all call-sites. No functional changes in this patch. The default zone becomes a global const object, namely nf_ct_zone_dflt and will be returned directly in various cases, one being, when there's f.e. no zoning support. Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 10 +++++++-- include/net/netfilter/nf_conntrack_core.h | 3 ++- include/net/netfilter/nf_conntrack_expect.h | 11 +++++++--- include/net/netfilter/nf_conntrack_zones.h | 33 ++++++++++++++++++++--------- 4 files changed, 41 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 37cd3911d5c5..f5e23c6dee8b 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -250,8 +250,12 @@ void nf_ct_untracked_status_or(unsigned long bits); void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report); + +struct nf_conntrack_zone; + void nf_conntrack_free(struct nf_conn *ct); -struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); @@ -291,7 +295,9 @@ extern unsigned int nf_conntrack_max; extern unsigned int nf_conntrack_hash_rnd; void init_nf_conntrack_hash_rnd(void); -struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags); +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, + const struct nf_conntrack_zone *zone, + gfp_t flags); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index f2f0fa3bb150..c03f9c42b3cd 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -52,7 +52,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, u16 zone, +nf_conntrack_find_get(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple); int __nf_conntrack_confirm(struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 3f3aecbc8632..dce56f09ac9a 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -4,7 +4,9 @@ #ifndef _NF_CONNTRACK_EXPECT_H #define _NF_CONNTRACK_EXPECT_H + #include +#include extern unsigned int nf_ct_expect_hsize; extern unsigned int nf_ct_expect_max; @@ -76,15 +78,18 @@ int nf_conntrack_expect_init(void); void nf_conntrack_expect_fini(void); struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, u16 zone, +__nf_ct_expect_find(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, u16 zone, +nf_ct_expect_find_get(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, u16 zone, +nf_ct_find_expectation(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 034efe8d45a5..0788bb0f267d 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -1,25 +1,38 @@ #ifndef _NF_CONNTRACK_ZONES_H #define _NF_CONNTRACK_ZONES_H -#define NF_CT_DEFAULT_ZONE 0 - -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) -#include +#define NF_CT_DEFAULT_ZONE_ID 0 struct nf_conntrack_zone { u16 id; }; -static inline u16 nf_ct_zone(const struct nf_conn *ct) +extern const struct nf_conntrack_zone nf_ct_zone_dflt; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include + +static inline const struct nf_conntrack_zone * +nf_ct_zone(const struct nf_conn *ct) { + const struct nf_conntrack_zone *nf_ct_zone = NULL; + #ifdef CONFIG_NF_CONNTRACK_ZONES - struct nf_conntrack_zone *nf_ct_zone; nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); - if (nf_ct_zone) - return nf_ct_zone->id; #endif - return NF_CT_DEFAULT_ZONE; + return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt; } -#endif /* CONFIG_NF_CONNTRACK || CONFIG_NF_CONNTRACK_MODULE */ +static inline const struct nf_conntrack_zone * +nf_ct_zone_tmpl(const struct nf_conn *tmpl) +{ + return tmpl ? nf_ct_zone(tmpl) : &nf_ct_zone_dflt; +} + +static inline bool nf_ct_zone_equal(const struct nf_conn *a, + const struct nf_conntrack_zone *b) +{ + return nf_ct_zone(a)->id == b->id; +} +#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */ #endif /* _NF_CONNTRACK_ZONES_H */ -- cgit v1.2.3 From deedb59039f111c41aa5a54ee384c8e7c08bc78a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Aug 2015 16:03:39 +0200 Subject: netfilter: nf_conntrack: add direction support for zones This work adds a direction parameter to netfilter zones, so identity separation can be performed only in original/reply or both directions (default). This basically opens up the possibility of doing NAT with conflicting IP address/port tuples from multiple, isolated tenants on a host (e.g. from a netns) without requiring each tenant to NAT twice resp. to use its own dedicated IP address to SNAT to, meaning overlapping tuples can be made unique with the zone identifier in original direction, where the NAT engine will then allocate a unique tuple in the commonly shared default zone for the reply direction. In some restricted, local DNAT cases, also port redirection could be used for making the reply traffic unique w/o requiring SNAT. The consensus we've reached and discussed at NFWS and since the initial implementation [1] was to directly integrate the direction meta data into the existing zones infrastructure, as opposed to the ct->mark approach we proposed initially. As we pass the nf_conntrack_zone object directly around, we don't have to touch all call-sites, but only those, that contain equality checks of zones. Thus, based on the current direction (original or reply), we either return the actual id, or the default NF_CT_DEFAULT_ZONE_ID. CT expectations are direction-agnostic entities when expectations are being compared among themselves, so we can only use the identifier in this case. Note that zone identifiers can not be included into the hash mix anymore as they don't contain a "stable" value that would be equal for both directions at all times, f.e. if only zone->id would unconditionally be xor'ed into the table slot hash, then replies won't find the corresponding conntracking entry anymore. If no particular direction is specified when configuring zones, the behaviour is exactly as we expect currently (both directions). Support has been added for the CT netlink interface as well as the x_tables raw CT target, which both already offer existing interfaces to user space for the configuration of zones. Below a minimal, simplified collision example (script in [2]) with netperf sessions: +--- tenant-1 ---+ mark := 1 | netperf |--+ +----------------+ | CT zone := mark [ORIGINAL] [ip,sport] := X +--------------+ +--- gateway ---+ | mark routing |--| SNAT |-- ... + +--------------+ +---------------+ | +--- tenant-2 ---+ | ~~~|~~~ | netperf |--+ +-----------+ | +----------------+ mark := 2 | netserver |------ ... + [ip,sport] := X +-----------+ [ip,port] := Y On the gateway netns, example: iptables -t raw -A PREROUTING -j CT --zone mark --zone-dir ORIGINAL iptables -t nat -A POSTROUTING -o -j SNAT --to-source --random-fully iptables -t mangle -A PREROUTING -m conntrack --ctdir ORIGINAL -j CONNMARK --save-mark iptables -t mangle -A POSTROUTING -m conntrack --ctdir REPLY -j CONNMARK --restore-mark conntrack dump from gateway netns: netperf -H 10.1.1.2 -t TCP_STREAM -l60 -p12865,5555 from each tenant netns tcp 6 431995 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=5555 dport=12865 zone-orig=1 src=10.1.1.2 dst=10.1.1.1 sport=12865 dport=1024 [ASSURED] mark=1 secctx=system_u:object_r:unlabeled_t:s0 use=1 tcp 6 431994 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=5555 dport=12865 zone-orig=2 src=10.1.1.2 dst=10.1.1.1 sport=12865 dport=5555 [ASSURED] mark=2 secctx=system_u:object_r:unlabeled_t:s0 use=1 tcp 6 299 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=39438 dport=33768 zone-orig=1 src=10.1.1.2 dst=10.1.1.1 sport=33768 dport=39438 [ASSURED] mark=1 secctx=system_u:object_r:unlabeled_t:s0 use=1 tcp 6 300 ESTABLISHED src=40.1.1.1 dst=10.1.1.2 sport=32889 dport=40206 zone-orig=2 src=10.1.1.2 dst=10.1.1.1 sport=40206 dport=32889 [ASSURED] mark=2 secctx=system_u:object_r:unlabeled_t:s0 use=2 Taking this further, test script in [2] creates 200 tenants and runs original-tuple colliding netperf sessions each. A conntrack -L dump in the gateway netns also confirms 200 overlapping entries, all in ESTABLISHED state as expected. I also did run various other tests with some permutations of the script, to mention some: SNAT in random/random-fully/persistent mode, no zones (no overlaps), static zones (original, reply, both directions), etc. [1] http://thread.gmane.org/gmane.comp.security.firewalls.netfilter.devel/57412/ [2] https://paste.fedoraproject.org/242835/65657871/ Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_zones.h | 31 +++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 0788bb0f267d..3942ddf0d4ff 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -1,10 +1,18 @@ #ifndef _NF_CONNTRACK_ZONES_H #define _NF_CONNTRACK_ZONES_H +#include + #define NF_CT_DEFAULT_ZONE_ID 0 +#define NF_CT_ZONE_DIR_ORIG (1 << IP_CT_DIR_ORIGINAL) +#define NF_CT_ZONE_DIR_REPL (1 << IP_CT_DIR_REPLY) + +#define NF_CT_DEFAULT_ZONE_DIR (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL) + struct nf_conntrack_zone { u16 id; + u16 dir; }; extern const struct nf_conntrack_zone nf_ct_zone_dflt; @@ -29,8 +37,29 @@ nf_ct_zone_tmpl(const struct nf_conn *tmpl) return tmpl ? nf_ct_zone(tmpl) : &nf_ct_zone_dflt; } +static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, + enum ip_conntrack_dir dir) +{ + return zone->dir & (1 << dir); +} + +static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone, + enum ip_conntrack_dir dir) +{ + return nf_ct_zone_matches_dir(zone, dir) ? + zone->id : NF_CT_DEFAULT_ZONE_ID; +} + static inline bool nf_ct_zone_equal(const struct nf_conn *a, - const struct nf_conntrack_zone *b) + const struct nf_conntrack_zone *b, + enum ip_conntrack_dir dir) +{ + return nf_ct_zone_id(nf_ct_zone(a), dir) == + nf_ct_zone_id(b, dir); +} + +static inline bool nf_ct_zone_equal_any(const struct nf_conn *a, + const struct nf_conntrack_zone *b) { return nf_ct_zone(a)->id == b->id; } -- cgit v1.2.3 From 5e8018fc61423e677398d4ad4d72df70b9788e77 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 14 Aug 2015 16:03:40 +0200 Subject: netfilter: nf_conntrack: add efficient mark to zone mapping This work adds the possibility of deriving the zone id from the skb->mark field in a scalable manner. This allows for having only a single template serving hundreds/thousands of different zones, for example, instead of the need to have one match for each zone as an extra CT jump target. Note that we'd need to have this information attached to the template as at the time when we're trying to lookup a possible ct object, we already need to know zone information for a possible match when going into __nf_conntrack_find_get(). This work provides a minimal implementation for a possible mapping. In order to not add/expose an extra ct->status bit, the zone structure has been extended to carry a flag for deriving the mark. Signed-off-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_zones.h | 45 ++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 3942ddf0d4ff..5316c7b3a374 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -10,9 +10,12 @@ #define NF_CT_DEFAULT_ZONE_DIR (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL) +#define NF_CT_FLAG_MARK 1 + struct nf_conntrack_zone { u16 id; - u16 dir; + u8 flags; + u8 dir; }; extern const struct nf_conntrack_zone nf_ct_zone_dflt; @@ -32,9 +35,45 @@ nf_ct_zone(const struct nf_conn *ct) } static inline const struct nf_conntrack_zone * -nf_ct_zone_tmpl(const struct nf_conn *tmpl) +nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags) +{ + zone->id = id; + zone->flags = flags; + zone->dir = dir; + + return zone; +} + +static inline const struct nf_conntrack_zone * +nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, + struct nf_conntrack_zone *tmp) +{ + const struct nf_conntrack_zone *zone; + + if (!tmpl) + return &nf_ct_zone_dflt; + + zone = nf_ct_zone(tmpl); + if (zone->flags & NF_CT_FLAG_MARK) + zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0); + + return zone; +} + +static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags, + const struct nf_conntrack_zone *info) { - return tmpl ? nf_ct_zone(tmpl) : &nf_ct_zone_dflt; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone *nf_ct_zone; + + nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags); + if (!nf_ct_zone) + return -ENOMEM; + + nf_ct_zone_init(nf_ct_zone, info->id, info->dir, + info->flags); +#endif + return 0; } static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, -- cgit v1.2.3