diff options
author | David S. Miller <davem@davemloft.net> | 2021-10-18 14:05:25 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-10-18 14:05:25 +0100 |
commit | 7adaf56edd03751badc3f045e664e30f9d1b195e (patch) | |
tree | 5e79414c19c7076792a0190d46a3aa9ee3d30ba5 /net/netfilter | |
parent | c87350ced118ef898ffcb7badacf0939010f2549 (diff) | |
parent | ffdd33dd9c12a8c263f78d778066709ef94671f9 (diff) | |
download | linux-stable-7adaf56edd03751badc3f045e664e30f9d1b195e.tar.gz linux-stable-7adaf56edd03751badc3f045e664e30f9d1b195e.tar.bz2 linux-stable-7adaf56edd03751badc3f045e664e30f9d1b195e.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter/IPVS updates for net-next
The following patchset contains Netfilter/IPVS for net-next:
1) Add new run_estimation toggle to IPVS to stop the estimation_timer
logic, from Dust Li.
2) Relax superfluous dynset check on NFT_SET_TIMEOUT.
3) Add egress hook, from Lukas Wunner.
4) Nowadays, almost all hook functions in x_table land just call the hook
evaluation loop. Remove remaining hook wrappers from iptables and IPVS.
From Florian Westphal.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r-- | net/netfilter/Kconfig | 11 | ||||
-rw-r--r-- | net/netfilter/core.c | 38 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 166 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 8 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_est.c | 5 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_hook.c | 16 | ||||
-rw-r--r-- | net/netfilter/nft_chain_filter.c | 4 | ||||
-rw-r--r-- | net/netfilter/nft_dynset.c | 11 |
8 files changed, 106 insertions, 153 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 54395266339d..49c9fae9c62c 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -10,6 +10,17 @@ config NETFILTER_INGRESS This allows you to classify packets from ingress using the Netfilter infrastructure. +config NETFILTER_EGRESS + bool "Netfilter egress support" + default y + select NET_EGRESS + help + This allows you to classify packets before transmission using the + Netfilter infrastructure. + +config NETFILTER_SKIP_EGRESS + def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB) + config NETFILTER_NETLINK tristate diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 63d032191e62..6dec9cd395f1 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -317,6 +317,12 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum, return &dev->nf_hooks_ingress; } #endif +#ifdef CONFIG_NETFILTER_EGRESS + if (hooknum == NF_NETDEV_EGRESS) { + if (dev && dev_net(dev) == net) + return &dev->nf_hooks_egress; + } +#endif WARN_ON_ONCE(1); return NULL; } @@ -335,7 +341,8 @@ static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg, return 0; } -static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) +static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg, + int pf) { if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) || (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS)) @@ -344,6 +351,12 @@ static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf) return false; } +static inline bool __maybe_unused nf_egress_hook(const struct nf_hook_ops *reg, + int pf) +{ + return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS; +} + static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf) { #ifdef CONFIG_JUMP_LABEL @@ -383,9 +396,18 @@ static int __nf_register_net_hook(struct net *net, int pf, switch (pf) { case NFPROTO_NETDEV: - err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS); - if (err < 0) - return err; +#ifndef CONFIG_NETFILTER_INGRESS + if (reg->hooknum == NF_NETDEV_INGRESS) + return -EOPNOTSUPP; +#endif +#ifndef CONFIG_NETFILTER_EGRESS + if (reg->hooknum == NF_NETDEV_EGRESS) + return -EOPNOTSUPP; +#endif + if ((reg->hooknum != NF_NETDEV_INGRESS && + reg->hooknum != NF_NETDEV_EGRESS) || + !reg->dev || dev_net(reg->dev) != net) + return -EINVAL; break; case NFPROTO_INET: if (reg->hooknum != NF_INET_INGRESS) @@ -418,6 +440,10 @@ static int __nf_register_net_hook(struct net *net, int pf, if (nf_ingress_hook(reg, pf)) net_inc_ingress_queue(); #endif +#ifdef CONFIG_NETFILTER_EGRESS + if (nf_egress_hook(reg, pf)) + net_inc_egress_queue(); +#endif nf_static_key_inc(reg, pf); BUG_ON(p == new_hooks); @@ -475,6 +501,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf, if (nf_ingress_hook(reg, pf)) net_dec_ingress_queue(); #endif +#ifdef CONFIG_NETFILTER_EGRESS + if (nf_egress_hook(reg, pf)) + net_dec_egress_queue(); +#endif nf_static_key_dec(reg, pf); } else { WARN_ONCE(1, "hook not found, pf %d num %d", pf, reg->hooknum); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 128690c512df..e93c937a8bf0 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1330,12 +1330,15 @@ drop: * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int -ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + struct netns_ipvs *ipvs = net_ipvs(state->net); + unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; + int af = state->pf; struct sock *sk; EnterFunction(11); @@ -1468,56 +1471,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in return NF_ACCEPT; } -/* - * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, - * used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_reply4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); -} - -/* - * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_local_reply4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); -} - -#ifdef CONFIG_IP_VS_IPV6 - -/* - * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain, - * used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_reply6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); -} - -/* - * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. - * Check if packet is reply for established ip_vs_conn. - */ -static unsigned int -ip_vs_local_reply6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); -} - -#endif - static unsigned int ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, @@ -1957,8 +1910,10 @@ out: * and send it on its way... */ static unsigned int -ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { + struct netns_ipvs *ipvs = net_ipvs(state->net); + unsigned int hooknum = state->hook; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; @@ -1966,6 +1921,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int int ret, pkts; int conn_reuse_mode; struct sock *sk; + int af = state->pf; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -2138,55 +2094,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int } /* - * AF_INET handler in NF_INET_LOCAL_IN chain - * Schedule and forward packets from remote clients - */ -static unsigned int -ip_vs_remote_request4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); -} - -/* - * AF_INET handler in NF_INET_LOCAL_OUT chain - * Schedule and forward packets from local clients - */ -static unsigned int -ip_vs_local_request4(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); -} - -#ifdef CONFIG_IP_VS_IPV6 - -/* - * AF_INET6 handler in NF_INET_LOCAL_IN chain - * Schedule and forward packets from remote clients - */ -static unsigned int -ip_vs_remote_request6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); -} - -/* - * AF_INET6 handler in NF_INET_LOCAL_OUT chain - * Schedule and forward packets from local clients - */ -static unsigned int -ip_vs_local_request6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); -} - -#endif - - -/* * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP * related packets destined for 0.0.0.0/0. * When fwmark-based virtual service is used, such as transparent @@ -2199,45 +2106,36 @@ static unsigned int ip_vs_forward_icmp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - int r; struct netns_ipvs *ipvs = net_ipvs(state->net); - - if (ip_hdr(skb)->protocol != IPPROTO_ICMP) - return NF_ACCEPT; + int r; /* ipvs enabled in this netns ? */ if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(ipvs, skb, &r, state->hook); -} - + if (state->pf == NFPROTO_IPV4) { + if (ip_hdr(skb)->protocol != IPPROTO_ICMP) + return NF_ACCEPT; #ifdef CONFIG_IP_VS_IPV6 -static unsigned int -ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) -{ - int r; - struct netns_ipvs *ipvs = net_ipvs(state->net); - struct ip_vs_iphdr iphdr; + } else { + struct ip_vs_iphdr iphdr; - ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); - if (iphdr.protocol != IPPROTO_ICMPV6) - return NF_ACCEPT; + ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); - /* ipvs enabled in this netns ? */ - if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) - return NF_ACCEPT; + if (iphdr.protocol != IPPROTO_ICMPV6) + return NF_ACCEPT; - return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); -} + return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); #endif + } + return ip_vs_in_icmp(ipvs, skb, &r, state->hook); +} static const struct nf_hook_ops ip_vs_ops4[] = { /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply4, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 2, @@ -2246,21 +2144,21 @@ static const struct nf_hook_ops ip_vs_ops4[] = { * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { - .hook = ip_vs_remote_request4, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_local_reply4, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { - .hook = ip_vs_local_request4, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 2, @@ -2275,7 +2173,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { }, /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply4, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 100, @@ -2286,7 +2184,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = { static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply6, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 2, @@ -2295,21 +2193,21 @@ static const struct nf_hook_ops ip_vs_ops6[] = { * or VS/NAT(change destination), so that filtering rules can be * applied to IPVS. */ { - .hook = ip_vs_remote_request6, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 1, }, /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_local_reply6, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, }, /* After mangle, schedule and forward local requests */ { - .hook = ip_vs_local_request6, + .hook = ip_vs_in_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 2, @@ -2317,14 +2215,14 @@ static const struct nf_hook_ops ip_vs_ops6[] = { /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { - .hook = ip_vs_forward_icmp_v6, + .hook = ip_vs_forward_icmp, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 99, }, /* After packet filtering, change source only for VS/NAT */ { - .hook = ip_vs_reply6, + .hook = ip_vs_out_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 100, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c25097092a06..cbea5a68afb5 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2017,6 +2017,12 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "run_estimation", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -4090,6 +4096,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; + ipvs->sysctl_run_estimation = 1; + tbl[idx++].data = &ipvs->sysctl_run_estimation; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 05b8112ffb37..9a1a7af6a186 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -100,6 +100,9 @@ static void estimation_timer(struct timer_list *t) u64 rate; struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer); + if (!sysctl_run_estimation(ipvs)) + goto skip; + spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); @@ -131,6 +134,8 @@ static void estimation_timer(struct timer_list *t) spin_unlock(&s->lock); } spin_unlock(&ipvs->est_lock); + +skip: mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index f554e2ea32ee..d5c719c9e36c 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -185,7 +185,7 @@ static const struct nf_hook_entries * nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) { const struct nf_hook_entries *hook_head = NULL; -#ifdef CONFIG_NETFILTER_INGRESS +#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) struct net_device *netdev; #endif @@ -221,9 +221,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de hook_head = rcu_dereference(net->nf.hooks_decnet[hook]); break; #endif -#ifdef CONFIG_NETFILTER_INGRESS +#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS) case NFPROTO_NETDEV: - if (hook != NF_NETDEV_INGRESS) + if (hook >= NF_NETDEV_NUMHOOKS) return ERR_PTR(-EOPNOTSUPP); if (!dev) @@ -233,7 +233,15 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de if (!netdev) return ERR_PTR(-ENODEV); - return rcu_dereference(netdev->nf_hooks_ingress); +#ifdef CONFIG_NETFILTER_INGRESS + if (hook == NF_NETDEV_INGRESS) + return rcu_dereference(netdev->nf_hooks_ingress); +#endif +#ifdef CONFIG_NETFILTER_EGRESS + if (hook == NF_NETDEV_EGRESS) + return rcu_dereference(netdev->nf_hooks_egress); +#endif + fallthrough; #endif default: return ERR_PTR(-EPROTONOSUPPORT); diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 5b02408a920b..680fe557686e 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -310,9 +310,11 @@ static const struct nft_chain_type nft_chain_filter_netdev = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_NETDEV, - .hook_mask = (1 << NF_NETDEV_INGRESS), + .hook_mask = (1 << NF_NETDEV_INGRESS) | + (1 << NF_NETDEV_EGRESS), .hooks = { [NF_NETDEV_INGRESS] = nft_do_chain_netdev, + [NF_NETDEV_EGRESS] = nft_do_chain_netdev, }, }; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 6ba3256fa844..87f3af4645d9 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -198,17 +198,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return -EBUSY; priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); - switch (priv->op) { - case NFT_DYNSET_OP_ADD: - case NFT_DYNSET_OP_DELETE: - break; - case NFT_DYNSET_OP_UPDATE: - if (!(set->flags & NFT_SET_TIMEOUT)) - return -EOPNOTSUPP; - break; - default: + if (priv->op > NFT_DYNSET_OP_DELETE) return -EOPNOTSUPP; - } timeout = 0; if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { |