From cb79a180f2e7eb51de5a4848652893197637bccb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Nov 2017 20:30:49 +0100 Subject: xfrm: defer daddr pointer assignment after spi parsing syzbot reports: BUG: KASAN: use-after-free in __xfrm_state_lookup+0x695/0x6b0 Read of size 4 at addr ffff8801d434e538 by task syzkaller647520/2991 [..] __xfrm_state_lookup+0x695/0x6b0 net/xfrm/xfrm_state.c:833 xfrm_state_lookup+0x8a/0x160 net/xfrm/xfrm_state.c:1592 xfrm_input+0x8e5/0x22f0 net/xfrm/xfrm_input.c:302 The use-after-free is the ipv4 destination address, which points to an skb head area that has been reallocated: pskb_expand_head+0x36b/0x1210 net/core/skbuff.c:1494 __pskb_pull_tail+0x14a/0x17c0 net/core/skbuff.c:1877 pskb_may_pull include/linux/skbuff.h:2102 [inline] xfrm_parse_spi+0x3d3/0x4d0 net/xfrm/xfrm_input.c:170 xfrm_input+0xce2/0x22f0 net/xfrm/xfrm_input.c:291 so the real bug is that xfrm_parse_spi() uses pskb_may_pull, but for now do smaller workaround that makes xfrm_input fetch daddr after spi parsing. Reported-by: syzbot Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 8ac9d32fb79d..1c6051cb7733 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -265,8 +265,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto lock; } - daddr = (xfrm_address_t *)(skb_network_header(skb) + - XFRM_SPI_SKB_CB(skb)->daddroff); family = XFRM_SPI_SKB_CB(skb)->family; /* if tunnel is present override skb->mark value with tunnel i_key */ @@ -293,6 +291,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } + daddr = (xfrm_address_t *)(skb_network_header(skb) + + XFRM_SPI_SKB_CB(skb)->daddroff); do { if (skb->sp->len == XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); -- cgit v1.2.3 From cf37966751747727629fe51fd4a1d4edd8457c60 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Nov 2017 16:46:01 +0100 Subject: xfrm: do unconditional template resolution before pcpu cache check Stephen Smalley says: Since 4.14-rc1, the selinux-testsuite has been encountering sporadic failures during testing of labeled IPSEC. git bisect pointed to commit ec30d ("xfrm: add xdst pcpu cache"). The xdst pcpu cache is only checking that the policies are the same, but does not validate that the policy, state, and flow match with respect to security context labeling. As a result, the wrong SA could be used and the receiver could end up performing permission checking and providing SO_PEERSEC or SCM_SECURITY values for the wrong security context. This fix makes it so that we always do the template resolution, and then checks that the found states match those in the pcpu bundle. This has the disadvantage of doing a bit more work (lookup in state hash table) if we can reuse the xdst entry (we only avoid xdst alloc/free) but we don't add a lot of extra work in case we can't reuse. xfrm_pol_dead() check is removed, reasoning is that xfrm_tmpl_resolve does all needed checks. Cc: Paul Moore Fixes: ec30d78c14a813db39a647b6a348b428 ("xfrm: add xdst pcpu cache") Reported-by: Stephen Smalley Tested-by: Stephen Smalley Signed-off-by: Florian Westphal Acked-by: Paul Moore Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8cafb3c0a4ac..a2e531bf4f97 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1787,19 +1787,23 @@ void xfrm_policy_cache_flush(void) put_online_cpus(); } -static bool xfrm_pol_dead(struct xfrm_dst *xdst) +static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst, + struct xfrm_state * const xfrm[], + int num) { - unsigned int num_pols = xdst->num_pols; - unsigned int pol_dead = 0, i; + const struct dst_entry *dst = &xdst->u.dst; + int i; - for (i = 0; i < num_pols; i++) - pol_dead |= xdst->pols[i]->walk.dead; + if (xdst->num_xfrms != num) + return false; - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - if (pol_dead) - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; + for (i = 0; i < num; i++) { + if (!dst || dst->xfrm != xfrm[i]) + return false; + dst = dst->child; + } - return pol_dead; + return xfrm_bundle_ok(xdst); } static struct xfrm_dst * @@ -1813,26 +1817,28 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, struct dst_entry *dst; int err; + /* Try to instantiate a bundle */ + err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); + if (err <= 0) { + if (err != 0 && err != -EAGAIN) + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); + return ERR_PTR(err); + } + xdst = this_cpu_read(xfrm_last_dst); if (xdst && xdst->u.dst.dev == dst_orig->dev && xdst->num_pols == num_pols && - !xfrm_pol_dead(xdst) && memcmp(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols) == 0 && - xfrm_bundle_ok(xdst)) { + xfrm_xdst_can_reuse(xdst, xfrm, err)) { dst_hold(&xdst->u.dst); + while (err > 0) + xfrm_state_put(xfrm[--err]); return xdst; } old = xdst; - /* Try to instantiate a bundle */ - err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); - if (err <= 0) { - if (err != 0 && err != -EAGAIN) - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); - return ERR_PTR(err); - } dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); if (IS_ERR(dst)) { -- cgit v1.2.3 From c9f3f813d462c72dbe412cee6a5cbacf13c4ad5e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 2 Nov 2017 08:10:17 +0100 Subject: xfrm: Fix stack-out-of-bounds read in xfrm_state_find. When we do tunnel or beet mode, we pass saddr and daddr from the template to xfrm_state_find(), this is ok. On transport mode, we pass the addresses from the flowi, assuming that the IP addresses (and address family) don't change during transformation. This assumption is wrong in the IPv4 mapped IPv6 case, packet is IPv4 and template is IPv6. Fix this by using the addresses from the template unconditionally. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a2e531bf4f97..6eb228a70131 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1361,36 +1361,29 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct net *net = xp_net(policy); int nx; int i, error; - xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); - xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp; for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; - xfrm_address_t *remote = daddr; - xfrm_address_t *local = saddr; + xfrm_address_t *local; + xfrm_address_t *remote; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode == XFRM_MODE_TUNNEL || - tmpl->mode == XFRM_MODE_BEET) { - remote = &tmpl->id.daddr; - local = &tmpl->saddr; - if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, fl->flowi_oif, - &tmp, remote, - tmpl->encap_family, 0); - if (error) - goto fail; - local = &tmp; - } + remote = &tmpl->id.daddr; + local = &tmpl->saddr; + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, fl->flowi_oif, + &tmp, remote, + tmpl->encap_family, 0); + if (error) + goto fail; + local = &tmp; } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; - daddr = remote; - saddr = local; continue; } if (x) { -- cgit v1.2.3 From 2b5ec1a5f9738ee7bf8f5ec0526e75e00362c48f Mon Sep 17 00:00:00 2001 From: Ye Yin Date: Thu, 26 Oct 2017 16:57:05 +0800 Subject: netfilter/ipvs: clear ipvs_property flag when SKB net namespace changed When run ipvs in two different network namespace at the same host, and one ipvs transport network traffic to the other network namespace ipvs. 'ipvs_property' flag will make the second ipvs take no effect. So we should clear 'ipvs_property' when SKB network namespace changed. Fixes: 621e84d6f373 ("dev: introduce skb_scrub_packet()") Signed-off-by: Ye Yin Signed-off-by: Wei Zhou Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 24656076906d..e140ba49b30a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4864,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) if (!xnet) return; + ipvs_reset(skb); skb_orphan(skb); skb->mark = 0; } -- cgit v1.2.3 From 8f7dc9ae4a7aece9fbc3e6637bdfa38b36bcdf09 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 3 Nov 2017 16:49:00 +0100 Subject: l2tp: don't use l2tp_tunnel_find() in l2tp_ip and l2tp_ip6 Using l2tp_tunnel_find() in l2tp_ip_recv() is wrong for two reasons: * It doesn't take a reference on the returned tunnel, which makes the call racy wrt. concurrent tunnel deletion. * The lookup is only based on the tunnel identifier, so it can return a tunnel that doesn't match the packet's addresses or protocol. For example, a packet sent to an L2TPv3 over IPv6 tunnel can be delivered to an L2TPv2 over UDPv4 tunnel. This is worse than a simple cross-talk: when delivering the packet to an L2TP over UDP tunnel, the corresponding socket is UDP, where ->sk_backlog_rcv() is NULL. Calling sk_receive_skb() will then crash the kernel by trying to execute this callback. And l2tp_tunnel_find() isn't even needed here. __l2tp_ip_bind_lookup() properly checks the socket binding and connection settings. It was used as a fallback mechanism for finding tunnels that didn't have their data path registered yet. But it's not limited to this case and can be used to replace l2tp_tunnel_find() in the general case. Fix l2tp_ip6 in the same way. Fixes: 0d76751fad77 ("l2tp: Add L2TPv3 IP encapsulation (no UDP) support") Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 24 +++++++++--------------- net/l2tp/l2tp_ip6.c | 24 +++++++++--------------- 2 files changed, 18 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4d322c1b7233..e4280b6568b4 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct iphdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -178,24 +179,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct iphdr *iph = (struct iphdr *) skb_network_header(skb); - - read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, - inet_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip_lock); - goto discard; - } + iph = (struct iphdr *)skb_network_header(skb); - sock_hold(sk); + read_lock_bh(&l2tp_ip_lock); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb), + tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip_lock); if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 88b397c30d86..8bcaa975b432 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct ipv6hdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -192,24 +193,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct ipv6hdr *iph = ipv6_hdr(skb); - - read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, - inet6_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip6_lock); - goto discard; - } + iph = ipv6_hdr(skb); - sock_hold(sk); + read_lock_bh(&l2tp_ip6_lock); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip6_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip6_lock); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; -- cgit v1.2.3 From d09b9e60e06d431b008a878c4b1d48d6cce816ef Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Fri, 3 Nov 2017 17:46:55 -0700 Subject: tcp: fix DSACK-based undo on non-duplicate ACK Fixes DSACK-based undo when sender is in Open State and an ACK advances snd_una. Example scenario: - Sender goes into recovery and makes some spurious rtx. - It comes out of recovery and enters into open state. - It sends some more packets, let's say 4. - The receiver sends an ACK for the first two, but this ACK is lost. - The sender receives ack for first two, and DSACK for previous spurious rtx. Signed-off-by: Priyaranjan Jha Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Yousuk Seung Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5a87a00641d3..b2fc7163bd40 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -115,7 +115,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) -#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) +#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) -- cgit v1.2.3 From b7e732fa3171318418524b776b841b4024933b2b Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 6 Nov 2017 20:50:35 -0800 Subject: qrtr: Move to postcore_initcall Registering qrtr with module_init makes the ability of typical platform code to create AF_QIPCRTR socket during probe a matter of link order luck. Moving qrtr to postcore_initcall() avoids this. Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c2f5c13550c0..78418f38464a 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1085,7 +1085,7 @@ static int __init qrtr_proto_init(void) return 0; } -module_init(qrtr_proto_init); +postcore_initcall(qrtr_proto_init); static void __exit qrtr_proto_fini(void) { -- cgit v1.2.3 From c7e460ce55724d4e4e22d3126e5c47273819c53a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:18 -0800 Subject: Revert "net_sched: hold netns refcnt for each action" This reverts commit ceffcc5e254b450e6159f173e4538215cebf1b59. If we hold that refcnt, the netns can never be destroyed until all actions are destroyed by user, this breaks our netns design which we expect all actions are destroyed when we destroy the whole netns. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 2 -- net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 2 +- net/sched/act_csum.c | 2 +- net/sched/act_gact.c | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_ipt.c | 4 ++-- net/sched/act_mirred.c | 2 +- net/sched/act_nat.c | 2 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 2 +- net/sched/act_sample.c | 2 +- net/sched/act_simple.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/act_skbmod.c | 2 +- net/sched/act_tunnel_key.c | 2 +- net/sched/act_vlan.c | 2 +- 17 files changed, 17 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index ca2ff0b3123f..8f2c63514956 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -78,7 +78,6 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) spin_lock_bh(&idrinfo->lock); idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); spin_unlock_bh(&idrinfo->lock); - put_net(idrinfo->net); gen_kill_estimator(&p->tcfa_rate_est); free_tcf(p); } @@ -337,7 +336,6 @@ err3: p->idrinfo = idrinfo; p->ops = ops; INIT_LIST_HEAD(&p->list); - get_net(idrinfo->net); *a = p; return 0; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 9bce8cc84cbb..c0c707eb2c96 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -398,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops, net); + return tc_action_net_init(tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 34e52d01a5dd..10b7a8855a6c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops, net); + return tc_action_net_init(tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 35171df2ebef..1c40caadcff9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -626,7 +626,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops, net); + return tc_action_net_init(tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ef7f7f39d26d..e29a48ef7fc3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops, net); + return tc_action_net_init(tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index f65e4b5058e0..8ccd35825b6b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -818,7 +818,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops, net); + return tc_action_net_init(tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index dbdf3b2470d5..d9e399a7e3d5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops, net); + return tc_action_net_init(tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct net *net) @@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops, net); + return tc_action_net_init(tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 84759cfd5a33..416627c66f08 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -343,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops, net); + return tc_action_net_init(tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 7eeaaf9217b6..c365d01b99c8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops, net); + return tc_action_net_init(tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b3d82c334a5f..491fe5deb09e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -450,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops, net); + return tc_action_net_init(tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 9ec42b26e4b9..3bb2ebf9e9ae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops, net); + return tc_action_net_init(tn, &act_police_ops); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index e69a1e3a39bf..8b5abcd2f32f 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -240,7 +240,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops, net); + return tc_action_net_init(tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct net *net) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index a8d0ea95f894..e7b57e5071a3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -201,7 +201,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops, net); + return tc_action_net_init(tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index fbac62472e09..59949d61f20d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -238,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops, net); + return tc_action_net_init(tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 8e12d8897d2f..b642ad3d39dd 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -263,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops, net); + return tc_action_net_init(tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index c33faa373cf2..30c96274c638 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -322,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops, net); + return tc_action_net_init(tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 115fc33cc6d8..16eb067a8d8f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -269,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops, net); + return tc_action_net_init(tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct net *net) -- cgit v1.2.3 From e4b95c41df36befcfd117210900cd790bc2cd048 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:19 -0800 Subject: net_sched: introduce tcf_exts_get_net() and tcf_exts_put_net() Instead of holding netns refcnt in tc actions, we can minimize the holding time by saving it in struct tcf_exts instead. This means we can just hold netns refcnt right before call_rcu() and release it after tcf_exts_destroy() is done. However, because on netns cleanup path we call tcf_proto_destroy() too, obviously we can not hold netns for a zero refcnt, in this case we have to do cleanup synchronously. It is fine for RCU too, the caller cleanup_net() already waits for a grace period. For other cases, refcnt is non-zero and we can safely grab it as normal and release it after we are done. This patch provides two new API for each filter to use: tcf_exts_get_net() and tcf_exts_put_net(). And all filters now can use the following pattern: void __destroy_filter() { tcf_exts_destroy(); tcf_exts_put_net(); // <== release netns refcnt kfree(); } void some_work() { rtnl_lock(); __destroy_filter(); rtnl_unlock(); } void some_rcu_callback() { tcf_queue_work(some_work); } if (tcf_exts_get_net()) // <== hold netns refcnt call_rcu(some_rcu_callback); else __destroy_filter(); Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b2d310745487..ecbb019efcbd 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -927,6 +927,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, exts->actions[i++] = act; exts->nr_actions = i; } + exts->net = net; } #else if ((exts->action && tb[exts->action]) || -- cgit v1.2.3 From 0b2a59894b7657fab46b50f176bd772aa495044f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:20 -0800 Subject: cls_basic: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index f177649a2419..e43c56d5b96a 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -85,16 +85,21 @@ static int basic_init(struct tcf_proto *tp) return 0; } +static void __basic_delete_filter(struct basic_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void basic_delete_filter_work(struct work_struct *work) { struct basic_filter *f = container_of(work, struct basic_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - tcf_em_tree_destroy(&f->ematches); + __basic_delete_filter(f); rtnl_unlock(); - - kfree(f); } static void basic_delete_filter(struct rcu_head *head) @@ -113,7 +118,10 @@ static void basic_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, basic_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, basic_delete_filter); + else + __basic_delete_filter(f); } kfree_rcu(head, rcu); } @@ -125,6 +133,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, basic_delete_filter); *last = list_empty(&head->flist); return 0; @@ -219,6 +228,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (fold) { list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, basic_delete_filter); } else { list_add_rcu(&fnew->link, &head->flist); -- cgit v1.2.3 From aae2c35ec89252639a97769fa72dbbf8f1cc3107 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:21 -0800 Subject: cls_bpf: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 037a3ae86829..990eb4d91d54 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -249,6 +249,7 @@ static int cls_bpf_init(struct tcf_proto *tp) static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); + tcf_exts_put_net(&prog->exts); if (cls_bpf_is_ebpf(prog)) bpf_prog_put(prog->filter); @@ -282,7 +283,10 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); - call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + if (tcf_exts_get_net(&prog->exts)) + call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + else + __cls_bpf_delete_prog(prog); } static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) @@ -516,6 +520,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); + tcf_exts_get_net(&oldprog->exts); call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); } else { list_add_rcu(&prog->link, &head->plist); -- cgit v1.2.3 From ed1481681414e4d4264ad46864d5c1da5ff6ccb1 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:22 -0800 Subject: cls_cgroup: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_cgroup.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index a97e069bee89..309d5899265f 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -60,15 +60,21 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void __cls_cgroup_destroy(struct cls_cgroup_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_em_tree_destroy(&head->ematches); + tcf_exts_put_net(&head->exts); + kfree(head); +} + static void cls_cgroup_destroy_work(struct work_struct *work) { struct cls_cgroup_head *head = container_of(work, struct cls_cgroup_head, work); rtnl_lock(); - tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(&head->ematches); - kfree(head); + __cls_cgroup_destroy(head); rtnl_unlock(); } @@ -124,8 +130,10 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; rcu_assign_pointer(tp->root, new); - if (head) + if (head) { + tcf_exts_get_net(&head->exts); call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + } return 0; errout: tcf_exts_destroy(&new->exts); @@ -138,8 +146,12 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) struct cls_cgroup_head *head = rtnl_dereference(tp->root); /* Head can still be NULL due to cls_cgroup_init(). */ - if (head) - call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + if (head) { + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + else + __cls_cgroup_destroy(head); + } } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) -- cgit v1.2.3 From 22f7cec93f0af86c4b66bf34a977da9d7cef076e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:23 -0800 Subject: cls_flow: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flow.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 67f3a2af6aab..85f765cff697 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -372,15 +372,21 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static void flow_destroy_filter_work(struct work_struct *work) +static void __flow_destroy_filter(struct flow_filter *f) { - struct flow_filter *f = container_of(work, struct flow_filter, work); - - rtnl_lock(); del_timer_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); kfree(f); +} + +static void flow_destroy_filter_work(struct work_struct *work) +{ + struct flow_filter *f = container_of(work, struct flow_filter, work); + + rtnl_lock(); + __flow_destroy_filter(f); rtnl_unlock(); } @@ -552,8 +558,10 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, *arg = fnew; - if (fold) + if (fold) { + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, flow_destroy_filter); + } return 0; err2: @@ -570,6 +578,7 @@ static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) struct flow_filter *f = arg; list_del_rcu(&f->list); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, flow_destroy_filter); *last = list_empty(&head->filters); return 0; @@ -594,7 +603,10 @@ static void flow_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); - call_rcu(&f->rcu, flow_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, flow_destroy_filter); + else + __flow_destroy_filter(f); } kfree_rcu(head, rcu); } -- cgit v1.2.3 From 0dadc117ac8bc78d8144e862ac8ad23f342f9ea8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:24 -0800 Subject: cls_flower: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5b5722c8b32c..7a838d1c1c00 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -218,13 +218,19 @@ static int fl_init(struct tcf_proto *tp) return 0; } +static void __fl_destroy_filter(struct cls_fl_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void fl_destroy_filter_work(struct work_struct *work) { struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __fl_destroy_filter(f); rtnl_unlock(); } @@ -318,7 +324,10 @@ static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fl_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fl_destroy_filter); + else + __fl_destroy_filter(f); } static void fl_destroy_sleepable(struct work_struct *work) @@ -988,6 +997,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, fl_destroy_filter); } else { list_add_tail_rcu(&fnew->list, &head->filters); -- cgit v1.2.3 From d5f984f5af1d926bc9c7a7f90e7a1e1e313a8ba7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:25 -0800 Subject: cls_fw: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_fw.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 99183b8621ec..7f45e5ab8afc 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -122,13 +122,19 @@ static int fw_init(struct tcf_proto *tp) return 0; } +static void __fw_delete_filter(struct fw_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void fw_delete_filter_work(struct work_struct *work) { struct fw_filter *f = container_of(work, struct fw_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __fw_delete_filter(f); rtnl_unlock(); } @@ -154,7 +160,10 @@ static void fw_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(head->ht[h], rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fw_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fw_delete_filter); + else + __fw_delete_filter(f); } } kfree_rcu(head, rcu); @@ -179,6 +188,7 @@ static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) if (pfp == f) { RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); ret = 0; break; @@ -299,6 +309,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); rcu_assign_pointer(*fp, fnew); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); *arg = fnew; -- cgit v1.2.3 From 57767e785321a427b8cdd282db2b8b33cd218ffa Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:26 -0800 Subject: cls_matchall: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index c33f711b9019..3684153cd8a9 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -44,13 +44,19 @@ static int mall_init(struct tcf_proto *tp) return 0; } +static void __mall_destroy(struct cls_mall_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_exts_put_net(&head->exts); + kfree(head); +} + static void mall_destroy_work(struct work_struct *work) { struct cls_mall_head *head = container_of(work, struct cls_mall_head, work); rtnl_lock(); - tcf_exts_destroy(&head->exts); - kfree(head); + __mall_destroy(head); rtnl_unlock(); } @@ -109,7 +115,10 @@ static void mall_destroy(struct tcf_proto *tp) if (tc_should_offload(dev, head->flags)) mall_destroy_hw_filter(tp, head, (unsigned long) head); - call_rcu(&head->rcu, mall_destroy_rcu); + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, mall_destroy_rcu); + else + __mall_destroy(head); } static void *mall_get(struct tcf_proto *tp, u32 handle) -- cgit v1.2.3 From 3fd51de5e3ba447624a08a8ba29f90d94f0fe909 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:27 -0800 Subject: cls_route: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_route.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 4b14ccd8b8f2..ac9a5b8825b9 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -257,13 +257,19 @@ static int route4_init(struct tcf_proto *tp) return 0; } +static void __route4_delete_filter(struct route4_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void route4_delete_filter_work(struct work_struct *work) { struct route4_filter *f = container_of(work, struct route4_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __route4_delete_filter(f); rtnl_unlock(); } @@ -297,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp) next = rtnl_dereference(f->next); RCU_INIT_POINTER(b->ht[h2], next); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, route4_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, route4_delete_filter); + else + __route4_delete_filter(f); } } RCU_INIT_POINTER(head->table[h1], NULL); @@ -338,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) /* Delete it */ tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, route4_delete_filter); /* Strip RTNL protected tree */ @@ -541,6 +551,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, *arg = f; if (fold) { tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, route4_delete_filter); } return 0; -- cgit v1.2.3 From 96585063a27f0704dcf7a09f8b78edd6a8973965 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:28 -0800 Subject: cls_rsvp: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_rsvp.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index bdbc541787f8..cf325625c99d 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -285,13 +285,19 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } +static void __rsvp_delete_filter(struct rsvp_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + static void rsvp_delete_filter_work(struct work_struct *work) { struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->exts); - kfree(f); + __rsvp_delete_filter(f); rtnl_unlock(); } @@ -310,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - call_rcu(&f->rcu, rsvp_delete_filter_rcu); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, rsvp_delete_filter_rcu); + else + __rsvp_delete_filter(f); } static void rsvp_destroy(struct tcf_proto *tp) -- cgit v1.2.3 From f2b751053ee9314e82c178f6ca0fee7e160fac95 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:29 -0800 Subject: cls_tcindex: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index beaa95e09c25..a76937ee0b2d 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -139,13 +139,19 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } +static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) +{ + tcf_exts_destroy(&r->exts); + tcf_exts_put_net(&r->exts); +} + static void tcindex_destroy_rexts_work(struct work_struct *work) { struct tcindex_filter_result *r; r = container_of(work, struct tcindex_filter_result, work); rtnl_lock(); - tcf_exts_destroy(&r->exts); + __tcindex_destroy_rexts(r); rtnl_unlock(); } @@ -158,14 +164,20 @@ static void tcindex_destroy_rexts(struct rcu_head *head) tcf_queue_work(&r->work); } +static void __tcindex_destroy_fexts(struct tcindex_filter *f) +{ + tcf_exts_destroy(&f->result.exts); + tcf_exts_put_net(&f->result.exts); + kfree(f); +} + static void tcindex_destroy_fexts_work(struct work_struct *work) { struct tcindex_filter *f = container_of(work, struct tcindex_filter, work); rtnl_lock(); - tcf_exts_destroy(&f->result.exts); - kfree(f); + __tcindex_destroy_fexts(f); rtnl_unlock(); } @@ -210,10 +222,17 @@ found: * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - if (f) - call_rcu(&f->rcu, tcindex_destroy_fexts); - else - call_rcu(&r->rcu, tcindex_destroy_rexts); + if (f) { + if (tcf_exts_get_net(&f->result.exts)) + call_rcu(&f->rcu, tcindex_destroy_fexts); + else + __tcindex_destroy_fexts(f); + } else { + if (tcf_exts_get_net(&r->exts)) + call_rcu(&r->rcu, tcindex_destroy_rexts); + else + __tcindex_destroy_rexts(r); + } *last = false; return 0; -- cgit v1.2.3 From 35c55fc156d85a396a975fc17636f560fc02fd65 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 6 Nov 2017 13:47:30 -0800 Subject: cls_u32: use tcf_exts_get_net() before call_rcu() Hold netns refcnt before call_rcu() and release it after the tcf_exts_destroy() is done. Note, on ->destroy() path we have to respect the return value of tcf_exts_get_net(), on other paths it should always return true, so we don't need to care. Cc: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index dadd1b344497..b58eccb21f03 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -399,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); + tcf_exts_put_net(&n->exts); if (n->ht_down) n->ht_down->refcnt--; #ifdef CONFIG_CLS_U32_PERF @@ -476,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) RCU_INIT_POINTER(*kp, key->next); tcf_unbind_filter(tp, &key->res); + tcf_exts_get_net(&key->exts); call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } @@ -588,7 +590,10 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); - call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + if (tcf_exts_get_net(&n->exts)) + call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + else + u32_destroy_key(n->tp, n, true); } } } @@ -949,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); + tcf_exts_get_net(&n->exts); call_rcu(&n->rcu, u32_delete_key_rcu); return 0; } -- cgit v1.2.3