From cf124db566e6b036b8bcbe8decbed740bdfac8c6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 8 May 2017 12:52:56 -0400 Subject: net: Fix inconsistent teardown and release of private netdev state. Network devices can allocate reasources and private memory using netdev_ops->ndo_init(). However, the release of these resources can occur in one of two different places. Either netdev_ops->ndo_uninit() or netdev->destructor(). The decision of which operation frees the resources depends upon whether it is necessary for all netdev refs to be released before it is safe to perform the freeing. netdev_ops->ndo_uninit() presumably can occur right after the NETDEV_UNREGISTER notifier completes and the unicast and multicast address lists are flushed. netdev->destructor(), on the other hand, does not run until the netdev references all go away. Further complicating the situation is that netdev->destructor() almost universally does also a free_netdev(). This creates a problem for the logic in register_netdevice(). Because all callers of register_netdevice() manage the freeing of the netdev, and invoke free_netdev(dev) if register_netdevice() fails. If netdev_ops->ndo_init() succeeds, but something else fails inside of register_netdevice(), it does call ndo_ops->ndo_uninit(). But it is not able to invoke netdev->destructor(). This is because netdev->destructor() will do a free_netdev() and then the caller of register_netdevice() will do the same. However, this means that the resources that would normally be released by netdev->destructor() will not be. Over the years drivers have added local hacks to deal with this, by invoking their destructor parts by hand when register_netdevice() fails. Many drivers do not try to deal with this, and instead we have leaks. Let's close this hole by formalizing the distinction between what private things need to be freed up by netdev->destructor() and whether the driver needs unregister_netdevice() to perform the free_netdev(). netdev->priv_destructor() performs all actions to free up the private resources that used to be freed by netdev->destructor(), except for free_netdev(). netdev->needs_free_netdev is a boolean that indicates whether free_netdev() should be done at the end of unregister_netdevice(). Now, register_netdevice() can sanely release all resources after ndo_ops->ndo_init() succeeds, by invoking both ndo_ops->ndo_uninit() and netdev->priv_destructor(). And at the end of unregister_netdevice(), we invoke netdev->priv_destructor() and optionally call free_netdev(). Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 9 +++++---- net/ipv6/ip6_tunnel.c | 8 ++++---- net/ipv6/ip6_vti.c | 8 ++++---- net/ipv6/ip6mr.c | 2 +- net/ipv6/sit.c | 6 +++--- 5 files changed, 17 insertions(+), 16 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 0c5b4caa1949..64eea3962733 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -991,13 +991,13 @@ static void ip6gre_dev_free(struct net_device *dev) dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } static void ip6gre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ip6gre_netdev_ops; - dev->destructor = ip6gre_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; dev->type = ARPHRD_IP6GRE; @@ -1148,7 +1148,7 @@ static int __net_init ip6gre_init_net(struct net *net) return 0; err_reg_dev: - ip6gre_dev_free(ign->fb_tunnel_dev); + free_netdev(ign->fb_tunnel_dev); err_alloc_dev: return err; } @@ -1300,7 +1300,8 @@ static void ip6gre_tap_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &ip6gre_tap_netdev_ops; - dev->destructor = ip6gre_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9b37f9747fc6..c3581973f5d7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -254,7 +254,6 @@ static void ip6_dev_free(struct net_device *dev) gro_cells_destroy(&t->gro_cells); dst_cache_destroy(&t->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } static int ip6_tnl_create2(struct net_device *dev) @@ -322,7 +321,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) return t; failed_free: - ip6_dev_free(dev); + free_netdev(dev); failed: return ERR_PTR(err); } @@ -1777,7 +1776,8 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { static void ip6_tnl_dev_setup(struct net_device *dev) { dev->netdev_ops = &ip6_tnl_netdev_ops; - dev->destructor = ip6_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ip6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->flags |= IFF_NOARP; @@ -2224,7 +2224,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) return 0; err_register: - ip6_dev_free(ip6n->fb_tnl_dev); + free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d67ef56454b2..837ea1eefe7f 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -180,7 +180,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t) static void vti6_dev_free(struct net_device *dev) { free_percpu(dev->tstats); - free_netdev(dev); } static int vti6_tnl_create2(struct net_device *dev) @@ -235,7 +234,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p return t; failed_free: - vti6_dev_free(dev); + free_netdev(dev); failed: return NULL; } @@ -842,7 +841,8 @@ static const struct net_device_ops vti6_netdev_ops = { static void vti6_dev_setup(struct net_device *dev) { dev->netdev_ops = &vti6_netdev_ops; - dev->destructor = vti6_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = vti6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); @@ -1100,7 +1100,7 @@ static int __net_init vti6_init_net(struct net *net) return 0; err_register: - vti6_dev_free(ip6n->fb_tnl_dev); + free_netdev(ip6n->fb_tnl_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 374997d26488..2ecb39b943b5 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -733,7 +733,7 @@ static void reg_vif_setup(struct net_device *dev) dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; dev->netdev_ops = ®_vif_netdev_ops; - dev->destructor = free_netdev; + dev->needs_free_netdev = true; dev->features |= NETIF_F_NETNS_LOCAL; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 61e5902f0687..2378503577b0 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -265,7 +265,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, return nt; failed_free: - ipip6_dev_free(dev); + free_netdev(dev); failed: return NULL; } @@ -1336,7 +1336,6 @@ static void ipip6_dev_free(struct net_device *dev) dst_cache_destroy(&tunnel->dst_cache); free_percpu(dev->tstats); - free_netdev(dev); } #define SIT_FEATURES (NETIF_F_SG | \ @@ -1351,7 +1350,8 @@ static void ipip6_tunnel_setup(struct net_device *dev) int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->netdev_ops = &ipip6_netdev_ops; - dev->destructor = ipip6_dev_free; + dev->needs_free_netdev = true; + dev->priv_destructor = ipip6_dev_free; dev->type = ARPHRD_SIT; dev->hard_header_len = LL_MAX_HEADER + t_hlen; -- cgit v1.2.3 From 8397ed36b7c585f8d3e06c431f4137309124f78f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 7 Jun 2017 12:26:23 -0600 Subject: net: ipv6: Release route when device is unregistering Roopa reported attempts to delete a bond device that is referenced in a multipath route is hanging: $ ifdown bond2 # ifupdown2 command that deletes virtual devices unregister_netdevice: waiting for bond2 to become free. Usage count = 2 Steps to reproduce: echo 1 > /proc/sys/net/ipv6/conf/all/ignore_routes_with_linkdown ip link add dev bond12 type bond ip link add dev bond13 type bond ip addr add 2001:db8:2::0/64 dev bond12 ip addr add 2001:db8:3::0/64 dev bond13 ip route add 2001:db8:33::0/64 nexthop via 2001:db8:2::2 nexthop via 2001:db8:3::2 ip link del dev bond12 ip link del dev bond13 The root cause is the recent change to keep routes on a linkdown. Update the check to detect when the device is unregistering and release the route for that case. Fixes: a1a22c12060e4 ("net: ipv6: Keep nexthop of multipath route on admin down") Reported-by: Roopa Prabhu Signed-off-by: David Ahern Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc61b0b5e64e..7cebd954d5bb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2804,6 +2804,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) if ((rt->dst.dev == dev || !dev) && rt != adn->net->ipv6.ip6_null_entry && (rt->rt6i_nsiblings == 0 || + (dev && netdev_unregistering(dev)) || !rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) return -1; -- cgit v1.2.3 From 0db47e3d323411beeb6ea97f2c4d19395c91fd8b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 8 Jun 2017 09:54:24 +0200 Subject: ila_xlat: add missing hash secret initialization While discussing the possible merits of clang warning about unused initialized functions, I found one function that was clearly meant to be called but never actually is. __ila_hash_secret_init() initializes the hash value for the ila locator, apparently this is intended to prevent hash collision attacks, but this ends up being a read-only zero constant since there is no caller. I could find no indication of why it was never called, the earliest patch submission for the module already was like this. If my interpretation is right, we certainly want to backport the patch to stable kernels as well. I considered adding it to the ila_xlat_init callback, but for best effect the random data is read as late as possible, just before it is first used. The underlying net_get_random_once() is already highly optimized to avoid overhead when called frequently. Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") Cc: stable@vger.kernel.org Link: https://www.spinics.net/lists/kernel/msg2527243.html Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/ipv6/ila/ila_xlat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 2fd5ca151dcf..77f7f8c7d93d 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -62,6 +62,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc) { u32 *v = (u32 *)loc.v32; + __ila_hash_secret_init(); return jhash_2words(v[0], v[1], hashrnd); } -- cgit v1.2.3 From 3500cd73dff48f28f4ba80c171c4c80034d40f76 Mon Sep 17 00:00:00 2001 From: Christian Perle Date: Mon, 12 Jun 2017 10:06:57 +0200 Subject: proc: snmp6: Use correct type in memset Reading /proc/net/snmp6 yields bogus values on 32 bit kernels. Use "u64" instead of "unsigned long" in sizeof(). Fixes: 4a4857b1c81e ("proc: Reduce cache miss in snmp6_seq_show") Signed-off-by: Christian Perle Signed-off-by: David S. Miller --- net/ipv6/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index cc8e3ae9ca73..e88bcb8ff0fd 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -219,7 +219,7 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, u64 buff64[SNMP_MIB_MAX]; int i; - memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX); snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) -- cgit v1.2.3 From 849a44de91636c24cea799cb8ad8c36433feb913 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 14 Jun 2017 13:27:37 +0200 Subject: net: don't global ICMP rate limit packets originating from loopback Florian Weimer seems to have a glibc test-case which requires that loopback interfaces does not get ICMP ratelimited. This was broken by commit c0303efeab73 ("net: reduce cycles spend on ICMP replies that gets rate limited"). An ICMP response will usually be routed back-out the same incoming interface. Thus, take advantage of this and skip global ICMP ratelimit when the incoming device is loopback. In the unlikely event that the outgoing it not loopback, due to strange routing policy rules, ICMP rate limiting still works via peer ratelimiting via icmpv4_xrlim_allow(). Thus, we should still comply with RFC1812 (section 4.3.2.8 "Rate Limiting"). This seems to fix the reproducer given by Florian. While still avoiding to perform expensive and unneeded outgoing route lookup for rate limited packets (in the non-loopback case). Fixes: c0303efeab73 ("net: reduce cycles spend on ICMP replies that gets rate limited") Reported-by: Florian Weimer Reported-by: "H.J. Lu" Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 230b5aac9f03..8d7b113958b1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -491,7 +491,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ - if (!icmpv6_global_allow(type)) + if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type)) goto out_bh_enable; mip6_addr_swap(skb); -- cgit v1.2.3