diff options
Diffstat (limited to 'net/ipv6/mcast.c')
-rw-r--r-- | net/ipv6/mcast.c | 1103 |
1 files changed, 646 insertions, 457 deletions
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 6c8604390266..0d59efb6b49e 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -29,7 +29,6 @@ #include <linux/socket.h> #include <linux/sockios.h> #include <linux/jiffies.h> -#include <linux/times.h> #include <linux/net.h> #include <linux/in.h> #include <linux/in6.h> @@ -42,6 +41,7 @@ #include <linux/slab.h> #include <linux/pkt_sched.h> #include <net/mld.h> +#include <linux/workqueue.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -67,18 +67,14 @@ static int __mld2_query_bugs[] __attribute__((__unused__)) = { BUILD_BUG_ON_ZERO(offsetof(struct mld2_grec, grec_mca) % 4) }; +static struct workqueue_struct *mld_wq; static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); -static void igmp6_timer_handler(struct timer_list *t); +static void mld_mca_work(struct work_struct *work); -static void mld_gq_timer_expire(struct timer_list *t); -static void mld_ifc_timer_expire(struct timer_list *t); static void mld_ifc_event(struct inet6_dev *idev); -static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); -static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc); -static void mld_clear_delrec(struct inet6_dev *idev); static bool mld_in_v1_mode(const struct inet6_dev *idev); static int sf_setstate(struct ifmcaddr6 *pmc); static void sf_markstate(struct ifmcaddr6 *pmc); @@ -112,12 +108,52 @@ int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT; /* * socket join on multicast group */ +#define mc_dereference(e, idev) \ + rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock)) + +#define sock_dereference(e, sk) \ + rcu_dereference_protected(e, lockdep_sock_is_held(sk)) + +#define for_each_pmc_socklock(np, sk, pmc) \ + for (pmc = sock_dereference((np)->ipv6_mc_list, sk); \ + pmc; \ + pmc = sock_dereference(pmc->next, sk)) #define for_each_pmc_rcu(np, pmc) \ - for (pmc = rcu_dereference(np->ipv6_mc_list); \ - pmc != NULL; \ + for (pmc = rcu_dereference((np)->ipv6_mc_list); \ + pmc; \ pmc = rcu_dereference(pmc->next)) +#define for_each_psf_mclock(mc, psf) \ + for (psf = mc_dereference((mc)->mca_sources, mc->idev); \ + psf; \ + psf = mc_dereference(psf->sf_next, mc->idev)) + +#define for_each_psf_rcu(mc, psf) \ + for (psf = rcu_dereference((mc)->mca_sources); \ + psf; \ + psf = rcu_dereference(psf->sf_next)) + +#define for_each_psf_tomb(mc, psf) \ + for (psf = mc_dereference((mc)->mca_tomb, mc->idev); \ + psf; \ + psf = mc_dereference(psf->sf_next, mc->idev)) + +#define for_each_mc_mclock(idev, mc) \ + for (mc = mc_dereference((idev)->mc_list, idev); \ + mc; \ + mc = mc_dereference(mc->next, idev)) + +#define for_each_mc_rcu(idev, mc) \ + for (mc = rcu_dereference((idev)->mc_list); \ + mc; \ + mc = rcu_dereference(mc->next)) + +#define for_each_mc_tomb(idev, mc) \ + for (mc = mc_dereference((idev)->mc_tomb, idev); \ + mc; \ + mc = mc_dereference(mc->next, idev)) + static int unsolicited_report_interval(struct inet6_dev *idev) { int iv; @@ -144,15 +180,11 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - rcu_read_lock(); - for_each_pmc_rcu(np, mc_lst) { + for_each_pmc_socklock(np, sk, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && - ipv6_addr_equal(&mc_lst->addr, addr)) { - rcu_read_unlock(); + ipv6_addr_equal(&mc_lst->addr, addr)) return -EADDRINUSE; - } } - rcu_read_unlock(); mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); @@ -179,8 +211,7 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, mc_lst->ifindex = dev->ifindex; mc_lst->sfmode = mode; - rwlock_init(&mc_lst->sflock); - mc_lst->sflist = NULL; + RCU_INIT_POINTER(mc_lst->sflist, NULL); /* * now add/increase the group membership on the device @@ -227,7 +258,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; for (lnk = &np->ipv6_mc_list; - (mc_lst = rtnl_dereference(*lnk)) != NULL; + (mc_lst = sock_dereference(*lnk, sk)) != NULL; lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { @@ -239,11 +270,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); - (void) ip6_mc_leave_src(sk, mc_lst, idev); + ip6_mc_leave_src(sk, mc_lst, idev); if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else - (void) ip6_mc_leave_src(sk, mc_lst, NULL); + } else { + ip6_mc_leave_src(sk, mc_lst, NULL); + } atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); @@ -255,10 +287,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } EXPORT_SYMBOL(ipv6_sock_mc_drop); -/* called with rcu_read_lock() */ -static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, - const struct in6_addr *group, - int ifindex) +static struct inet6_dev *ip6_mc_find_dev_rtnl(struct net *net, + const struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; struct inet6_dev *idev = NULL; @@ -270,19 +301,17 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, dev = rt->dst.dev; ip6_rt_put(rt); } - } else - dev = dev_get_by_index_rcu(net, ifindex); + } else { + dev = __dev_get_by_index(net, ifindex); + } if (!dev) return NULL; idev = __in6_dev_get(dev); if (!idev) return NULL; - read_lock_bh(&idev->lock); - if (idev->dead) { - read_unlock_bh(&idev->lock); + if (idev->dead) return NULL; - } return idev; } @@ -294,7 +323,7 @@ void __ipv6_sock_mc_close(struct sock *sk) ASSERT_RTNL(); - while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) { + while ((mc_lst = sock_dereference(np->ipv6_mc_list, sk)) != NULL) { struct net_device *dev; np->ipv6_mc_list = mc_lst->next; @@ -303,11 +332,12 @@ void __ipv6_sock_mc_close(struct sock *sk) if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); - (void) ip6_mc_leave_src(sk, mc_lst, idev); + ip6_mc_leave_src(sk, mc_lst, idev); if (idev) __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else - (void) ip6_mc_leave_src(sk, mc_lst, NULL); + } else { + ip6_mc_leave_src(sk, mc_lst, NULL); + } atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); @@ -320,8 +350,11 @@ void ipv6_sock_mc_close(struct sock *sk) if (!rcu_access_pointer(np->ipv6_mc_list)) return; + rtnl_lock(); + lock_sock(sk); __ipv6_sock_mc_close(sk); + release_sock(sk); rtnl_unlock(); } @@ -336,7 +369,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, struct net *net = sock_net(sk); int i, j, rv; int leavegroup = 0; - int pmclocked = 0; int err; source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr; @@ -345,16 +377,14 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - rcu_read_lock(); - idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface); - if (!idev) { - rcu_read_unlock(); + idev = ip6_mc_find_dev_rtnl(net, group, pgsr->gsr_interface); + if (!idev) return -ENODEV; - } err = -EADDRNOTAVAIL; - for_each_pmc_rcu(inet6, pmc) { + mutex_lock(&idev->mc_lock); + for_each_pmc_socklock(inet6, sk, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -365,7 +395,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, goto done; } /* if a source filter was set, must be the same mode as before */ - if (pmc->sflist) { + if (rcu_access_pointer(pmc->sflist)) { if (pmc->sfmode != omode) { err = -EINVAL; goto done; @@ -377,10 +407,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, pmc->sfmode = omode; } - write_lock(&pmc->sflock); - pmclocked = 1; - - psl = pmc->sflist; + psl = sock_dereference(pmc->sflist, sk); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ @@ -420,7 +447,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_ATOMIC); + newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -430,9 +457,11 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) { for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); } - pmc->sflist = psl = newpsl; + psl = newpsl; + rcu_assign_pointer(pmc->sflist, psl); } rv = 1; /* > 0 for insert logic below if sl_count is 0 */ for (i = 0; i < psl->sl_count; i++) { @@ -448,10 +477,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, /* update the interface list */ ip6_mc_add_src(idev, group, omode, 1, source, 1); done: - if (pmclocked) - write_unlock(&pmc->sflock); - read_unlock_bh(&idev->lock); - rcu_read_unlock(); + mutex_unlock(&idev->mc_lock); if (leavegroup) err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; @@ -477,13 +503,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - rcu_read_lock(); - idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - - if (!idev) { - rcu_read_unlock(); + idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface); + if (!idev) return -ENODEV; - } err = 0; @@ -492,7 +514,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, goto done; } - for_each_pmc_rcu(inet6, pmc) { + for_each_pmc_socklock(inet6, sk, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -504,7 +526,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, } if (gsf->gf_numsrc) { newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc), - GFP_ATOMIC); + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -516,32 +538,37 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } + mutex_lock(&idev->mc_lock); err = ip6_mc_add_src(idev, group, gsf->gf_fmode, - newpsl->sl_count, newpsl->sl_addr, 0); + newpsl->sl_count, newpsl->sl_addr, 0); if (err) { + mutex_unlock(&idev->mc_lock); sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); goto done; } + mutex_unlock(&idev->mc_lock); } else { newpsl = NULL; - (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); + mutex_lock(&idev->mc_lock); + ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); + mutex_unlock(&idev->mc_lock); } - write_lock(&pmc->sflock); - psl = pmc->sflist; + mutex_lock(&idev->mc_lock); + psl = sock_dereference(pmc->sflist, sk); if (psl) { - (void) ip6_mc_del_src(idev, group, pmc->sfmode, - psl->sl_count, psl->sl_addr, 0); - sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max)); - } else - (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); - pmc->sflist = newpsl; + ip6_mc_del_src(idev, group, pmc->sfmode, + psl->sl_count, psl->sl_addr, 0); + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); + } else { + ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); + } + mutex_unlock(&idev->mc_lock); + rcu_assign_pointer(pmc->sflist, newpsl); pmc->sfmode = gsf->gf_fmode; - write_unlock(&pmc->sflock); err = 0; done: - read_unlock_bh(&idev->lock); - rcu_read_unlock(); if (leavegroup) err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); return err; @@ -550,52 +577,37 @@ done: int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, struct sockaddr_storage __user *p) { - int err, i, count, copycount; + struct ipv6_pinfo *inet6 = inet6_sk(sk); const struct in6_addr *group; struct ipv6_mc_socklist *pmc; - struct inet6_dev *idev; - struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; - struct net *net = sock_net(sk); + int i, count, copycount; group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr; if (!ipv6_addr_is_multicast(group)) return -EINVAL; - rcu_read_lock(); - idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface); - - if (!idev) { - rcu_read_unlock(); - return -ENODEV; - } - - err = -EADDRNOTAVAIL; /* changes to the ipv6_mc_list require the socket lock and - * rtnl lock. We have the socket lock and rcu read lock, - * so reading the list is safe. + * rtnl lock. We have the socket lock, so reading the list is safe. */ - for_each_pmc_rcu(inet6, pmc) { + for_each_pmc_socklock(inet6, sk, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) break; } if (!pmc) /* must have a prior join */ - goto done; + return -EADDRNOTAVAIL; + gsf->gf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = sock_dereference(pmc->sflist, sk); count = psl ? psl->sl_count : 0; - read_unlock_bh(&idev->lock); - rcu_read_unlock(); copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - /* changes to psl require the socket lock, and a write lock - * on pmc->sflock. We have the socket lock so reading here is safe. - */ + for (i = 0; i < copycount; i++, p++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -608,10 +620,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, return -EFAULT; } return 0; -done: - read_unlock_bh(&idev->lock); - rcu_read_unlock(); - return err; } bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, @@ -631,8 +639,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, rcu_read_unlock(); return np->mc_all; } - read_lock(&mc->sflock); - psl = mc->sflist; + psl = rcu_dereference(mc->sflist); if (!psl) { rv = mc->sfmode == MCAST_EXCLUDE; } else { @@ -647,12 +654,12 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count) rv = false; } - read_unlock(&mc->sflock); rcu_read_unlock(); return rv; } +/* called with mc_lock */ static void igmp6_group_added(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; @@ -662,13 +669,11 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) IPV6_ADDR_SCOPE_LINKLOCAL) return; - spin_lock_bh(&mc->mca_lock); if (!(mc->mca_flags&MAF_LOADED)) { mc->mca_flags |= MAF_LOADED; if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) dev_mc_add(dev, buf); } - spin_unlock_bh(&mc->mca_lock); if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT)) return; @@ -689,6 +694,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) mld_ifc_event(mc->idev); } +/* called with mc_lock */ static void igmp6_group_dropped(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; @@ -698,28 +704,25 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) IPV6_ADDR_SCOPE_LINKLOCAL) return; - spin_lock_bh(&mc->mca_lock); if (mc->mca_flags&MAF_LOADED) { mc->mca_flags &= ~MAF_LOADED; if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) dev_mc_del(dev, buf); } - spin_unlock_bh(&mc->mca_lock); if (mc->mca_flags & MAF_NOREPORT) return; if (!mc->idev->dead) igmp6_leave_group(mc); - spin_lock_bh(&mc->mca_lock); - if (del_timer(&mc->mca_timer)) + if (cancel_delayed_work(&mc->mca_work)) refcount_dec(&mc->mca_refcnt); - spin_unlock_bh(&mc->mca_lock); } /* * deleted ifmcaddr6 manipulation + * called with mc_lock */ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { @@ -731,12 +734,10 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) * for deleted items allows change reports to use common code with * non-deleted or query-response MCA's. */ - pmc = kzalloc(sizeof(*pmc), GFP_ATOMIC); + pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); if (!pmc) return; - spin_lock_bh(&im->mca_lock); - spin_lock_init(&pmc->mca_lock); pmc->idev = im->idev; in6_dev_hold(idev); pmc->mca_addr = im->mca_addr; @@ -745,90 +746,110 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) if (pmc->mca_sfmode == MCAST_INCLUDE) { struct ip6_sf_list *psf; - pmc->mca_tomb = im->mca_tomb; - pmc->mca_sources = im->mca_sources; - im->mca_tomb = im->mca_sources = NULL; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + rcu_assign_pointer(pmc->mca_tomb, + mc_dereference(im->mca_tomb, idev)); + rcu_assign_pointer(pmc->mca_sources, + mc_dereference(im->mca_sources, idev)); + RCU_INIT_POINTER(im->mca_tomb, NULL); + RCU_INIT_POINTER(im->mca_sources, NULL); + + for_each_psf_mclock(pmc, psf) psf->sf_crcount = pmc->mca_crcount; } - spin_unlock_bh(&im->mca_lock); - spin_lock_bh(&idev->mc_lock); - pmc->next = idev->mc_tomb; - idev->mc_tomb = pmc; - spin_unlock_bh(&idev->mc_lock); + rcu_assign_pointer(pmc->next, idev->mc_tomb); + rcu_assign_pointer(idev->mc_tomb, pmc); } +/* called with mc_lock */ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { - struct ifmcaddr6 *pmc, *pmc_prev; - struct ip6_sf_list *psf; + struct ip6_sf_list *psf, *sources, *tomb; struct in6_addr *pmca = &im->mca_addr; + struct ifmcaddr6 *pmc, *pmc_prev; - spin_lock_bh(&idev->mc_lock); pmc_prev = NULL; - for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) { + for_each_mc_tomb(idev, pmc) { if (ipv6_addr_equal(&pmc->mca_addr, pmca)) break; pmc_prev = pmc; } if (pmc) { if (pmc_prev) - pmc_prev->next = pmc->next; + rcu_assign_pointer(pmc_prev->next, pmc->next); else - idev->mc_tomb = pmc->next; + rcu_assign_pointer(idev->mc_tomb, pmc->next); } - spin_unlock_bh(&idev->mc_lock); - spin_lock_bh(&im->mca_lock); if (pmc) { im->idev = pmc->idev; if (im->mca_sfmode == MCAST_INCLUDE) { - swap(im->mca_tomb, pmc->mca_tomb); - swap(im->mca_sources, pmc->mca_sources); - for (psf = im->mca_sources; psf; psf = psf->sf_next) + tomb = rcu_replace_pointer(im->mca_tomb, + mc_dereference(pmc->mca_tomb, pmc->idev), + lockdep_is_held(&im->idev->mc_lock)); + rcu_assign_pointer(pmc->mca_tomb, tomb); + + sources = rcu_replace_pointer(im->mca_sources, + mc_dereference(pmc->mca_sources, pmc->idev), + lockdep_is_held(&im->idev->mc_lock)); + rcu_assign_pointer(pmc->mca_sources, sources); + for_each_psf_mclock(im, psf) psf->sf_crcount = idev->mc_qrv; } else { im->mca_crcount = idev->mc_qrv; } in6_dev_put(pmc->idev); ip6_mc_clear_src(pmc); - kfree(pmc); + kfree_rcu(pmc, rcu); } - spin_unlock_bh(&im->mca_lock); } +/* called with mc_lock */ static void mld_clear_delrec(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *nextpmc; - spin_lock_bh(&idev->mc_lock); - pmc = idev->mc_tomb; - idev->mc_tomb = NULL; - spin_unlock_bh(&idev->mc_lock); + pmc = mc_dereference(idev->mc_tomb, idev); + RCU_INIT_POINTER(idev->mc_tomb, NULL); for (; pmc; pmc = nextpmc) { - nextpmc = pmc->next; + nextpmc = mc_dereference(pmc->next, idev); ip6_mc_clear_src(pmc); in6_dev_put(pmc->idev); - kfree(pmc); + kfree_rcu(pmc, rcu); } /* clear dead sources, too */ - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + for_each_mc_mclock(idev, pmc) { struct ip6_sf_list *psf, *psf_next; - spin_lock_bh(&pmc->mca_lock); - psf = pmc->mca_tomb; - pmc->mca_tomb = NULL; - spin_unlock_bh(&pmc->mca_lock); + psf = mc_dereference(pmc->mca_tomb, idev); + RCU_INIT_POINTER(pmc->mca_tomb, NULL); for (; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); + psf_next = mc_dereference(psf->sf_next, idev); + kfree_rcu(psf, rcu); } } - read_unlock_bh(&idev->lock); +} + +static void mld_clear_query(struct inet6_dev *idev) +{ + struct sk_buff *skb; + + spin_lock_bh(&idev->mc_query_lock); + while ((skb = __skb_dequeue(&idev->mc_query_queue))) + kfree_skb(skb); + spin_unlock_bh(&idev->mc_query_lock); +} + +static void mld_clear_report(struct inet6_dev *idev) +{ + struct sk_buff *skb; + + spin_lock_bh(&idev->mc_report_lock); + while ((skb = __skb_dequeue(&idev->mc_report_queue))) + kfree_skb(skb); + spin_unlock_bh(&idev->mc_report_lock); } static void mca_get(struct ifmcaddr6 *mc) @@ -840,21 +861,22 @@ static void ma_put(struct ifmcaddr6 *mc) { if (refcount_dec_and_test(&mc->mca_refcnt)) { in6_dev_put(mc->idev); - kfree(mc); + kfree_rcu(mc, rcu); } } +/* called with mc_lock */ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, const struct in6_addr *addr, unsigned int mode) { struct ifmcaddr6 *mc; - mc = kzalloc(sizeof(*mc), GFP_ATOMIC); + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; - timer_setup(&mc->mca_timer, igmp6_timer_handler, 0); + INIT_DELAYED_WORK(&mc->mca_work, mld_mca_work); mc->mca_addr = *addr; mc->idev = idev; /* reference taken by caller */ @@ -862,7 +884,6 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, /* mca_stamp should be updated upon changes */ mc->mca_cstamp = mc->mca_tstamp = jiffies; refcount_set(&mc->mca_refcnt, 1); - spin_lock_init(&mc->mca_lock); mc->mca_sfmode = mode; mc->mca_sfcount[mode] = 1; @@ -891,18 +912,17 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, if (!idev) return -EINVAL; - write_lock_bh(&idev->lock); if (idev->dead) { - write_unlock_bh(&idev->lock); in6_dev_put(idev); return -ENODEV; } - for (mc = idev->mc_list; mc; mc = mc->next) { + mutex_lock(&idev->mc_lock); + for_each_mc_mclock(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, addr)) { mc->mca_users++; - write_unlock_bh(&idev->lock); ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0); + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return 0; } @@ -910,22 +930,19 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, mc = mca_alloc(idev, addr, mode); if (!mc) { - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return -ENOMEM; } - mc->next = idev->mc_list; - idev->mc_list = mc; + rcu_assign_pointer(mc->next, idev->mc_list); + rcu_assign_pointer(idev->mc_list, mc); - /* Hold this for the code below before we unlock, - * it is already exposed via idev->mc_list. - */ mca_get(mc); - write_unlock_bh(&idev->lock); mld_del_delrec(idev, mc); igmp6_group_added(mc); + mutex_unlock(&idev->mc_lock); ma_put(mc); return 0; } @@ -937,33 +954,35 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) EXPORT_SYMBOL(ipv6_dev_mc_inc); /* - * device multicast group del + * device multicast group del */ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) { - struct ifmcaddr6 *ma, **map; + struct ifmcaddr6 *ma, __rcu **map; ASSERT_RTNL(); - write_lock_bh(&idev->lock); - for (map = &idev->mc_list; (ma = *map) != NULL; map = &ma->next) { + mutex_lock(&idev->mc_lock); + for (map = &idev->mc_list; + (ma = mc_dereference(*map, idev)); + map = &ma->next) { if (ipv6_addr_equal(&ma->mca_addr, addr)) { if (--ma->mca_users == 0) { *map = ma->next; - write_unlock_bh(&idev->lock); igmp6_group_dropped(ma); ip6_mc_clear_src(ma); + mutex_unlock(&idev->mc_lock); ma_put(ma); return 0; } - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); return 0; } } - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); return -ENOENT; } @@ -997,8 +1016,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - for (mc = idev->mc_list; mc; mc = mc->next) { + for_each_mc_rcu(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, group)) break; } @@ -1006,8 +1024,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, if (src_addr && !ipv6_addr_any(src_addr)) { struct ip6_sf_list *psf; - spin_lock_bh(&mc->mca_lock); - for (psf = mc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_rcu(mc, psf) { if (ipv6_addr_equal(&psf->sf_addr, src_addr)) break; } @@ -1017,89 +1034,107 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, mc->mca_sfcount[MCAST_EXCLUDE]; else rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0; - spin_unlock_bh(&mc->mca_lock); } else rv = true; /* don't filter unspecified source */ } - read_unlock_bh(&idev->lock); } rcu_read_unlock(); return rv; } -static void mld_gq_start_timer(struct inet6_dev *idev) +/* called with mc_lock */ +static void mld_gq_start_work(struct inet6_dev *idev) { unsigned long tv = prandom_u32() % idev->mc_maxdelay; idev->mc_gq_running = 1; - if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2)) + if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2)) in6_dev_hold(idev); } -static void mld_gq_stop_timer(struct inet6_dev *idev) +/* called with mc_lock */ +static void mld_gq_stop_work(struct inet6_dev *idev) { idev->mc_gq_running = 0; - if (del_timer(&idev->mc_gq_timer)) + if (cancel_delayed_work(&idev->mc_gq_work)) __in6_dev_put(idev); } -static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay) +/* called with mc_lock */ +static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = prandom_u32() % delay; - if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2)) + if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2)) in6_dev_hold(idev); } -static void mld_ifc_stop_timer(struct inet6_dev *idev) +/* called with mc_lock */ +static void mld_ifc_stop_work(struct inet6_dev *idev) { idev->mc_ifc_count = 0; - if (del_timer(&idev->mc_ifc_timer)) + if (cancel_delayed_work(&idev->mc_ifc_work)) __in6_dev_put(idev); } -static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay) +/* called with mc_lock */ +static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = prandom_u32() % delay; - if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) + if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2)) in6_dev_hold(idev); } -static void mld_dad_stop_timer(struct inet6_dev *idev) +static void mld_dad_stop_work(struct inet6_dev *idev) +{ + if (cancel_delayed_work(&idev->mc_dad_work)) + __in6_dev_put(idev); +} + +static void mld_query_stop_work(struct inet6_dev *idev) { - if (del_timer(&idev->mc_dad_timer)) + spin_lock_bh(&idev->mc_query_lock); + if (cancel_delayed_work(&idev->mc_query_work)) + __in6_dev_put(idev); + spin_unlock_bh(&idev->mc_query_lock); +} + +static void mld_report_stop_work(struct inet6_dev *idev) +{ + if (cancel_delayed_work_sync(&idev->mc_report_work)) __in6_dev_put(idev); } /* - * IGMP handling (alias multicast ICMPv6 messages) + * IGMP handling (alias multicast ICMPv6 messages) + * called with mc_lock */ - static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) { unsigned long delay = resptime; - /* Do not start timer for these addresses */ + /* Do not start work for these addresses */ if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) || IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) return; - if (del_timer(&ma->mca_timer)) { + if (cancel_delayed_work(&ma->mca_work)) { refcount_dec(&ma->mca_refcnt); - delay = ma->mca_timer.expires - jiffies; + delay = ma->mca_work.timer.expires - jiffies; } if (delay >= resptime) delay = prandom_u32() % resptime; - ma->mca_timer.expires = jiffies + delay; - if (!mod_timer(&ma->mca_timer, jiffies + delay)) + if (!mod_delayed_work(mld_wq, &ma->mca_work, delay)) refcount_inc(&ma->mca_refcnt); ma->mca_flags |= MAF_TIMER_RUNNING; } -/* mark EXCLUDE-mode sources */ +/* mark EXCLUDE-mode sources + * called with mc_lock + */ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { @@ -1107,7 +1142,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, int i, scount; scount = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (scount == nsrcs) break; for (i = 0; i < nsrcs; i++) { @@ -1128,6 +1163,7 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, return true; } +/* called with mc_lock */ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { @@ -1140,7 +1176,7 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, /* mark INCLUDE-mode sources */ scount = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (scount == nsrcs) break; for (i = 0; i < nsrcs; i++) { @@ -1305,10 +1341,10 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld, if (v1_query) mld_set_v1_mode(idev); - /* cancel MLDv2 report timer */ - mld_gq_stop_timer(idev); - /* cancel the interface change timer */ - mld_ifc_stop_timer(idev); + /* cancel MLDv2 report work */ + mld_gq_stop_work(idev); + /* cancel the interface change work */ + mld_ifc_stop_work(idev); /* clear deleted report items */ mld_clear_delrec(idev); @@ -1332,18 +1368,41 @@ static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, /* called with rcu_read_lock() */ int igmp6_event_query(struct sk_buff *skb) { + struct inet6_dev *idev = __in6_dev_get(skb->dev); + + if (!idev) + return -EINVAL; + + if (idev->dead) { + kfree_skb(skb); + return -ENODEV; + } + + spin_lock_bh(&idev->mc_query_lock); + if (skb_queue_len(&idev->mc_query_queue) < MLD_MAX_SKBS) { + __skb_queue_tail(&idev->mc_query_queue, skb); + if (!mod_delayed_work(mld_wq, &idev->mc_query_work, 0)) + in6_dev_hold(idev); + } + spin_unlock_bh(&idev->mc_query_lock); + + return 0; +} + +static void __mld_query_work(struct sk_buff *skb) +{ struct mld2_query *mlh2 = NULL; - struct ifmcaddr6 *ma; const struct in6_addr *group; unsigned long max_delay; struct inet6_dev *idev; + struct ifmcaddr6 *ma; struct mld_msg *mld; int group_type; int mark = 0; int len, err; if (!pskb_may_pull(skb, sizeof(struct in6_addr))) - return -EINVAL; + goto kfree_skb; /* compute payload length excluding extension headers */ len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr); @@ -1360,11 +1419,11 @@ int igmp6_event_query(struct sk_buff *skb) ipv6_hdr(skb)->hop_limit != 1 || !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) || IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD)) - return -EINVAL; + goto kfree_skb; - idev = __in6_dev_get(skb->dev); + idev = in6_dev_get(skb->dev); if (!idev) - return 0; + goto kfree_skb; mld = (struct mld_msg *)icmp6_hdr(skb); group = &mld->mld_mca; @@ -1372,60 +1431,56 @@ int igmp6_event_query(struct sk_buff *skb) if (group_type != IPV6_ADDR_ANY && !(group_type&IPV6_ADDR_MULTICAST)) - return -EINVAL; + goto out; if (len < MLD_V1_QUERY_LEN) { - return -EINVAL; + goto out; } else if (len == MLD_V1_QUERY_LEN || mld_in_v1_mode(idev)) { err = mld_process_v1(idev, mld, &max_delay, len == MLD_V1_QUERY_LEN); if (err < 0) - return err; + goto out; } else if (len >= MLD_V2_QUERY_LEN_MIN) { int srcs_offset = sizeof(struct mld2_query) - sizeof(struct icmp6hdr); if (!pskb_may_pull(skb, srcs_offset)) - return -EINVAL; + goto out; mlh2 = (struct mld2_query *)skb_transport_header(skb); err = mld_process_v2(idev, mlh2, &max_delay); if (err < 0) - return err; + goto out; if (group_type == IPV6_ADDR_ANY) { /* general query */ if (mlh2->mld2q_nsrcs) - return -EINVAL; /* no sources allowed */ + goto out; /* no sources allowed */ - mld_gq_start_timer(idev); - return 0; + mld_gq_start_work(idev); + goto out; } /* mark sources to include, if group & source-specific */ if (mlh2->mld2q_nsrcs != 0) { if (!pskb_may_pull(skb, srcs_offset + ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) - return -EINVAL; + goto out; mlh2 = (struct mld2_query *)skb_transport_header(skb); mark = 1; } } else { - return -EINVAL; + goto out; } - read_lock_bh(&idev->lock); if (group_type == IPV6_ADDR_ANY) { - for (ma = idev->mc_list; ma; ma = ma->next) { - spin_lock_bh(&ma->mca_lock); + for_each_mc_mclock(idev, ma) { igmp6_group_queried(ma, max_delay); - spin_unlock_bh(&ma->mca_lock); } } else { - for (ma = idev->mc_list; ma; ma = ma->next) { + for_each_mc_mclock(idev, ma) { if (!ipv6_addr_equal(group, &ma->mca_addr)) continue; - spin_lock_bh(&ma->mca_lock); if (ma->mca_flags & MAF_TIMER_RUNNING) { /* gsquery <- gsquery && mark */ if (!mark) @@ -1440,34 +1495,91 @@ int igmp6_event_query(struct sk_buff *skb) if (!(ma->mca_flags & MAF_GSQUERY) || mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs)) igmp6_group_queried(ma, max_delay); - spin_unlock_bh(&ma->mca_lock); break; } } - read_unlock_bh(&idev->lock); - return 0; +out: + in6_dev_put(idev); +kfree_skb: + consume_skb(skb); +} + +static void mld_query_work(struct work_struct *work) +{ + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_query_work); + struct sk_buff_head q; + struct sk_buff *skb; + bool rework = false; + int cnt = 0; + + skb_queue_head_init(&q); + + spin_lock_bh(&idev->mc_query_lock); + while ((skb = __skb_dequeue(&idev->mc_query_queue))) { + __skb_queue_tail(&q, skb); + + if (++cnt >= MLD_MAX_QUEUE) { + rework = true; + schedule_delayed_work(&idev->mc_query_work, 0); + break; + } + } + spin_unlock_bh(&idev->mc_query_lock); + + mutex_lock(&idev->mc_lock); + while ((skb = __skb_dequeue(&q))) + __mld_query_work(skb); + mutex_unlock(&idev->mc_lock); + + if (!rework) + in6_dev_put(idev); } /* called with rcu_read_lock() */ int igmp6_event_report(struct sk_buff *skb) { - struct ifmcaddr6 *ma; + struct inet6_dev *idev = __in6_dev_get(skb->dev); + + if (!idev) + return -EINVAL; + + if (idev->dead) { + kfree_skb(skb); + return -ENODEV; + } + + spin_lock_bh(&idev->mc_report_lock); + if (skb_queue_len(&idev->mc_report_queue) < MLD_MAX_SKBS) { + __skb_queue_tail(&idev->mc_report_queue, skb); + if (!mod_delayed_work(mld_wq, &idev->mc_report_work, 0)) + in6_dev_hold(idev); + } + spin_unlock_bh(&idev->mc_report_lock); + + return 0; +} + +static void __mld_report_work(struct sk_buff *skb) +{ struct inet6_dev *idev; + struct ifmcaddr6 *ma; struct mld_msg *mld; int addr_type; /* Our own report looped back. Ignore it. */ if (skb->pkt_type == PACKET_LOOPBACK) - return 0; + goto kfree_skb; /* send our report if the MC router may not have heard this report */ if (skb->pkt_type != PACKET_MULTICAST && skb->pkt_type != PACKET_BROADCAST) - return 0; + goto kfree_skb; if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr))) - return -EINVAL; + goto kfree_skb; mld = (struct mld_msg *)icmp6_hdr(skb); @@ -1475,29 +1587,61 @@ int igmp6_event_report(struct sk_buff *skb) addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); if (addr_type != IPV6_ADDR_ANY && !(addr_type&IPV6_ADDR_LINKLOCAL)) - return -EINVAL; + goto kfree_skb; - idev = __in6_dev_get(skb->dev); + idev = in6_dev_get(skb->dev); if (!idev) - return -ENODEV; + goto kfree_skb; /* - * Cancel the timer for this group + * Cancel the work for this group */ - read_lock_bh(&idev->lock); - for (ma = idev->mc_list; ma; ma = ma->next) { + for_each_mc_mclock(idev, ma) { if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) { - spin_lock(&ma->mca_lock); - if (del_timer(&ma->mca_timer)) + if (cancel_delayed_work(&ma->mca_work)) refcount_dec(&ma->mca_refcnt); - ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING); - spin_unlock(&ma->mca_lock); + ma->mca_flags &= ~(MAF_LAST_REPORTER | + MAF_TIMER_RUNNING); break; } } - read_unlock_bh(&idev->lock); - return 0; + + in6_dev_put(idev); +kfree_skb: + consume_skb(skb); +} + +static void mld_report_work(struct work_struct *work) +{ + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_report_work); + struct sk_buff_head q; + struct sk_buff *skb; + bool rework = false; + int cnt = 0; + + skb_queue_head_init(&q); + spin_lock_bh(&idev->mc_report_lock); + while ((skb = __skb_dequeue(&idev->mc_report_queue))) { + __skb_queue_tail(&q, skb); + + if (++cnt >= MLD_MAX_QUEUE) { + rework = true; + schedule_delayed_work(&idev->mc_report_work, 0); + break; + } + } + spin_unlock_bh(&idev->mc_report_lock); + + mutex_lock(&idev->mc_lock); + while ((skb = __skb_dequeue(&q))) + __mld_report_work(skb); + mutex_unlock(&idev->mc_lock); + + if (!rework) + in6_dev_put(idev); } static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type, @@ -1550,7 +1694,7 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) struct ip6_sf_list *psf; int scount = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (!is_in(pmc, psf, type, gdeleted, sdeleted)) continue; scount++; @@ -1724,15 +1868,18 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, #define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) +/* called with mc_lock */ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, - int type, int gdeleted, int sdeleted, int crsend) + int type, int gdeleted, int sdeleted, + int crsend) { + struct ip6_sf_list *psf, *psf_prev, *psf_next; + int scount, stotal, first, isquery, truncate; + struct ip6_sf_list __rcu **psf_list; struct inet6_dev *idev = pmc->idev; struct net_device *dev = idev->dev; - struct mld2_report *pmr; struct mld2_grec *pgr = NULL; - struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; - int scount, stotal, first, isquery, truncate; + struct mld2_report *pmr; unsigned int mtu; if (pmc->mca_flags & MAF_NOREPORT) @@ -1751,7 +1898,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources; - if (!*psf_list) + if (!rcu_access_pointer(*psf_list)) goto empty_source; pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL; @@ -1767,10 +1914,12 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, } first = 1; psf_prev = NULL; - for (psf = *psf_list; psf; psf = psf_next) { + for (psf = mc_dereference(*psf_list, idev); + psf; + psf = psf_next) { struct in6_addr *psrc; - psf_next = psf->sf_next; + psf_next = mc_dereference(psf->sf_next, idev); if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) { psf_prev = psf; @@ -1817,10 +1966,12 @@ decrease_sf_crcount: psf->sf_crcount--; if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { if (psf_prev) - psf_prev->sf_next = psf->sf_next; + rcu_assign_pointer(psf_prev->sf_next, + mc_dereference(psf->sf_next, idev)); else - *psf_list = psf->sf_next; - kfree(psf); + rcu_assign_pointer(*psf_list, + mc_dereference(psf->sf_next, idev)); + kfree_rcu(psf, rcu); continue; } } @@ -1849,72 +2000,73 @@ empty_source: return skb; } +/* called with mc_lock */ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) { struct sk_buff *skb = NULL; int type; - read_lock_bh(&idev->lock); if (!pmc) { - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + for_each_mc_mclock(idev, pmc) { if (pmc->mca_flags & MAF_NOREPORT) continue; - spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; skb = add_grec(skb, pmc, type, 0, 0, 0); - spin_unlock_bh(&pmc->mca_lock); } } else { - spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; skb = add_grec(skb, pmc, type, 0, 0, 0); - spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } /* * remove zero-count source records from a source filter list + * called with mc_lock */ -static void mld_clear_zeros(struct ip6_sf_list **ppsf) +static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *idev) { struct ip6_sf_list *psf_prev, *psf_next, *psf; psf_prev = NULL; - for (psf = *ppsf; psf; psf = psf_next) { - psf_next = psf->sf_next; + for (psf = mc_dereference(*ppsf, idev); + psf; + psf = psf_next) { + psf_next = mc_dereference(psf->sf_next, idev); if (psf->sf_crcount == 0) { if (psf_prev) - psf_prev->sf_next = psf->sf_next; + rcu_assign_pointer(psf_prev->sf_next, + mc_dereference(psf->sf_next, idev)); else - *ppsf = psf->sf_next; - kfree(psf); - } else + rcu_assign_pointer(*ppsf, + mc_dereference(psf->sf_next, idev)); + kfree_rcu(psf, rcu); + } else { psf_prev = psf; + } } } +/* called with mc_lock */ static void mld_send_cr(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next; struct sk_buff *skb = NULL; int type, dtype; - read_lock_bh(&idev->lock); - spin_lock(&idev->mc_lock); - /* deleted MCA's */ pmc_prev = NULL; - for (pmc = idev->mc_tomb; pmc; pmc = pmc_next) { - pmc_next = pmc->next; + for (pmc = mc_dereference(idev->mc_tomb, idev); + pmc; + pmc = pmc_next) { + pmc_next = mc_dereference(pmc->next, idev); if (pmc->mca_sfmode == MCAST_INCLUDE) { type = MLD2_BLOCK_OLD_SOURCES; dtype = MLD2_BLOCK_OLD_SOURCES; @@ -1928,26 +2080,25 @@ static void mld_send_cr(struct inet6_dev *idev) } pmc->mca_crcount--; if (pmc->mca_crcount == 0) { - mld_clear_zeros(&pmc->mca_tomb); - mld_clear_zeros(&pmc->mca_sources); + mld_clear_zeros(&pmc->mca_tomb, idev); + mld_clear_zeros(&pmc->mca_sources, idev); } } - if (pmc->mca_crcount == 0 && !pmc->mca_tomb && - !pmc->mca_sources) { + if (pmc->mca_crcount == 0 && + !rcu_access_pointer(pmc->mca_tomb) && + !rcu_access_pointer(pmc->mca_sources)) { if (pmc_prev) - pmc_prev->next = pmc_next; + rcu_assign_pointer(pmc_prev->next, pmc_next); else - idev->mc_tomb = pmc_next; + rcu_assign_pointer(idev->mc_tomb, pmc_next); in6_dev_put(pmc->idev); - kfree(pmc); + kfree_rcu(pmc, rcu); } else pmc_prev = pmc; } - spin_unlock(&idev->mc_lock); /* change recs */ - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { - spin_lock_bh(&pmc->mca_lock); + for_each_mc_mclock(idev, pmc) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { type = MLD2_BLOCK_OLD_SOURCES; dtype = MLD2_ALLOW_NEW_SOURCES; @@ -1967,9 +2118,7 @@ static void mld_send_cr(struct inet6_dev *idev) skb = add_grec(skb, pmc, type, 0, 0, 0); pmc->mca_crcount--; } - spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); if (!skb) return; (void) mld_sendpack(skb); @@ -2071,6 +2220,7 @@ err_out: goto out; } +/* called with mc_lock */ static void mld_send_initial_cr(struct inet6_dev *idev) { struct sk_buff *skb; @@ -2081,47 +2231,49 @@ static void mld_send_initial_cr(struct inet6_dev *idev) return; skb = NULL; - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { - spin_lock_bh(&pmc->mca_lock); + for_each_mc_mclock(idev, pmc) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) type = MLD2_CHANGE_TO_EXCLUDE; else type = MLD2_ALLOW_NEW_SOURCES; skb = add_grec(skb, pmc, type, 0, 0, 1); - spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } void ipv6_mc_dad_complete(struct inet6_dev *idev) { + mutex_lock(&idev->mc_lock); idev->mc_dad_count = idev->mc_qrv; if (idev->mc_dad_count) { mld_send_initial_cr(idev); idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, - unsolicited_report_interval(idev)); + mld_dad_start_work(idev, + unsolicited_report_interval(idev)); } + mutex_unlock(&idev->mc_lock); } -static void mld_dad_timer_expire(struct timer_list *t) +static void mld_dad_work(struct work_struct *work) { - struct inet6_dev *idev = from_timer(idev, t, mc_dad_timer); - + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_dad_work); + mutex_lock(&idev->mc_lock); mld_send_initial_cr(idev); if (idev->mc_dad_count) { idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, - unsolicited_report_interval(idev)); + mld_dad_start_work(idev, + unsolicited_report_interval(idev)); } + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); } +/* called with mc_lock */ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) { @@ -2129,7 +2281,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, int rv = 0; psf_prev = NULL; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; @@ -2144,21 +2296,27 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, /* no more filters for this source */ if (psf_prev) - psf_prev->sf_next = psf->sf_next; + rcu_assign_pointer(psf_prev->sf_next, + mc_dereference(psf->sf_next, idev)); else - pmc->mca_sources = psf->sf_next; + rcu_assign_pointer(pmc->mca_sources, + mc_dereference(psf->sf_next, idev)); + if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) && !mld_in_v1_mode(idev)) { psf->sf_crcount = idev->mc_qrv; - psf->sf_next = pmc->mca_tomb; - pmc->mca_tomb = psf; + rcu_assign_pointer(psf->sf_next, + mc_dereference(pmc->mca_tomb, idev)); + rcu_assign_pointer(pmc->mca_tomb, psf); rv = 1; - } else - kfree(psf); + } else { + kfree_rcu(psf, rcu); + } } return rv; } +/* called with mc_lock */ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, int delta) @@ -2169,24 +2327,19 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } - if (!pmc) { - /* MCA not found?? bug */ - read_unlock_bh(&idev->lock); + if (!pmc) return -ESRCH; - } - spin_lock_bh(&pmc->mca_lock); + sf_markstate(pmc); if (!delta) { - if (!pmc->mca_sfcount[sfmode]) { - spin_unlock_bh(&pmc->mca_lock); - read_unlock_bh(&idev->lock); + if (!pmc->mca_sfcount[sfmode]) return -EINVAL; - } + pmc->mca_sfcount[sfmode]--; } err = 0; @@ -2206,18 +2359,19 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_sfmode = MCAST_INCLUDE; pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + for_each_psf_mclock(pmc, psf) psf->sf_crcount = 0; mld_ifc_event(pmc->idev); - } else if (sf_setstate(pmc) || changerec) + } else if (sf_setstate(pmc) || changerec) { mld_ifc_event(pmc->idev); - spin_unlock_bh(&pmc->mca_lock); - read_unlock_bh(&idev->lock); + } + return err; } /* * Add multicast single-source filter to the interface list + * called with mc_lock */ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) @@ -2225,40 +2379,45 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, struct ip6_sf_list *psf, *psf_prev; psf_prev = NULL; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) break; psf_prev = psf; } if (!psf) { - psf = kzalloc(sizeof(*psf), GFP_ATOMIC); + psf = kzalloc(sizeof(*psf), GFP_KERNEL); if (!psf) return -ENOBUFS; psf->sf_addr = *psfsrc; if (psf_prev) { - psf_prev->sf_next = psf; - } else - pmc->mca_sources = psf; + rcu_assign_pointer(psf_prev->sf_next, psf); + } else { + rcu_assign_pointer(pmc->mca_sources, psf); + } } psf->sf_count[sfmode]++; return 0; } +/* called with mc_lock */ static void sf_markstate(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { psf->sf_oldin = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; - } else + } else { psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0; + } + } } +/* called with mc_lock */ static int sf_setstate(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *dpsf; @@ -2267,7 +2426,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) int new_in, rv; rv = 0; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { + for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && !psf->sf_count[MCAST_INCLUDE]; @@ -2277,8 +2436,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) if (!psf->sf_oldin) { struct ip6_sf_list *prev = NULL; - for (dpsf = pmc->mca_tomb; dpsf; - dpsf = dpsf->sf_next) { + for_each_psf_tomb(pmc, dpsf) { if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; @@ -2286,10 +2444,14 @@ static int sf_setstate(struct ifmcaddr6 *pmc) } if (dpsf) { if (prev) - prev->sf_next = dpsf->sf_next; + rcu_assign_pointer(prev->sf_next, + mc_dereference(dpsf->sf_next, + pmc->idev)); else - pmc->mca_tomb = dpsf->sf_next; - kfree(dpsf); + rcu_assign_pointer(pmc->mca_tomb, + mc_dereference(dpsf->sf_next, + pmc->idev)); + kfree_rcu(dpsf, rcu); } psf->sf_crcount = qrv; rv++; @@ -2300,18 +2462,19 @@ static int sf_setstate(struct ifmcaddr6 *pmc) * add or update "delete" records if an active filter * is now inactive */ - for (dpsf = pmc->mca_tomb; dpsf; dpsf = dpsf->sf_next) + + for_each_psf_tomb(pmc, dpsf) if (ipv6_addr_equal(&dpsf->sf_addr, &psf->sf_addr)) break; if (!dpsf) { - dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); + dpsf = kmalloc(sizeof(*dpsf), GFP_KERNEL); if (!dpsf) continue; *dpsf = *psf; - /* pmc->mca_lock held by callers */ - dpsf->sf_next = pmc->mca_tomb; - pmc->mca_tomb = dpsf; + rcu_assign_pointer(dpsf->sf_next, + mc_dereference(pmc->mca_tomb, pmc->idev)); + rcu_assign_pointer(pmc->mca_tomb, dpsf); } dpsf->sf_crcount = qrv; rv++; @@ -2322,6 +2485,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) /* * Add multicast source filter list to the interface list + * called with mc_lock */ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, @@ -2333,17 +2497,13 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) { + + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; } - if (!pmc) { - /* MCA not found?? bug */ - read_unlock_bh(&idev->lock); + if (!pmc) return -ESRCH; - } - spin_lock_bh(&pmc->mca_lock); sf_markstate(pmc); isexclude = pmc->mca_sfmode == MCAST_EXCLUDE; @@ -2374,36 +2534,40 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, pmc->mca_crcount = idev->mc_qrv; idev->mc_ifc_count = pmc->mca_crcount; - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) + for_each_psf_mclock(pmc, psf) psf->sf_crcount = 0; mld_ifc_event(idev); - } else if (sf_setstate(pmc)) + } else if (sf_setstate(pmc)) { mld_ifc_event(idev); - spin_unlock_bh(&pmc->mca_lock); - read_unlock_bh(&idev->lock); + } return err; } +/* called with mc_lock */ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *nextpsf; - for (psf = pmc->mca_tomb; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); + for (psf = mc_dereference(pmc->mca_tomb, pmc->idev); + psf; + psf = nextpsf) { + nextpsf = mc_dereference(psf->sf_next, pmc->idev); + kfree_rcu(psf, rcu); } - pmc->mca_tomb = NULL; - for (psf = pmc->mca_sources; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); + RCU_INIT_POINTER(pmc->mca_tomb, NULL); + for (psf = mc_dereference(pmc->mca_sources, pmc->idev); + psf; + psf = nextpsf) { + nextpsf = mc_dereference(psf->sf_next, pmc->idev); + kfree_rcu(psf, rcu); } - pmc->mca_sources = NULL; + RCU_INIT_POINTER(pmc->mca_sources, NULL); pmc->mca_sfmode = MCAST_EXCLUDE; pmc->mca_sfcount[MCAST_INCLUDE] = 0; pmc->mca_sfcount[MCAST_EXCLUDE] = 1; } - +/* called with mc_lock */ static void igmp6_join_group(struct ifmcaddr6 *ma) { unsigned long delay; @@ -2415,93 +2579,115 @@ static void igmp6_join_group(struct ifmcaddr6 *ma) delay = prandom_u32() % unsolicited_report_interval(ma->idev); - spin_lock_bh(&ma->mca_lock); - if (del_timer(&ma->mca_timer)) { + if (cancel_delayed_work(&ma->mca_work)) { refcount_dec(&ma->mca_refcnt); - delay = ma->mca_timer.expires - jiffies; + delay = ma->mca_work.timer.expires - jiffies; } - if (!mod_timer(&ma->mca_timer, jiffies + delay)) + if (!mod_delayed_work(mld_wq, &ma->mca_work, delay)) refcount_inc(&ma->mca_refcnt); ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER; - spin_unlock_bh(&ma->mca_lock); } static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, struct inet6_dev *idev) { + struct ip6_sf_socklist *psl; int err; - write_lock_bh(&iml->sflock); - if (!iml->sflist) { + psl = sock_dereference(iml->sflist, sk); + + if (idev) + mutex_lock(&idev->mc_lock); + + if (!psl) { /* any-source empty exclude case */ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); } else { err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + psl->sl_count, psl->sl_addr, 0); + RCU_INIT_POINTER(iml->sflist, NULL); + atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + kfree_rcu(psl, rcu); } - write_unlock_bh(&iml->sflock); + + if (idev) + mutex_unlock(&idev->mc_lock); + return err; } +/* called with mc_lock */ static void igmp6_leave_group(struct ifmcaddr6 *ma) { if (mld_in_v1_mode(ma->idev)) { - if (ma->mca_flags & MAF_LAST_REPORTER) + if (ma->mca_flags & MAF_LAST_REPORTER) { igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REDUCTION); + } } else { mld_add_delrec(ma->idev, ma); mld_ifc_event(ma->idev); } } -static void mld_gq_timer_expire(struct timer_list *t) +static void mld_gq_work(struct work_struct *work) { - struct inet6_dev *idev = from_timer(idev, t, mc_gq_timer); + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_gq_work); - idev->mc_gq_running = 0; + mutex_lock(&idev->mc_lock); mld_send_report(idev, NULL); + idev->mc_gq_running = 0; + mutex_unlock(&idev->mc_lock); + in6_dev_put(idev); } -static void mld_ifc_timer_expire(struct timer_list *t) +static void mld_ifc_work(struct work_struct *work) { - struct inet6_dev *idev = from_timer(idev, t, mc_ifc_timer); + struct inet6_dev *idev = container_of(to_delayed_work(work), + struct inet6_dev, + mc_ifc_work); + mutex_lock(&idev->mc_lock); mld_send_cr(idev); + if (idev->mc_ifc_count) { idev->mc_ifc_count--; if (idev->mc_ifc_count) - mld_ifc_start_timer(idev, - unsolicited_report_interval(idev)); + mld_ifc_start_work(idev, + unsolicited_report_interval(idev)); } + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); } +/* called with mc_lock */ static void mld_ifc_event(struct inet6_dev *idev) { if (mld_in_v1_mode(idev)) return; + idev->mc_ifc_count = idev->mc_qrv; - mld_ifc_start_timer(idev, 1); + mld_ifc_start_work(idev, 1); } -static void igmp6_timer_handler(struct timer_list *t) +static void mld_mca_work(struct work_struct *work) { - struct ifmcaddr6 *ma = from_timer(ma, t, mca_timer); + struct ifmcaddr6 *ma = container_of(to_delayed_work(work), + struct ifmcaddr6, mca_work); + mutex_lock(&ma->idev->mc_lock); if (mld_in_v1_mode(ma->idev)) igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); else mld_send_report(ma->idev, ma); - - spin_lock(&ma->mca_lock); ma->mca_flags |= MAF_LAST_REPORTER; ma->mca_flags &= ~MAF_TIMER_RUNNING; - spin_unlock(&ma->mca_lock); + mutex_unlock(&ma->idev->mc_lock); + ma_put(ma); } @@ -2513,10 +2699,10 @@ void ipv6_mc_unmap(struct inet6_dev *idev) /* Install multicast list, except for all-nodes (already installed) */ - read_lock_bh(&idev->lock); - for (i = idev->mc_list; i; i = i->next) + mutex_lock(&idev->mc_lock); + for_each_mc_mclock(idev, i) igmp6_group_dropped(i); - read_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); } void ipv6_mc_remap(struct inet6_dev *idev) @@ -2525,25 +2711,25 @@ void ipv6_mc_remap(struct inet6_dev *idev) } /* Device going down */ - void ipv6_mc_down(struct inet6_dev *idev) { struct ifmcaddr6 *i; + mutex_lock(&idev->mc_lock); /* Withdraw multicast list */ - - read_lock_bh(&idev->lock); - - for (i = idev->mc_list; i; i = i->next) + for_each_mc_mclock(idev, i) igmp6_group_dropped(i); + mutex_unlock(&idev->mc_lock); - /* Should stop timer after group drop. or we will - * start timer again in mld_ifc_event() + /* Should stop work after group drop. or we will + * start work again in mld_ifc_event() */ - mld_ifc_stop_timer(idev); - mld_gq_stop_timer(idev); - mld_dad_stop_timer(idev); - read_unlock_bh(&idev->lock); + synchronize_net(); + mld_query_stop_work(idev); + mld_report_stop_work(idev); + mld_ifc_stop_work(idev); + mld_gq_stop_work(idev); + mld_dad_stop_work(idev); } static void ipv6_mc_reset(struct inet6_dev *idev) @@ -2563,29 +2749,33 @@ void ipv6_mc_up(struct inet6_dev *idev) /* Install multicast list, except for all-nodes (already installed) */ - read_lock_bh(&idev->lock); ipv6_mc_reset(idev); - for (i = idev->mc_list; i; i = i->next) { + mutex_lock(&idev->mc_lock); + for_each_mc_mclock(idev, i) { mld_del_delrec(idev, i); igmp6_group_added(i); } - read_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); } /* IPv6 device initialization. */ void ipv6_mc_init_dev(struct inet6_dev *idev) { - write_lock_bh(&idev->lock); - spin_lock_init(&idev->mc_lock); idev->mc_gq_running = 0; - timer_setup(&idev->mc_gq_timer, mld_gq_timer_expire, 0); - idev->mc_tomb = NULL; + INIT_DELAYED_WORK(&idev->mc_gq_work, mld_gq_work); + RCU_INIT_POINTER(idev->mc_tomb, NULL); idev->mc_ifc_count = 0; - timer_setup(&idev->mc_ifc_timer, mld_ifc_timer_expire, 0); - timer_setup(&idev->mc_dad_timer, mld_dad_timer_expire, 0); + INIT_DELAYED_WORK(&idev->mc_ifc_work, mld_ifc_work); + INIT_DELAYED_WORK(&idev->mc_dad_work, mld_dad_work); + INIT_DELAYED_WORK(&idev->mc_query_work, mld_query_work); + INIT_DELAYED_WORK(&idev->mc_report_work, mld_report_work); + skb_queue_head_init(&idev->mc_query_queue); + skb_queue_head_init(&idev->mc_report_queue); + spin_lock_init(&idev->mc_query_lock); + spin_lock_init(&idev->mc_report_lock); + mutex_init(&idev->mc_lock); ipv6_mc_reset(idev); - write_unlock_bh(&idev->lock); } /* @@ -2596,9 +2786,13 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) { struct ifmcaddr6 *i; - /* Deactivate timers */ + /* Deactivate works */ ipv6_mc_down(idev); + mutex_lock(&idev->mc_lock); mld_clear_delrec(idev); + mutex_unlock(&idev->mc_lock); + mld_clear_query(idev); + mld_clear_report(idev); /* Delete all-nodes address. */ /* We cannot call ipv6_dev_mc_dec() directly, our caller in @@ -2610,16 +2804,14 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) if (idev->cnf.forwarding) __ipv6_dev_mc_dec(idev, &in6addr_linklocal_allrouters); - write_lock_bh(&idev->lock); - while ((i = idev->mc_list) != NULL) { - idev->mc_list = i->next; + mutex_lock(&idev->mc_lock); + while ((i = mc_dereference(idev->mc_list, idev))) { + rcu_assign_pointer(idev->mc_list, mc_dereference(i->next, idev)); - write_unlock_bh(&idev->lock); ip6_mc_clear_src(i); ma_put(i); - write_lock_bh(&idev->lock); } - write_unlock_bh(&idev->lock); + mutex_unlock(&idev->mc_lock); } static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) @@ -2628,13 +2820,14 @@ static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) ASSERT_RTNL(); + mutex_lock(&idev->mc_lock); if (mld_in_v1_mode(idev)) { - read_lock_bh(&idev->lock); - for (pmc = idev->mc_list; pmc; pmc = pmc->next) + for_each_mc_mclock(idev, pmc) igmp6_join_group(pmc); - read_unlock_bh(&idev->lock); - } else + } else { mld_send_report(idev, NULL); + } + mutex_unlock(&idev->mc_lock); } static int ipv6_mc_netdev_event(struct notifier_block *this, @@ -2681,13 +2874,12 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) idev = __in6_dev_get(state->dev); if (!idev) continue; - read_lock_bh(&idev->lock); - im = idev->mc_list; + + im = rcu_dereference(idev->mc_list); if (im) { state->idev = idev; break; } - read_unlock_bh(&idev->lock); } return im; } @@ -2696,11 +2888,8 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - im = im->next; + im = rcu_dereference(im->next); while (!im) { - if (likely(state->idev)) - read_unlock_bh(&state->idev->lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2709,8 +2898,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; - read_lock_bh(&state->idev->lock); - im = state->idev->mc_list; + im = rcu_dereference(state->idev->mc_list); } return im; } @@ -2744,10 +2932,8 @@ static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - if (likely(state->idev)) { - read_unlock_bh(&state->idev->lock); + if (likely(state->idev)) state->idev = NULL; - } state->dev = NULL; rcu_read_unlock(); } @@ -2762,8 +2948,8 @@ static int igmp6_mc_seq_show(struct seq_file *seq, void *v) state->dev->ifindex, state->dev->name, &im->mca_addr, im->mca_users, im->mca_flags, - (im->mca_flags&MAF_TIMER_RUNNING) ? - jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0); + (im->mca_flags & MAF_TIMER_RUNNING) ? + jiffies_to_clock_t(im->mca_work.timer.expires - jiffies) : 0); return 0; } @@ -2797,19 +2983,16 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) idev = __in6_dev_get(state->dev); if (unlikely(idev == NULL)) continue; - read_lock_bh(&idev->lock); - im = idev->mc_list; + + im = rcu_dereference(idev->mc_list); if (likely(im)) { - spin_lock_bh(&im->mca_lock); - psf = im->mca_sources; + psf = rcu_dereference(im->mca_sources); if (likely(psf)) { state->im = im; state->idev = idev; break; } - spin_unlock_bh(&im->mca_lock); } - read_unlock_bh(&idev->lock); } return psf; } @@ -2818,14 +3001,10 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - psf = psf->sf_next; + psf = rcu_dereference(psf->sf_next); while (!psf) { - spin_unlock_bh(&state->im->mca_lock); - state->im = state->im->next; + state->im = rcu_dereference(state->im->next); while (!state->im) { - if (likely(state->idev)) - read_unlock_bh(&state->idev->lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2834,13 +3013,11 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s state->idev = __in6_dev_get(state->dev); if (!state->idev) continue; - read_lock_bh(&state->idev->lock); - state->im = state->idev->mc_list; + state->im = rcu_dereference(state->idev->mc_list); } if (!state->im) break; - spin_lock_bh(&state->im->mca_lock); - psf = state->im->mca_sources; + psf = rcu_dereference(state->im->mca_sources); } out: return psf; @@ -2877,14 +3054,12 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - if (likely(state->im)) { - spin_unlock_bh(&state->im->mca_lock); + + if (likely(state->im)) state->im = NULL; - } - if (likely(state->idev)) { - read_unlock_bh(&state->idev->lock); + if (likely(state->idev)) state->idev = NULL; - } + state->dev = NULL; rcu_read_unlock(); } @@ -2965,6 +3140,7 @@ static int __net_init igmp6_net_init(struct net *net) } inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1; + net->ipv6.igmp_sk->sk_allocation = GFP_KERNEL; err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); @@ -3002,7 +3178,19 @@ static struct pernet_operations igmp6_net_ops = { int __init igmp6_init(void) { - return register_pernet_subsys(&igmp6_net_ops); + int err; + + err = register_pernet_subsys(&igmp6_net_ops); + if (err) + return err; + + mld_wq = create_workqueue("mld"); + if (!mld_wq) { + unregister_pernet_subsys(&igmp6_net_ops); + return -ENOMEM; + } + + return err; } int __init igmp6_late_init(void) @@ -3013,6 +3201,7 @@ int __init igmp6_late_init(void) void igmp6_cleanup(void) { unregister_pernet_subsys(&igmp6_net_ops); + destroy_workqueue(mld_wq); } void igmp6_late_cleanup(void) |