diff options
author | Jakub Kicinski <kuba@kernel.org> | 2020-09-07 13:16:48 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2020-09-07 13:18:15 -0700 |
commit | 6af52ae2ed14a6bc756d5606b29097dfd76740b8 (patch) | |
tree | a23abc847500aee38959cbcf556b0392da42adc0 | |
parent | bb1416adb8a084f2979ac9536dca3e4c44d45821 (diff) | |
parent | e12cec65b5546f19217e26aafb8add6e2fadca18 (diff) | |
download | linux-stable-6af52ae2ed14a6bc756d5606b29097dfd76740b8.tar.gz linux-stable-6af52ae2ed14a6bc756d5606b29097dfd76740b8.tar.bz2 linux-stable-6af52ae2ed14a6bc756d5606b29097dfd76740b8.zip |
Merge branch 'net-bridge-mcast-initial-IGMPv3-MLDv2-support-part-1'
Nikolay Aleksandrov says:
====================
net: bridge: mcast: initial IGMPv3/MLDv2 support (part 1)
This patch-set implements the control plane for initial IGMPv3/MLDv2
support which takes care of include/exclude sets and state transitions
based on the different report types.
Patch 01 arranges the structure better by moving the frequently used
fields together, patch 02 factors out the port group deletion code which is
used in a few places. Patches 03 and 04 add support for source lists and
group modes per port group which are dumped. Patch 05 adds support for
group-and-source specific queries required for IGMPv3/MLDv2. Then patch 06
adds support for group and group-and-source query retransmissions via a new
rexmit timer. Patches 07 and 08 make use of the already present mdb fill
functions when sending notifications so we can have the full mdb entries'
state filled in (with sources, mode etc). Patch 09 takes care of port group
expiration, it switches the group mode to include and deletes it if there
are no sources with active timers. Patches 10-13 are the core changes which
add support for IGMPv3/MLDv2 reports and handle the source list set
operations as per RFCs 3376 and 3810, all IGMPv3/MLDv2 report types with
their transitions should be supported after these patches. I've used RFCs
3376, 3810 and FRR as a reference implementation. The source lists are
capped at 32 entries, we can remove that limitation at a later point which
would require a better data structure to hold them. IGMPv3 processing is
hidden behind the bridge's multicast_igmp_version option which must be set
to 3 in order to enable it. MLDv2 processing is hidden behind the bridge's
multicast_mld_version which must be set to 2 in order to enable it.
Patch 14 improves other querier processing a bit (more about this below).
And finally patch 15 transforms the src gc so it can be used with all mcast
objects since now we have multiple timers that can be running and we
need to make sure they have all finished before freeing the objects.
This is part 1, it only adds control plane support and doesn't change
the fast path. A following patch-set will take care of that.
Here're the sets that will come next (in order):
- Fast path patch-set which adds support for (S, G) mdb entries needed
for IGMPv3/MLDv2 forwarding, entry add source (kernel, user-space etc)
needed for IGMPv3/MLDv2 entry management, entry block mode needed for
IGMPv3/MLDv2 exclude mode. This set will also add iproute2 support for
manipulating and showing all the new state.
- Selftests patches which will verify all state transitions and forwarding
- Explicit host tracking patch-set, needed for proper fast leave and
with it fast leave will be enabled for IGMPv3/MLDv2
Not implemented yet:
- Host IGMPv3/MLDv2 filter support (currently we handle only join/leave
as before)
- Proper other querier source timer and value updates
- IGMPv3/v2 MLDv2/v1 compat (I have a few rough patches for this one)
v4: move old patch 05 to 02 (group del patch), before src lists
patch 02: set pg's fast leave flag when deleting due to fast leave
patch 03: now can use the new port del function
add igmpv2/mldv1 bool which are set when the entry is
added in those modes (later will be passed as update_timer)
patch 10: rename update_timer to igmpv2_mldv1 and use the passed
value from br_multicast_add_group's callers
v3: add IPv6/MLDv2 support, most patches are changed
v2:
patches 03-04: make src lists RCU friendly so they can be traversed
when dumping, reduce limit to a more conservative 32
src group entries for a start
patches 11-13: remove helper and directly do bitops
patch 15: force mcast gc on bridge port del to make sure port
group timers have finished before freeing the port
====================
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | include/uapi/linux/if_bridge.h | 21 | ||||
-rw-r--r-- | net/bridge/br_mdb.c | 256 | ||||
-rw-r--r-- | net/bridge/br_multicast.c | 1290 | ||||
-rw-r--r-- | net/bridge/br_private.h | 70 |
4 files changed, 1415 insertions, 222 deletions
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c1227aecd38f..75a2ac479247 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -455,10 +455,31 @@ enum { enum { MDBA_MDB_EATTR_UNSPEC, MDBA_MDB_EATTR_TIMER, + MDBA_MDB_EATTR_SRC_LIST, + MDBA_MDB_EATTR_GROUP_MODE, __MDBA_MDB_EATTR_MAX }; #define MDBA_MDB_EATTR_MAX (__MDBA_MDB_EATTR_MAX - 1) +/* per mdb entry source */ +enum { + MDBA_MDB_SRCLIST_UNSPEC, + MDBA_MDB_SRCLIST_ENTRY, + __MDBA_MDB_SRCLIST_MAX +}; +#define MDBA_MDB_SRCLIST_MAX (__MDBA_MDB_SRCLIST_MAX - 1) + +/* per mdb entry per source attributes + * these are embedded in MDBA_MDB_SRCLIST_ENTRY + */ +enum { + MDBA_MDB_SRCATTR_UNSPEC, + MDBA_MDB_SRCATTR_ADDRESS, + MDBA_MDB_SRCATTR_TIMER, + __MDBA_MDB_SRCATTR_MAX +}; +#define MDBA_MDB_SRCATTR_MAX (__MDBA_MDB_SRCATTR_MAX - 1) + /* multicast router types */ enum { MDB_RTR_TYPE_DISABLED, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index da5ed4cf9233..67e0976aeed2 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -77,10 +77,67 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip) #endif } +static int __mdb_fill_srcs(struct sk_buff *skb, + struct net_bridge_port_group *p) +{ + struct net_bridge_group_src *ent; + struct nlattr *nest, *nest_ent; + + if (hlist_empty(&p->src_list)) + return 0; + + nest = nla_nest_start(skb, MDBA_MDB_EATTR_SRC_LIST); + if (!nest) + return -EMSGSIZE; + + hlist_for_each_entry_rcu(ent, &p->src_list, node, + lockdep_is_held(&p->port->br->multicast_lock)) { + nest_ent = nla_nest_start(skb, MDBA_MDB_SRCLIST_ENTRY); + if (!nest_ent) + goto out_cancel_err; + switch (ent->addr.proto) { + case htons(ETH_P_IP): + if (nla_put_in_addr(skb, MDBA_MDB_SRCATTR_ADDRESS, + ent->addr.u.ip4)) { + nla_nest_cancel(skb, nest_ent); + goto out_cancel_err; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (nla_put_in6_addr(skb, MDBA_MDB_SRCATTR_ADDRESS, + &ent->addr.u.ip6)) { + nla_nest_cancel(skb, nest_ent); + goto out_cancel_err; + } + break; +#endif + default: + nla_nest_cancel(skb, nest_ent); + continue; + } + if (nla_put_u32(skb, MDBA_MDB_SRCATTR_TIMER, + br_timer_value(&ent->timer))) { + nla_nest_cancel(skb, nest_ent); + goto out_cancel_err; + } + nla_nest_end(skb, nest_ent); + } + + nla_nest_end(skb, nest); + + return 0; + +out_cancel_err: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + static int __mdb_fill_info(struct sk_buff *skb, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *p) { + bool dump_srcs_mode = false; struct timer_list *mtimer; struct nlattr *nest_ent; struct br_mdb_entry e; @@ -119,6 +176,23 @@ static int __mdb_fill_info(struct sk_buff *skb, nla_nest_cancel(skb, nest_ent); return -EMSGSIZE; } + switch (mp->addr.proto) { + case htons(ETH_P_IP): + dump_srcs_mode = !!(p && mp->br->multicast_igmp_version == 3); + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + dump_srcs_mode = !!(p && mp->br->multicast_mld_version == 2); + break; +#endif + } + if (dump_srcs_mode && + (__mdb_fill_srcs(skb, p) || + nla_put_u8(skb, MDBA_MDB_EATTR_GROUP_MODE, p->filter_mode))) { + nla_nest_cancel(skb, nest_ent); + return -EMSGSIZE; + } + nla_nest_end(skb, nest_ent); return 0; @@ -127,7 +201,7 @@ static int __mdb_fill_info(struct sk_buff *skb, static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev) { - int idx = 0, s_idx = cb->args[1], err = 0; + int idx = 0, s_idx = cb->args[1], err = 0, pidx = 0, s_pidx = cb->args[2]; struct net_bridge *br = netdev_priv(dev); struct net_bridge_mdb_entry *mp; struct nlattr *nest, *nest2; @@ -152,7 +226,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, break; } - if (mp->host_joined) { + if (!s_pidx && mp->host_joined) { err = __mdb_fill_info(skb, mp, NULL); if (err) { nla_nest_cancel(skb, nest2); @@ -164,13 +238,19 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, pp = &p->next) { if (!p->port) continue; + if (pidx < s_pidx) + goto skip_pg; err = __mdb_fill_info(skb, mp, p); if (err) { nla_nest_cancel(skb, nest2); goto out; } +skip_pg: + pidx++; } + pidx = 0; + s_pidx = 0; nla_nest_end(skb, nest2); skip: idx++; @@ -178,6 +258,7 @@ skip: out: cb->args[1] = idx; + cb->args[2] = pidx; nla_nest_end(skb, nest); return err; } @@ -263,14 +344,15 @@ out: static int nlmsg_populate_mdb_fill(struct sk_buff *skb, struct net_device *dev, - struct br_mdb_entry *entry, u32 pid, - u32 seq, int type, unsigned int flags) + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type) { struct nlmsghdr *nlh; struct br_port_msg *bpm; struct nlattr *nest, *nest2; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); + nlh = nlmsg_put(skb, 0, 0, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; @@ -285,7 +367,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb, if (nest2 == NULL) goto end; - if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry)) + if (__mdb_fill_info(skb, mp, pg)) goto end; nla_nest_end(skb, nest2); @@ -300,10 +382,49 @@ cancel: return -EMSGSIZE; } -static inline size_t rtnl_mdb_nlmsg_size(void) +static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) { - return NLMSG_ALIGN(sizeof(struct br_port_msg)) - + nla_total_size(sizeof(struct br_mdb_entry)); + size_t nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + + nla_total_size(sizeof(struct br_mdb_entry)) + + nla_total_size(sizeof(u32)); + struct net_bridge_group_src *ent; + size_t addr_size = 0; + + if (!pg) + goto out; + + switch (pg->addr.proto) { + case htons(ETH_P_IP): + if (pg->port->br->multicast_igmp_version == 2) + goto out; + addr_size = sizeof(__be32); + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (pg->port->br->multicast_mld_version == 1) + goto out; + addr_size = sizeof(struct in6_addr); + break; +#endif + } + + /* MDBA_MDB_EATTR_GROUP_MODE */ + nlmsg_size += nla_total_size(sizeof(u8)); + + /* MDBA_MDB_EATTR_SRC_LIST nested attr */ + if (!hlist_empty(&pg->src_list)) + nlmsg_size += nla_total_size(0); + + hlist_for_each_entry(ent, &pg->src_list, node) { + /* MDBA_MDB_SRCLIST_ENTRY nested attr + + * MDBA_MDB_SRCATTR_ADDRESS + MDBA_MDB_SRCATTR_TIMER + */ + nlmsg_size += nla_total_size(0) + + nla_total_size(addr_size) + + nla_total_size(sizeof(u32)); + } +out: + return nlmsg_size; } struct br_mdb_complete_info { @@ -341,21 +462,22 @@ err: static void br_mdb_switchdev_host_port(struct net_device *dev, struct net_device *lower_dev, - struct br_mdb_entry *entry, int type) + struct net_bridge_mdb_entry *mp, + int type) { struct switchdev_obj_port_mdb mdb = { .obj = { .id = SWITCHDEV_OBJ_ID_HOST_MDB, .flags = SWITCHDEV_F_DEFER, }, - .vid = entry->vid, + .vid = mp->addr.vid, }; - if (entry->addr.proto == htons(ETH_P_IP)) - ip_eth_mc_map(entry->addr.u.ip4, mdb.addr); + if (mp->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(mp->addr.u.ip4, mdb.addr); #if IS_ENABLED(CONFIG_IPV6) else - ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr); + ipv6_eth_mc_map(&mp->addr.u.ip6, mdb.addr); #endif mdb.obj.orig_dev = dev; @@ -370,17 +492,19 @@ static void br_mdb_switchdev_host_port(struct net_device *dev, } static void br_mdb_switchdev_host(struct net_device *dev, - struct br_mdb_entry *entry, int type) + struct net_bridge_mdb_entry *mp, int type) { struct net_device *lower_dev; struct list_head *iter; netdev_for_each_lower_dev(dev, lower_dev, iter) - br_mdb_switchdev_host_port(dev, lower_dev, entry, type); + br_mdb_switchdev_host_port(dev, lower_dev, mp, type); } -static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, - struct br_mdb_entry *entry, int type) +void br_mdb_notify(struct net_device *dev, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + int type) { struct br_mdb_complete_info *complete_info; struct switchdev_obj_port_mdb mdb = { @@ -388,44 +512,45 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, .id = SWITCHDEV_OBJ_ID_PORT_MDB, .flags = SWITCHDEV_F_DEFER, }, - .vid = entry->vid, + .vid = mp->addr.vid, }; - struct net_device *port_dev; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; - port_dev = __dev_get_by_index(net, entry->ifindex); - if (entry->addr.proto == htons(ETH_P_IP)) - ip_eth_mc_map(entry->addr.u.ip4, mdb.addr); + if (pg) { + if (mp->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(mp->addr.u.ip4, mdb.addr); #if IS_ENABLED(CONFIG_IPV6) - else - ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr); + else + ipv6_eth_mc_map(&mp->addr.u.ip6, mdb.addr); #endif - - mdb.obj.orig_dev = port_dev; - if (p && port_dev && type == RTM_NEWMDB) { - complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); - if (complete_info) { - complete_info->port = p; - __mdb_entry_to_br_ip(entry, &complete_info->ip); + mdb.obj.orig_dev = pg->port->dev; + switch (type) { + case RTM_NEWMDB: + complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); + if (!complete_info) + break; + complete_info->port = pg->port; + complete_info->ip = mp->addr; mdb.obj.complete_priv = complete_info; mdb.obj.complete = br_mdb_complete; - if (switchdev_port_obj_add(port_dev, &mdb.obj, NULL)) + if (switchdev_port_obj_add(pg->port->dev, &mdb.obj, NULL)) kfree(complete_info); + break; + case RTM_DELMDB: + switchdev_port_obj_del(pg->port->dev, &mdb.obj); + break; } - } else if (p && port_dev && type == RTM_DELMDB) { - switchdev_port_obj_del(port_dev, &mdb.obj); + } else { + br_mdb_switchdev_host(dev, mp, type); } - if (!p) - br_mdb_switchdev_host(dev, entry, type); - - skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); + skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC); if (!skb) goto errout; - err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF); + err = nlmsg_populate_mdb_fill(skb, dev, mp, pg, type); if (err < 0) { kfree_skb(skb); goto errout; @@ -437,26 +562,6 @@ errout: rtnl_set_sk_err(net, RTNLGRP_MDB, err); } -void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, - struct br_ip *group, int type, u8 flags) -{ - struct br_mdb_entry entry; - - memset(&entry, 0, sizeof(entry)); - if (port) - entry.ifindex = port->dev->ifindex; - else - entry.ifindex = dev->ifindex; - entry.addr.proto = group->proto; - entry.addr.u.ip4 = group->u.ip4; -#if IS_ENABLED(CONFIG_IPV6) - entry.addr.u.ip6 = group->u.ip6; -#endif - entry.vid = group->vid; - __mdb_entry_fill_flags(&entry, flags); - __br_mdb_notify(dev, port, &entry, type); -} - static int nlmsg_populate_rtr_fill(struct sk_buff *skb, struct net_device *dev, int ifindex, u32 pid, @@ -600,7 +705,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, } static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group, unsigned char state) + struct br_ip *group, struct br_mdb_entry *entry) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -619,12 +724,13 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, /* host join */ if (!port) { /* don't allow any flags for host-joined groups */ - if (state) + if (entry->state) return -EINVAL; if (mp->host_joined) return -EEXIST; br_multicast_host_join(mp, false); + br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB); return 0; } @@ -638,12 +744,14 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, break; } - p = br_multicast_new_port_group(port, group, *pp, state, NULL); + p = br_multicast_new_port_group(port, group, *pp, entry->state, NULL, + MCAST_EXCLUDE); if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); - if (state == MDB_TEMPORARY) + if (entry->state == MDB_TEMPORARY) mod_timer(&p->timer, now + br->multicast_membership_interval); + br_mdb_notify(br->dev, mp, p, RTM_NEWMDB); return 0; } @@ -672,7 +780,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, __mdb_entry_to_br_ip(entry, &ip); spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, &ip, entry->state); + ret = br_mdb_add_group(br, p, &ip, entry); spin_unlock_bh(&br->multicast_lock); return ret; } @@ -717,12 +825,9 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, err = __br_mdb_add(net, br, entry); if (err) break; - __br_mdb_notify(dev, p, entry, RTM_NEWMDB); } } else { err = __br_mdb_add(net, br, entry); - if (!err) - __br_mdb_notify(dev, p, entry, RTM_NEWMDB); } return err; @@ -750,6 +855,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) { br_multicast_host_leave(mp, false); err = 0; + br_mdb_notify(br->dev, mp, NULL, RTM_DELMDB); if (!mp->ports && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); goto unlock; @@ -764,16 +870,8 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (p->port->state == BR_STATE_DISABLED) goto unlock; - __mdb_entry_fill_flags(entry, p->flags); - rcu_assign_pointer(*pp, p->next); - hlist_del_init(&p->mglist); - del_timer(&p->timer); - kfree_rcu(p, rcu); + br_multicast_del_pg(mp, p, pp); err = 0; - - if (!mp->ports && !mp->host_joined && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); break; } @@ -820,13 +918,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; err = __br_mdb_del(br, entry); - if (!err) - __br_mdb_notify(dev, p, entry, RTM_DELMDB); } } else { err = __br_mdb_del(br, entry); - if (!err) - __br_mdb_notify(dev, p, entry, RTM_DELMDB); } return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 4c4a93abde68..b83f11228948 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -50,6 +50,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br, __be32 group, __u16 vid, const unsigned char *src); +static void br_multicast_port_group_rexmit(struct timer_list *t); static void __del_port_router(struct net_bridge_port *p); #if IS_ENABLED(CONFIG_IPV6) @@ -139,6 +140,29 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, return br_mdb_ip_get_rcu(br, &ip); } +static void br_multicast_destroy_mdb_entry(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_mdb_entry *mp; + + mp = container_of(gc, struct net_bridge_mdb_entry, mcast_gc); + WARN_ON(!hlist_unhashed(&mp->mdb_node)); + WARN_ON(mp->ports); + + del_timer_sync(&mp->timer); + kfree_rcu(mp, rcu); +} + +static void br_multicast_del_mdb_entry(struct net_bridge_mdb_entry *mp) +{ + struct net_bridge *br = mp->br; + + rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode, + br_mdb_rht_params); + hlist_del_init_rcu(&mp->mdb_node); + hlist_add_head(&mp->mcast_gc.gc_node, &br->mcast_gc_list); + queue_work(system_long_wq, &br->mcast_gc_work); +} + static void br_multicast_group_expired(struct timer_list *t) { struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer); @@ -152,23 +176,71 @@ static void br_multicast_group_expired(struct timer_list *t) if (mp->ports) goto out; + br_multicast_del_mdb_entry(mp); +out: + spin_unlock(&br->multicast_lock); +} - rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode, - br_mdb_rht_params); - hlist_del_rcu(&mp->mdb_node); +static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_group_src *src; - kfree_rcu(mp, rcu); + src = container_of(gc, struct net_bridge_group_src, mcast_gc); + WARN_ON(!hlist_unhashed(&src->node)); -out: - spin_unlock(&br->multicast_lock); + del_timer_sync(&src->timer); + kfree_rcu(src, rcu); } -static void br_multicast_del_pg(struct net_bridge *br, - struct net_bridge_port_group *pg) +static void br_multicast_del_group_src(struct net_bridge_group_src *src) { + struct net_bridge *br = src->pg->port->br; + + hlist_del_init_rcu(&src->node); + src->pg->src_ents--; + hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list); + queue_work(system_long_wq, &br->mcast_gc_work); +} + +static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_port_group *pg; + + pg = container_of(gc, struct net_bridge_port_group, mcast_gc); + WARN_ON(!hlist_unhashed(&pg->mglist)); + WARN_ON(!hlist_empty(&pg->src_list)); + + del_timer_sync(&pg->rexmit_timer); + del_timer_sync(&pg->timer); + kfree_rcu(pg, rcu); +} + +void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + struct net_bridge_port_group __rcu **pp) +{ + struct net_bridge *br = pg->port->br; + struct net_bridge_group_src *ent; + struct hlist_node *tmp; + + rcu_assign_pointer(*pp, pg->next); + hlist_del_init(&pg->mglist); + hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) + br_multicast_del_group_src(ent); + br_mdb_notify(br->dev, mp, pg, RTM_DELMDB); + hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list); + queue_work(system_long_wq, &br->mcast_gc_work); + + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); +} + +static void br_multicast_find_del_pg(struct net_bridge *br, + struct net_bridge_port_group *pg) +{ + struct net_bridge_port_group __rcu **pp; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; - struct net_bridge_port_group __rcu **pp; mp = br_mdb_ip_get(br, &pg->addr); if (WARN_ON(!mp)) @@ -180,17 +252,7 @@ static void br_multicast_del_pg(struct net_bridge *br, if (p != pg) continue; - rcu_assign_pointer(*pp, p->next); - hlist_del_init(&p->mglist); - del_timer(&p->timer); - br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB, - p->flags); - kfree_rcu(p, rcu); - - if (!mp->ports && !mp->host_joined && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); - + br_multicast_del_pg(mp, pg, pp); return; } @@ -200,35 +262,95 @@ static void br_multicast_del_pg(struct net_bridge *br, static void br_multicast_port_group_expired(struct timer_list *t) { struct net_bridge_port_group *pg = from_timer(pg, t, timer); + struct net_bridge_group_src *src_ent; struct net_bridge *br = pg->port->br; + struct hlist_node *tmp; + bool changed; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&pg->timer) || hlist_unhashed(&pg->mglist) || pg->flags & MDB_PG_FLAGS_PERMANENT) goto out; - br_multicast_del_pg(br, pg); + changed = !!(pg->filter_mode == MCAST_EXCLUDE); + pg->filter_mode = MCAST_INCLUDE; + hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) { + if (!timer_pending(&src_ent->timer)) { + br_multicast_del_group_src(src_ent); + changed = true; + } + } + + if (hlist_empty(&pg->src_list)) { + br_multicast_find_del_pg(br, pg); + } else if (changed) { + struct net_bridge_mdb_entry *mp = br_mdb_ip_get(br, &pg->addr); + if (WARN_ON(!mp)) + goto out; + br_mdb_notify(br->dev, mp, pg, RTM_NEWMDB); + } out: spin_unlock(&br->multicast_lock); } -static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, - __be32 group, - u8 *igmp_type) +static void br_multicast_gc(struct hlist_head *head) { + struct net_bridge_mcast_gc *gcent; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(gcent, tmp, head, gc_node) { + hlist_del_init(&gcent->gc_node); + gcent->destroy(gcent); + } +} + +static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, + struct net_bridge_port_group *pg, + __be32 ip_dst, __be32 group, + bool with_srcs, bool over_lmqt, + u8 sflag, u8 *igmp_type, + bool *need_rexmit) +{ + struct net_bridge_port *p = pg ? pg->port : NULL; + struct net_bridge_group_src *ent; + size_t pkt_size, igmp_hdr_size; + unsigned long now = jiffies; struct igmpv3_query *ihv3; - size_t igmp_hdr_size; + void *csum_start = NULL; + __sum16 *csum = NULL; struct sk_buff *skb; struct igmphdr *ih; struct ethhdr *eth; + unsigned long lmqt; struct iphdr *iph; + u16 lmqt_srcs = 0; igmp_hdr_size = sizeof(*ih); - if (br->multicast_igmp_version == 3) + if (br->multicast_igmp_version == 3) { igmp_hdr_size = sizeof(*ihv3); - skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) + - igmp_hdr_size + 4); + if (pg && with_srcs) { + lmqt = now + (br->multicast_last_member_interval * + br->multicast_last_member_count); + hlist_for_each_entry(ent, &pg->src_list, node) { + if (over_lmqt == time_after(ent->timer.expires, + lmqt) && + ent->src_query_rexmit_cnt > 0) + lmqt_srcs++; + } + + if (!lmqt_srcs) + return NULL; + igmp_hdr_size += lmqt_srcs * sizeof(__be32); + } + } + + pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size; + if ((p && pkt_size > p->dev->mtu) || + pkt_size > br->dev->mtu) + return NULL; + + skb = netdev_alloc_skb_ip_align(br->dev, pkt_size); if (!skb) goto out; @@ -238,29 +360,24 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, eth = eth_hdr(skb); ether_addr_copy(eth->h_source, br->dev->dev_addr); - eth->h_dest[0] = 1; - eth->h_dest[1] = 0; - eth->h_dest[2] = 0x5e; - eth->h_dest[3] = 0; - eth->h_dest[4] = 0; - eth->h_dest[5] = 1; + ip_eth_mc_map(ip_dst, eth->h_dest); eth->h_proto = htons(ETH_P_IP); skb_put(skb, sizeof(*eth)); skb_set_network_header(skb, skb->len); iph = ip_hdr(skb); + iph->tot_len = htons(pkt_size - sizeof(*eth)); iph->version = 4; iph->ihl = 6; iph->tos = 0xc0; - iph->tot_len = htons(sizeof(*iph) + igmp_hdr_size + 4); iph->id = 0; iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->protocol = IPPROTO_IGMP; iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ? inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0; - iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); + iph->daddr = ip_dst; ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; @@ -280,7 +397,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, (HZ / IGMP_TIMER_SCALE); ih->group = group; ih->csum = 0; - ih->csum = ip_compute_csum((void *)ih, sizeof(*ih)); + csum = &ih->csum; + csum_start = (void *)ih; break; case 3: ihv3 = igmpv3_query_hdr(skb); @@ -290,15 +408,40 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, (HZ / IGMP_TIMER_SCALE); ihv3->group = group; ihv3->qqic = br->multicast_query_interval / HZ; - ihv3->nsrcs = 0; + ihv3->nsrcs = htons(lmqt_srcs); ihv3->resv = 0; - ihv3->suppress = 0; + ihv3->suppress = sflag; ihv3->qrv = 2; ihv3->csum = 0; - ihv3->csum = ip_compute_csum((void *)ihv3, sizeof(*ihv3)); + csum = &ihv3->csum; + csum_start = (void *)ihv3; + if (!pg || !with_srcs) + break; + + lmqt_srcs = 0; + hlist_for_each_entry(ent, &pg->src_list, node) { + if (over_lmqt == time_after(ent->timer.expires, + lmqt) && + ent->src_query_rexmit_cnt > 0) { + ihv3->srcs[lmqt_srcs++] = ent->addr.u.ip4; + ent->src_query_rexmit_cnt--; + if (need_rexmit && ent->src_query_rexmit_cnt) + *need_rexmit = true; + } + } + if (WARN_ON(lmqt_srcs != ntohs(ihv3->nsrcs))) { + kfree_skb(skb); + return NULL; + } break; } + if (WARN_ON(!csum || !csum_start)) { + kfree_skb(skb); + return NULL; + } + + *csum = ip_compute_csum(csum_start, igmp_hdr_size); skb_put(skb, igmp_hdr_size); __skb_pull(skb, sizeof(*eth)); @@ -308,23 +451,54 @@ out: #if IS_ENABLED(CONFIG_IPV6) static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, - const struct in6_addr *grp, - u8 *igmp_type) -{ + struct net_bridge_port_group *pg, + const struct in6_addr *ip6_dst, + const struct in6_addr *group, + bool with_srcs, bool over_llqt, + u8 sflag, u8 *igmp_type, + bool *need_rexmit) +{ + struct net_bridge_port *p = pg ? pg->port : NULL; + struct net_bridge_group_src *ent; + size_t pkt_size, mld_hdr_size; + unsigned long now = jiffies; struct mld2_query *mld2q; + void *csum_start = NULL; unsigned long interval; + __sum16 *csum = NULL; struct ipv6hdr *ip6h; struct mld_msg *mldq; - size_t mld_hdr_size; struct sk_buff *skb; + unsigned long llqt; struct ethhdr *eth; + u16 llqt_srcs = 0; u8 *hopopt; mld_hdr_size = sizeof(*mldq); - if (br->multicast_mld_version == 2) + if (br->multicast_mld_version == 2) { mld_hdr_size = sizeof(*mld2q); - skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) + - 8 + mld_hdr_size); + if (pg && with_srcs) { + llqt = now + (br->multicast_last_member_interval * + br->multicast_last_member_count); + hlist_for_each_entry(ent, &pg->src_list, node) { + if (over_llqt == time_after(ent->timer.expires, + llqt) && + ent->src_query_rexmit_cnt > 0) + llqt_srcs++; + } + + if (!llqt_srcs) + return NULL; + mld_hdr_size += llqt_srcs * sizeof(struct in6_addr); + } + } + + pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size; + if ((p && pkt_size > p->dev->mtu) || + pkt_size > br->dev->mtu) + return NULL; + + skb = netdev_alloc_skb_ip_align(br->dev, pkt_size); if (!skb) goto out; @@ -346,7 +520,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->payload_len = htons(8 + mld_hdr_size); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; - ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); + ip6h->daddr = *ip6_dst; if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); @@ -371,7 +545,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, /* ICMPv6 */ skb_set_transport_header(skb, skb->len); - interval = ipv6_addr_any(grp) ? + interval = ipv6_addr_any(group) ? br->multicast_query_response_interval : br->multicast_last_member_interval; *igmp_type = ICMPV6_MGM_QUERY; @@ -383,12 +557,9 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, mldq->mld_cksum = 0; mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); mldq->mld_reserved = 0; - mldq->mld_mca = *grp; - mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, - sizeof(*mldq), IPPROTO_ICMPV6, - csum_partial(mldq, - sizeof(*mldq), - 0)); + mldq->mld_mca = *group; + csum = &mldq->mld_cksum; + csum_start = (void *)mldq; break; case 2: mld2q = (struct mld2_query *)icmp6_hdr(skb); @@ -398,21 +569,43 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, mld2q->mld2q_cksum = 0; mld2q->mld2q_resv1 = 0; mld2q->mld2q_resv2 = 0; - mld2q->mld2q_suppress = 0; + mld2q->mld2q_suppress = sflag; mld2q->mld2q_qrv = 2; - mld2q->mld2q_nsrcs = 0; + mld2q->mld2q_nsrcs = htons(llqt_srcs); mld2q->mld2q_qqic = br->multicast_query_interval / HZ; - mld2q->mld2q_mca = *grp; - mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, - sizeof(*mld2q), - IPPROTO_ICMPV6, - csum_partial(mld2q, - sizeof(*mld2q), - 0)); + mld2q->mld2q_mca = *group; + csum = &mld2q->mld2q_cksum; + csum_start = (void *)mld2q; + if (!pg || !with_srcs) + break; + + llqt_srcs = 0; + hlist_for_each_entry(ent, &pg->src_list, node) { + if (over_llqt == time_after(ent->timer.expires, + llqt) && + ent->src_query_rexmit_cnt > 0) { + mld2q->mld2q_srcs[llqt_srcs++] = ent->addr.u.ip6; + ent->src_query_rexmit_cnt--; + if (need_rexmit && ent->src_query_rexmit_cnt) + *need_rexmit = true; + } + } + if (WARN_ON(llqt_srcs != ntohs(mld2q->mld2q_nsrcs))) { + kfree_skb(skb); + return NULL; + } break; } - skb_put(skb, mld_hdr_size); + if (WARN_ON(!csum || !csum_start)) { + kfree_skb(skb); + return NULL; + } + + *csum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, mld_hdr_size, + IPPROTO_ICMPV6, + csum_partial(csum_start, mld_hdr_size, 0)); + skb_put(skb, mld_hdr_size); __skb_pull(skb, sizeof(*eth)); out: @@ -421,16 +614,39 @@ out: #endif static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, - struct br_ip *addr, - u8 *igmp_type) + struct net_bridge_port_group *pg, + struct br_ip *ip_dst, + struct br_ip *group, + bool with_srcs, bool over_lmqt, + u8 sflag, u8 *igmp_type, + bool *need_rexmit) { - switch (addr->proto) { + __be32 ip4_dst; + + switch (group->proto) { case htons(ETH_P_IP): - return br_ip4_multicast_alloc_query(br, addr->u.ip4, igmp_type); + ip4_dst = ip_dst ? ip_dst->u.ip4 : htonl(INADDR_ALLHOSTS_GROUP); + return br_ip4_multicast_alloc_query(br, pg, + ip4_dst, group->u.ip4, + with_srcs, over_lmqt, + sflag, igmp_type, + need_rexmit); #if IS_ENABLED(CONFIG_IPV6) - case htons(ETH_P_IPV6): - return br_ip6_multicast_alloc_query(br, &addr->u.ip6, - igmp_type); + case htons(ETH_P_IPV6): { + struct in6_addr ip6_dst; + + if (ip_dst) + ip6_dst = ip_dst->u.ip6; + else + ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0, + htonl(1)); + + return br_ip6_multicast_alloc_query(br, pg, + &ip6_dst, &group->u.ip6, + with_srcs, over_lmqt, + sflag, igmp_type, + need_rexmit); + } #endif } return NULL; @@ -457,6 +673,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, mp->br = br; mp->addr = *group; + mp->mcast_gc.destroy = br_multicast_destroy_mdb_entry; timer_setup(&mp->timer, br_multicast_group_expired, 0); err = rhashtable_lookup_insert_fast(&br->mdb_hash_tbl, &mp->rhnode, br_mdb_rht_params); @@ -470,12 +687,97 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, return mp; } +static void br_multicast_group_src_expired(struct timer_list *t) +{ + struct net_bridge_group_src *src = from_timer(src, t, timer); + struct net_bridge_port_group *pg; + struct net_bridge *br = src->br; + + spin_lock(&br->multicast_lock); + if (hlist_unhashed(&src->node) || !netif_running(br->dev) || + timer_pending(&src->timer)) + goto out; + + pg = src->pg; + if (pg->filter_mode == MCAST_INCLUDE) { + br_multicast_del_group_src(src); + if (!hlist_empty(&pg->src_list)) + goto out; + br_multicast_find_del_pg(br, pg); + } +out: + spin_unlock(&br->multicast_lock); +} + +static struct net_bridge_group_src * +br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip) +{ + struct net_bridge_group_src *ent; + + switch (ip->proto) { + case htons(ETH_P_IP): + hlist_for_each_entry(ent, &pg->src_list, node) + if (ip->u.ip4 == ent->addr.u.ip4) + return ent; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + hlist_for_each_entry(ent, &pg->src_list, node) + if (!ipv6_addr_cmp(&ent->addr.u.ip6, &ip->u.ip6)) + return ent; + break; +#endif + } + + return NULL; +} + +static struct net_bridge_group_src * +br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_ip) +{ + struct net_bridge_group_src *grp_src; + + if (unlikely(pg->src_ents >= PG_SRC_ENT_LIMIT)) + return NULL; + + switch (src_ip->proto) { + case htons(ETH_P_IP): + if (ipv4_is_zeronet(src_ip->u.ip4) || + ipv4_is_multicast(src_ip->u.ip4)) + return NULL; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (ipv6_addr_any(&src_ip->u.ip6) || + ipv6_addr_is_multicast(&src_ip->u.ip6)) + return NULL; + break; +#endif + } + + grp_src = kzalloc(sizeof(*grp_src), GFP_ATOMIC); + if (unlikely(!grp_src)) + return NULL; + + grp_src->pg = pg; + grp_src->br = pg->port->br; + grp_src->addr = *src_ip; + grp_src->mcast_gc.destroy = br_multicast_destroy_group_src; + timer_setup(&grp_src->timer, br_multicast_group_src_expired, 0); + + hlist_add_head_rcu(&grp_src->node, &pg->src_list); + pg->src_ents++; + + return grp_src; +} + struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, struct net_bridge_port_group __rcu *next, unsigned char flags, - const unsigned char *src) + const unsigned char *src, + u8 filter_mode) { struct net_bridge_port_group *p; @@ -486,9 +788,13 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->addr = *group; p->port = port; p->flags = flags; + p->filter_mode = filter_mode; + p->mcast_gc.destroy = br_multicast_destroy_port_group; + INIT_HLIST_HEAD(&p->src_list); rcu_assign_pointer(p->next, next); - hlist_add_head(&p->mglist, &port->mglist); timer_setup(&p->timer, br_multicast_port_group_expired, 0); + timer_setup(&p->rexmit_timer, br_multicast_port_group_rexmit, 0); + hlist_add_head(&p->mglist, &port->mglist); if (src) memcpy(p->eth_addr, src, ETH_ALEN); @@ -516,8 +822,7 @@ void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify) if (!mp->host_joined) { mp->host_joined = true; if (notify) - br_mdb_notify(mp->br->dev, NULL, &mp->addr, - RTM_NEWMDB, 0); + br_mdb_notify(mp->br->dev, mp, NULL, RTM_NEWMDB); } mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval); } @@ -529,13 +834,15 @@ void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify) mp->host_joined = false; if (notify) - br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_DELMDB, 0); + br_mdb_notify(mp->br->dev, mp, NULL, RTM_DELMDB); } static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group, - const unsigned char *src) + const unsigned char *src, + u8 filter_mode, + bool igmpv2_mldv1) { struct net_bridge_port_group __rcu **pp; struct net_bridge_port_group *p; @@ -567,14 +874,16 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = br_multicast_new_port_group(port, group, *pp, 0, src); + p = br_multicast_new_port_group(port, group, *pp, 0, src, filter_mode); if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); - br_mdb_notify(br->dev, port, group, RTM_NEWMDB, 0); + br_mdb_notify(br->dev, mp, p, RTM_NEWMDB); found: - mod_timer(&p->timer, now + br->multicast_membership_interval); + if (igmpv2_mldv1) + mod_timer(&p->timer, now + br->multicast_membership_interval); + out: err = 0; @@ -587,9 +896,11 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, __be32 group, __u16 vid, - const unsigned char *src) + const unsigned char *src, + bool igmpv2) { struct br_ip br_group; + u8 filter_mode; if (ipv4_is_local_multicast(group)) return 0; @@ -598,8 +909,10 @@ static int br_ip4_multicast_add_group(struct net_bridge *br, br_group.u.ip4 = group; br_group.proto = htons(ETH_P_IP); br_group.vid = vid; + filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE; - return br_multicast_add_group(br, port, &br_group, src); + return br_multicast_add_group(br, port, &br_group, src, filter_mode, + igmpv2); } #if IS_ENABLED(CONFIG_IPV6) @@ -607,9 +920,11 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, const struct in6_addr *group, __u16 vid, - const unsigned char *src) + const unsigned char *src, + bool mldv1) { struct br_ip br_group; + u8 filter_mode; if (ipv6_addr_is_ll_all_nodes(group)) return 0; @@ -618,8 +933,10 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, br_group.u.ip6 = *group; br_group.proto = htons(ETH_P_IPV6); br_group.vid = vid; + filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE; - return br_multicast_add_group(br, port, &br_group, src); + return br_multicast_add_group(br, port, &br_group, src, filter_mode, + mldv1); } #endif @@ -711,12 +1028,21 @@ static void br_multicast_select_own_querier(struct net_bridge *br, static void __br_multicast_send_query(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *ip) -{ + struct net_bridge_port_group *pg, + struct br_ip *ip_dst, + struct br_ip *group, + bool with_srcs, + u8 sflag, + bool *need_rexmit) +{ + bool over_lmqt = !!sflag; struct sk_buff *skb; u8 igmp_type; - skb = br_multicast_alloc_query(br, ip, &igmp_type); +again_under_lmqt: + skb = br_multicast_alloc_query(br, pg, ip_dst, group, with_srcs, + over_lmqt, sflag, &igmp_type, + need_rexmit); if (!skb) return; @@ -727,8 +1053,13 @@ static void __br_multicast_send_query(struct net_bridge *br, NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, dev_net(port->dev), NULL, skb, NULL, skb->dev, br_dev_queue_push_xmit); + + if (over_lmqt && with_srcs && sflag) { + over_lmqt = false; + goto again_under_lmqt; + } } else { - br_multicast_select_own_querier(br, ip, skb); + br_multicast_select_own_querier(br, group, skb); br_multicast_count(br, port, skb, igmp_type, BR_MCAST_DIR_RX); netif_rx(skb); @@ -764,7 +1095,8 @@ static void br_multicast_send_query(struct net_bridge *br, if (!other_query || timer_pending(&other_query->timer)) return; - __br_multicast_send_query(br, port, &br_group); + __br_multicast_send_query(br, port, NULL, NULL, &br_group, false, 0, + NULL); time = jiffies; time += own_query->startup_sent < br->multicast_startup_query_count ? @@ -809,6 +1141,44 @@ static void br_ip6_multicast_port_query_expired(struct timer_list *t) } #endif +static void br_multicast_port_group_rexmit(struct timer_list *t) +{ + struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer); + struct bridge_mcast_other_query *other_query = NULL; + struct net_bridge *br = pg->port->br; + bool need_rexmit = false; + + spin_lock(&br->multicast_lock); + if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) || + !br_opt_get(br, BROPT_MULTICAST_ENABLED) || + !br_opt_get(br, BROPT_MULTICAST_QUERIER)) + goto out; + + if (pg->addr.proto == htons(ETH_P_IP)) + other_query = &br->ip4_other_query; +#if IS_ENABLED(CONFIG_IPV6) + else + other_query = &br->ip6_other_query; +#endif + + if (!other_query || timer_pending(&other_query->timer)) + goto out; + + if (pg->grp_query_rexmit_cnt) { + pg->grp_query_rexmit_cnt--; + __br_multicast_send_query(br, pg->port, pg, &pg->addr, + &pg->addr, false, 1, NULL); + } + __br_multicast_send_query(br, pg->port, pg, &pg->addr, + &pg->addr, true, 0, &need_rexmit); + + if (pg->grp_query_rexmit_cnt || need_rexmit) + mod_timer(&pg->rexmit_timer, jiffies + + br->multicast_last_member_interval); +out: + spin_unlock(&br->multicast_lock); +} + static void br_mc_disabled_update(struct net_device *dev, bool value) { struct switchdev_attr attr = { @@ -847,13 +1217,16 @@ void br_multicast_del_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; + HLIST_HEAD(deleted_head); struct hlist_node *n; /* Take care of the remaining groups, only perm ones should be left */ spin_lock_bh(&br->multicast_lock); hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) - br_multicast_del_pg(br, pg); + br_multicast_find_del_pg(br, pg); + hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); + br_multicast_gc(&deleted_head); del_timer_sync(&port->multicast_router_timer); free_percpu(port->mcast_stats); } @@ -901,7 +1274,7 @@ void br_multicast_disable_port(struct net_bridge_port *port) spin_lock(&br->multicast_lock); hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) if (!(pg->flags & MDB_PG_FLAGS_PERMANENT)) - br_multicast_del_pg(br, pg); + br_multicast_find_del_pg(br, pg); __del_port_router(port); @@ -913,20 +1286,561 @@ void br_multicast_disable_port(struct net_bridge_port *port) spin_unlock(&br->multicast_lock); } +static int __grp_src_delete_marked(struct net_bridge_port_group *pg) +{ + struct net_bridge_group_src *ent; + struct hlist_node *tmp; + int deleted = 0; + + hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) + if (ent->flags & BR_SGRP_F_DELETE) { + br_multicast_del_group_src(ent); + deleted++; + } + + return deleted; +} + +static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg) +{ + struct bridge_mcast_other_query *other_query = NULL; + struct net_bridge *br = pg->port->br; + u32 lmqc = br->multicast_last_member_count; + unsigned long lmqt, lmi, now = jiffies; + struct net_bridge_group_src *ent; + + if (!netif_running(br->dev) || + !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return; + + if (pg->addr.proto == htons(ETH_P_IP)) + other_query = &br->ip4_other_query; +#if IS_ENABLED(CONFIG_IPV6) + else + other_query = &br->ip6_other_query; +#endif + + lmqt = now + br_multicast_lmqt(br); + hlist_for_each_entry(ent, &pg->src_list, node) { + if (ent->flags & BR_SGRP_F_SEND) { + ent->flags &= ~BR_SGRP_F_SEND; + if (ent->timer.expires > lmqt) { + if (br_opt_get(br, BROPT_MULTICAST_QUERIER) && + other_query && + !timer_pending(&other_query->timer)) + ent->src_query_rexmit_cnt = lmqc; + mod_timer(&ent->timer, lmqt); + } + } + } + + if (!br_opt_get(br, BROPT_MULTICAST_QUERIER) || + !other_query || timer_pending(&other_query->timer)) + return; + + __br_multicast_send_query(br, pg->port, pg, &pg->addr, + &pg->addr, true, 1, NULL); + + lmi = now + br->multicast_last_member_interval; + if (!timer_pending(&pg->rexmit_timer) || + time_after(pg->rexmit_timer.expires, lmi)) + mod_timer(&pg->rexmit_timer, lmi); +} + +static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) +{ + struct bridge_mcast_other_query *other_query = NULL; + struct net_bridge *br = pg->port->br; + unsigned long now = jiffies, lmi; + + if (!netif_running(br->dev) || + !br_opt_get(br, BROPT_MULTICAST_ENABLED)) + return; + + if (pg->addr.proto == htons(ETH_P_IP)) + other_query = &br->ip4_other_query; +#if IS_ENABLED(CONFIG_IPV6) + else + other_query = &br->ip6_other_query; +#endif + + if (br_opt_get(br, BROPT_MULTICAST_QUERIER) && + other_query && !timer_pending(&other_query->timer)) { + lmi = now + br->multicast_last_member_interval; + pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1; + __br_multicast_send_query(br, pg->port, pg, &pg->addr, + &pg->addr, false, 0, NULL); + if (!timer_pending(&pg->rexmit_timer) || + time_after(pg->rexmit_timer.expires, lmi)) + mod_timer(&pg->rexmit_timer, lmi); + } + + if (pg->filter_mode == MCAST_EXCLUDE && + (!timer_pending(&pg->timer) || + time_after(pg->timer.expires, now + br_multicast_lmqt(br)))) + mod_timer(&pg->timer, now + br_multicast_lmqt(br)); +} + +/* State Msg type New state Actions + * INCLUDE (A) IS_IN (B) INCLUDE (A+B) (B)=GMI + * INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI + * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI + */ +static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->port->br; + struct net_bridge_group_src *ent; + unsigned long now = jiffies; + bool changed = false; + struct br_ip src_ip; + u32 src_idx; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.u, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (!ent) { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) + changed = true; + } + + if (ent) + mod_timer(&ent->timer, now + br_multicast_gmi(br)); + srcs += src_size; + } + + return changed; +} + +/* State Msg type New state Actions + * INCLUDE (A) IS_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 + * Delete (A-B) + * Group Timer=GMI + */ +static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + struct br_ip src_ip; + u32 src_idx; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags |= BR_SGRP_F_DELETE; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.u, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) + ent->flags &= ~BR_SGRP_F_DELETE; + else + br_multicast_new_group_src(pg, &src_ip); + srcs += src_size; + } + + __grp_src_delete_marked(pg); +} + +/* State Msg type New state Actions + * EXCLUDE (X,Y) IS_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=GMI + * Delete (X-A) + * Delete (Y-A) + * Group Timer=GMI + */ +static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->port->br; + struct net_bridge_group_src *ent; + unsigned long now = jiffies; + bool changed = false; + struct br_ip src_ip; + u32 src_idx; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags |= BR_SGRP_F_DELETE; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.u, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags &= ~BR_SGRP_F_DELETE; + } else { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) { + mod_timer(&ent->timer, + now + br_multicast_gmi(br)); + changed = true; + } + } + srcs += src_size; + } + + if (__grp_src_delete_marked(pg)) + changed = true; + + return changed; +} + +static bool br_multicast_isexc(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->port->br; + bool changed = false; + + switch (pg->filter_mode) { + case MCAST_INCLUDE: + __grp_src_isexc_incl(pg, srcs, nsrcs, src_size); + changed = true; + break; + case MCAST_EXCLUDE: + changed = __grp_src_isexc_excl(pg, srcs, nsrcs, src_size); + break; + } + + pg->filter_mode = MCAST_EXCLUDE; + mod_timer(&pg->timer, jiffies + br_multicast_gmi(br)); + + return changed; +} + +/* State Msg type New state Actions + * INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI + * Send Q(G,A-B) + */ +static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->port->br; + u32 src_idx, to_send = pg->src_ents; + struct net_bridge_group_src *ent; + unsigned long now = jiffies; + bool changed = false; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags |= BR_SGRP_F_SEND; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.u, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags &= ~BR_SGRP_F_SEND; + to_send--; + } else { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) + changed = true; + } + if (ent) + mod_timer(&ent->timer, now + br_multicast_gmi(br)); + srcs += src_size; + } + + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + return changed; +} + +/* State Msg type New state Actions + * EXCLUDE (X,Y) TO_IN (A) EXCLUDE (X+A,Y-A) (A)=GMI + * Send Q(G,X-A) + * Send Q(G) + */ +static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->port->br; + u32 src_idx, to_send = pg->src_ents; + struct net_bridge_group_src *ent; + unsigned long now = jiffies; + bool changed = false; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + if (timer_pending(&ent->timer)) + ent->flags |= BR_SGRP_F_SEND; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.u, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + if (timer_pending(&ent->timer)) { + ent->flags &= ~BR_SGRP_F_SEND; + to_send--; + } + } else { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) + changed = true; + } + if (ent) + mod_timer(&ent->timer, now + br_multicast_gmi(br)); + srcs += src_size; + } + + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + __grp_send_query_and_rexmit(pg); + + return changed; +} + +static bool br_multicast_toin(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + bool changed = false; + + switch (pg->filter_mode) { + case MCAST_INCLUDE: + changed = __grp_src_toin_incl(pg, srcs, nsrcs, src_size); + break; + case MCAST_EXCLUDE: + changed = __grp_src_toin_excl(pg, srcs, nsrcs, src_size); + break; + } + + return changed; +} + +/* State Msg type New state Actions + * INCLUDE (A) TO_EX (B) EXCLUDE (A*B,B-A) (B-A)=0 + * Delete (A-B) + * Send Q(G,A*B) + * Group Timer=GMI + */ +static void __grp_src_toex_incl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + u32 src_idx, to_send = 0; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.u, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) | + BR_SGRP_F_SEND; + to_send++; + } else { + br_multicast_new_group_src(pg, &src_ip); + } + srcs += src_size; + } + + __grp_src_delete_marked(pg); + if (to_send) + __grp_src_query_marked_and_rexmit(pg); +} + +/* State Msg type New state Actions + * EXCLUDE (X,Y) TO_EX (A) EXCLUDE (A-Y,Y*A) (A-X-Y)=Group Timer + * Delete (X-A) + * Delete (Y-A) + * Send Q(G,A-Y) + * Group Timer=GMI + */ +static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + u32 src_idx, to_send = 0; + bool changed = false; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags = (ent->flags & ~BR_SGRP_F_SEND) | BR_SGRP_F_DELETE; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.u, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags &= ~BR_SGRP_F_DELETE; + } else { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) { + mod_timer(&ent->timer, pg->timer.expires); + changed = true; + } + } + if (ent && timer_pending(&ent->timer)) { + ent->flags |= BR_SGRP_F_SEND; + to_send++; + } + srcs += src_size; + } + + if (__grp_src_delete_marked(pg)) + changed = true; + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + return changed; +} + +static bool br_multicast_toex(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge *br = pg->port->br; + bool changed = false; + + switch (pg->filter_mode) { + case MCAST_INCLUDE: + __grp_src_toex_incl(pg, srcs, nsrcs, src_size); + changed = true; + break; + case MCAST_EXCLUDE: + __grp_src_toex_excl(pg, srcs, nsrcs, src_size); + break; + } + + pg->filter_mode = MCAST_EXCLUDE; + mod_timer(&pg->timer, jiffies + br_multicast_gmi(br)); + + return changed; +} + +/* State Msg type New state Actions + * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) + */ +static void __grp_src_block_incl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + u32 src_idx, to_send = 0; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags &= ~BR_SGRP_F_SEND; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.u, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (ent) { + ent->flags |= BR_SGRP_F_SEND; + to_send++; + } + srcs += src_size; + } + + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) + br_multicast_find_del_pg(pg->port->br, pg); +} + +/* State Msg type New state Actions + * EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer + * Send Q(G,A-Y) + */ +static bool __grp_src_block_excl(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + struct net_bridge_group_src *ent; + u32 src_idx, to_send = 0; + bool changed = false; + struct br_ip src_ip; + + hlist_for_each_entry(ent, &pg->src_list, node) + ent->flags &= ~BR_SGRP_F_SEND; + + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&src_ip.u, srcs, src_size); + ent = br_multicast_find_group_src(pg, &src_ip); + if (!ent) { + ent = br_multicast_new_group_src(pg, &src_ip); + if (ent) { + mod_timer(&ent->timer, pg->timer.expires); + changed = true; + } + } + if (ent && timer_pending(&ent->timer)) { + ent->flags |= BR_SGRP_F_SEND; + to_send++; + } + srcs += src_size; + } + + if (to_send) + __grp_src_query_marked_and_rexmit(pg); + + return changed; +} + +static bool br_multicast_block(struct net_bridge_port_group *pg, + void *srcs, u32 nsrcs, size_t src_size) +{ + bool changed = false; + + switch (pg->filter_mode) { + case MCAST_INCLUDE: + __grp_src_block_incl(pg, srcs, nsrcs, src_size); + break; + case MCAST_EXCLUDE: + changed = __grp_src_block_excl(pg, srcs, nsrcs, src_size); + break; + } + + return changed; +} + +static struct net_bridge_port_group * +br_multicast_find_port(struct net_bridge_mdb_entry *mp, + struct net_bridge_port *p, + const unsigned char *src) +{ + struct net_bridge_port_group *pg; + struct net_bridge *br = mp->br; + + for (pg = mlock_dereference(mp->ports, br); + pg; + pg = mlock_dereference(pg->next, br)) + if (br_port_group_equal(pg, p, src)) + return pg; + + return NULL; +} + static int br_ip4_multicast_igmp3_report(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, u16 vid) { + bool igmpv2 = br->multicast_igmp_version == 2; + struct net_bridge_mdb_entry *mdst; + struct net_bridge_port_group *pg; const unsigned char *src; struct igmpv3_report *ih; struct igmpv3_grec *grec; - int i; - int len; - int num; - int type; - int err = 0; + int i, len, num, type; + bool changed = false; __be32 group; + int err = 0; u16 nsrcs; ih = igmpv3_report_hdr(skb); @@ -947,7 +1861,6 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, if (!ip_mc_may_pull(skb, len)) return -EINVAL; - /* We treat this as an IGMPv2 report for now. */ switch (type) { case IGMPV3_MODE_IS_INCLUDE: case IGMPV3_MODE_IS_EXCLUDE: @@ -962,16 +1875,62 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, } src = eth_hdr(skb)->h_source; - if ((type == IGMPV3_CHANGE_TO_INCLUDE || - type == IGMPV3_MODE_IS_INCLUDE) && - nsrcs == 0) { - br_ip4_multicast_leave_group(br, port, group, vid, src); + if (nsrcs == 0 && + (type == IGMPV3_CHANGE_TO_INCLUDE || + type == IGMPV3_MODE_IS_INCLUDE)) { + if (!port || igmpv2) { + br_ip4_multicast_leave_group(br, port, group, vid, src); + continue; + } } else { err = br_ip4_multicast_add_group(br, port, group, vid, - src); + src, igmpv2); if (err) break; } + + if (!port || igmpv2) + continue; + + spin_lock_bh(&br->multicast_lock); + mdst = br_mdb_ip4_get(br, group, vid); + if (!mdst) + goto unlock_continue; + pg = br_multicast_find_port(mdst, port, src); + if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) + goto unlock_continue; + /* reload grec */ + grec = (void *)(skb->data + len - sizeof(*grec) - (nsrcs * 4)); + switch (type) { + case IGMPV3_ALLOW_NEW_SOURCES: + changed = br_multicast_isinc_allow(pg, grec->grec_src, + nsrcs, sizeof(__be32)); + break; + case IGMPV3_MODE_IS_INCLUDE: + changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + case IGMPV3_MODE_IS_EXCLUDE: + changed = br_multicast_isexc(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + case IGMPV3_CHANGE_TO_INCLUDE: + changed = br_multicast_toin(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + case IGMPV3_CHANGE_TO_EXCLUDE: + changed = br_multicast_toex(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + case IGMPV3_BLOCK_OLD_SOURCES: + changed = br_multicast_block(pg, grec->grec_src, nsrcs, + sizeof(__be32)); + break; + } + if (changed) + br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB); +unlock_continue: + spin_unlock_bh(&br->multicast_lock); } return err; @@ -983,14 +1942,16 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct sk_buff *skb, u16 vid) { + bool mldv1 = br->multicast_mld_version == 1; + struct net_bridge_mdb_entry *mdst; + struct net_bridge_port_group *pg; unsigned int nsrcs_offset; const unsigned char *src; struct icmp6hdr *icmp6h; struct mld2_grec *grec; unsigned int grec_len; - int i; - int len; - int num; + bool changed = false; + int i, len, num; int err = 0; if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h))) @@ -1024,7 +1985,6 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, grec = (struct mld2_grec *)(skb->data + len); len += grec_len; - /* We treat these as MLDv1 reports for now. */ switch (grec->grec_type) { case MLD2_MODE_IS_INCLUDE: case MLD2_MODE_IS_EXCLUDE: @@ -1042,15 +2002,61 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || grec->grec_type == MLD2_MODE_IS_INCLUDE) && nsrcs == 0) { - br_ip6_multicast_leave_group(br, port, &grec->grec_mca, - vid, src); + if (!port || mldv1) { + br_ip6_multicast_leave_group(br, port, + &grec->grec_mca, + vid, src); + continue; + } } else { err = br_ip6_multicast_add_group(br, port, &grec->grec_mca, vid, - src); + src, mldv1); if (err) break; } + + if (!port || mldv1) + continue; + + spin_lock_bh(&br->multicast_lock); + mdst = br_mdb_ip6_get(br, &grec->grec_mca, vid); + if (!mdst) + goto unlock_continue; + pg = br_multicast_find_port(mdst, port, src); + if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) + goto unlock_continue; + switch (grec->grec_type) { + case MLD2_ALLOW_NEW_SOURCES: + changed = br_multicast_isinc_allow(pg, grec->grec_src, + nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_MODE_IS_INCLUDE: + changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_MODE_IS_EXCLUDE: + changed = br_multicast_isexc(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_CHANGE_TO_INCLUDE: + changed = br_multicast_toin(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_CHANGE_TO_EXCLUDE: + changed = br_multicast_toex(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_BLOCK_OLD_SOURCES: + changed = br_multicast_block(pg, grec->grec_src, nsrcs, + sizeof(struct in6_addr)); + break; + } + if (changed) + br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB); +unlock_continue: + spin_unlock_bh(&br->multicast_lock); } return err; @@ -1245,7 +2251,8 @@ static void br_ip4_multicast_query(struct net_bridge *br, } } else if (transport_len >= sizeof(*ih3)) { ih3 = igmpv3_query_hdr(skb); - if (ih3->nsrcs) + if (ih3->nsrcs || + (br->multicast_igmp_version == 3 && group && ih3->suppress)) goto out; max_delay = ih3->code ? @@ -1280,7 +2287,9 @@ static void br_ip4_multicast_query(struct net_bridge *br, pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : - try_to_del_timer_sync(&p->timer) >= 0) + try_to_del_timer_sync(&p->timer) >= 0 && + (br->multicast_igmp_version == 2 || + p->filter_mode == MCAST_EXCLUDE)) mod_timer(&p->timer, now + max_delay); } @@ -1330,6 +2339,10 @@ static int br_ip6_multicast_query(struct net_bridge *br, mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; + if (br->multicast_mld_version == 2 && + !ipv6_addr_any(&mld2q->mld2q_mca) && + mld2q->mld2q_suppress) + goto out; max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL); } @@ -1363,7 +2376,9 @@ static int br_ip6_multicast_query(struct net_bridge *br, pp = &p->next) { if (timer_pending(&p->timer) ? time_after(p->timer.expires, now + max_delay) : - try_to_del_timer_sync(&p->timer) >= 0) + try_to_del_timer_sync(&p->timer) >= 0 && + (br->multicast_mld_version == 1 || + p->filter_mode == MCAST_EXCLUDE)) mod_timer(&p->timer, now + max_delay); } @@ -1407,16 +2422,8 @@ br_multicast_leave_group(struct net_bridge *br, if (p->flags & MDB_PG_FLAGS_PERMANENT) break; - rcu_assign_pointer(*pp, p->next); - hlist_del_init(&p->mglist); - del_timer(&p->timer); - kfree_rcu(p, rcu); - br_mdb_notify(br->dev, port, group, RTM_DELMDB, - p->flags | MDB_PG_FLAGS_FAST_LEAVE); - - if (!mp->ports && !mp->host_joined && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); + p->flags |= MDB_PG_FLAGS_FAST_LEAVE; + br_multicast_del_pg(mp, p, pp); } goto out; } @@ -1425,7 +2432,8 @@ br_multicast_leave_group(struct net_bridge *br, goto out; if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) { - __br_multicast_send_query(br, port, &mp->addr); + __br_multicast_send_query(br, port, NULL, NULL, &mp->addr, + false, 0, NULL); time = jiffies + br->multicast_last_member_count * br->multicast_last_member_interval; @@ -1627,7 +2635,8 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, case IGMP_HOST_MEMBERSHIP_REPORT: case IGMPV2_HOST_MEMBERSHIP_REPORT: BR_INPUT_SKB_CB(skb)->mrouters_only = 1; - err = br_ip4_multicast_add_group(br, port, ih->group, vid, src); + err = br_ip4_multicast_add_group(br, port, ih->group, vid, src, + true); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: err = br_ip4_multicast_igmp3_report(br, port, skb, vid); @@ -1706,7 +2715,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, src = eth_hdr(skb)->h_source; BR_INPUT_SKB_CB(skb)->mrouters_only = 1; err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid, - src); + src, true); break; case ICMPV6_MLD2_REPORT: err = br_ip6_multicast_mld2_report(br, port, skb, vid); @@ -1781,6 +2790,19 @@ static void br_ip6_multicast_query_expired(struct timer_list *t) } #endif +static void br_multicast_gc_work(struct work_struct *work) +{ + struct net_bridge *br = container_of(work, struct net_bridge, + mcast_gc_work); + HLIST_HEAD(deleted_head); + + spin_lock_bh(&br->multicast_lock); + hlist_move_list(&br->mcast_gc_list, &deleted_head); + spin_unlock_bh(&br->multicast_lock); + + br_multicast_gc(&deleted_head); +} + void br_multicast_init(struct net_bridge *br) { br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX; @@ -1821,6 +2843,8 @@ void br_multicast_init(struct net_bridge *br) br_ip6_multicast_query_expired, 0); #endif INIT_HLIST_HEAD(&br->mdb_list); + INIT_HLIST_HEAD(&br->mcast_gc_list); + INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work); } static void br_ip4_multicast_join_snoopers(struct net_bridge *br) @@ -1924,18 +2948,18 @@ void br_multicast_stop(struct net_bridge *br) void br_multicast_dev_del(struct net_bridge *br) { struct net_bridge_mdb_entry *mp; + HLIST_HEAD(deleted_head); struct hlist_node *tmp; spin_lock_bh(&br->multicast_lock); - hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node) { - del_timer(&mp->timer); - rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode, - br_mdb_rht_params); - hlist_del_rcu(&mp->mdb_node); - kfree_rcu(mp, rcu); - } + hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node) + br_multicast_del_mdb_entry(mp); + hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); + br_multicast_gc(&deleted_head); + cancel_work_sync(&br->mcast_gc_work); + rcu_barrier(); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index baa1500f384f..a23d2bae56e1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -214,26 +214,61 @@ struct net_bridge_fdb_entry { #define MDB_PG_FLAGS_OFFLOAD BIT(1) #define MDB_PG_FLAGS_FAST_LEAVE BIT(2) +#define PG_SRC_ENT_LIMIT 32 + +#define BR_SGRP_F_DELETE BIT(0) +#define BR_SGRP_F_SEND BIT(1) + +struct net_bridge_mcast_gc { + struct hlist_node gc_node; + void (*destroy)(struct net_bridge_mcast_gc *gc); +}; + +struct net_bridge_group_src { + struct hlist_node node; + + struct br_ip addr; + struct net_bridge_port_group *pg; + u8 flags; + u8 src_query_rexmit_cnt; + struct timer_list timer; + + struct net_bridge *br; + struct net_bridge_mcast_gc mcast_gc; + struct rcu_head rcu; +}; + struct net_bridge_port_group { struct net_bridge_port *port; struct net_bridge_port_group __rcu *next; - struct hlist_node mglist; - struct rcu_head rcu; - struct timer_list timer; struct br_ip addr; unsigned char eth_addr[ETH_ALEN] __aligned(2); unsigned char flags; + unsigned char filter_mode; + unsigned char grp_query_rexmit_cnt; + + struct hlist_head src_list; + unsigned int src_ents; + struct timer_list timer; + struct timer_list rexmit_timer; + struct hlist_node mglist; + + struct net_bridge_mcast_gc mcast_gc; + struct rcu_head rcu; }; struct net_bridge_mdb_entry { struct rhash_head rhnode; struct net_bridge *br; struct net_bridge_port_group __rcu *ports; - struct rcu_head rcu; - struct timer_list timer; struct br_ip addr; bool host_joined; + + struct timer_list timer; struct hlist_node mdb_node; + + struct net_bridge_mcast_gc mcast_gc; + struct rcu_head rcu; }; struct net_bridge_port { @@ -406,6 +441,7 @@ struct net_bridge { struct rhashtable mdb_hash_tbl; + struct hlist_head mcast_gc_list; struct hlist_head mdb_list; struct hlist_head router_list; @@ -419,6 +455,7 @@ struct net_bridge { struct bridge_mcast_own_query ip6_own_query; struct bridge_mcast_querier ip6_querier; #endif /* IS_ENABLED(CONFIG_IPV6) */ + struct work_struct mcast_gc_work; #endif struct timer_list hello_timer; @@ -766,13 +803,17 @@ br_multicast_new_group(struct net_bridge *br, struct br_ip *group); struct net_bridge_port_group * br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, struct net_bridge_port_group __rcu *next, - unsigned char flags, const unsigned char *src); + unsigned char flags, const unsigned char *src, + u8 filter_mode); int br_mdb_hash_init(struct net_bridge *br); void br_mdb_hash_fini(struct net_bridge *br); -void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, - struct br_ip *group, int type, u8 flags); +void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, int type); void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, int type); +void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *pg, + struct net_bridge_port_group __rcu **pp); void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, const struct sk_buff *skb, u8 type, u8 dir); int br_multicast_init_stats(struct net_bridge *br); @@ -836,6 +877,19 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb) { return BR_INPUT_SKB_CB(skb)->igmp; } + +static inline unsigned long br_multicast_lmqt(const struct net_bridge *br) +{ + return br->multicast_last_member_interval * + br->multicast_last_member_count; +} + +static inline unsigned long br_multicast_gmi(const struct net_bridge *br) +{ + /* use the RFC default of 2 for QRV */ + return 2 * br->multicast_query_interval + + br->multicast_query_response_interval; +} #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, |