diff options
Diffstat (limited to 'net/bridge')
-rw-r--r-- | net/bridge/Makefile | 4 | ||||
-rw-r--r-- | net/bridge/br.c | 14 | ||||
-rw-r--r-- | net/bridge/br_device.c | 12 | ||||
-rw-r--r-- | net/bridge/br_forward.c | 3 | ||||
-rw-r--r-- | net/bridge/br_if.c | 20 | ||||
-rw-r--r-- | net/bridge/br_input.c | 1 | ||||
-rw-r--r-- | net/bridge/br_multicast.c | 4 | ||||
-rw-r--r-- | net/bridge/br_netfilter.c | 156 | ||||
-rw-r--r-- | net/bridge/br_netlink.c | 116 | ||||
-rw-r--r-- | net/bridge/br_nf_core.c | 96 | ||||
-rw-r--r-- | net/bridge/br_private.h | 43 | ||||
-rw-r--r-- | net/bridge/br_stp.c | 15 | ||||
-rw-r--r-- | net/bridge/br_stp_if.c | 4 | ||||
-rw-r--r-- | net/bridge/br_stp_timer.c | 4 | ||||
-rw-r--r-- | net/bridge/br_sysfs_br.c | 21 | ||||
-rw-r--r-- | net/bridge/br_vlan.c | 165 | ||||
-rw-r--r-- | net/bridge/netfilter/ebtables.c | 25 | ||||
-rw-r--r-- | net/bridge/netfilter/nf_tables_bridge.c | 8 | ||||
-rw-r--r-- | net/bridge/netfilter/nft_reject_bridge.c | 370 |
19 files changed, 882 insertions, 199 deletions
diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 8590b942bffa..fd7ee03c59b3 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -10,7 +10,9 @@ bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o -bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o +bridge-$(subst m,y,$(CONFIG_BRIDGE_NETFILTER)) += br_nf_core.o + +obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o diff --git a/net/bridge/br.c b/net/bridge/br.c index 1a755a1e5410..44425aff7cba 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -161,7 +161,7 @@ static int __init br_init(void) if (err) goto err_out1; - err = br_netfilter_init(); + err = br_nf_core_init(); if (err) goto err_out2; @@ -179,11 +179,16 @@ static int __init br_init(void) br_fdb_test_addr_hook = br_fdb_test_addr; #endif + pr_info("bridge: automatic filtering via arp/ip/ip6tables has been " + "deprecated. Update your scripts to load br_netfilter if you " + "need this.\n"); + return 0; + err_out4: unregister_netdevice_notifier(&br_device_notifier); err_out3: - br_netfilter_fini(); + br_nf_core_fini(); err_out2: unregister_pernet_subsys(&br_net_ops); err_out1: @@ -196,20 +201,17 @@ err_out: static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); - br_netlink_fini(); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); - unregister_pernet_subsys(&br_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ - br_netfilter_fini(); + br_nf_core_fini(); #if IS_ENABLED(CONFIG_ATM_LANE) br_fdb_test_addr_hook = NULL; #endif - br_fdb_fini(); } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 568cccd39a3d..ffd379db5938 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -36,7 +36,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) u16 vid = 0; rcu_read_lock(); -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { br_nf_pre_routing_finish_bridge_slow(skb); rcu_read_unlock(); @@ -88,12 +88,17 @@ out: static int br_dev_init(struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); + int err; br->stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!br->stats) return -ENOMEM; - return 0; + err = br_vlan_init(br); + if (err) + free_percpu(br->stats); + + return err; } static int br_dev_open(struct net_device *dev) @@ -167,7 +172,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) dev->mtu = new_mtu; -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) /* remember the MTU in the rtable for PMTU */ dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu); #endif @@ -389,5 +394,4 @@ void br_dev_setup(struct net_device *dev) br_netfilter_rtable_init(br); br_stp_timer_init(br); br_multicast_init(br); - br_vlan_init(br); } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 056b67b0e277..44cb786b925a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -49,6 +49,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) return 0; } +EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); int br_forward_finish(struct sk_buff *skb) { @@ -56,6 +57,7 @@ int br_forward_finish(struct sk_buff *skb) br_dev_queue_push_xmit); } +EXPORT_SYMBOL_GPL(br_forward_finish); static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) { @@ -110,6 +112,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) kfree_skb(skb); } +EXPORT_SYMBOL_GPL(br_deliver); /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 078d336a1f37..ed307db7a12b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -252,12 +252,12 @@ static void del_nbp(struct net_bridge_port *p) br_fdb_delete_by_port(br, p, 1); nbp_update_port_count(br); + netdev_upper_dev_unlink(dev, br->dev); + dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); - netdev_upper_dev_unlink(dev, br->dev); - br_multicast_del_port(p); kobject_uevent(&p->kobj, KOBJ_REMOVE); @@ -332,7 +332,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->port_no = index; p->flags = BR_LEARNING | BR_FLOOD; br_init_port(p); - p->state = BR_STATE_DISABLED; + br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); br_multicast_add_port(p); @@ -476,16 +476,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err3; - err = netdev_master_upper_dev_link(dev, br->dev); + err = netdev_rx_handler_register(dev, br_handle_frame, p); if (err) goto err4; - err = netdev_rx_handler_register(dev, br_handle_frame, p); + dev->priv_flags |= IFF_BRIDGE_PORT; + + err = netdev_master_upper_dev_link(dev, br->dev); if (err) goto err5; - dev->priv_flags |= IFF_BRIDGE_PORT; - dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -500,6 +500,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (br_fdb_insert(br, p, dev->dev_addr, 0)) netdev_err(dev, "failed insert local address bridge forwarding table\n"); + if (nbp_vlan_init(p)) + netdev_err(dev, "failed to initialize vlan filtering on this port\n"); + spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); @@ -520,7 +523,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; err5: - netdev_upper_dev_unlink(dev, br->dev); + dev->priv_flags &= ~IFF_BRIDGE_PORT; + netdev_rx_handler_unregister(dev); err4: br_netpoll_disable(p); err3: diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 366c43649079..6fd5522df696 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -140,6 +140,7 @@ drop: kfree_skb(skb); goto out; } +EXPORT_SYMBOL_GPL(br_handle_frame_finish); /* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct sk_buff *skb) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index b4845f4b2bb4..648d79ccf462 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1174,7 +1174,7 @@ static void br_multicast_add_router(struct net_bridge *br, } if (slot) - hlist_add_after_rcu(slot, &port->rlist); + hlist_add_behind_rcu(&port->rlist, slot); else hlist_add_head_rcu(&port->rlist, &br->router_list); } @@ -1822,7 +1822,7 @@ static void br_multicast_query_expired(struct net_bridge *br, if (query->startup_sent < br->multicast_startup_query_count) query->startup_sent++; - rcu_assign_pointer(querier, NULL); + RCU_INIT_POINTER(querier, NULL); br_multicast_send_query(br, NULL, query); spin_unlock(&br->multicast_lock); } diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index a615264cf01a..1a4f32c09ad5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,66 +111,6 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) -static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) -{ -} - -static void fake_redirect(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb) -{ -} - -static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) -{ - return NULL; -} - -static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr) -{ - return NULL; -} - -static unsigned int fake_mtu(const struct dst_entry *dst) -{ - return dst->dev->mtu; -} - -static struct dst_ops fake_dst_ops = { - .family = AF_INET, - .protocol = cpu_to_be16(ETH_P_IP), - .update_pmtu = fake_update_pmtu, - .redirect = fake_redirect, - .cow_metrics = fake_cow_metrics, - .neigh_lookup = fake_neigh_lookup, - .mtu = fake_mtu, -}; - -/* - * Initialize bogus route table used to keep netfilter happy. - * Currently, we fill in the PMTU entry because netfilter - * refragmentation needs it, and the rt_flags entry because - * ipt_REJECT needs it. Future netfilter modules might - * require us to fill additional fields. - */ -static const u32 br_dst_default_metrics[RTAX_MAX] = { - [RTAX_MTU - 1] = 1500, -}; - -void br_netfilter_rtable_init(struct net_bridge *br) -{ - struct rtable *rt = &br->fake_rtable; - - atomic_set(&rt->dst.__refcnt, 1); - rt->dst.dev = br->dev; - rt->dst.path = &rt->dst; - dst_init_metrics(&rt->dst, br_dst_default_metrics, true); - rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; - rt->dst.ops = &fake_dst_ops; -} - static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { struct net_bridge_port *port; @@ -245,14 +185,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb) skb->nf_bridge->data, header_size); } -static inline void nf_bridge_update_protocol(struct sk_buff *skb) -{ - if (skb->nf_bridge->mask & BRNF_8021Q) - skb->protocol = htons(ETH_P_8021Q); - else if (skb->nf_bridge->mask & BRNF_PPPoE) - skb->protocol = htons(ETH_P_PPP_SES); -} - /* When handing a packet over to the IP layer * check whether we have a skb that is in the * expected format @@ -260,7 +192,6 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb) static int br_parse_ip_options(struct sk_buff *skb) { - struct ip_options *opt; const struct iphdr *iph; struct net_device *dev = skb->dev; u32 len; @@ -269,7 +200,6 @@ static int br_parse_ip_options(struct sk_buff *skb) goto inhdr_error; iph = ip_hdr(skb); - opt = &(IPCB(skb)->opt); /* Basic sanity checks */ if (iph->ihl < 5 || iph->version != 4) @@ -295,23 +225,11 @@ static int br_parse_ip_options(struct sk_buff *skb) } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - if (iph->ihl == 5) - return 0; - - opt->optlen = iph->ihl*4 - sizeof(struct iphdr); - if (ip_options_compile(dev_net(dev), opt, skb)) - goto inhdr_error; - - /* Check correct handling of SRR option */ - if (unlikely(opt->srr)) { - struct in_device *in_dev = __in_dev_get_rcu(dev); - if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) - goto drop; - - if (ip_options_rcv_srr(skb)) - goto drop; - } - + /* We should really parse IP options here but until + * somebody who actually uses IP options complains to + * us we'll just silently ignore the options because + * we're lazy! + */ return 0; inhdr_error: @@ -320,26 +238,6 @@ drop: return -1; } -/* Fill in the header for fragmented IP packets handled by - * the IPv4 connection tracking code. - */ -int nf_bridge_copy_header(struct sk_buff *skb) -{ - int err; - unsigned int header_size; - - nf_bridge_update_protocol(skb); - header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow_head(skb, header_size); - if (err) - return err; - - skb_copy_to_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); - __skb_push(skb, nf_bridge_encap_header_len(skb)); - return 0; -} - /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ @@ -404,6 +302,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; + /* FIXME Need to refragment */ ret = neigh->output(neigh, skb); } neigh_release(neigh); @@ -459,6 +358,10 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct rtable *rt; int err; + int frag_max_size; + + frag_max_size = IPCB(skb)->frag_max_size; + BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; if (nf_bridge->mask & BRNF_PKT_TYPE) { skb->pkt_type = PACKET_OTHERHOST; @@ -863,13 +766,19 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; + int frag_max_size; + /* This is wrong! We should preserve the original fragment + * boundaries by preserving frag_list rather than refragmenting. + */ if (skb->protocol == htons(ETH_P_IP) && skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && !skb_is_gso(skb)) { + frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; + IPCB(skb)->frag_max_size = frag_max_size; ret = ip_fragment(skb, br_dev_queue_push_xmit); } else ret = br_dev_queue_push_xmit(skb); @@ -944,6 +853,11 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, return NF_ACCEPT; } +void br_netfilter_enable(void) +{ +} +EXPORT_SYMBOL_GPL(br_netfilter_enable); + /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ static struct nf_hook_ops br_nf_ops[] __read_mostly = { @@ -1059,38 +973,42 @@ static struct ctl_table brnf_table[] = { }; #endif -int __init br_netfilter_init(void) +static int __init br_netfilter_init(void) { int ret; - ret = dst_entries_init(&fake_dst_ops); + ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); if (ret < 0) return ret; - ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); - if (ret < 0) { - dst_entries_destroy(&fake_dst_ops); - return ret; - } #ifdef CONFIG_SYSCTL brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table); if (brnf_sysctl_header == NULL) { printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n"); - nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); - dst_entries_destroy(&fake_dst_ops); - return -ENOMEM; + ret = -ENOMEM; + goto err1; } #endif printk(KERN_NOTICE "Bridge firewalling registered\n"); return 0; +err1: + nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + return ret; } -void br_netfilter_fini(void) +static void __exit br_netfilter_fini(void) { nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); #ifdef CONFIG_SYSCTL unregister_net_sysctl_table(brnf_sysctl_header); #endif - dst_entries_destroy(&fake_dst_ops); } + +module_init(br_netfilter_init); +module_exit(br_netfilter_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>"); +MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); +MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge"); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index cb5fcf62f663..2ff9706647f2 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -257,9 +257,6 @@ static int br_afspec(struct net_bridge *br, } else err = br_vlan_add(br, vinfo->vid, vinfo->flags); - if (err) - break; - break; case RTM_DELLINK: @@ -276,7 +273,7 @@ static int br_afspec(struct net_bridge *br, return err; } -static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { +static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, [IFLA_BRPORT_COST] = { .type = NLA_U32 }, [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, @@ -304,7 +301,7 @@ static int br_set_port_state(struct net_bridge_port *p, u8 state) (!netif_oper_up(p->dev) && state != BR_STATE_DISABLED)) return -ENETDOWN; - p->state = state; + br_set_state(p, state); br_log_state(p); br_port_state_selection(p->br); return 0; @@ -382,7 +379,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) if (p && protinfo) { if (protinfo->nla_type & NLA_F_NESTED) { err = nla_parse_nested(tb, IFLA_BRPORT_MAX, - protinfo, ifla_brport_policy); + protinfo, br_port_policy); if (err) return err; @@ -461,6 +458,88 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev, return register_netdevice(dev); } +static int br_port_slave_changelink(struct net_device *brdev, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + if (!data) + return 0; + return br_setport(br_port_get_rtnl(dev), data); +} + +static int br_port_fill_slave_info(struct sk_buff *skb, + const struct net_device *brdev, + const struct net_device *dev) +{ + return br_port_fill_attrs(skb, br_port_get_rtnl(dev)); +} + +static size_t br_port_get_slave_size(const struct net_device *brdev, + const struct net_device *dev) +{ + return br_port_info_size(); +} + +static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { + [IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 }, + [IFLA_BR_HELLO_TIME] = { .type = NLA_U32 }, + [IFLA_BR_MAX_AGE] = { .type = NLA_U32 }, +}; + +static int br_changelink(struct net_device *brdev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct net_bridge *br = netdev_priv(brdev); + int err; + + if (!data) + return 0; + + if (data[IFLA_BR_FORWARD_DELAY]) { + err = br_set_forward_delay(br, nla_get_u32(data[IFLA_BR_FORWARD_DELAY])); + if (err) + return err; + } + + if (data[IFLA_BR_HELLO_TIME]) { + err = br_set_hello_time(br, nla_get_u32(data[IFLA_BR_HELLO_TIME])); + if (err) + return err; + } + + if (data[IFLA_BR_MAX_AGE]) { + err = br_set_max_age(br, nla_get_u32(data[IFLA_BR_MAX_AGE])); + if (err) + return err; + } + + return 0; +} + +static size_t br_get_size(const struct net_device *brdev) +{ + return nla_total_size(sizeof(u32)) + /* IFLA_BR_FORWARD_DELAY */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_HELLO_TIME */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_MAX_AGE */ + 0; +} + +static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) +{ + struct net_bridge *br = netdev_priv(brdev); + u32 forward_delay = jiffies_to_clock_t(br->forward_delay); + u32 hello_time = jiffies_to_clock_t(br->hello_time); + u32 age_time = jiffies_to_clock_t(br->max_age); + + if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) || + nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) || + nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time)) + return -EMSGSIZE; + + return 0; +} + static size_t br_get_link_af_size(const struct net_device *dev) { struct net_port_vlans *pv; @@ -485,12 +564,23 @@ static struct rtnl_af_ops br_af_ops = { }; struct rtnl_link_ops br_link_ops __read_mostly = { - .kind = "bridge", - .priv_size = sizeof(struct net_bridge), - .setup = br_dev_setup, - .validate = br_validate, - .newlink = br_dev_newlink, - .dellink = br_dev_delete, + .kind = "bridge", + .priv_size = sizeof(struct net_bridge), + .setup = br_dev_setup, + .maxtype = IFLA_BRPORT_MAX, + .policy = br_policy, + .validate = br_validate, + .newlink = br_dev_newlink, + .changelink = br_changelink, + .dellink = br_dev_delete, + .get_size = br_get_size, + .fill_info = br_fill_info, + + .slave_maxtype = IFLA_BRPORT_MAX, + .slave_policy = br_port_policy, + .slave_changelink = br_port_slave_changelink, + .get_slave_size = br_port_get_slave_size, + .fill_slave_info = br_port_fill_slave_info, }; int __init br_netlink_init(void) @@ -512,7 +602,7 @@ out_af: return err; } -void __exit br_netlink_fini(void) +void br_netlink_fini(void) { br_mdb_uninit(); rtnl_af_unregister(&br_af_ops); diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c new file mode 100644 index 000000000000..387cb3bd017c --- /dev/null +++ b/net/bridge/br_nf_core.c @@ -0,0 +1,96 @@ +/* + * Handle firewalling core + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek <buytenh@gnu.org> + * Bart De Schuymer <bdschuym@pandora.be> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Lennert dedicates this file to Kerstin Wurdinger. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/in_route.h> +#include <linux/inetdevice.h> +#include <net/route.h> + +#include "br_private.h" +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif + +static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ +} + +static void fake_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) +{ +} + +static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) +{ + return NULL; +} + +static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + return NULL; +} + +static unsigned int fake_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + +static struct dst_ops fake_dst_ops = { + .family = AF_INET, + .protocol = cpu_to_be16(ETH_P_IP), + .update_pmtu = fake_update_pmtu, + .redirect = fake_redirect, + .cow_metrics = fake_cow_metrics, + .neigh_lookup = fake_neigh_lookup, + .mtu = fake_mtu, +}; + +/* + * Initialize bogus route table used to keep netfilter happy. + * Currently, we fill in the PMTU entry because netfilter + * refragmentation needs it, and the rt_flags entry because + * ipt_REJECT needs it. Future netfilter modules might + * require us to fill additional fields. + */ +static const u32 br_dst_default_metrics[RTAX_MAX] = { + [RTAX_MTU - 1] = 1500, +}; + +void br_netfilter_rtable_init(struct net_bridge *br) +{ + struct rtable *rt = &br->fake_rtable; + + atomic_set(&rt->dst.__refcnt, 1); + rt->dst.dev = br->dev; + rt->dst.path = &rt->dst; + dst_init_metrics(&rt->dst, br_dst_default_metrics, true); + rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE; + rt->dst.ops = &fake_dst_ops; +} + +int __init br_nf_core_init(void) +{ + return dst_entries_init(&fake_dst_ops); +} + +void br_nf_core_fini(void) +{ + dst_entries_destroy(&fake_dst_ops); +} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 62a7fa2e3569..4d783d071305 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -221,7 +221,7 @@ struct net_bridge struct pcpu_sw_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct rtable fake_rtable; bool nf_call_iptables; bool nf_call_ip6tables; @@ -299,16 +299,24 @@ struct net_bridge #ifdef CONFIG_BRIDGE_VLAN_FILTERING u8 vlan_enabled; __be16 vlan_proto; + u16 default_pvid; struct net_port_vlans __rcu *vlan_info; #endif }; struct br_input_skb_cb { struct net_device *brdev; + #ifdef CONFIG_BRIDGE_IGMP_SNOOPING int igmp; int mrouters_only; #endif + + u16 frag_max_size; + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING + bool vlan_filtered; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) @@ -601,11 +609,13 @@ bool br_vlan_find(struct net_bridge *br, u16 vid); void br_recalculate_fwd_mask(struct net_bridge *br); int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val); int br_vlan_set_proto(struct net_bridge *br, unsigned long val); -void br_vlan_init(struct net_bridge *br); +int br_vlan_init(struct net_bridge *br); +int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); bool nbp_vlan_find(struct net_bridge_port *port, u16 vid); +int nbp_vlan_init(struct net_bridge_port *port); static inline struct net_port_vlans *br_get_vlan_info( const struct net_bridge *br) @@ -638,11 +648,11 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid) static inline u16 br_get_pvid(const struct net_port_vlans *v) { - /* Return just the VID if it is set, or VLAN_N_VID (invalid vid) if - * vid wasn't set - */ + if (!v) + return 0; + smp_rmb(); - return v->pvid ?: VLAN_N_VID; + return v->pvid; } static inline int br_vlan_enabled(struct net_bridge *br) @@ -701,8 +711,9 @@ static inline void br_recalculate_fwd_mask(struct net_bridge *br) { } -static inline void br_vlan_init(struct net_bridge *br) +static inline int br_vlan_init(struct net_bridge *br) { + return 0; } static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) @@ -735,13 +746,18 @@ static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid) return false; } +static inline int nbp_vlan_init(struct net_bridge_port *port) +{ + return 0; +} + static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag) { return 0; } static inline u16 br_get_pvid(const struct net_port_vlans *v) { - return VLAN_N_VID; /* Returns invalid vid */ + return 0; } static inline int br_vlan_enabled(struct net_bridge *br) @@ -751,18 +767,19 @@ static inline int br_vlan_enabled(struct net_bridge *br) #endif /* br_netfilter.c */ -#ifdef CONFIG_BRIDGE_NETFILTER -int br_netfilter_init(void); -void br_netfilter_fini(void); +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +int br_nf_core_init(void); +void br_nf_core_fini(void); void br_netfilter_rtable_init(struct net_bridge *); #else -#define br_netfilter_init() (0) -#define br_netfilter_fini() do { } while (0) +static inline int br_nf_core_init(void) { return 0; } +static inline void br_nf_core_fini(void) {} #define br_netfilter_rtable_init(x) #endif /* br_stp.c */ void br_log_state(const struct net_bridge_port *p); +void br_set_state(struct net_bridge_port *p, unsigned int state); struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no); void br_init_port(struct net_bridge_port *p); void br_become_designated_port(struct net_bridge_port *p); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 3c86f0538cbb..2b047bcf42a4 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -36,6 +36,11 @@ void br_log_state(const struct net_bridge_port *p) br_port_state_names[p->state]); } +void br_set_state(struct net_bridge_port *p, unsigned int state) +{ + p->state = state; +} + /* called under bridge lock */ struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no) { @@ -107,7 +112,7 @@ static void br_root_port_block(const struct net_bridge *br, br_notice(br, "port %u(%s) tried to become root port (blocked)", (unsigned int) p->port_no, p->dev->name); - p->state = BR_STATE_LISTENING; + br_set_state(p, BR_STATE_LISTENING); br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); @@ -387,7 +392,7 @@ static void br_make_blocking(struct net_bridge_port *p) p->state == BR_STATE_LEARNING) br_topology_change_detection(p->br); - p->state = BR_STATE_BLOCKING; + br_set_state(p, BR_STATE_BLOCKING); br_log_state(p); br_ifinfo_notify(RTM_NEWLINK, p); @@ -404,13 +409,13 @@ static void br_make_forwarding(struct net_bridge_port *p) return; if (br->stp_enabled == BR_NO_STP || br->forward_delay == 0) { - p->state = BR_STATE_FORWARDING; + br_set_state(p, BR_STATE_FORWARDING); br_topology_change_detection(br); del_timer(&p->forward_delay_timer); } else if (br->stp_enabled == BR_KERNEL_STP) - p->state = BR_STATE_LISTENING; + br_set_state(p, BR_STATE_LISTENING); else - p->state = BR_STATE_LEARNING; + br_set_state(p, BR_STATE_LEARNING); br_multicast_enable_port(p); br_log_state(p); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 189ba1e7d851..41146872c1b4 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -37,7 +37,7 @@ void br_init_port(struct net_bridge_port *p) { p->port_id = br_make_port_id(p->priority, p->port_no); br_become_designated_port(p); - p->state = BR_STATE_BLOCKING; + br_set_state(p, BR_STATE_BLOCKING); p->topology_change_ack = 0; p->config_pending = 0; } @@ -100,7 +100,7 @@ void br_stp_disable_port(struct net_bridge_port *p) wasroot = br_is_root_bridge(br); br_become_designated_port(p); - p->state = BR_STATE_DISABLED; + br_set_state(p, BR_STATE_DISABLED); p->topology_change_ack = 0; p->config_pending = 0; diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 558c46d19e05..4fcaa67750fd 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -87,11 +87,11 @@ static void br_forward_delay_timer_expired(unsigned long arg) (unsigned int) p->port_no, p->dev->name); spin_lock(&br->lock); if (p->state == BR_STATE_LISTENING) { - p->state = BR_STATE_LEARNING; + br_set_state(p, BR_STATE_LEARNING); mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); } else if (p->state == BR_STATE_LEARNING) { - p->state = BR_STATE_FORWARDING; + br_set_state(p, BR_STATE_FORWARDING); if (br_is_designated_for_some_port(br)) br_topology_change_detection(br); netif_carrier_on(br->dev); diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index c9e2572b15f4..4c97fc50fb70 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -629,7 +629,7 @@ static ssize_t multicast_startup_query_interval_store( } static DEVICE_ATTR_RW(multicast_startup_query_interval); #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static ssize_t nf_call_iptables_show( struct device *d, struct device_attribute *attr, char *buf) { @@ -725,6 +725,22 @@ static ssize_t vlan_protocol_store(struct device *d, return store_bridge_parm(d, buf, len, br_vlan_set_proto); } static DEVICE_ATTR_RW(vlan_protocol); + +static ssize_t default_pvid_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->default_pvid); +} + +static ssize_t default_pvid_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, br_vlan_set_default_pvid); +} +static DEVICE_ATTR_RW(default_pvid); #endif static struct attribute *bridge_attrs[] = { @@ -763,7 +779,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_query_response_interval.attr, &dev_attr_multicast_startup_query_interval.attr, #endif -#ifdef CONFIG_BRIDGE_NETFILTER +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) &dev_attr_nf_call_iptables.attr, &dev_attr_nf_call_ip6tables.attr, &dev_attr_nf_call_arptables.attr, @@ -771,6 +787,7 @@ static struct attribute *bridge_attrs[] = { #ifdef CONFIG_BRIDGE_VLAN_FILTERING &dev_attr_vlan_filtering.attr, &dev_attr_vlan_protocol.attr, + &dev_attr_default_pvid.attr, #endif NULL }; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index febb0f87fa37..150048fb99b0 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -27,9 +27,13 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags) { if (flags & BRIDGE_VLAN_INFO_PVID) __vlan_add_pvid(v, vid); + else + __vlan_delete_pvid(v, vid); if (flags & BRIDGE_VLAN_INFO_UNTAGGED) set_bit(vid, v->untagged_bitmap); + else + clear_bit(vid, v->untagged_bitmap); } static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) @@ -125,7 +129,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, { u16 vid; - if (!br->vlan_enabled) + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) goto out; /* Vlan filter table must be configured at this point. The @@ -164,8 +169,10 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, /* If VLAN filtering is disabled on the bridge, all packets are * permitted. */ - if (!br->vlan_enabled) + if (!br->vlan_enabled) { + BR_INPUT_SKB_CB(skb)->vlan_filtered = false; return true; + } /* If there are no vlan in the permitted list, all packets are * rejected. @@ -173,6 +180,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) goto drop; + BR_INPUT_SKB_CB(skb)->vlan_filtered = true; proto = br->vlan_proto; /* If vlan tx offload is disabled on bridge device and frame was @@ -181,7 +189,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, */ if (unlikely(!vlan_tx_tag_present(skb) && skb->protocol == proto)) { - skb = vlan_untag(skb); + skb = skb_vlan_untag(skb); if (unlikely(!skb)) return false; } @@ -215,7 +223,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, * See if pvid is set on this port. That tells us which * vlan untagged or priority-tagged traffic belongs to. */ - if (pvid == VLAN_N_VID) + if (!pvid) goto drop; /* PVID is set on this port. Any untagged or priority-tagged @@ -251,7 +259,8 @@ bool br_allowed_egress(struct net_bridge *br, { u16 vid; - if (!br->vlan_enabled) + /* If this packet was not filtered at input, let it pass */ + if (!BR_INPUT_SKB_CB(skb)->vlan_filtered) return true; if (!v) @@ -270,6 +279,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) struct net_bridge *br = p->br; struct net_port_vlans *v; + /* If filtering was disabled at input, let it pass. */ if (!br->vlan_enabled) return true; @@ -282,7 +292,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) if (!*vid) { *vid = br_get_pvid(v); - if (*vid == VLAN_N_VID) + if (!*vid) return false; return true; @@ -489,9 +499,141 @@ err_filt: goto unlock; } -void br_vlan_init(struct net_bridge *br) +static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid) +{ + return pv && vid == pv->pvid && test_bit(vid, pv->untagged_bitmap); +} + +static void br_vlan_disable_default_pvid(struct net_bridge *br) +{ + struct net_bridge_port *p; + u16 pvid = br->default_pvid; + + /* Disable default_pvid on all ports where it is still + * configured. + */ + if (vlan_default_pvid(br_get_vlan_info(br), pvid)) + br_vlan_delete(br, pvid); + + list_for_each_entry(p, &br->port_list, list) { + if (vlan_default_pvid(nbp_get_vlan_info(p), pvid)) + nbp_vlan_delete(p, pvid); + } + + br->default_pvid = 0; +} + +static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) +{ + struct net_bridge_port *p; + u16 old_pvid; + int err = 0; + unsigned long *changed; + + changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long), + GFP_KERNEL); + if (!changed) + return -ENOMEM; + + old_pvid = br->default_pvid; + + /* Update default_pvid config only if we do not conflict with + * user configuration. + */ + if ((!old_pvid || vlan_default_pvid(br_get_vlan_info(br), old_pvid)) && + !br_vlan_find(br, pvid)) { + err = br_vlan_add(br, pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED); + if (err) + goto out; + br_vlan_delete(br, old_pvid); + set_bit(0, changed); + } + + list_for_each_entry(p, &br->port_list, list) { + /* Update default_pvid config only if we do not conflict with + * user configuration. + */ + if ((old_pvid && + !vlan_default_pvid(nbp_get_vlan_info(p), old_pvid)) || + nbp_vlan_find(p, pvid)) + continue; + + err = nbp_vlan_add(p, pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED); + if (err) + goto err_port; + nbp_vlan_delete(p, old_pvid); + set_bit(p->port_no, changed); + } + + br->default_pvid = pvid; + +out: + kfree(changed); + return err; + +err_port: + list_for_each_entry_continue_reverse(p, &br->port_list, list) { + if (!test_bit(p->port_no, changed)) + continue; + + if (old_pvid) + nbp_vlan_add(p, old_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED); + nbp_vlan_delete(p, pvid); + } + + if (test_bit(0, changed)) { + if (old_pvid) + br_vlan_add(br, old_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED); + br_vlan_delete(br, pvid); + } + goto out; +} + +int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val) +{ + u16 pvid = val; + int err = 0; + + if (val >= VLAN_VID_MASK) + return -EINVAL; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (pvid == br->default_pvid) + goto unlock; + + /* Only allow default pvid change when filtering is disabled */ + if (br->vlan_enabled) { + pr_info_once("Please disable vlan filtering to change default_pvid\n"); + err = -EPERM; + goto unlock; + } + + if (!pvid) + br_vlan_disable_default_pvid(br); + else + err = __br_vlan_set_default_pvid(br, pvid); + +unlock: + rtnl_unlock(); + return err; +} + +int br_vlan_init(struct net_bridge *br) { br->vlan_proto = htons(ETH_P_8021Q); + br->default_pvid = 1; + return br_vlan_add(br, 1, + BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED); } /* Must be protected by RTNL. @@ -583,3 +725,12 @@ out: rcu_read_unlock(); return found; } + +int nbp_vlan_init(struct net_bridge_port *p) +{ + return p->br->default_pvid ? + nbp_vlan_add(p, p->br->default_pvid, + BRIDGE_VLAN_INFO_PVID | + BRIDGE_VLAN_INFO_UNTAGGED) : + 0; +} diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 1059ed3bc255..d9a8c05d995d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -26,6 +26,7 @@ #include <asm/uaccess.h> #include <linux/smp.h> #include <linux/cpumask.h> +#include <linux/audit.h> #include <net/sock.h> /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" @@ -327,10 +328,7 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error, char name[EBT_FUNCTION_MAXNAMELEN]; } *e; - *error = mutex_lock_interruptible(mutex); - if (*error != 0) - return NULL; - + mutex_lock(mutex); list_for_each_entry(e, head, list) { if (strcmp(e->name, name) == 0) return e; @@ -1061,6 +1059,20 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, vfree(table); vfree(counterstmp); + +#ifdef CONFIG_AUDIT + if (audit_enabled) { + struct audit_buffer *ab; + + ab = audit_log_start(current->audit_context, GFP_KERNEL, + AUDIT_NETFILTER_CFG); + if (ab) { + audit_log_format(ab, "table=%s family=%u entries=%u", + repl->name, AF_BRIDGE, repl->nentries); + audit_log_end(ab); + } + } +#endif return ret; free_unlock: @@ -1203,10 +1215,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) table->private = newinfo; rwlock_init(&table->lock); - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - goto free_chainstack; - + mutex_lock(&ebt_mutex); list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index 5bcc0d8b31f2..074c557ab505 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -34,9 +34,11 @@ static struct nft_af_info nft_af_bridge __read_mostly = { .owner = THIS_MODULE, .nops = 1, .hooks = { + [NF_BR_PRE_ROUTING] = nft_do_chain_bridge, [NF_BR_LOCAL_IN] = nft_do_chain_bridge, [NF_BR_FORWARD] = nft_do_chain_bridge, [NF_BR_LOCAL_OUT] = nft_do_chain_bridge, + [NF_BR_POST_ROUTING] = nft_do_chain_bridge, }, }; @@ -73,9 +75,11 @@ static const struct nf_chain_type filter_bridge = { .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_BRIDGE, .owner = THIS_MODULE, - .hook_mask = (1 << NF_BR_LOCAL_IN) | + .hook_mask = (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | - (1 << NF_BR_LOCAL_OUT), + (1 << NF_BR_LOCAL_OUT) | + (1 << NF_BR_POST_ROUTING), }; static int __init nf_tables_bridge_init(void) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index ee3ffe93e14e..48da2c54a69e 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -14,21 +14,380 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_reject.h> +#include <net/netfilter/ipv4/nf_reject.h> +#include <net/netfilter/ipv6/nf_reject.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <net/ip6_checksum.h> +#include <linux/netfilter_bridge.h> +#include "../br_private.h" + +static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, + struct sk_buff *nskb) +{ + struct ethhdr *eth; + + eth = (struct ethhdr *)skb_push(nskb, ETH_HLEN); + skb_reset_mac_header(nskb); + ether_addr_copy(eth->h_source, eth_hdr(oldskb)->h_dest); + ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source); + eth->h_proto = eth_hdr(oldskb)->h_proto; + skb_pull(nskb, ETH_HLEN); +} + +static int nft_reject_iphdr_validate(struct sk_buff *oldskb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(oldskb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(oldskb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (oldskb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(oldskb, iph->ihl*4)) + return 0; + + return 1; +} + +static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + sysctl_ip_default_ttl); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + niph->ttl = sysctl_ip_default_ttl; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct iphdr *niph; + struct icmphdr *icmph; + unsigned int len; + void *payload; + __wsum csum; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + /* RFC says return as much as we can without exceeding 576 bytes. */ + len = min_t(unsigned int, 536, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, + sysctl_ip_default_ttl); + + skb_reset_transport_header(nskb); + icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); + memset(icmph, 0, sizeof(*icmph)); + icmph->type = ICMP_DEST_UNREACH; + icmph->code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + + csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); + icmph->checksum = csum_fold(csum); + + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static int nft_reject_ip6hdr_validate(struct sk_buff *oldskb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(oldskb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(oldskb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > oldskb->len) + return 0; + + return 1; +} + +static void nft_reject_br_send_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct tcphdr *oth; + struct tcphdr _oth; + unsigned int otcplen; + struct ipv6hdr *nip6h; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv6.devconf_all->hop_limit); + nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v6_unreach(struct net *net, + struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct ipv6hdr *nip6h; + struct icmp6hdr *icmp6h; + unsigned int len; + void *payload; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + /* Include "As much of invoking packet as possible without the ICMPv6 + * packet exceeding the minimum IPv6 MTU" in the ICMP payload. + */ + len = min_t(unsigned int, 1220, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, + net->ipv6.devconf_all->hop_limit); + + skb_reset_transport_header(nskb); + icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr)); + memset(icmp6h, 0, sizeof(*icmp6h)); + icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; + icmp6h->icmp6_code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + icmp6h->icmp6_cksum = + csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, + nskb->len - sizeof(struct ipv6hdr), + IPPROTO_ICMPV6, + csum_partial(icmp6h, + nskb->len - sizeof(struct ipv6hdr), + 0)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} static void nft_reject_bridge_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt) { + struct nft_reject *priv = nft_expr_priv(expr); + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + const unsigned char *dest = eth_hdr(pkt->skb)->h_dest; + + if (is_broadcast_ether_addr(dest) || + is_multicast_ether_addr(dest)) + goto out; + switch (eth_hdr(pkt->skb)->h_proto) { case htons(ETH_P_IP): - return nft_reject_ipv4_eval(expr, data, pkt); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nft_reject_br_send_v4_tcp_reset(pkt->skb, + pkt->ops->hooknum); + break; + case NFT_REJECT_ICMPX_UNREACH: + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + nft_reject_icmp_code(priv->icmp_code)); + break; + } + break; case htons(ETH_P_IPV6): - return nft_reject_ipv6_eval(expr, data, pkt); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nft_reject_br_send_v6_tcp_reset(net, pkt->skb, + pkt->ops->hooknum); + break; + case NFT_REJECT_ICMPX_UNREACH: + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + nft_reject_icmpv6_code(priv->icmp_code)); + break; + } + break; default: /* No explicit way to reject this protocol, drop it. */ - data[NFT_REG_VERDICT].verdict = NF_DROP; break; } +out: + data[NFT_REG_VERDICT].verdict = NF_DROP; +} + +static int nft_reject_bridge_validate_hooks(const struct nft_chain *chain) +{ + struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + + switch (basechain->ops[0].hooknum) { + case NF_BR_PRE_ROUTING: + case NF_BR_LOCAL_IN: + break; + default: + return -EOPNOTSUPP; + } + } + return 0; +} + +static int nft_reject_bridge_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_reject *priv = nft_expr_priv(expr); + int icmp_code, err; + + err = nft_reject_bridge_validate_hooks(ctx->chain); + if (err < 0) + return err; + + if (tb[NFTA_REJECT_TYPE] == NULL) + return -EINVAL; + + priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE])); + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (tb[NFTA_REJECT_ICMP_CODE] == NULL) + return -EINVAL; + + icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]); + if (priv->type == NFT_REJECT_ICMPX_UNREACH && + icmp_code > NFT_REJECT_ICMPX_MAX) + return -EINVAL; + + priv->icmp_code = icmp_code; + break; + case NFT_REJECT_TCP_RST: + break; + default: + return -EINVAL; + } + return 0; +} + +static int nft_reject_bridge_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_reject *priv = nft_expr_priv(expr); + + if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type))) + goto nla_put_failure; + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + case NFT_REJECT_ICMPX_UNREACH: + if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) + goto nla_put_failure; + break; + } + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_reject_bridge_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_reject_bridge_validate_hooks(ctx->chain); } static struct nft_expr_type nft_reject_bridge_type; @@ -36,8 +395,9 @@ static const struct nft_expr_ops nft_reject_bridge_ops = { .type = &nft_reject_bridge_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), .eval = nft_reject_bridge_eval, - .init = nft_reject_init, - .dump = nft_reject_dump, + .init = nft_reject_bridge_init, + .dump = nft_reject_bridge_dump, + .validate = nft_reject_bridge_validate, }; static struct nft_expr_type nft_reject_bridge_type __read_mostly = { |