From 10b68487869031828aede7313c2befc53d6d30ec Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 27 Oct 2014 11:56:06 +0100 Subject: mac80211: flush keys for AP mode on ieee80211_do_stop Userspace can add keys to an AP mode interface before start_ap has been called. If there have been no calls to start_ap/stop_ap in the mean time, the keys will still be around when the interface is brought down. Signed-off-by: Felix Fietkau [adjust comments, fix AP_VLAN case] Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index af237223a8cd..3b9e2b7b3f30 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -898,6 +898,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, list_del(&sdata->u.vlan.list); mutex_unlock(&local->mtx); RCU_INIT_POINTER(sdata->vif.chanctx_conf, NULL); + /* see comment in the default case below */ + ieee80211_free_keys(sdata, true); /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: @@ -923,17 +925,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* * When we get here, the interface is marked down. * Free the remaining keys, if there are any - * (shouldn't be, except maybe in WDS mode?) + * (which can happen in AP mode if userspace sets + * keys before the interface is operating, and maybe + * also in WDS mode) * * Force the key freeing to always synchronize_net() * to wait for the RX path in case it is using this - * interface enqueuing frames * at this very time on + * interface enqueuing frames at this very time on * another CPU. */ ieee80211_free_keys(sdata, true); - - /* fall through */ - case NL80211_IFTYPE_AP: skb_queue_purge(&sdata->skb_queue); } -- cgit v1.2.3 From 84469a45a1bedec9918e94ab2f78c5dc0739e4a7 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 28 Oct 2014 13:33:04 +0200 Subject: mac80211: use secondary channel offset IE also beacons during CSA If we are switching from an HT40+ to an HT40- channel (or vice-versa), we need the secondary channel offset IE to specify what is the post-CSA offset to be used. This applies both to beacons and to probe responses. In ieee80211_parse_ch_switch_ie() we were ignoring this IE from beacons and using the *current* HT information IE instead. This was causing us to use the same offset as before the switch. Fix that by using the secondary channel offset IE also for beacons and don't ever use the pre-switch offset. Additionally, remove the "beacon" argument from ieee80211_parse_ch_switch_ie(), since it's not needed anymore. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 2 +- net/mac80211/ieee80211_i.h | 3 +-- net/mac80211/mesh.c | 2 +- net/mac80211/mlme.c | 2 +- net/mac80211/spectmgmt.c | 18 ++++++------------ 5 files changed, 10 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 56b53571c807..509bc157ce55 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -805,7 +805,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, memset(¶ms, 0, sizeof(params)); memset(&csa_ie, 0, sizeof(csa_ie)); - err = ieee80211_parse_ch_switch_ie(sdata, elems, beacon, + err = ieee80211_parse_ch_switch_ie(sdata, elems, ifibss->chandef.chan->band, sta_flags, ifibss->bssid, &csa_ie); /* can't switch to destination channel, fail */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c2aaec4dfcf0..8c68da30595d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1642,7 +1642,6 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * ieee80211_parse_ch_switch_ie - parses channel switch IEs * @sdata: the sdata of the interface which has received the frame * @elems: parsed 802.11 elements received with the frame - * @beacon: indicates if the frame was a beacon or probe response * @current_band: indicates the current band * @sta_flags: contains information about own capabilities and restrictions * to decide which channel switch announcements can be accepted. Only the @@ -1656,7 +1655,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * Return: 0 on success, <0 on error and >0 if there is nothing to parse. */ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, - struct ieee802_11_elems *elems, bool beacon, + struct ieee802_11_elems *elems, enum ieee80211_band current_band, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index e9f99c1e3fad..0c8b2a77d312 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -874,7 +874,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, memset(¶ms, 0, sizeof(params)); memset(&csa_ie, 0, sizeof(csa_ie)); - err = ieee80211_parse_ch_switch_ie(sdata, elems, beacon, band, + err = ieee80211_parse_ch_switch_ie(sdata, elems, band, sta_flags, sdata->vif.addr, &csa_ie); if (err < 0) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2de88704278b..08f51c6d0953 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1072,7 +1072,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, current_band = cbss->channel->band; memset(&csa_ie, 0, sizeof(csa_ie)); - res = ieee80211_parse_ch_switch_ie(sdata, elems, beacon, current_band, + res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band, ifmgd->flags, ifmgd->associated->bssid, &csa_ie); if (res < 0) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 6ab009070084..efeba56c913b 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -22,7 +22,7 @@ #include "wme.h" int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, - struct ieee802_11_elems *elems, bool beacon, + struct ieee802_11_elems *elems, enum ieee80211_band current_band, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie) @@ -91,19 +91,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, return -EINVAL; } - if (!beacon && sec_chan_offs) { + if (sec_chan_offs) { secondary_channel_offset = sec_chan_offs->sec_chan_offs; - } else if (beacon && ht_oper) { - secondary_channel_offset = - ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; } else if (!(sta_flags & IEEE80211_STA_DISABLE_HT)) { - /* If it's not a beacon, HT is enabled and the IE not present, - * it's 20 MHz, 802.11-2012 8.5.2.6: - * This element [the Secondary Channel Offset Element] is - * present when switching to a 40 MHz channel. It may be - * present when switching to a 20 MHz channel (in which - * case the secondary channel offset is set to SCN). - */ + /* If the secondary channel offset IE is not present, + * we can't know what's the post-CSA offset, so the + * best we can do is use 20MHz. + */ secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; } -- cgit v1.2.3 From ff1e417c7c239b7abfe70aa90460a77eaafc7f83 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 28 Oct 2014 13:33:05 +0200 Subject: mac80211: schedule the actual switch of the station before CSA count 0 Due to the time it takes to process the beacon that started the CSA process, we may be late for the switch if we try to reach exactly beacon 0. To avoid that, use count - 1 when calculating the switch time. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 08f51c6d0953..93af0f1c9d99 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1168,7 +1168,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work); else mod_timer(&ifmgd->chswitch_timer, - TU_TO_EXP_TIME(csa_ie.count * cbss->beacon_interval)); + TU_TO_EXP_TIME((csa_ie.count - 1) * + cbss->beacon_interval)); } static bool -- cgit v1.2.3 From 46238845bd609a5c0fbe076e1b82b4c5b33360b2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Oct 2014 20:56:42 +0200 Subject: mac80211: properly flush delayed scan work on interface removal When an interface is deleted, an ongoing hardware scan is canceled and the driver must abort the scan, at the very least reporting completion while the interface is removed. However, if it scheduled the work that might only run after everything is said and done, which leads to cfg80211 warning that the scan isn't reported as finished yet; this is no fault of the driver, it already did, but mac80211 hasn't processed it. To fix this situation, flush the delayed work when the interface being removed is the one that was executing the scan. Cc: stable@vger.kernel.org Reported-by: Sujith Manoharan Tested-by: Sujith Manoharan Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3b9e2b7b3f30..653f5eb07a27 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -766,10 +766,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, int i, flushed; struct ps_data *ps; struct cfg80211_chan_def chandef; + bool cancel_scan; clear_bit(SDATA_STATE_RUNNING, &sdata->state); - if (rcu_access_pointer(local->scan_sdata) == sdata) + cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata; + if (cancel_scan) ieee80211_scan_cancel(local); /* @@ -992,6 +994,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps(local, -1); + if (cancel_scan) + flush_delayed_work(&local->scan_work); + if (local->open_count == 0) { ieee80211_stop_device(local); -- cgit v1.2.3 From b8fff407a180286aa683d543d878d98d9fc57b13 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Nov 2014 13:57:46 +0100 Subject: mac80211: fix use-after-free in defragmentation Upon receiving the last fragment, all but the first fragment are freed, but the multicast check for statistics at the end of the function refers to the current skb (the last fragment) causing a use-after-free bug. Since multicast frames cannot be fragmented and we check for this early in the function, just modify that check to also do the accounting to fix the issue. Cc: stable@vger.kernel.org Reported-by: Yosef Khyal Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b04ca4049c95..a37f9af634cb 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1678,11 +1678,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (likely((!ieee80211_has_morefrags(fc) && frag == 0) || - is_multicast_ether_addr(hdr->addr1))) { - /* not fragmented */ + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) + goto out; + + if (is_multicast_ether_addr(hdr->addr1)) { + rx->local->dot11MulticastReceivedFrameCount++; goto out; } + I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) @@ -1775,10 +1778,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) out: if (rx->sta) rx->sta->rx_packets++; - if (is_multicast_ether_addr(hdr->addr1)) - rx->local->dot11MulticastReceivedFrameCount++; - else - ieee80211_led_rx(rx->local); + ieee80211_led_rx(rx->local); return RX_CONTINUE; } -- cgit v1.2.3 From c1207c049b204b0a96535dc5416aee331b51e0e1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 2 Nov 2014 18:19:15 -0800 Subject: netfilter: nft_reject_bridge: Fix powerpc build error Fix: net/bridge/netfilter/nft_reject_bridge.c: In function 'nft_reject_br_send_v6_unreach': net/bridge/netfilter/nft_reject_bridge.c:240:3: error: implicit declaration of function 'csum_ipv6_magic' csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, ^ make[3]: *** [net/bridge/netfilter/nft_reject_bridge.o] Error 1 Seen with powerpc:allmodconfig. Fixes: 523b929d5446 ("netfilter: nft_reject_bridge: don't use IP stack to reject traffic") Cc: Pablo Neira Ayuso Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- net/bridge/netfilter/nft_reject_bridge.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 654c9018e3e7..48da2c54a69e 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "../br_private.h" -- cgit v1.2.3 From 6c6151daaf2d8dc2046d9926539feed5f66bf74e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 3 Nov 2014 09:19:27 +0100 Subject: ip6_tunnel: Use ip6_tnl_dev_init as the ndo_init function. ip6_tnl_dev_init() sets the dev->iflink via a call to ip6_tnl_link_config(). After that, register_netdevice() sets dev->iflink = -1. So we loose the iflink configuration for ipv6 tunnels. Fix this by using ip6_tnl_dev_init() as the ndo_init function. Then ip6_tnl_dev_init() is called after dev->iflink is set to -1 from register_netdevice(). Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9409887fb664..9cb94cfa0ae7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -272,9 +272,6 @@ static int ip6_tnl_create2(struct net_device *dev) int err; t = netdev_priv(dev); - err = ip6_tnl_dev_init(dev); - if (err < 0) - goto out; err = register_netdevice(dev); if (err < 0) @@ -1462,6 +1459,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ip6_tnl_netdev_ops = { + .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, @@ -1546,16 +1544,10 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - int err = ip6_tnl_dev_init_gen(dev); - - if (err) - return err; t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - ip6_tnl_link_config(t); - rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } -- cgit v1.2.3 From 16a0231bf7dc3fb37e9b1f1cb1a277dc220b5c5e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 3 Nov 2014 09:19:28 +0100 Subject: vti6: Use vti6_dev_init as the ndo_init function. vti6_dev_init() sets the dev->iflink via a call to vti6_link_config(). After that, register_netdevice() sets dev->iflink = -1. So we loose the iflink configuration for vti6 tunnels. Fix this by using vti6_dev_init() as the ndo_init function. Then vti6_dev_init() is called after dev->iflink is set to -1 from register_netdevice(). Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d440bb585524..31089d153fd3 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -172,10 +172,6 @@ static int vti6_tnl_create2(struct net_device *dev) struct vti6_net *ip6n = net_generic(net, vti6_net_id); int err; - err = vti6_dev_init(dev); - if (err < 0) - goto out; - err = register_netdevice(dev); if (err < 0) goto out; @@ -783,6 +779,7 @@ static int vti6_change_mtu(struct net_device *dev, int new_mtu) } static const struct net_device_ops vti6_netdev_ops = { + .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, @@ -852,16 +849,10 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); - int err = vti6_dev_init_gen(dev); - - if (err) - return err; t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - vti6_link_config(t); - rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } -- cgit v1.2.3 From ebe084aafb7e93adf210e80043c9f69adf56820d Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 3 Nov 2014 09:19:29 +0100 Subject: sit: Use ipip6_tunnel_init as the ndo_init function. ipip6_tunnel_init() sets the dev->iflink via a call to ipip6_tunnel_bind_dev(). After that, register_netdevice() sets dev->iflink = -1. So we loose the iflink configuration for ipv6 tunnels. Fix this by using ipip6_tunnel_init() as the ndo_init function. Then ipip6_tunnel_init() is called after dev->iflink is set to -1 from register_netdevice(). Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/sit.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 58e5b4710127..a24557a1c1d8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -195,10 +195,8 @@ static int ipip6_tunnel_create(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); int err; - err = ipip6_tunnel_init(dev); - if (err < 0) - goto out; - ipip6_tunnel_clone_6rd(dev, sitn); + memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if ((__force u16)t->parms.i_flags & SIT_ISATAP) dev->priv_flags |= IFF_ISATAP; @@ -207,7 +205,8 @@ static int ipip6_tunnel_create(struct net_device *dev) if (err < 0) goto out; - strcpy(t->parms.name, dev->name); + ipip6_tunnel_clone_6rd(dev, sitn); + dev->rtnl_link_ops = &sit_link_ops; dev_hold(dev); @@ -1330,6 +1329,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) } static const struct net_device_ops ipip6_netdev_ops = { + .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, @@ -1378,9 +1378,7 @@ static int ipip6_tunnel_init(struct net_device *dev) tunnel->dev = dev; tunnel->net = dev_net(dev); - - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); - memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + strcpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); @@ -1405,7 +1403,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) tunnel->dev = dev; tunnel->net = dev_net(dev); - strcpy(tunnel->parms.name, dev->name); iph->version = 4; iph->protocol = IPPROTO_IPV6; -- cgit v1.2.3 From f03eb128e3f4276f46442d14f3b8f864f3775821 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 3 Nov 2014 09:19:30 +0100 Subject: gre6: Move the setting of dev->iflink into the ndo_init functions. Otherwise it gets overwritten by register_netdev(). Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 12c3c8ef3849..4564e1fca3eb 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -961,8 +961,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) else dev->flags &= ~IFF_POINTOPOINT; - dev->iflink = p->link; - /* Precalculate GRE options length */ if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { if (t->parms.o_flags&GRE_CSUM) @@ -1272,6 +1270,7 @@ static int ip6gre_tunnel_init(struct net_device *dev) u64_stats_init(&ip6gre_tunnel_stats->syncp); } + dev->iflink = tunnel->parms.link; return 0; } @@ -1481,6 +1480,8 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + dev->iflink = tunnel->parms.link; + return 0; } -- cgit v1.2.3 From 45cac46e51da75628ac2a593c70f5144abb9b31d Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 3 Nov 2014 19:38:37 -0800 Subject: geneve: Set GSO type on transmit. Geneve does not currently set the inner protocol type when transmitting packets. This causes GSO segmentation to fail on NICs that do not support Geneve offloading. CC: Andy Zhou Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 065cd94c640c..6e5266cf403d 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -144,6 +144,8 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet); } -- cgit v1.2.3 From d3ca9eafc0ed97b8f56fdf23655cfece89c48354 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 3 Nov 2014 19:38:38 -0800 Subject: geneve: Unregister pernet subsys on module unload. The pernet ops aren't ever unregistered, which causes a memory leak and an OOPs if the module is ever reinserted. Fixes: 0b5e8b8eeae4 ("net: Add Geneve tunneling protocol driver") CC: Andy Zhou Signed-off-by: Jesse Gross Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 6e5266cf403d..dedb21e99914 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -366,6 +366,7 @@ late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { destroy_workqueue(geneve_wq); + unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); -- cgit v1.2.3 From 1f37bf87aa7523d28e7e4c4f7bb5dba98faa3e00 Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Tue, 4 Nov 2014 17:15:08 -0200 Subject: tcp: zero retrans_stamp if all retrans were acked Ueki Kohei reported that when we are using NewReno with connections that have a very low traffic, we may timeout the connection too early if a second loss occurs after the first one was successfully acked but no data was transfered later. Below is his description of it: When SACK is disabled, and a socket suffers multiple separate TCP retransmissions, that socket's ETIMEDOUT value is calculated from the time of the *first* retransmission instead of the *latest* retransmission. This happens because the tcp_sock's retrans_stamp is set once then never cleared. Take the following connection: Linux remote-machine | | send#1---->(*1)|--------> data#1 --------->| | | | RTO : : | | | ---(*2)|----> data#1(retrans) ---->| | (*3)|<---------- ACK <----------| | | | | : : | : : | : : 16 minutes (or more) : | : : | : : | : : | | | send#2---->(*4)|--------> data#2 --------->| | | | RTO : : | | | ---(*5)|----> data#2(retrans) ---->| | | | | | | RTO*2 : : | | | | | | ETIMEDOUT<----(*6)| | (*1) One data packet sent. (*2) Because no ACK packet is received, the packet is retransmitted. (*3) The ACK packet is received. The transmitted packet is acknowledged. At this point the first "retransmission event" has passed and been recovered from. Any future retransmission is a completely new "event". (*4) After 16 minutes (to correspond with retries2=15), a new data packet is sent. Note: No data is transmitted between (*3) and (*4). The socket's timeout SHOULD be calculated from this point in time, but instead it's calculated from the prior "event" 16 minutes ago. (*5) Because no ACK packet is received, the packet is retransmitted. (*6) At the time of the 2nd retransmission, the socket returns ETIMEDOUT. Therefore, now we clear retrans_stamp as soon as all data during the loss window is fully acked. Reported-by: Ueki Kohei Cc: Neal Cardwell Cc: Yuchung Cheng Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 60 +++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a12b455928e5..88fa2d160685 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2315,6 +2315,35 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) /* Undo procedures. */ +/* We can clear retrans_stamp when there are no retransmissions in the + * window. It would seem that it is trivially available for us in + * tp->retrans_out, however, that kind of assumptions doesn't consider + * what will happen if errors occur when sending retransmission for the + * second time. ...It could the that such segment has only + * TCPCB_EVER_RETRANS set at the present time. It seems that checking + * the head skb is enough except for some reneging corner cases that + * are not worth the effort. + * + * Main reason for all this complexity is the fact that connection dying + * time now depends on the validity of the retrans_stamp, in particular, + * that successive retransmissions of a segment must not advance + * retrans_stamp under any conditions. + */ +static bool tcp_any_retrans_done(const struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (tp->retrans_out) + return true; + + skb = tcp_write_queue_head(sk); + if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) + return true; + + return false; +} + #if FASTRETRANS_DEBUG > 1 static void DBGUNDO(struct sock *sk, const char *msg) { @@ -2410,6 +2439,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ tcp_moderate_cwnd(tp); + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; return true; } tcp_set_ca_state(sk, TCP_CA_Open); @@ -2430,35 +2461,6 @@ static bool tcp_try_undo_dsack(struct sock *sk) return false; } -/* We can clear retrans_stamp when there are no retransmissions in the - * window. It would seem that it is trivially available for us in - * tp->retrans_out, however, that kind of assumptions doesn't consider - * what will happen if errors occur when sending retransmission for the - * second time. ...It could the that such segment has only - * TCPCB_EVER_RETRANS set at the present time. It seems that checking - * the head skb is enough except for some reneging corner cases that - * are not worth the effort. - * - * Main reason for all this complexity is the fact that connection dying - * time now depends on the validity of the retrans_stamp, in particular, - * that successive retransmissions of a segment must not advance - * retrans_stamp under any conditions. - */ -static bool tcp_any_retrans_done(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if (tp->retrans_out) - return true; - - skb = tcp_write_queue_head(sk); - if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) - return true; - - return false; -} - /* Undo during loss recovery after partial ACK or using F-RTO. */ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { -- cgit v1.2.3 From b31f65fb4383a49bdcfa465176754b37e44e1e17 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 5 Nov 2014 19:47:28 +0100 Subject: net: dsa: slave: Fix autoneg for phys on switch MDIO bus When the ports phys are connected to the switches internal MDIO bus, we need to connect the phy to the slave netdev, otherwise auto-negotiation etc, does not work. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6d1817449c36..ab03e00ffe8f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -489,11 +489,14 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p, /* We could not connect to a designated PHY, so use the switch internal * MDIO bus instead */ - if (!p->phy) + if (!p->phy) { p->phy = ds->slave_mii_bus->phy_map[p->port]; - else + phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, + p->phy_interface); + } else { pr_info("attached PHY at address %d [%s]\n", p->phy->addr, p->phy->drv->name); + } } int dsa_slave_suspend(struct net_device *slave_dev) -- cgit v1.2.3 From cfdf1e1ba5bf55e095cf4bcaa9585c4759f239e8 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 10 Nov 2014 11:45:13 -0800 Subject: udptunnel: Add SKB_GSO_UDP_TUNNEL during gro_complete. When doing GRO processing for UDP tunnels, we never add SKB_GSO_UDP_TUNNEL to gso_type - only the type of the inner protocol is added (such as SKB_GSO_TCPV4). The result is that if the packet is later resegmented we will do GSO but not treat it as a tunnel. This results in UDP fragmentation of the outer header instead of (i.e.) TCP segmentation of the inner header as was originally on the wire. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/fou.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 32e78924e246..606c520ffd5a 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -133,6 +133,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff) int err = -ENOSYS; const struct net_offload **offloads; + udp_tunnel_gro_complete(skb, nhoff); + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); -- cgit v1.2.3 From e40607cbe270a9e8360907cb1e62ddf0736e4864 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 10 Nov 2014 17:54:26 +0100 Subject: net: sctp: fix NULL pointer dereference in af->from_addr_param on malformed packet An SCTP server doing ASCONF will panic on malformed INIT ping-of-death in the form of: ------------ INIT[PARAM: SET_PRIMARY_IP] ------------> While the INIT chunk parameter verification dissects through many things in order to detect malformed input, it misses to actually check parameters inside of parameters. E.g. RFC5061, section 4.2.4 proposes a 'set primary IP address' parameter in ASCONF, which has as a subparameter an address parameter. So an attacker may send a parameter type other than SCTP_PARAM_IPV4_ADDRESS or SCTP_PARAM_IPV6_ADDRESS, param_type2af() will subsequently return 0 and thus sctp_get_af_specific() returns NULL, too, which we then happily dereference unconditionally through af->from_addr_param(). The trace for the log: BUG: unable to handle kernel NULL pointer dereference at 0000000000000078 IP: [] sctp_process_init+0x492/0x990 [sctp] PGD 0 Oops: 0000 [#1] SMP [...] Pid: 0, comm: swapper Not tainted 2.6.32-504.el6.x86_64 #1 Bochs Bochs RIP: 0010:[] [] sctp_process_init+0x492/0x990 [sctp] [...] Call Trace: [] ? sctp_bind_addr_copy+0x5d/0xe0 [sctp] [] sctp_sf_do_5_1B_init+0x21b/0x340 [sctp] [] sctp_do_sm+0x71/0x1210 [sctp] [] ? sctp_endpoint_lookup_assoc+0xc9/0xf0 [sctp] [] sctp_endpoint_bh_rcv+0x116/0x230 [sctp] [] sctp_inq_push+0x56/0x80 [sctp] [] sctp_rcv+0x982/0xa10 [sctp] [] ? ipt_local_in_hook+0x23/0x28 [iptable_filter] [] ? nf_iterate+0x69/0xb0 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? nf_hook_slow+0x76/0x120 [] ? ip_local_deliver_finish+0x0/0x2d0 [...] A minimal way to address this is to check for NULL as we do on all other such occasions where we know sctp_get_af_specific() could possibly return with NULL. Fixes: d6de3097592b ("[SCTP]: Add the handling of "Set Primary IP Address" parameter to INIT") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ab734be8cb20..9f32741abb1c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2609,6 +2609,9 @@ do_addr_param: addr_param = param.v + sizeof(sctp_addip_param_t); af = sctp_get_af_specific(param_type2af(param.p->type)); + if (af == NULL) + break; + af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); -- cgit v1.2.3 From 4184b2a79a7612a9272ce20d639934584a1f3786 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 10 Nov 2014 18:00:09 +0100 Subject: net: sctp: fix memory leak in auth key management A very minimal and simple user space application allocating an SCTP socket, setting SCTP_AUTH_KEY setsockopt(2) on it and then closing the socket again will leak the memory containing the authentication key from user space: unreferenced object 0xffff8800837047c0 (size 16): comm "a.out", pid 2789, jiffies 4296954322 (age 192.258s) hex dump (first 16 bytes): 01 00 00 00 04 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] __kmalloc+0xe8/0x270 [] sctp_auth_create_key+0x23/0x50 [sctp] [] sctp_auth_set_key+0xa1/0x140 [sctp] [] sctp_setsockopt+0xd03/0x1180 [sctp] [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xd0 [] system_call_fastpath+0x12/0x17 [] 0xffffffffffffffff This is bad because of two things, we can bring down a machine from user space when auth_enable=1, but also we would leave security sensitive keying material in memory without clearing it after use. The issue is that sctp_auth_create_key() already sets the refcount to 1, but after allocation sctp_auth_set_key() does an additional refcount on it, and thus leaving it around when we free the socket. Fixes: 65b07e5d0d0 ("[SCTP]: API updates to suport SCTP-AUTH extensions.") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/auth.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 0e8529113dc5..fb7976aee61c 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -862,8 +862,6 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, list_add(&cur_key->key_list, sh_keys); cur_key->key = key; - sctp_auth_key_hold(key); - return 0; nomem: if (!replace) -- cgit v1.2.3 From 5337b5b75cd9bd3624a6820e3c2a084d2480061c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Nov 2014 17:54:25 -0800 Subject: ipv6: fix IPV6_PKTINFO with v4 mapped Use IS_ENABLED(CONFIG_IPV6), to enable this code if IPv6 is a module. Signed-off-by: Eric Dumazet Fixes: c8e6ad0829a7 ("ipv6: honor IPV6_PKTINFO with v4 mapped addresses on sendmsg") Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c373a9ad4555..9daf2177dc00 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -195,7 +195,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; -#if defined(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (allow_ipv6 && cmsg->cmsg_level == SOL_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) { -- cgit v1.2.3 From 6251edd932ce3faadbfe27b0a0fe79780e0972e9 Mon Sep 17 00:00:00 2001 From: Hiroaki SHIMODA Date: Thu, 13 Nov 2014 04:24:10 +0900 Subject: netlink: Properly unbind in error conditions. Even if netlink_kernel_cfg::unbind is implemented the unbind() method is not called, because cfg->unbind is omitted in __netlink_kernel_create(). And fix wrong argument of test_bit() and off by one problem. At this point, no unbind() method is implemented, so there is no real issue. Fixes: 4f520900522f ("netlink: have netlink per-protocol bind function return an error code.") Signed-off-by: Hiroaki SHIMODA Cc: Richard Guy Briggs Acked-by: Richard Guy Briggs Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f1de72de273e..0007b8180397 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1440,7 +1440,7 @@ static void netlink_unbind(int group, long unsigned int groups, return; for (undo = 0; undo < group; undo++) - if (test_bit(group, &groups)) + if (test_bit(undo, &groups)) nlk->netlink_unbind(undo); } @@ -1492,7 +1492,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_unbind(nlk->ngroups - 1, groups, nlk); + netlink_unbind(nlk->ngroups, groups, nlk); return err; } } @@ -2509,6 +2509,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, nl_table[unit].module = module; if (cfg) { nl_table[unit].bind = cfg->bind; + nl_table[unit].unbind = cfg->unbind; nl_table[unit].flags = cfg->flags; if (cfg->compare) nl_table[unit].compare = cfg->compare; -- cgit v1.2.3