From c5d4d7d83165ae863954b113c7f403d8b58febed Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 4 Sep 2017 10:28:02 +0200 Subject: xfrm: Fix deletion of offloaded SAs on failure. When we off load a SA, it gets pushed to the NIC before we can add it. In case of a failure, we don't delete this SA from the NIC. Fix this by calling xfrm_dev_state_delete on failure. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Reported-by: Shannon Nelson Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2bfbd9121e3b..b997f1395357 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -657,6 +657,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) { x->km.state = XFRM_STATE_DEAD; + xfrm_dev_state_delete(x); __xfrm_state_put(x); goto out; } -- cgit v1.2.3 From 67a63387b1417b5954eedb15f638f1f0bee3da49 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 4 Sep 2017 10:59:55 +0200 Subject: xfrm: Fix negative device refcount on offload failure. Reset the offload device at the xfrm_state if the device was not able to offload the state. Otherwise we drop the device refcount twice. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Reported-by: Shannon Nelson Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_device.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index acf00104ef31..30e5746085b8 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -91,6 +91,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, } if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { + xso->dev = NULL; dev_put(dev); return 0; } -- cgit v1.2.3 From 23e9fcfef1f3d10675acce023592796851bcaf1a Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Tue, 12 Sep 2017 14:53:46 +0300 Subject: vti: fix NULL dereference in xfrm_input() Can be reproduced with LTP tests: # icmp-uni-vti.sh -p ah -a sha256 -m tunnel -S fffffffe -k 1 -s 10 IPv4: RIP: 0010:xfrm_input+0x7f9/0x870 ... Call Trace: vti_input+0xaa/0x110 [ip_vti] ? skb_free_head+0x21/0x40 vti_rcv+0x33/0x40 [ip_vti] xfrm4_ah_rcv+0x33/0x60 ip_local_deliver_finish+0x94/0x1e0 ip_local_deliver+0x6f/0xe0 ? ip_route_input_noref+0x28/0x50 ... # icmp-uni-vti.sh -6 -p ah -a sha256 -m tunnel -S fffffffe -k 1 -s 10 IPv6: RIP: 0010:xfrm_input+0x7f9/0x870 ... Call Trace: xfrm6_rcv_tnl+0x3c/0x40 vti6_rcv+0xd5/0xe0 [ip6_vti] xfrm6_ah_rcv+0x33/0x60 ip6_input_finish+0xee/0x460 ip6_input+0x3f/0xb0 ip6_rcv_finish+0x45/0xa0 ipv6_rcv+0x34b/0x540 xfrm_input() invokes xfrm_rcv_cb() -> vti_rcv_cb(), the last callback might call skb_scrub_packet(), which in turn can reset secpath. Fix it by adding a check that skb->sp is not NULL. Fixes: 7e9e9202bccc ("xfrm: Clear RX SKB secpath xfrm_offload") Signed-off-by: Alexey Kodanev Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 2515cd2bc5db..8ac9d32fb79d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -429,7 +429,8 @@ resume: nf_reset(skb); if (decaps) { - skb->sp->olen = 0; + if (skb->sp) + skb->sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return 0; @@ -440,7 +441,8 @@ resume: err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); if (xfrm_gro) { - skb->sp->olen = 0; + if (skb->sp) + skb->sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return err; -- cgit v1.2.3 From b621129f4f08c8d42ac4de2e77a07c5cf0c4b740 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Wed, 15 Feb 2017 16:33:56 +0300 Subject: netfilter: ipvs: full-functionality option for ECN encapsulation in tunnel IPVS tunnel mode works as simple tunnel (see RFC 3168) copying ECN field to outer header. That's result in packet drops on egress tunnels in case the egress tunnel operates as ECN-capable with Full-functionality option (like ip_tunnel and ip6_tunnel kernel modules), according to RFC 3168 section 9.1.1 recommendation. This patch implements ECN full-functionality option into ipvs xmit code. Cc: netdev@vger.kernel.org Cc: lvs-devel@vger.kernel.org Signed-off-by: Vadim Fedorenko Reviewed-by: Konstantin Khlebnikov Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_xmit.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 90d396814798..4527921b1c3a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -921,6 +921,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, { struct sk_buff *new_skb = NULL; struct iphdr *old_iph = NULL; + __u8 old_dsfield; #ifdef CONFIG_IP_VS_IPV6 struct ipv6hdr *old_ipv6h = NULL; #endif @@ -945,7 +946,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, *payload_len = ntohs(old_ipv6h->payload_len) + sizeof(*old_ipv6h); - *dsfield = ipv6_get_dsfield(old_ipv6h); + old_dsfield = ipv6_get_dsfield(old_ipv6h); *ttl = old_ipv6h->hop_limit; if (df) *df = 0; @@ -960,12 +961,15 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, /* fix old IP header checksum */ ip_send_check(old_iph); - *dsfield = ipv4_get_dsfield(old_iph); + old_dsfield = ipv4_get_dsfield(old_iph); *ttl = old_iph->ttl; if (payload_len) *payload_len = ntohs(old_iph->tot_len); } + /* Implement full-functionality option for ECN encapsulation */ + *dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield); + return skb; error: kfree_skb(skb); -- cgit v1.2.3 From 89fcbb564f4a64c439d597c2702f990eed49c8a1 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Thu, 21 Sep 2017 19:01:36 -0600 Subject: netfilter: xt_socket: Restore mark from full sockets only An out of bounds error was detected on an ARM64 target with Android based kernel 4.9. This occurs while trying to restore mark on a skb from an inet request socket. BUG: KASAN: slab-out-of-bounds in socket_match.isra.2+0xc8/0x1f0 net/netfilter/xt_socket.c:248 Read of size 4 at addr ffffffc06a8d824c by task syz-fuzzer/1532 CPU: 7 PID: 1532 Comm: syz-fuzzer Tainted: G W O 4.9.41+ #1 Call trace: [] dump_backtrace+0x0/0x440 arch/arm64/kernel/traps.c:76 [] show_stack+0x28/0x38 arch/arm64/kernel/traps.c:226 [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0xe4/0x134 lib/dump_stack.c:51 [] print_address_description+0x68/0x258 mm/kasan/report.c:248 [] kasan_report_error mm/kasan/report.c:347 [inline] [] kasan_report.part.2+0x228/0x2f0 mm/kasan/report.c:371 [] kasan_report+0x5c/0x70 mm/kasan/report.c:372 [] check_memory_region_inline mm/kasan/kasan.c:308 [inline] [] __asan_load4+0x88/0xa0 mm/kasan/kasan.c:740 [] socket_match.isra.2+0xc8/0x1f0 net/netfilter/xt_socket.c:248 [] socket_mt4_v1_v2_v3+0x3c/0x48 net/netfilter/xt_socket.c:272 [] ipt_do_table+0x54c/0xad8 net/ipv4/netfilter/ip_tables.c:311 [] iptable_mangle_hook+0x6c/0x220 net/ipv4/netfilter/iptable_mangle.c:90 ... Allocated by task 1532: save_stack_trace_tsk+0x0/0x2a0 arch/arm64/kernel/stacktrace.c:131 save_stack_trace+0x28/0x38 arch/arm64/kernel/stacktrace.c:215 save_stack mm/kasan/kasan.c:495 [inline] set_track mm/kasan/kasan.c:507 [inline] kasan_kmalloc+0xd8/0x188 mm/kasan/kasan.c:599 kasan_slab_alloc+0x14/0x20 mm/kasan/kasan.c:537 slab_post_alloc_hook mm/slab.h:417 [inline] slab_alloc_node mm/slub.c:2728 [inline] slab_alloc mm/slub.c:2736 [inline] kmem_cache_alloc+0x14c/0x2e8 mm/slub.c:2741 reqsk_alloc include/net/request_sock.h:87 [inline] inet_reqsk_alloc+0x4c/0x238 net/ipv4/tcp_input.c:6236 tcp_conn_request+0x2b0/0xea8 net/ipv4/tcp_input.c:6341 tcp_v4_conn_request+0xe0/0x100 net/ipv4/tcp_ipv4.c:1256 tcp_rcv_state_process+0x384/0x18a8 net/ipv4/tcp_input.c:5926 tcp_v4_do_rcv+0x2f0/0x3e0 net/ipv4/tcp_ipv4.c:1430 tcp_v4_rcv+0x1278/0x1350 net/ipv4/tcp_ipv4.c:1709 ip_local_deliver_finish+0x174/0x3e0 net/ipv4/ip_input.c:216 v1->v2: Change socket_mt6_v1_v2_v3() as well as mentioned by Eric v2->v3: Put the correct fixes tag Fixes: 01555e74bde5 ("netfilter: xt_socket: add XT_SOCKET_RESTORESKMARK flag") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index e75ef39669c5..575d2153e3b8 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -76,7 +76,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && - transparent) + transparent && sk_fullsock(sk)) pskb->mark = sk->sk_mark; if (sk != skb->sk) @@ -133,7 +133,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && - transparent) + transparent && sk_fullsock(sk)) pskb->mark = sk->sk_mark; if (sk != skb->sk) -- cgit v1.2.3 From 48596a8ddc46f96afb6a2cd72787cb15d6bb01fc Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 23 Sep 2017 23:37:40 +0200 Subject: netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses Wrong comparison prevented the hash types to add a range with more than 2^31 addresses but reported as a success. Fixes Netfilter's bugzilla id #1005, reported by Oleg Serditov and Oliver Ford. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ip.c | 22 ++++++++++++---------- net/netfilter/ipset/ip_set_hash_ipmark.c | 2 +- net/netfilter/ipset/ip_set_hash_ipport.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportip.c | 2 +- net/netfilter/ipset/ip_set_hash_ipportnet.c | 4 ++-- net/netfilter/ipset/ip_set_hash_net.c | 2 +- net/netfilter/ipset/ip_set_hash_netiface.c | 2 +- net/netfilter/ipset/ip_set_hash_netnet.c | 4 ++-- net/netfilter/ipset/ip_set_hash_netport.c | 2 +- net/netfilter/ipset/ip_set_hash_netportnet.c | 4 ++-- 10 files changed, 24 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 20bfbd315f61..613eb212cb48 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -123,13 +123,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; ip &= ip_set_hostmask(h->netmask); + e.ip = htonl(ip); + if (e.ip == 0) + return -IPSET_ERR_HASH_ELEM; - if (adt == IPSET_TEST) { - e.ip = htonl(ip); - if (e.ip == 0) - return -IPSET_ERR_HASH_ELEM; + if (adt == IPSET_TEST) return adtfn(set, &e, &ext, &ext, flags); - } ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { @@ -148,17 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); - if (retried) + if (retried) { ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip += hosts) { e.ip = htonl(ip); - if (e.ip == 0) - return -IPSET_ERR_HASH_ELEM; + } + for (; ip <= ip_to;) { ret = adtfn(set, &e, &ext, &ext, flags); - if (ret && !ip_set_eexist(ret, flags)) return ret; + ip += hosts; + e.ip = htonl(ip); + if (e.ip == 0) + return 0; + ret = 0; } return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index b64cf14e8352..f3ba8348cf9d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -149,7 +149,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { e.ip = htonl(ip); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index f438740e6c6a..ddb8039ec1d2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -178,7 +178,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 6215fb898c50..a7f4d7a85420 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -185,7 +185,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5ab1b99a53c2..a2f19b9906e9 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -271,7 +271,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { e.ip = htonl(ip); p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; @@ -281,7 +281,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip == ntohl(h->next.ip) && p == ntohs(h->next.port) ? ntohl(h->next.ip2) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip2 = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, &cidr); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 5d9e895452e7..1c67a1761e45 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -193,7 +193,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 44cf11939c91..d417074f1c1a 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -255,7 +255,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index db614e13b193..7f9ae2e9645b 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -250,13 +250,13 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip[0]); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip[0] = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); ip2 = (retried && ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip[1] = htonl(ip2); last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 54b64b6cd0cd..e6ef382febe4 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -241,7 +241,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index aff846960ac4..8602f2595a1a 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -291,7 +291,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip[0]); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip[0] = htonl(ip); ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) @@ -301,7 +301,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip2 = (retried && ip == ntohl(h->next.ip[0]) && p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip[1] = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); -- cgit v1.2.3 From e23ed762db7ed1950a6408c3be80bc56909ab3d4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 11:57:54 +0200 Subject: netfilter: ipset: pernet ops must be unregistered last Removing the ipset module leaves a small window where one cpu performs module removal while another runs a command like 'ipset flush'. ipset uses net_generic(), unregistering the pernet ops frees this storage area. Fix it by first removing the user-visible api handlers and the pernet ops last. Fixes: 1785e8f473082 ("netfiler: ipset: Add net namespace for ipset") Reported-by: Li Shuang Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e495b5e484b1..a7f049ff3049 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -2072,25 +2072,28 @@ static struct pernet_operations ip_set_net_ops = { static int __init ip_set_init(void) { - int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + int ret = register_pernet_subsys(&ip_set_net_ops); + if (ret) { + pr_err("ip_set: cannot register pernet_subsys.\n"); + return ret; + } + + ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); + unregister_pernet_subsys(&ip_set_net_ops); return ret; } + ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + unregister_pernet_subsys(&ip_set_net_ops); return ret; } - ret = register_pernet_subsys(&ip_set_net_ops); - if (ret) { - pr_err("ip_set: cannot register pernet_subsys.\n"); - nf_unregister_sockopt(&so_set); - nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - return ret; - } + pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); return 0; } @@ -2098,9 +2101,10 @@ ip_set_init(void) static void __exit ip_set_fini(void) { - unregister_pernet_subsys(&ip_set_net_ops); nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + + unregister_pernet_subsys(&ip_set_net_ops); pr_debug("these are the famous last words\n"); } -- cgit v1.2.3 From dd269db84908d4d3f7c0efed85bf9d8939fb0b9b Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 27 Sep 2017 14:25:37 +0200 Subject: xfrm: don't call xfrm_policy_cache_flush under xfrm_state_lock I might be wrong but it doesn't look like xfrm_state_lock is required for xfrm_policy_cache_flush and calling it under this lock triggers both "sleeping function called from invalid context" and "possible circular locking dependency detected" warnings on flush. Fixes: ec30d78c14a8 xfrm: add xdst pcpu cache Signed-off-by: Artem Savkov Acked-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0dab1cd79ce4..12213477cd3a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -732,12 +732,12 @@ restart: } } } +out: + spin_unlock_bh(&net->xfrm.xfrm_state_lock); if (cnt) { err = 0; xfrm_policy_cache_flush(); } -out: - spin_unlock_bh(&net->xfrm.xfrm_state_lock); return err; } EXPORT_SYMBOL(xfrm_state_flush); -- cgit v1.2.3 From e5173418ac597cebe9f7a39adf10be470000b518 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 27 Sep 2017 10:06:27 +0100 Subject: netfilter: ipset: Fix race between dump and swap Fix a race between ip_set_dump_start() and ip_set_swap(). The race is as follows: * Without holding the ref lock, ip_set_swap() checks ref_netlink of the set and it is 0. * ip_set_dump_start() takes a reference on the set. * ip_set_swap() does the swap (even though it now has a non-zero reference count). * ip_set_dump_start() gets the set from ip_set_list again which is now a different set since it has been swapped. * ip_set_dump_start() calls __ip_set_put_netlink() and hits a BUG_ON due to the reference count being 0. Fix this race by extending the critical region in which the ref lock is held to include checking the ref counts. The race can be reproduced with the following script: while :; do ipset destroy hash_ip1 ipset destroy hash_ip2 ipset create hash_ip1 hash:ip family inet hashsize 1024 \ maxelem 500000 ipset create hash_ip2 hash:ip family inet hashsize 300000 \ maxelem 500000 ipset create hash_ip3 hash:ip family inet hashsize 1024 \ maxelem 500000 ipset save & ipset swap hash_ip3 hash_ip2 ipset destroy hash_ip3 wait done Signed-off-by: Ross Lagerwall Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a7f049ff3049..cf84f7b37cd9 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1191,14 +1191,17 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; - if (from->ref_netlink || to->ref_netlink) + write_lock_bh(&ip_set_ref_lock); + + if (from->ref_netlink || to->ref_netlink) { + write_unlock_bh(&ip_set_ref_lock); return -EBUSY; + } strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); - write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); ip_set(inst, from_id) = to; ip_set(inst, to_id) = from; -- cgit v1.2.3 From 0d18779be13766b33c69cbc26df38383598da373 Mon Sep 17 00:00:00 2001 From: JingPiao Chen Date: Sat, 23 Sep 2017 17:10:44 +0800 Subject: netfilter: nf_tables: fix update chain error # nft add table filter # nft add chain filter c1 # nft rename chain filter c1 c2 Error: Could not process rule: No such file or directory rename chain filter c1 c2 ^^^^^^^^^^^^^^^^^^^^^^^^^^ # nft add chain filter c2 # nft rename chain filter c1 c2 # nft list table filter table ip filter { chain c2 { } chain c2 { } } Fixes: 664b0f8cd8 ("netfilter: nf_tables: add generation mask to chains") Signed-off-by: JingPiao Chen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 929927171426..f98ca8c6aa59 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1487,8 +1487,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); - if (IS_ERR(chain2)) - return PTR_ERR(chain2); + if (!IS_ERR(chain2)) + return -EEXIST; } if (nla[NFTA_CHAIN_COUNTERS]) { -- cgit v1.2.3 From e6b72ee88a56bcfe63f72e9c30766484c45bec72 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 26 Sep 2017 18:35:45 +0200 Subject: netfilter: ebtables: fix race condition in frame_filter_net_init() It is possible for ebt_in_hook to be triggered before ebt_table is assigned resulting in a NULL-pointer dereference. Make sure hooks are registered as the last step. Fixes: aee12a0a3727 ("ebtables: remove nf_hook_register usage") Signed-off-by: Artem Savkov Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtable_broute.c | 4 ++-- net/bridge/netfilter/ebtable_filter.c | 4 ++-- net/bridge/netfilter/ebtable_nat.c | 4 ++-- net/bridge/netfilter/ebtables.c | 17 +++++++++-------- 4 files changed, 15 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 2585b100ebbb..276b60262981 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -65,8 +65,8 @@ static int ebt_broute(struct sk_buff *skb) static int __net_init broute_net_init(struct net *net) { - net->xt.broute_table = ebt_register_table(net, &broute_table, NULL); - return PTR_ERR_OR_ZERO(net->xt.broute_table); + return ebt_register_table(net, &broute_table, NULL, + &net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 45a00dbdbcad..c41da5fac84f 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_filter[] = { static int __net_init frame_filter_net_init(struct net *net) { - net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter); - return PTR_ERR_OR_ZERO(net->xt.frame_filter); + return ebt_register_table(net, &frame_filter, ebt_ops_filter, + &net->xt.frame_filter); } static void __net_exit frame_filter_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 57cd5bb154e7..08df7406ecb3 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_nat[] = { static int __net_init frame_nat_net_init(struct net *net) { - net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat); - return PTR_ERR_OR_ZERO(net->xt.frame_nat); + return ebt_register_table(net, &frame_nat, ebt_ops_nat, + &net->xt.frame_nat); } static void __net_exit frame_nat_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 83951f978445..3b3dcf719e07 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1169,9 +1169,8 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table) kfree(table); } -struct ebt_table * -ebt_register_table(struct net *net, const struct ebt_table *input_table, - const struct nf_hook_ops *ops) +int ebt_register_table(struct net *net, const struct ebt_table *input_table, + const struct nf_hook_ops *ops, struct ebt_table **res) { struct ebt_table_info *newinfo; struct ebt_table *t, *table; @@ -1183,7 +1182,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table, repl->entries == NULL || repl->entries_size == 0 || repl->counters != NULL || input_table->private != NULL) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } /* Don't add one table to multiple lists. */ @@ -1252,16 +1251,18 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table, list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); mutex_unlock(&ebt_mutex); + WRITE_ONCE(*res, table); + if (!ops) - return table; + return 0; ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret) { __ebt_unregister_table(net, table); - return ERR_PTR(ret); + *res = NULL; } - return table; + return ret; free_unlock: mutex_unlock(&ebt_mutex); free_chainstack: @@ -1276,7 +1277,7 @@ free_newinfo: free_table: kfree(table); out: - return ERR_PTR(ret); + return ret; } void ebt_unregister_table(struct net *net, struct ebt_table *table, -- cgit v1.2.3 From e63aaaa6be54c956b9603590ea436b003407bb3e Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 20 Sep 2017 12:31:28 +0530 Subject: netfilter: nf_tables: Release memory obtained by kasprintf Free memory region, if nf_tables_set_alloc_name is not successful. Fixes: 387454901bd6 ("netfilter: nf_tables: Allow set names of up to 255 chars") Signed-off-by: Arvind Yadav Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f98ca8c6aa59..34adedcb239e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2741,8 +2741,10 @@ cont: list_for_each_entry(i, &ctx->table->sets, list) { if (!nft_is_active_next(ctx->net, i)) continue; - if (!strcmp(set->name, i->name)) + if (!strcmp(set->name, i->name)) { + kfree(set->name); return -ENFILE; + } } return 0; } -- cgit v1.2.3 From ad670233c9e1d5feb365d870e30083ef1b889177 Mon Sep 17 00:00:00 2001 From: Peng Xu Date: Tue, 3 Oct 2017 23:21:51 +0300 Subject: nl80211: Define policy for packet pattern attributes Define a policy for packet pattern attributes in order to fix a potential read over the end of the buffer during nla_get_u32() of the NL80211_PKTPAT_OFFSET attribute. Note that the data there can always be read due to SKB allocation (with alignment and struct skb_shared_info at the end), but the data might be uninitialized. This could be used to leak some data from uninitialized vmalloc() memory, but most drivers don't allow an offset (so you'd just get -EINVAL if the data is non-zero) or just allow it with a fixed value - 100 or 128 bytes, so anything above that would get -EINVAL. With brcmfmac the limit is 1500 so (at least) one byte could be obtained. Cc: stable@kernel.org Signed-off-by: Peng Xu Signed-off-by: Jouni Malinen [rewrite description based on SKB allocation knowledge] Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 690874293cfc..d396cb61a280 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -549,6 +549,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, }; +/* policy for packet pattern attributes */ +static const struct nla_policy +nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = { + [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -10532,7 +10540,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) u8 *mask_pat; nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - NULL, info->extack); + nl80211_packet_pattern_policy, + info->extack); err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10781,7 +10790,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + nl80211_packet_pattern_policy, NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; -- cgit v1.2.3 From 5f9bfe0ef622a7bb9707c22ceb4b6451e1e2cb7b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Oct 2017 17:18:27 +0200 Subject: netfilter: nf_tables: do not dump chain counters if not enabled Chain counters are only enabled on demand since 9f08ea848117, skip them when dumping them via netlink. Fixes: 9f08ea848117 ("netfilter: nf_tables: keep chain counters away from hot path") Reported-by: Johny Mattsson Tested-by: Johny Mattsson Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 34adedcb239e..64e1ee091225 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1048,7 +1048,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) goto nla_put_failure; - if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) + if (basechain->stats && nft_dump_stats(skb, basechain->stats)) goto nla_put_failure; } -- cgit v1.2.3 From e466af75c074e76107ae1cd5a2823e9c61894ffb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 02:50:07 -0700 Subject: netfilter: x_tables: avoid stack-out-of-bounds read in xt_copy_counters_from_user syzkaller reports an out of bound read in strlcpy(), triggered by xt_copy_counters_from_user() Fix this by using memcpy(), then forcing a zero byte at the last position of the destination, as Florian did for the non COMPAT code. Fixes: d7591f0c41ce ("netfilter: x_tables: introduce and use xt_copy_counters_from_user") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c83a3b5e1c6c..d8571f414208 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) return ERR_PTR(-EFAULT); - strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1); info->num_counters = compat_tmp.num_counters; user += sizeof(compat_tmp); } else @@ -905,9 +905,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, if (copy_from_user(info, user, sizeof(*info)) != 0) return ERR_PTR(-EFAULT); - info->name[sizeof(info->name) - 1] = '\0'; user += sizeof(*info); } + info->name[sizeof(info->name) - 1] = '\0'; size = sizeof(struct xt_counters); size *= info->num_counters; -- cgit v1.2.3 From a2d3f3e33853ef52e5f66b41c3e8ee5710aa3305 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 5 Oct 2017 19:03:05 +0200 Subject: ipv6: fix net.ipv6.conf.all.accept_dad behaviour for real Commit 35e015e1f577 ("ipv6: fix net.ipv6.conf.all interface DAD handlers") was intended to affect accept_dad flag handling in such a way that DAD operation and mode on a given interface would be selected according to the maximum value of conf/{all,interface}/accept_dad. However, addrconf_dad_begin() checks for particular cases in which we need to skip DAD, and this check was modified in the wrong way. Namely, it was modified so that, if the accept_dad flag is 0 for the given interface *or* for all interfaces, DAD would be skipped. We have instead to skip DAD if accept_dad is 0 for the given interface *and* for all interfaces. Fixes: 35e015e1f577 ("ipv6: fix net.ipv6.conf.all interface DAD handlers") Acked-by: Stefano Brivio Signed-off-by: Matteo Croce Reported-by: Erik Kline Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 96861c702c06..4a96ebbf8eda 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3820,8 +3820,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) goto out; if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - dev_net(dev)->ipv6.devconf_all->accept_dad < 1 || - idev->cnf.accept_dad < 1 || + (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 && + idev->cnf.accept_dad < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { bump_id = ifp->flags & IFA_F_TENTATIVE; -- cgit v1.2.3 From 3d0241d57c7b25bb75ac9d7a62753642264fdbce Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 6 Oct 2017 19:02:35 +0300 Subject: gso: fix payload length when gso_size is zero When gso_size reset to zero for the tail segment in skb_segment(), later in ipv6_gso_segment(), __skb_udp_tunnel_segment() and gre_gso_segment() we will get incorrect results (payload length, pcsum) for that segment. inet_gso_segment() already has a check for gso_size before calculating payload. The issue was found with LTP vxlan & gre tests over ixgbe NIC. Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer") Signed-off-by: Alexey Kodanev Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/gre_offload.c | 2 +- net/ipv4/udp_offload.c | 2 +- net/ipv6/ip6_offload.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 416bb304a281..1859c473b21a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -86,7 +86,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); pcsum = (__sum16 *)(greh + 1); - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { unsigned int partial_adj; /* Adjust checksum to account for the fact that diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 97658bfc1b58..e360d55be555 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -120,7 +120,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index cdb3728faca7..4a87f9428ca5 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - if (gso_partial) + if (gso_partial && skb_is_gso(skb)) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); -- cgit v1.2.3 From 3382605fd8db1ed1fb03f3f1529490133fe3ab08 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Sat, 7 Oct 2017 14:32:49 +0200 Subject: tipc: correct initialization of skb list We change the initialization of the skb transmit buffer queues in the functions tipc_bcast_xmit() and tipc_rcast_xmit() to also initialize their spinlocks. This is needed because we may, during error conditions, need to call skb_queue_purge() on those queues further down the stack. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 7d99029df342..a140dd4a84af 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -233,7 +233,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, struct sk_buff_head xmitq; int rc = 0; - __skb_queue_head_init(&xmitq); + skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); @@ -263,7 +263,7 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, u32 dst, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); - __skb_queue_head_init(&_pkts); + skb_queue_head_init(&_pkts); list_for_each_entry_safe(n, tmp, &dests->list, list) { dst = n->value; -- cgit v1.2.3 From a9e2971b8cd3ef469de0112ba15778b5b98ad72e Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Sat, 7 Oct 2017 15:07:20 +0200 Subject: tipc: Unclone message at secondary destination lookup When a bundling message is received, the function tipc_link_input() calls function tipc_msg_extract() to unbundle all inner messages of the bundling message before adding them to input queue. The function tipc_msg_extract() just clones all inner skb for all inner messagges from the bundling skb. This means that the skb headroom of an inner message overlaps with the data part of the preceding message in the bundle. If the message in question is a name addressed message, it may be subject to a secondary destination lookup, and eventually be sent out on one of the interfaces again. But, since what is perceived as headroom by the device driver in reality is the last bytes of the preceding message in the bundle, the latter will be overwritten by the MAC addresses of the L2 header. If the preceding message has not yet been consumed by the user, it will evenually be delivered with corrupted contents. This commit fixes this by uncloning all messages passing through the function tipc_msg_lookup_dest(), hence ensuring that the headroom is always valid when the message is passed on. Signed-off-by: Tung Nguyen Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 121e59a1d0e7..17146c16ee2d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -568,6 +568,14 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg_set_destnode(msg, dnode); msg_set_destport(msg, dport); *err = TIPC_OK; + + if (!skb_cloned(skb)) + return true; + + /* Unclone buffer in case it was bundled */ + if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) + return false; + return true; } -- cgit v1.2.3 From 49f817d793d1bcc11d721881aac037b996feef5c Mon Sep 17 00:00:00 2001 From: Lin Zhang Date: Fri, 6 Oct 2017 00:44:03 +0800 Subject: netfilter: SYNPROXY: skip non-tcp packet in {ipv4, ipv6}_synproxy_hook In function {ipv4,ipv6}_synproxy_hook we expect a normal tcp packet, but the real server maybe reply an icmp error packet related to the exist tcp conntrack, so we will access wrong tcp data. Fix it by checking for the protocol field and only process tcp traffic. Signed-off-by: Lin Zhang Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_SYNPROXY.c | 3 ++- net/ipv6/netfilter/ip6t_SYNPROXY.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 811689e523c3..f75fc6b53115 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -330,7 +330,8 @@ static unsigned int ipv4_synproxy_hook(void *priv, if (synproxy == NULL) return NF_ACCEPT; - if (nf_is_loopback_packet(skb)) + if (nf_is_loopback_packet(skb) || + ip_hdr(skb)->protocol != IPPROTO_TCP) return NF_ACCEPT; thoff = ip_hdrlen(skb); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index a5cd43d75393..437af8c95277 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -353,7 +353,7 @@ static unsigned int ipv6_synproxy_hook(void *priv, nexthdr = ipv6_hdr(skb)->nexthdr; thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if (thoff < 0) + if (thoff < 0 || nexthdr != IPPROTO_TCP) return NF_ACCEPT; th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); -- cgit v1.2.3 From 98589a0998b8b13c4a8fa1ccb0e62751a019faa5 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 9 Oct 2017 15:27:15 +0300 Subject: netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1' Commit 2c16d6033264 ("netfilter: xt_bpf: support ebpf") introduced support for attaching an eBPF object by an fd, with the 'bpf_mt_check_v1' ABI expecting the '.fd' to be specified upon each IPT_SO_SET_REPLACE call. However this breaks subsequent iptables calls: # iptables -A INPUT -m bpf --object-pinned /sys/fs/bpf/xxx -j ACCEPT # iptables -A INPUT -s 5.6.7.8 -j ACCEPT iptables: Invalid argument. Run `dmesg' for more information. That's because iptables works by loading existing rules using IPT_SO_GET_ENTRIES to userspace, then issuing IPT_SO_SET_REPLACE with the replacement set. However, the loaded 'xt_bpf_info_v1' has an arbitrary '.fd' number (from the initial "iptables -m bpf" invocation) - so when 2nd invocation occurs, userspace passes a bogus fd number, which leads to 'bpf_mt_check_v1' to fail. One suggested solution [1] was to hack iptables userspace, to perform a "entries fixup" immediatley after IPT_SO_GET_ENTRIES, by opening a new, process-local fd per every 'xt_bpf_info_v1' entry seen. However, in [2] both Pablo Neira Ayuso and Willem de Bruijn suggested to depricate the xt_bpf_info_v1 ABI dealing with pinned ebpf objects. This fix changes the XT_BPF_MODE_FD_PINNED behavior to ignore the given '.fd' and instead perform an in-kernel lookup for the bpf object given the provided '.path'. It also defines an alias for the XT_BPF_MODE_FD_PINNED mode, named XT_BPF_MODE_PATH_PINNED, to better reflect the fact that the user is expected to provide the path of the pinned object. Existing XT_BPF_MODE_FD_ELF behavior (non-pinned fd mode) is preserved. References: [1] https://marc.info/?l=netfilter-devel&m=150564724607440&w=2 [2] https://marc.info/?l=netfilter-devel&m=150575727129880&w=2 Reported-by: Rafael Buchbinder Signed-off-by: Shmulik Ladkani Acked-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_bpf.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 38986a95216c..29123934887b 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) return 0; } +static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) +{ + mm_segment_t oldfs = get_fs(); + int retval, fd; + + set_fs(KERNEL_DS); + fd = bpf_obj_get_user(path); + set_fs(oldfs); + if (fd < 0) + return fd; + + retval = __bpf_mt_check_fd(fd, ret); + sys_close(fd); + return retval; +} + static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; @@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par) return __bpf_mt_check_bytecode(info->bpf_program, info->bpf_program_num_elem, &info->filter); - else if (info->mode == XT_BPF_MODE_FD_PINNED || - info->mode == XT_BPF_MODE_FD_ELF) + else if (info->mode == XT_BPF_MODE_FD_ELF) return __bpf_mt_check_fd(info->fd, &info->filter); + else if (info->mode == XT_BPF_MODE_PATH_PINNED) + return __bpf_mt_check_path(info->path, &info->filter); else return -EINVAL; } -- cgit v1.2.3 From 62cf27e52b8c9a39066172ca6b6134cb5eaa9450 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 9 Oct 2017 08:39:43 +0200 Subject: ipv6: Fix traffic triggered IPsec connections. A recent patch removed the dst_free() on the allocated dst_entry in ipv6_blackhole_route(). The dst_free() marked the dst_entry as dead and added it to the gc list. I.e. it was setup for a one time usage. As a result we may now have a blackhole route cached at a socket on some IPsec scenarios. This makes the connection unusable. Fix this by marking the dst_entry directly at allocation time as 'dead', so it is used only once. Fixes: 587fea741134 ("ipv6: mark DST_NOGC and remove the operation of dst_free()") Reported-by: Tobias Brunner Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 26cc9f483b6d..a96d5b385d8f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1325,7 +1325,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct dst_entry *new = NULL; rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, - DST_OBSOLETE_NONE, 0); + DST_OBSOLETE_DEAD, 0); if (rt) { rt6_info_init(rt); -- cgit v1.2.3 From 6c0e7284d89995877740d8a26c3e99a937312a3c Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 9 Oct 2017 08:43:55 +0200 Subject: ipv4: Fix traffic triggered IPsec connections. A recent patch removed the dst_free() on the allocated dst_entry in ipv4_blackhole_route(). The dst_free() marked the dst_entry as dead and added it to the gc list. I.e. it was setup for a one time usage. As a result we may now have a blackhole route cached at a socket on some IPsec scenarios. This makes the connection unusable. Fix this by marking the dst_entry directly at allocation time as 'dead', so it is used only once. Fixes: b838d5e1c5b6 ("ipv4: mark DST_NOGC and remove the operation of dst_free()") Reported-by: Tobias Brunner Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ac6fde5d45f1..3d9f1c2f81c5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2513,7 +2513,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or struct rtable *ort = (struct rtable *) dst_orig; struct rtable *rt; - rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; -- cgit v1.2.3 From 41c87425a1ac9b633e0fcc78eb1f19640c8fb5a0 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 9 Oct 2017 14:14:51 +0200 Subject: netlink: do not set cb_running if dump's start() errs It turns out that multiple places can call netlink_dump(), which means it's still possible to dereference partially initialized values in dump() that were the result of a faulty returned start(). This fixes the issue by calling start() _before_ setting cb_running to true, so that there's no chance at all of hitting the dump() function through any indirect paths. It also moves the call to start() to be when the mutex is held. This has the nice side effect of serializing invocations to start(), which is likely desirable anyway. It also prevents any possible other races that might come out of this logic. In testing this with several different pieces of tricky code to trigger these issues, this commit fixes all avenues that I'm aware of. Signed-off-by: Jason A. Donenfeld Cc: Johannes Berg Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 94c11cf0459d..f34750691c5c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2266,16 +2266,17 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->min_dump_alloc = control->min_dump_alloc; cb->skb = skb; + if (cb->start) { + ret = cb->start(cb); + if (ret) + goto error_unlock; + } + nlk->cb_running = true; mutex_unlock(nlk->cb_mutex); - ret = 0; - if (cb->start) - ret = cb->start(cb); - - if (!ret) - ret = netlink_dump(sk); + ret = netlink_dump(sk); sock_put(sk); -- cgit v1.2.3 From 996b44fcef8f216ea0b6b6e74468c5a77b5e341f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 9 Oct 2017 14:52:10 +0200 Subject: udp: fix bcast packet reception The commit bc044e8db796 ("udp: perform source validation for mcast early demux") does not take into account that broadcast packets lands in the same code path and they need different checks for the source address - notably, zero source address are valid for bcast and invalid for mcast. As a result, 2nd and later broadcast packets with 0 source address landing to the same socket are dropped. This breaks dhcp servers. Since we don't have stringent performance requirements for ingress broadcast traffic, fix it by disabling UDP early demux such traffic. Reported-by: Hannes Frederic Sowa Fixes: bc044e8db796 ("udp: perform source validation for mcast early demux") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5676237d2b0f..e45177ceb0ee 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2240,20 +2240,16 @@ int udp_v4_early_demux(struct sk_buff *skb) iph = ip_hdr(skb); uh = udp_hdr(skb); - if (skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) { + if (skb->pkt_type == PACKET_MULTICAST) { in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) return 0; - /* we are supposed to accept bcast packets */ - if (skb->pkt_type == PACKET_MULTICAST) { - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return 0; - } + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return 0; sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, uh->source, iph->saddr, -- cgit v1.2.3