From 9c90c9b3e50e16d03c7f87d63e9db373974781e0 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Mon, 23 May 2022 22:05:24 +0200 Subject: Revert "net: af_key: add check for pfkey_broadcast in function pfkey_process" This reverts commit 4dc2a5a8f6754492180741facf2a8787f2c415d7. A non-zero return value from pfkey_broadcast() does not necessarily mean an error occurred as this function returns -ESRCH when no registered listener received the message. In particular, a call with BROADCAST_PROMISC_ONLY flag and null one_sk argument can never return zero so that this commit in fact prevents processing any PF_KEY message. One visible effect is that racoon daemon fails to find encryption algorithms like aes and refuses to start. Excluding -ESRCH return value would fix this but it's not obvious that we really want to bail out here and most other callers of pfkey_broadcast() also ignore the return value. Also, as pointed out by Steffen Klassert, PF_KEY is kind of deprecated and newer userspace code should use netlink instead so that we should only disturb the code for really important fixes. v2: add a comment explaining why is the return value ignored Signed-off-by: Michal Kubecek Signed-off-by: Steffen Klassert --- net/key/af_key.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 339d95df19d3..d93bde657359 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2826,10 +2826,12 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - err = pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, - BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); - if (err) - return err; + /* Non-zero return value of pfkey_broadcast() does not always signal + * an error and even on an actual error we may still want to process + * the message so rather ignore the return value. + */ + pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, + BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); -- cgit v1.2.3 From 6821ad8770340825f17962cf5ef64ebaffee7fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Wed, 18 May 2022 14:05:48 -0700 Subject: xfrm: do not set IPv4 DF flag when encapsulating IPv6 frames <= 1280 bytes. One may want to have DF set on large packets to support discovering path mtu and limiting the size of generated packets (hence not setting the XFRM_STATE_NOPMTUDISC tunnel flag), while still supporting networks that are incapable of carrying even minimal sized IPv6 frames (post encapsulation). Having IPv4 Don't Frag bit set on encapsulated IPv6 frames that are not larger than the minimum IPv6 mtu of 1280 isn't useful, because the resulting ICMP Fragmentation Required error isn't actionable (even assuming you receive it) because IPv6 will not drop it's path mtu below 1280 anyway. While the IPv4 stack could prefrag the packets post encap, this requires the ICMP error to be successfully delivered and causes a loss of the original IPv6 frame (thus requiring a retransmit and latency hit). Luckily with IPv4 if we simply don't set the DF flag, we'll just make further fragmenting the packets some other router's problems. We'll still learn the correct IPv4 path mtu through encapsulation of larger IPv6 frames. I'm still not convinced this patch is entirely sufficient to make everything happy... but I don't see how it could possibly make things worse. See also recent: 4ff2980b6bd2 'xfrm: fix tunnel model fragmentation behavior' and friends Cc: Lorenzo Colitti Cc: Eric Dumazet Cc: Lina Wang Cc: Steffen Klassert Signed-off-by: Maciej Zenczykowski Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index d4935b3b9983..555ab35cd119 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -273,6 +273,7 @@ static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) */ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) { + bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU); struct dst_entry *dst = skb_dst(skb); struct iphdr *top_iph; int flags; @@ -303,7 +304,7 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) if (flags & XFRM_STATE_NOECN) IP_ECN_clear(top_iph); - top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); -- cgit v1.2.3 From b3b1a17538d3ef6a9667b2271216fd16d7678ab5 Mon Sep 17 00:00:00 2001 From: liuyacan Date: Wed, 25 May 2022 16:54:08 +0800 Subject: net/smc: set ini->smcrv2.ib_dev_v2 to NULL if SMC-Rv2 is unavailable In the process of checking whether RDMAv2 is available, the current implementation first sets ini->smcrv2.ib_dev_v2, and then allocates smc buf desc and register rmb, but the latter may fail. In this case, the pointer should be reset. Fixes: e49300a6bf62 ("net/smc: add listen processing for SMC-Rv2") Signed-off-by: liuyacan Reviewed-by: Karsten Graul Link: https://lore.kernel.org/r/20220525085408.812273-1-liuyacan@corp.netease.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index a201bf29af98..433bb5a7df31 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2161,6 +2161,7 @@ static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc, not_found: ini->smcr_version &= ~SMC_V2; + ini->smcrv2.ib_dev_v2 = NULL; ini->check_smcrv2 = false; } -- cgit v1.2.3 From ed6cd6a17896561b9f51ab4c0d9bbb29e762b597 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 May 2022 00:56:18 +0200 Subject: net, neigh: Set lower cap for neigh_managed_work rearming Yuwei reported that plain reuse of DELAY_PROBE_TIME to rearm work queue in neigh_managed_work is problematic if user explicitly configures the DELAY_PROBE_TIME to 0 for a neighbor table. Such misconfig can then hog CPU to 100% processing the system work queue. Instead, set lower interval bound to HZ which is totally sufficient. Yuwei is additionally looking into making the interval separately configurable from DELAY_PROBE_TIME. Reported-by: Yuwei Wang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/netdev/797c3c53-ce1b-9f60-e253-cda615788f4a@iogearbox.net Reviewed-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/3b8c5aa906c52c3a8c995d1b2e8ccf650ea7c716.1653432794.git.daniel@iogearbox.net Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 47b6c1f0fdbb..54625287ee5b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1579,7 +1579,7 @@ static void neigh_managed_work(struct work_struct *work) list_for_each_entry(neigh, &tbl->managed_list, managed_list) neigh_event_send_probe(neigh, NULL, false); queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, - NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME)); + max(NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME), HZ)); write_unlock_bh(&tbl->lock); } -- cgit v1.2.3 From 520778042ccca019f3ffa136dd0ca565c486cedd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 May 2022 10:36:38 +0200 Subject: netfilter: nf_tables: disallow non-stateful expression in sets earlier Since 3e135cd499bf ("netfilter: nft_dynset: dynamic stateful expression instantiation"), it is possible to attach stateful expressions to set elements. cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate and destroy phase") introduces conditional destruction on the object to accomodate transaction semantics. nft_expr_init() calls expr->ops->init() first, then check for NFT_STATEFUL_EXPR, this stills allows to initialize a non-stateful lookup expressions which points to a set, which might lead to UAF since the set is not properly detached from the set->binding for this case. Anyway, this combination is non-sense from nf_tables perspective. This patch fixes this problem by checking for NFT_STATEFUL_EXPR before expr->ops->init() is called. The reporter provides a KASAN splat and a poc reproducer (similar to those autogenerated by syzbot to report use-after-free errors). It is unknown to me if they are using syzbot or if they use similar automated tool to locate the bug that they are reporting. For the record, this is the KASAN splat. [ 85.431824] ================================================================== [ 85.432901] BUG: KASAN: use-after-free in nf_tables_bind_set+0x81b/0xa20 [ 85.433825] Write of size 8 at addr ffff8880286f0e98 by task poc/776 [ 85.434756] [ 85.434999] CPU: 1 PID: 776 Comm: poc Tainted: G W 5.18.0+ #2 [ 85.436023] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 Fixes: 0b2d8a7b638b ("netfilter: nf_tables: add helper functions for expression handling") Reported-and-tested-by: Aaron Adams Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 12fc9cda4a2c..f296dfe86b62 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2873,27 +2873,31 @@ static struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, err = nf_tables_expr_parse(ctx, nla, &expr_info); if (err < 0) - goto err1; + goto err_expr_parse; + + err = -EOPNOTSUPP; + if (!(expr_info.ops->type->flags & NFT_EXPR_STATEFUL)) + goto err_expr_stateful; err = -ENOMEM; expr = kzalloc(expr_info.ops->size, GFP_KERNEL_ACCOUNT); if (expr == NULL) - goto err2; + goto err_expr_stateful; err = nf_tables_newexpr(ctx, &expr_info, expr); if (err < 0) - goto err3; + goto err_expr_new; return expr; -err3: +err_expr_new: kfree(expr); -err2: +err_expr_stateful: owner = expr_info.ops->type->owner; if (expr_info.ops->type->release_ops) expr_info.ops->type->release_ops(expr_info.ops); module_put(owner); -err1: +err_expr_parse: return ERR_PTR(err); } @@ -5413,9 +5417,6 @@ struct nft_expr *nft_set_elem_expr_alloc(const struct nft_ctx *ctx, return expr; err = -EOPNOTSUPP; - if (!(expr->ops->type->flags & NFT_EXPR_STATEFUL)) - goto err_set_elem_expr; - if (expr->ops->type->flags & NFT_EXPR_GC) { if (set->flags & NFT_SET_TIMEOUT) goto err_set_elem_expr; -- cgit v1.2.3 From 558254b0b602b8605d7246a10cfeb584b1fcabfc Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 24 May 2022 14:50:01 +0200 Subject: netfilter: nft_limit: Clone packet limits' cost value When cloning a packet-based limit expression, copy the cost value as well. Otherwise the new limit is not functional anymore. Fixes: 3b9e2ea6c11bf ("netfilter: nft_limit: move stateful fields out of expression data") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_limit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 04ea8b9bf202..981addb2d051 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -213,6 +213,8 @@ static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst); struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src); + priv_dst->cost = priv_src->cost; + return nft_limit_clone(&priv_dst->limit, &priv_src->limit); } -- cgit v1.2.3 From ffd219efd9ee1ceccc7ccfa9361fd40705680fc3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 May 2022 00:02:03 +0200 Subject: netfilter: nfnetlink: fix warn in nfnetlink_unbind syzbot reports following warn: WARNING: CPU: 0 PID: 3600 at net/netfilter/nfnetlink.c:703 nfnetlink_unbind+0x357/0x3b0 net/netfilter/nfnetlink.c:694 The syzbot generated program does this: socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER) = 3 setsockopt(3, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, [1], 4) = 0 ... which triggers 'WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == 0)' check. Instead of counting, just enable reporting for every bind request and check if we still have listeners on unbind. While at it, also add the needed bounds check on nfnl_group2type[] access. Reported-by: Reported-by: Fixes: 2794cdb0b97b ("netfilter: nfnetlink: allow to detect if ctnetlink listeners exist") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index ad3bbe34ca88..2f7c477fc9e7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -45,7 +45,6 @@ MODULE_DESCRIPTION("Netfilter messages via netlink socket"); static unsigned int nfnetlink_pernet_id __read_mostly; struct nfnl_net { - unsigned int ctnetlink_listeners; struct sock *nfnl; }; @@ -673,18 +672,8 @@ static int nfnetlink_bind(struct net *net, int group) #ifdef CONFIG_NF_CONNTRACK_EVENTS if (type == NFNL_SUBSYS_CTNETLINK) { - struct nfnl_net *nfnlnet = nfnl_pernet(net); - nfnl_lock(NFNL_SUBSYS_CTNETLINK); - - if (WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == UINT_MAX)) { - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); - return -EOVERFLOW; - } - - nfnlnet->ctnetlink_listeners++; - if (nfnlnet->ctnetlink_listeners == 1) - WRITE_ONCE(net->ct.ctnetlink_has_listener, true); + WRITE_ONCE(net->ct.ctnetlink_has_listener, true); nfnl_unlock(NFNL_SUBSYS_CTNETLINK); } #endif @@ -694,15 +683,12 @@ static int nfnetlink_bind(struct net *net, int group) static void nfnetlink_unbind(struct net *net, int group) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - int type = nfnl_group2type[group]; - - if (type == NFNL_SUBSYS_CTNETLINK) { - struct nfnl_net *nfnlnet = nfnl_pernet(net); + if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) + return; + if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) { nfnl_lock(NFNL_SUBSYS_CTNETLINK); - WARN_ON_ONCE(nfnlnet->ctnetlink_listeners == 0); - nfnlnet->ctnetlink_listeners--; - if (nfnlnet->ctnetlink_listeners == 0) + if (!nfnetlink_has_listeners(net, group)) WRITE_ONCE(net->ct.ctnetlink_has_listener, false); nfnl_unlock(NFNL_SUBSYS_CTNETLINK); } -- cgit v1.2.3 From aeed55a08d0b2bf2e3986b110b04d2a5aca192a6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 May 2022 00:02:05 +0200 Subject: netfilter: cttimeout: fix slab-out-of-bounds read in cttimeout_net_exit syzbot reports: BUG: KASAN: slab-out-of-bounds in __list_del_entry_valid+0xcc/0xf0 lib/list_debug.c:42 [..] list_del include/linux/list.h:148 [inline] cttimeout_net_exit+0x211/0x540 net/netfilter/nfnetlink_cttimeout.c:617 No reproducer so far. Looking at recent changes in this area its clear that the free_head must not be at the end of the structure because nf_ct_timeout structure has variable size. Reported-by: Fixes: 78222bacfca9 ("netfilter: cttimeout: decouple unlink and free on netns destruction") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index f069c24c6146..af15102bc696 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -35,12 +35,13 @@ static unsigned int nfct_timeout_id __read_mostly; struct ctnl_timeout { struct list_head head; + struct list_head free_head; struct rcu_head rcu_head; refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; - struct nf_ct_timeout timeout; - struct list_head free_head; + /* must be at the end */ + struct nf_ct_timeout timeout; }; struct nfct_timeout_pernet { -- cgit v1.2.3 From b53c116642502b0c85ecef78bff4f826a7dd4145 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 20 May 2022 00:02:06 +0200 Subject: netfilter: nf_tables: set element extended ACK reporting support Report the element that causes problems via netlink extended ACK for set element commands. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f296dfe86b62..f4f1d0a2da43 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5348,8 +5348,10 @@ static int nf_tables_getsetelem(struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_get_set_elem(&ctx, set, attr); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); break; + } } return err; @@ -6126,8 +6128,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); return err; + } } if (nft_net->validate_state == NFT_VALIDATE_DO) @@ -6397,8 +6401,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb, nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, attr); break; + } } return err; } -- cgit v1.2.3 From e225c9a5a74b12e9ef8516f30a3db2c7eb866ee1 Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Sat, 28 May 2022 14:54:57 +0800 Subject: net/smc: fixes for converting from "struct smc_cdc_tx_pend **" to "struct smc_wr_tx_pend_priv *" "struct smc_cdc_tx_pend **" can not directly convert to "struct smc_wr_tx_pend_priv *". Fixes: 2bced6aefa3d ("net/smc: put slot when connection is killed") Signed-off-by: Guangguan Wang Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 5c731f27996e..53f63bfbaf5f 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -82,7 +82,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, /* abnormal termination */ if (!rc) smc_wr_tx_put_slot(link, - (struct smc_wr_tx_pend_priv *)pend); + (struct smc_wr_tx_pend_priv *)(*pend)); rc = -EPIPE; } return rc; -- cgit v1.2.3 From 11825765291a93d8e7f44230da67b9f607c777bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 May 2022 14:28:29 -0700 Subject: tcp: fix tcp_mtup_probe_success vs wrong snd_cwnd syzbot got a new report [1] finally pointing to a very old bug, added in initial support for MTU probing. tcp_mtu_probe() has checks about starting an MTU probe if tcp_snd_cwnd(tp) >= 11. But nothing prevents tcp_snd_cwnd(tp) to be reduced later and before the MTU probe succeeds. This bug would lead to potential zero-divides. Debugging added in commit 40570375356c ("tcp: add accessors to read/set tp->snd_cwnd") has paid off :) While we are at it, address potential overflows in this code. [1] WARNING: CPU: 1 PID: 14132 at include/net/tcp.h:1219 tcp_mtup_probe_success+0x366/0x570 net/ipv4/tcp_input.c:2712 Modules linked in: CPU: 1 PID: 14132 Comm: syz-executor.2 Not tainted 5.18.0-syzkaller-07857-gbabf0bb978e3 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:tcp_snd_cwnd_set include/net/tcp.h:1219 [inline] RIP: 0010:tcp_mtup_probe_success+0x366/0x570 net/ipv4/tcp_input.c:2712 Code: 74 08 48 89 ef e8 da 80 17 f9 48 8b 45 00 65 48 ff 80 80 03 00 00 48 83 c4 30 5b 41 5c 41 5d 41 5e 41 5f 5d c3 e8 aa b0 c5 f8 <0f> 0b e9 16 fe ff ff 48 8b 4c 24 08 80 e1 07 38 c1 0f 8c c7 fc ff RSP: 0018:ffffc900079e70f8 EFLAGS: 00010287 RAX: ffffffff88c0f7f6 RBX: ffff8880756e7a80 RCX: 0000000000040000 RDX: ffffc9000c6c4000 RSI: 0000000000031f9e RDI: 0000000000031f9f RBP: 0000000000000000 R08: ffffffff88c0f606 R09: ffffc900079e7520 R10: ffffed101011226d R11: 1ffff1101011226c R12: 1ffff1100eadcf50 R13: ffff8880756e72c0 R14: 1ffff1100eadcf89 R15: dffffc0000000000 FS: 00007f643236e700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1ab3f1e2a0 CR3: 0000000064fe7000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_clean_rtx_queue+0x223a/0x2da0 net/ipv4/tcp_input.c:3356 tcp_ack+0x1962/0x3c90 net/ipv4/tcp_input.c:3861 tcp_rcv_established+0x7c8/0x1ac0 net/ipv4/tcp_input.c:5973 tcp_v6_do_rcv+0x57b/0x1210 net/ipv6/tcp_ipv6.c:1476 sk_backlog_rcv include/net/sock.h:1061 [inline] __release_sock+0x1d8/0x4c0 net/core/sock.c:2849 release_sock+0x5d/0x1c0 net/core/sock.c:3404 sk_stream_wait_memory+0x700/0xdc0 net/core/stream.c:145 tcp_sendmsg_locked+0x111d/0x3fc0 net/ipv4/tcp.c:1410 tcp_sendmsg+0x2c/0x40 net/ipv4/tcp.c:1448 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] __sys_sendto+0x439/0x5c0 net/socket.c:2119 __do_sys_sendto net/socket.c:2131 [inline] __se_sys_sendto net/socket.c:2127 [inline] __x64_sys_sendto+0xda/0xf0 net/socket.c:2127 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f6431289109 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f643236e168 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f643139c100 RCX: 00007f6431289109 RDX: 00000000d0d0c2ac RSI: 0000000020000080 RDI: 000000000000000a RBP: 00007f64312e308d R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff372533af R14: 00007f643236e300 R15: 0000000000022000 Fixes: 5d424d5a674f ("[TCP]: MTU probing") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3231af73e430..2e2a9ece9af2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2706,12 +2706,15 @@ static void tcp_mtup_probe_success(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + u64 val; - /* FIXME: breaks with very large cwnd */ tp->prior_ssthresh = tcp_current_ssthresh(sk); - tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) * - tcp_mss_to_mtu(sk, tp->mss_cache) / - icsk->icsk_mtup.probe_size); + + val = (u64)tcp_snd_cwnd(tp) * tcp_mss_to_mtu(sk, tp->mss_cache); + do_div(val, icsk->icsk_mtup.probe_size); + DEBUG_NET_WARN_ON_ONCE((u32)val != val); + tcp_snd_cwnd_set(tp, max_t(u32, 1U, val)); + tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_jiffies32; tp->snd_ssthresh = tcp_current_ssthresh(sk); -- cgit v1.2.3 From 911799172d2f703cc14e9000c55d124d5e0a91a2 Mon Sep 17 00:00:00 2001 From: keliu Date: Fri, 27 May 2022 06:42:25 +0000 Subject: net: nfc: Directly use ida_alloc()/free() Use ida_alloc()/ida_free() instead of deprecated ida_simple_get()/ida_simple_remove() . Signed-off-by: keliu Signed-off-by: David S. Miller --- net/nfc/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/nfc/core.c b/net/nfc/core.c index 6ff3e10ff8e3..eb2c0959e5b6 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -975,7 +975,7 @@ static void nfc_release(struct device *d) kfree(se); } - ida_simple_remove(&nfc_index_ida, dev->idx); + ida_free(&nfc_index_ida, dev->idx); kfree(dev); } @@ -1066,7 +1066,7 @@ struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, if (!dev) return NULL; - rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL); + rc = ida_alloc(&nfc_index_ida, GFP_KERNEL); if (rc < 0) goto err_free_dev; dev->idx = rc; -- cgit v1.2.3 From 3a2cd89bfbeb10012eb90857ce641d34f0151c4c Mon Sep 17 00:00:00 2001 From: huhai Date: Thu, 26 May 2022 18:12:13 +0800 Subject: net: ipv4: Avoid bounds check warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following build warning when CONFIG_IPV6 is not set: In function ‘fortify_memcpy_chk’, inlined from ‘tcp_md5_do_add’ at net/ipv4/tcp_ipv4.c:1210:2: ./include/linux/fortify-string.h:328:4: error: call to ‘__write_overflow_field’ declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning] 328 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Suggested-by: Paolo Abeni Signed-off-by: huhai Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220526101213.2392980-1-zhanggenjian@kylinos.cn Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index dac2650f3863..fe8f23b95d32 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1207,8 +1207,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key->l3index = l3index; key->flags = flags; memcpy(&key->addr, addr, - (family == AF_INET6) ? sizeof(struct in6_addr) : - sizeof(struct in_addr)); + (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) ? sizeof(struct in6_addr) : + sizeof(struct in_addr)); hlist_add_head_rcu(&key->node, &md5sig->head); return 0; } -- cgit v1.2.3 From 3e0b8f529c10037ae0b369fc892e524eae5a5485 Mon Sep 17 00:00:00 2001 From: Arun Ajith S Date: Mon, 30 May 2022 10:14:14 +0000 Subject: net/ipv6: Expand and rename accept_unsolicited_na to accept_untracked_na RFC 9131 changes default behaviour of handling RX of NA messages when the corresponding entry is absent in the neighbour cache. The current implementation is limited to accept just unsolicited NAs. However, the RFC is more generic where it also accepts solicited NAs. Both types should result in adding a STALE entry for this case. Expand accept_untracked_na behaviour to also accept solicited NAs to be compliant with the RFC and rename the sysctl knob to accept_untracked_na. Fixes: f9a2fb73318e ("net/ipv6: Introduce accept_unsolicited_na knob to implement router-side changes for RFC9131") Signed-off-by: Arun Ajith S Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220530101414.65439-1-aajith@arista.com Signed-off-by: Paolo Abeni --- net/ipv6/addrconf.c | 6 +++--- net/ipv6/ndisc.c | 42 +++++++++++++++++++++++++----------------- 2 files changed, 28 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ca0aa744593e..1b1932502e9e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5586,7 +5586,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier; - array[DEVCONF_ACCEPT_UNSOLICITED_NA] = cnf->accept_unsolicited_na; + array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na; } static inline size_t inet6_ifla6_size(void) @@ -7038,8 +7038,8 @@ static const struct ctl_table addrconf_sysctl[] = { .extra2 = (void *)SYSCTL_ONE, }, { - .procname = "accept_unsolicited_na", - .data = &ipv6_devconf.accept_unsolicited_na, + .procname = "accept_untracked_na", + .data = &ipv6_devconf.accept_untracked_na, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 254addad0dd3..b0dfe97ea4ee 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -979,7 +979,7 @@ static void ndisc_recv_na(struct sk_buff *skb) struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; - bool create_neigh; + u8 new_state; if (skb->len < sizeof(struct nd_msg)) { ND_PRINTK(2, warn, "NA: packet too short\n"); @@ -1000,7 +1000,7 @@ static void ndisc_recv_na(struct sk_buff *skb) /* For some 802.11 wireless deployments (and possibly other networks), * there will be a NA proxy and unsolicitd packets are attacks * and thus should not be accepted. - * drop_unsolicited_na takes precedence over accept_unsolicited_na + * drop_unsolicited_na takes precedence over accept_untracked_na */ if (!msg->icmph.icmp6_solicited && idev && idev->cnf.drop_unsolicited_na) @@ -1041,25 +1041,33 @@ static void ndisc_recv_na(struct sk_buff *skb) in6_ifa_put(ifp); return; } + + neigh = neigh_lookup(&nd_tbl, &msg->target, dev); + /* RFC 9131 updates original Neighbour Discovery RFC 4861. - * An unsolicited NA can now create a neighbour cache entry - * on routers if it has Target LL Address option. + * NAs with Target LL Address option without a corresponding + * entry in the neighbour cache can now create a STALE neighbour + * cache entry on routers. + * + * entry accept fwding solicited behaviour + * ------- ------ ------ --------- ---------------------- + * present X X 0 Set state to STALE + * present X X 1 Set state to REACHABLE + * absent 0 X X Do nothing + * absent 1 0 X Do nothing + * absent 1 1 X Add a new STALE entry * - * drop accept fwding behaviour - * ---- ------ ------ ---------------------------------------------- - * 1 X X Drop NA packet and don't pass up the stack - * 0 0 X Pass NA packet up the stack, don't update NC - * 0 1 0 Pass NA packet up the stack, don't update NC - * 0 1 1 Pass NA packet up the stack, and add a STALE - * NC entry * Note that we don't do a (daddr == all-routers-mcast) check. */ - create_neigh = !msg->icmph.icmp6_solicited && lladdr && - idev && idev->cnf.forwarding && - idev->cnf.accept_unsolicited_na; - neigh = __neigh_lookup(&nd_tbl, &msg->target, dev, create_neigh); + new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE; + if (!neigh && lladdr && + idev && idev->cnf.forwarding && + idev->cnf.accept_untracked_na) { + neigh = neigh_create(&nd_tbl, &msg->target, dev); + new_state = NUD_STALE; + } - if (neigh) { + if (neigh && !IS_ERR(neigh)) { u8 old_flags = neigh->flags; struct net *net = dev_net(dev); @@ -1079,7 +1087,7 @@ static void ndisc_recv_na(struct sk_buff *skb) } ndisc_update(dev, neigh, lladdr, - msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, + new_state, NEIGH_UPDATE_F_WEAK_OVERRIDE| (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| -- cgit v1.2.3 From fecf31ee395b0295f2d7260aa29946b7605f7c85 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 27 May 2022 09:56:18 +0200 Subject: netfilter: nf_tables: sanitize nft_set_desc_concat_parse() Add several sanity checks for nft_set_desc_concat_parse(): - validate desc->field_count not larger than desc->field_len array. - field length cannot be larger than desc->field_len (ie. U8_MAX) - total length of the concatenation cannot be larger than register array. Joint work with Florian Westphal. Fixes: f3a2181e16f1 ("netfilter: nf_tables: Support for sets with multiple ranged fields") Reported-by: Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f4f1d0a2da43..dcefb5f36b3a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4246,6 +4246,9 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, u32 len; int err; + if (desc->field_count >= ARRAY_SIZE(desc->field_len)) + return -E2BIG; + err = nla_parse_nested_deprecated(tb, NFTA_SET_FIELD_MAX, attr, nft_concat_policy, NULL); if (err < 0) @@ -4255,9 +4258,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr, return -EINVAL; len = ntohl(nla_get_be32(tb[NFTA_SET_FIELD_LEN])); - - if (len * BITS_PER_BYTE / 32 > NFT_REG32_COUNT) - return -E2BIG; + if (!len || len > U8_MAX) + return -EINVAL; desc->field_len[desc->field_count++] = len; @@ -4268,7 +4270,8 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, const struct nlattr *nla) { struct nlattr *attr; - int rem, err; + u32 num_regs = 0; + int rem, err, i; nla_for_each_nested(attr, nla, rem) { if (nla_type(attr) != NFTA_LIST_ELEM) @@ -4279,6 +4282,12 @@ static int nft_set_desc_concat(struct nft_set_desc *desc, return err; } + for (i = 0; i < desc->field_count; i++) + num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32)); + + if (num_regs > NFT_REG32_COUNT) + return -E2BIG; + return 0; } -- cgit v1.2.3 From 3923b1e4406680d57da7e873da77b1683035d83f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 May 2022 18:24:05 +0200 Subject: netfilter: nf_tables: hold mutex on netns pre_exit path clean_net() runs in workqueue while walking over the lists, grab mutex. Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dcefb5f36b3a..f77414e13de1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9896,7 +9896,11 @@ static int __net_init nf_tables_init_net(struct net *net) static void __net_exit nf_tables_pre_exit_net(struct net *net) { + struct nftables_pernet *nft_net = nft_pernet(net); + + mutex_lock(&nft_net->commit_mutex); __nft_release_hooks(net); + mutex_unlock(&nft_net->commit_mutex); } static void __net_exit nf_tables_exit_net(struct net *net) -- cgit v1.2.3 From f9a43007d3f7ba76d5e7f9421094f00f2ef202f8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 May 2022 18:24:06 +0200 Subject: netfilter: nf_tables: double hook unregistration in netns path __nft_release_hooks() is called from pre_netns exit path which unregisters the hooks, then the NETDEV_UNREGISTER event is triggered which unregisters the hooks again. [ 565.221461] WARNING: CPU: 18 PID: 193 at net/netfilter/core.c:495 __nf_unregister_net_hook+0x247/0x270 [...] [ 565.246890] CPU: 18 PID: 193 Comm: kworker/u64:1 Tainted: G E 5.18.0-rc7+ #27 [ 565.253682] Workqueue: netns cleanup_net [ 565.257059] RIP: 0010:__nf_unregister_net_hook+0x247/0x270 [...] [ 565.297120] Call Trace: [ 565.300900] [ 565.304683] nf_tables_flowtable_event+0x16a/0x220 [nf_tables] [ 565.308518] raw_notifier_call_chain+0x63/0x80 [ 565.312386] unregister_netdevice_many+0x54f/0xb50 Unregister and destroy netdev hook from netns pre_exit via kfree_rcu so the NETDEV_UNREGISTER path see unregistered hooks. Fixes: 767d1216bff8 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 54 ++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f77414e13de1..746be13438ef 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -222,12 +222,18 @@ err_register: } static void nft_netdev_unregister_hooks(struct net *net, - struct list_head *hook_list) + struct list_head *hook_list, + bool release_netdev) { - struct nft_hook *hook; + struct nft_hook *hook, *next; - list_for_each_entry(hook, hook_list, list) + list_for_each_entry_safe(hook, next, hook_list, list) { nf_unregister_net_hook(net, &hook->ops); + if (release_netdev) { + list_del(&hook->list); + kfree_rcu(hook, rcu); + } + } } static int nf_tables_register_hook(struct net *net, @@ -253,9 +259,10 @@ static int nf_tables_register_hook(struct net *net, return nf_register_net_hook(net, &basechain->ops); } -static void nf_tables_unregister_hook(struct net *net, - const struct nft_table *table, - struct nft_chain *chain) +static void __nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain, + bool release_netdev) { struct nft_base_chain *basechain; const struct nf_hook_ops *ops; @@ -270,11 +277,19 @@ static void nf_tables_unregister_hook(struct net *net, return basechain->type->ops_unregister(net, ops); if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) - nft_netdev_unregister_hooks(net, &basechain->hook_list); + nft_netdev_unregister_hooks(net, &basechain->hook_list, + release_netdev); else nf_unregister_net_hook(net, &basechain->ops); } +static void nf_tables_unregister_hook(struct net *net, + const struct nft_table *table, + struct nft_chain *chain) +{ + return __nf_tables_unregister_hook(net, table, chain, false); +} + static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -7307,13 +7322,25 @@ static void nft_unregister_flowtable_hook(struct net *net, FLOW_BLOCK_UNBIND); } -static void nft_unregister_flowtable_net_hooks(struct net *net, - struct list_head *hook_list) +static void __nft_unregister_flowtable_net_hooks(struct net *net, + struct list_head *hook_list, + bool release_netdev) { - struct nft_hook *hook; + struct nft_hook *hook, *next; - list_for_each_entry(hook, hook_list, list) + list_for_each_entry_safe(hook, next, hook_list, list) { nf_unregister_net_hook(net, &hook->ops); + if (release_netdev) { + list_del(&hook->list); + kfree_rcu(hook); + } + } +} + +static void nft_unregister_flowtable_net_hooks(struct net *net, + struct list_head *hook_list) +{ + __nft_unregister_flowtable_net_hooks(net, hook_list, false); } static int nft_register_flowtable_net_hooks(struct net *net, @@ -9755,9 +9782,10 @@ static void __nft_release_hook(struct net *net, struct nft_table *table) struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hook(net, table, chain); + __nf_tables_unregister_hook(net, table, chain, true); list_for_each_entry(flowtable, &table->flowtables, list) - nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list); + __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list, + true); } static void __nft_release_hooks(struct net *net) -- cgit v1.2.3 From f1896d45fee90a868dfd0b3cc119e4cc1bbd7ca2 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 25 May 2022 21:25:45 -0400 Subject: netfilter: flowtable: fix missing FLOWI_FLAG_ANYSRC flag The nf_flow_table gets route through ip_route_output_key. If the saddr is not local one, then FLOWI_FLAG_ANYSRC flag should be set. Without this flag, the route lookup for other_dst will fail. Fixes: 3412e1641828 (netfilter: flowtable: nft_flow_route use more data for reverse route) Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index a16cf47199b7..20e5f372dd76 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -237,6 +237,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos); fl.u.ip4.flowi4_mark = pkt->skb->mark; + fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC; break; case NFPROTO_IPV6: fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; @@ -245,6 +246,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, fl.u.ip6.flowi6_iif = this_dst->dev->ifindex; fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb)); fl.u.ip6.flowi6_mark = pkt->skb->mark; + fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC; break; } -- cgit v1.2.3 From 97629b237a8cb7ac655c3969b8d5e57300ff6598 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 25 May 2022 21:25:46 -0400 Subject: netfilter: flowtable: fix nft_flow_route source address for nat case For snat and dnat cases, the saddr should be taken from reverse tuple. Fixes: 3412e1641828 (netfilter: flowtable: nft_flow_route use more data for reverse route) Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 20e5f372dd76..a25c88bc8b75 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -232,7 +232,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, switch (nft_pf(pkt)) { case NFPROTO_IPV4: fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; - fl.u.ip4.saddr = ct->tuplehash[dir].tuple.dst.u3.ip; + fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip; fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos); @@ -241,7 +241,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, break; case NFPROTO_IPV6: fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; - fl.u.ip6.saddr = ct->tuplehash[dir].tuple.dst.u3.in6; + fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6; fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex; fl.u.ip6.flowi6_iif = this_dst->dev->ifindex; fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb)); -- cgit v1.2.3 From 0a375c822497ed6ad6b5da0792a12a6f1af10c0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 May 2022 14:37:13 -0700 Subject: tcp: tcp_rtx_synack() can be called from process context Laurent reported the enclosed report [1] This bug triggers with following coditions: 0) Kernel built with CONFIG_DEBUG_PREEMPT=y 1) A new passive FastOpen TCP socket is created. This FO socket waits for an ACK coming from client to be a complete ESTABLISHED one. 2) A socket operation on this socket goes through lock_sock() release_sock() dance. 3) While the socket is owned by the user in step 2), a retransmit of the SYN is received and stored in socket backlog. 4) At release_sock() time, the socket backlog is processed while in process context. 5) A SYNACK packet is cooked in response of the SYN retransmit. 6) -> tcp_rtx_synack() is called in process context. Before blamed commit, tcp_rtx_synack() was always called from BH handler, from a timer handler. Fix this by using TCP_INC_STATS() & NET_INC_STATS() which do not assume caller is in non preemptible context. [1] BUG: using __this_cpu_add() in preemptible [00000000] code: epollpep/2180 caller is tcp_rtx_synack.part.0+0x36/0xc0 CPU: 10 PID: 2180 Comm: epollpep Tainted: G OE 5.16.0-0.bpo.4-amd64 #1 Debian 5.16.12-1~bpo11+1 Hardware name: Supermicro SYS-5039MC-H8TRF/X11SCD-F, BIOS 1.7 11/23/2021 Call Trace: dump_stack_lvl+0x48/0x5e check_preemption_disabled+0xde/0xe0 tcp_rtx_synack.part.0+0x36/0xc0 tcp_rtx_synack+0x8d/0xa0 ? kmem_cache_alloc+0x2e0/0x3e0 ? apparmor_file_alloc_security+0x3b/0x1f0 inet_rtx_syn_ack+0x16/0x30 tcp_check_req+0x367/0x610 tcp_rcv_state_process+0x91/0xf60 ? get_nohz_timer_target+0x18/0x1a0 ? lock_timer_base+0x61/0x80 ? preempt_count_add+0x68/0xa0 tcp_v4_do_rcv+0xbd/0x270 __release_sock+0x6d/0xb0 release_sock+0x2b/0x90 sock_setsockopt+0x138/0x1140 ? __sys_getsockname+0x7e/0xc0 ? aa_sk_perm+0x3e/0x1a0 __sys_setsockopt+0x198/0x1e0 __x64_sys_setsockopt+0x21/0x30 do_syscall_64+0x38/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Signed-off-by: Eric Dumazet Reported-by: Laurent Fasnacht Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20220530213713.601888-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4b2284ed4a2..1c054431e358 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4115,8 +4115,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL, NULL); if (!res) { - __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); if (unlikely(tcp_passive_fastopen(sk))) tcp_sk(sk)->total_retrans++; trace_tcp_retransmit_synack(sk, req); -- cgit v1.2.3 From 2965c4cdf7ad9ce0796fac5e57debb9519ea721e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 1 Jun 2022 09:19:36 +0200 Subject: wifi: mac80211: fix use-after-free in chanctx code In ieee80211_vif_use_reserved_context(), when we have an old context and the new context's replace_state is set to IEEE80211_CHANCTX_REPLACE_NONE, we free the old context in ieee80211_vif_use_reserved_reassign(). Therefore, we cannot check the old_ctx anymore, so we should set it to NULL after this point. However, since the new_ctx replace state is clearly not IEEE80211_CHANCTX_REPLACES_OTHER, we're not going to do anything else in this function and can just return to avoid accessing the freed old_ctx. Cc: stable@vger.kernel.org Fixes: 5bcae31d9cb1 ("mac80211: implement multi-vif in-place reservations") Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220601091926.df419d91b165.I17a9b3894ff0b8323ce2afdb153b101124c821e5@changeid --- net/mac80211/chan.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index e3452445b363..d8246e00a10b 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1749,12 +1749,9 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata) if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) { if (old_ctx) - err = ieee80211_vif_use_reserved_reassign(sdata); - else - err = ieee80211_vif_use_reserved_assign(sdata); + return ieee80211_vif_use_reserved_reassign(sdata); - if (err) - return err; + return ieee80211_vif_use_reserved_assign(sdata); } /* -- cgit v1.2.3 From e6652a8ef3e64d953168a95878fe29b934ad78ac Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 31 May 2022 11:45:44 +0300 Subject: net: ping6: Fix ping -6 with interface name When passing interface parameter to ping -6: $ ping -6 ::11:141:84:9 -I eth2 Results in: PING ::11:141:84:10(::11:141:84:10) from ::11:141:84:9 eth2: 56 data bytes ping: sendmsg: Invalid argument ping: sendmsg: Invalid argument Initialize the fl6's outgoing interface (OIF) before triggering ip6_datagram_send_ctl. Don't wipe fl6 after ip6_datagram_send_ctl() as changes in fl6 that may happen in the function are overwritten explicitly. Update comment accordingly. Fixes: 13651224c00b ("net: ping6: support setting basic SOL_IPV6 options via cmsg") Signed-off-by: Aya Levin Reviewed-by: Gal Pressman Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20220531084544.15126-1-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- net/ipv6/ping.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ff033d16549e..ecf3a553a0dc 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -101,6 +101,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.sockc.tsflags = sk->sk_tsflags; ipc6.sockc.mark = sk->sk_mark; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + if (msg->msg_controllen) { struct ipv6_txoptions opt = {}; @@ -112,17 +115,14 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return err; /* Changes to txoptions and flow info are not implemented, yet. - * Drop the options, fl6 is wiped below. + * Drop the options. */ ipc6.opt = NULL; } - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; - fl6.flowi6_oif = oif; fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; -- cgit v1.2.3 From 86360030cc5117596626bef1d937277cd2bebe05 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 May 2022 15:10:05 +0300 Subject: net/sched: act_api: fix error code in tcf_ct_flow_table_fill_tuple_ipv6() The tcf_ct_flow_table_fill_tuple_ipv6() function is supposed to return false on failure. It should not return negatives because that means succes/true. Fixes: fcb6aa86532c ("act_ct: Support GRE offload") Signed-off-by: Dan Carpenter Acked-by: Toshiaki Makita Link: https://lore.kernel.org/r/YpYFnbDxFl6tQ3Bn@kili Signed-off-by: Paolo Abeni --- net/sched/act_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 8af9d6e5ba61..e013253b10d1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -548,7 +548,7 @@ tcf_ct_flow_table_fill_tuple_ipv6(struct sk_buff *skb, break; #endif default: - return -1; + return false; } if (ip6h->hop_limit <= 1) -- cgit v1.2.3 From 7d8a3a477b3e25ada8dc71d22048c2ea417209a0 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Mon, 30 May 2022 23:21:58 +0800 Subject: ax25: Fix ax25 session cleanup problems There are session cleanup problems in ax25_release() and ax25_disconnect(). If we setup a session and then disconnect, the disconnected session is still in "LISTENING" state that is shown below. Active AX.25 sockets Dest Source Device State Vr/Vs Send-Q Recv-Q DL9SAU-4 DL9SAU-3 ??? LISTENING 000/000 0 0 DL9SAU-3 DL9SAU-4 ??? LISTENING 000/000 0 0 The first reason is caused by del_timer_sync() in ax25_release(). The timers of ax25 are used for correct session cleanup. If we use ax25_release() to close ax25 sessions and ax25_dev is not null, the del_timer_sync() functions in ax25_release() will execute. As a result, the sessions could not be cleaned up correctly, because the timers have stopped. In order to solve this problem, this patch adds a device_up flag in ax25_dev in order to judge whether the device is up. If there are sessions to be cleaned up, the del_timer_sync() in ax25_release() will not execute. What's more, we add ax25_cb_del() in ax25_kill_by_device(), because the timers have been stopped and there are no functions that could delete ax25_cb if we do not call ax25_release(). Finally, we reorder the position of ax25_list_lock in ax25_cb_del() in order to synchronize among different functions that call ax25_cb_del(). The second reason is caused by improper check in ax25_disconnect(). The incoming ax25 sessions which ax25->sk is null will close heartbeat timer, because the check "if(!ax25->sk || ..)" is satisfied. As a result, the session could not be cleaned up properly. In order to solve this problem, this patch changes the improper check to "if(ax25->sk && ..)" in ax25_disconnect(). What`s more, the ax25_disconnect() may be called twice, which is not necessary. For example, ax25_kill_by_device() calls ax25_disconnect() and sets ax25->state to AX25_STATE_0, but ax25_release() calls ax25_disconnect() again. In order to solve this problem, this patch add a check in ax25_release(). If the flag of ax25->sk equals to SOCK_DEAD, the ax25_disconnect() in ax25_release() should not be executed. Fixes: 82e31755e55f ("ax25: Fix UAF bugs in ax25 timers") Fixes: 8a367e74c012 ("ax25: Fix segfault after sock connection timeout") Reported-and-tested-by: Thomas Osterried Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20220530152158.108619-1-duoming@zju.edu.cn Signed-off-by: Paolo Abeni --- net/ax25/af_ax25.c | 27 +++++++++++++++++---------- net/ax25/ax25_dev.c | 1 + net/ax25/ax25_subr.c | 2 +- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 116481e4da82..95393bb2760b 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -62,12 +62,12 @@ static void ax25_free_sock(struct sock *sk) */ static void ax25_cb_del(ax25_cb *ax25) { + spin_lock_bh(&ax25_list_lock); if (!hlist_unhashed(&ax25->ax25_node)) { - spin_lock_bh(&ax25_list_lock); hlist_del_init(&ax25->ax25_node); - spin_unlock_bh(&ax25_list_lock); ax25_cb_put(ax25); } + spin_unlock_bh(&ax25_list_lock); } /* @@ -81,6 +81,7 @@ static void ax25_kill_by_device(struct net_device *dev) if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; + ax25_dev->device_up = false; spin_lock_bh(&ax25_list_lock); again: @@ -91,6 +92,7 @@ again: spin_unlock_bh(&ax25_list_lock); ax25_disconnect(s, ENETUNREACH); s->ax25_dev = NULL; + ax25_cb_del(s); spin_lock_bh(&ax25_list_lock); goto again; } @@ -103,6 +105,7 @@ again: dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } + ax25_cb_del(s); release_sock(sk); spin_lock_bh(&ax25_list_lock); sock_put(sk); @@ -995,9 +998,11 @@ static int ax25_release(struct socket *sock) if (sk->sk_type == SOCK_SEQPACKET) { switch (ax25->state) { case AX25_STATE_0: - release_sock(sk); - ax25_disconnect(ax25, 0); - lock_sock(sk); + if (!sock_flag(ax25->sk, SOCK_DEAD)) { + release_sock(sk); + ax25_disconnect(ax25, 0); + lock_sock(sk); + } ax25_destroy_socket(ax25); break; @@ -1053,11 +1058,13 @@ static int ax25_release(struct socket *sock) ax25_destroy_socket(ax25); } if (ax25_dev) { - del_timer_sync(&ax25->timer); - del_timer_sync(&ax25->t1timer); - del_timer_sync(&ax25->t2timer); - del_timer_sync(&ax25->t3timer); - del_timer_sync(&ax25->idletimer); + if (!ax25_dev->device_up) { + del_timer_sync(&ax25->timer); + del_timer_sync(&ax25->t1timer); + del_timer_sync(&ax25->t2timer); + del_timer_sync(&ax25->t3timer); + del_timer_sync(&ax25->idletimer); + } dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); } diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index b80fccbac62a..95a76d571c44 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -62,6 +62,7 @@ void ax25_dev_device_up(struct net_device *dev) ax25_dev->dev = dev; dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); ax25_dev->forward = NULL; + ax25_dev->device_up = true; ax25_dev->values[AX25_VALUES_IPDEFMODE] = AX25_DEF_IPDEFMODE; ax25_dev->values[AX25_VALUES_AXDEFMODE] = AX25_DEF_AXDEFMODE; diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 3a476e4f6cd0..9ff98f46dc6b 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -268,7 +268,7 @@ void ax25_disconnect(ax25_cb *ax25, int reason) del_timer_sync(&ax25->t3timer); del_timer_sync(&ax25->idletimer); } else { - if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY)) + if (ax25->sk && !sock_flag(ax25->sk, SOCK_DESTROY)) ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); -- cgit v1.2.3 From 7f36f798f89bf32c0164049cb0e3fd1af613d0bb Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Thu, 2 Jun 2022 13:30:53 +0700 Subject: tipc: check attribute length for bearer name syzbot reported uninit-value: ===================================================== BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:644 [inline] BUG: KMSAN: uninit-value in string+0x4f9/0x6f0 lib/vsprintf.c:725 string_nocheck lib/vsprintf.c:644 [inline] string+0x4f9/0x6f0 lib/vsprintf.c:725 vsnprintf+0x2222/0x3650 lib/vsprintf.c:2806 vprintk_store+0x537/0x2150 kernel/printk/printk.c:2158 vprintk_emit+0x28b/0xab0 kernel/printk/printk.c:2256 vprintk_default+0x86/0xa0 kernel/printk/printk.c:2283 vprintk+0x15f/0x180 kernel/printk/printk_safe.c:50 _printk+0x18d/0x1cf kernel/printk/printk.c:2293 tipc_enable_bearer net/tipc/bearer.c:371 [inline] __tipc_nl_bearer_enable+0x2022/0x22a0 net/tipc/bearer.c:1033 tipc_nl_bearer_enable+0x6c/0xb0 net/tipc/bearer.c:1042 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] - Do sanity check the attribute length for TIPC_NLA_BEARER_NAME. - Do not use 'illegal name' in printing message. Reported-by: syzbot+e820fdc8ce362f2dea51@syzkaller.appspotmail.com Fixes: cb30a63384bc ("tipc: refactor function tipc_enable_bearer()") Acked-by: Jon Maloy Signed-off-by: Hoang Le Link: https://lore.kernel.org/r/20220602063053.5892-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/bearer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6d39ca05f249..932c87b98eca 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -259,9 +259,8 @@ static int tipc_enable_bearer(struct net *net, const char *name, u32 i; if (!bearer_name_validate(name, &b_names)) { - errstr = "illegal name"; NL_SET_ERR_MSG(extack, "Illegal name"); - goto rejected; + return res; } if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) { -- cgit v1.2.3 From eb0b39efb7d908e3950f6f76ee6e3cecb86ec489 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jun 2022 09:18:57 -0700 Subject: net: CONFIG_DEBUG_NET depends on CONFIG_NET It makes little sense to debug networking stacks if networking is not compiled in. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/Kconfig.debug b/net/Kconfig.debug index a5781cf63b16..e6ae11cc2fb7 100644 --- a/net/Kconfig.debug +++ b/net/Kconfig.debug @@ -20,7 +20,7 @@ config NET_NS_REFCNT_TRACKER config DEBUG_NET bool "Add generic networking debug" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && NET help Enable extra sanity checks in networking. This is mostly used by fuzzers, but is safe to select. -- cgit v1.2.3 From e9d3f80935b6607dcdc5682b00b1d4b28e0a0c5d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jun 2022 09:18:59 -0700 Subject: net/af_packet: make sure to pull mac header GSO assumes skb->head contains link layer headers. tun device in some case can provide base 14 bytes, regardless of VLAN being used or not. After blamed commit, we can end up setting a network header offset of 18+, we better pull the missing bytes to avoid a posible crash in GSO. syzbot report was: kernel BUG at include/linux/skbuff.h:2699! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 3601 Comm: syz-executor210 Not tainted 5.18.0-syzkaller-11338-g2c5ca23f7414 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__skb_pull include/linux/skbuff.h:2699 [inline] RIP: 0010:skb_mac_gso_segment+0x48f/0x530 net/core/gro.c:136 Code: 00 48 c7 c7 00 96 d4 8a c6 05 cb d3 45 06 01 e8 26 bb d0 01 e9 2f fd ff ff 49 c7 c4 ea ff ff ff e9 f1 fe ff ff e8 91 84 19 fa <0f> 0b 48 89 df e8 97 44 66 fa e9 7f fd ff ff e8 ad 44 66 fa e9 48 RSP: 0018:ffffc90002e2f4b8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000012 RCX: 0000000000000000 RDX: ffff88805bb58000 RSI: ffffffff8760ed0f RDI: 0000000000000004 RBP: 0000000000005dbc R08: 0000000000000004 R09: 0000000000000fe0 R10: 0000000000000fe4 R11: 0000000000000000 R12: 0000000000000fe0 R13: ffff88807194d780 R14: 1ffff920005c5e9b R15: 0000000000000012 FS: 000055555730f300(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200015c0 CR3: 0000000071ff8000 CR4: 0000000000350ee0 Call Trace: __skb_gso_segment+0x327/0x6e0 net/core/dev.c:3411 skb_gso_segment include/linux/netdevice.h:4749 [inline] validate_xmit_skb+0x6bc/0xf10 net/core/dev.c:3669 validate_xmit_skb_list+0xbc/0x120 net/core/dev.c:3719 sch_direct_xmit+0x3d1/0xbe0 net/sched/sch_generic.c:327 __dev_xmit_skb net/core/dev.c:3815 [inline] __dev_queue_xmit+0x14a1/0x3a00 net/core/dev.c:4219 packet_snd net/packet/af_packet.c:3071 [inline] packet_sendmsg+0x21cb/0x5550 net/packet/af_packet.c:3102 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2492 ___sys_sendmsg+0xf3/0x170 net/socket.c:2546 __sys_sendmsg net/socket.c:2575 [inline] __do_sys_sendmsg net/socket.c:2584 [inline] __se_sys_sendmsg net/socket.c:2582 [inline] __x64_sys_sendmsg+0x132/0x220 net/socket.c:2582 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f4b95da06c9 Code: 28 c3 e8 4a 15 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffd7defc4c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007ffd7defc4f0 RCX: 00007f4b95da06c9 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000003 RBP: 0000000000000003 R08: bb1414ac00000050 R09: bb1414ac00000050 R10: 0000000000000004 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffd7defc4e0 R14: 00007ffd7defc4d8 R15: 00007ffd7defc4d4 Fixes: dfed913e8b55 ("net/af_packet: add VLAN support for AF_PACKET SOCK_RAW GSO") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Hangbin Liu Acked-by: Willem de Bruijn Cc: Michael S. Tsirkin Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 677f9cfa9660..ca6e92a22923 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1935,8 +1935,10 @@ static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) /* Move network header to the right position for VLAN tagged packets */ if (likely(skb->dev->type == ARPHRD_ETHER) && eth_type_vlan(skb->protocol) && - __vlan_get_protocol(skb, skb->protocol, &depth) != 0) - skb_set_network_header(skb, depth); + __vlan_get_protocol(skb, skb->protocol, &depth) != 0) { + if (pskb_may_pull(skb, depth)) + skb_set_network_header(skb, depth); + } skb_probe_transport_header(skb); } -- cgit v1.2.3