summaryrefslogtreecommitdiffstats
path: root/net
Commit message (Collapse)AuthorAgeFilesLines
...
| * | | Merge tag 'ieee802154-for-davem-2020-09-08' of ↵David S. Miller2020-09-081-3/+5
| |\ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan Stefan Schmidt says: ==================== pull-request: ieee802154 for net 2020-09-08 An update from ieee802154 for your *net* tree. A potential memory leak fix for ca8210 from Liu Jian, a check on the return for a register read in adf7242 and finally a user after free fix in the softmac tx function from Eric found by syzkaller. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
| | * | | mac802154: tx: fix use-after-freeEric Dumazet2020-09-081-3/+5
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | syzbot reported a bug in ieee802154_tx() [1] A similar issue in ieee802154_xmit_worker() is also fixed in this patch. [1] BUG: KASAN: use-after-free in ieee802154_tx+0x3d2/0x480 net/mac802154/tx.c:88 Read of size 4 at addr ffff8880251a8c70 by task syz-executor.3/928 CPU: 0 PID: 928 Comm: syz-executor.3 Not tainted 5.9.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xae/0x497 mm/kasan/report.c:383 __kasan_report mm/kasan/report.c:513 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 ieee802154_tx+0x3d2/0x480 net/mac802154/tx.c:88 ieee802154_subif_start_xmit+0xbe/0xe4 net/mac802154/tx.c:130 __netdev_start_xmit include/linux/netdevice.h:4634 [inline] netdev_start_xmit include/linux/netdevice.h:4648 [inline] dev_direct_xmit+0x4e9/0x6e0 net/core/dev.c:4203 packet_snd net/packet/af_packet.c:2989 [inline] packet_sendmsg+0x2413/0x5290 net/packet/af_packet.c:3014 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:671 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45d5b9 Code: 5d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fc98e749c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000002ccc0 RCX: 000000000045d5b9 RDX: 0000000000000000 RSI: 0000000020007780 RDI: 000000000000000b RBP: 000000000118d020 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118cfec R13: 00007fff690c720f R14: 00007fc98e74a9c0 R15: 000000000118cfec Allocated by task 928: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track mm/kasan/common.c:56 [inline] __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:461 slab_post_alloc_hook mm/slab.h:518 [inline] slab_alloc_node mm/slab.c:3254 [inline] kmem_cache_alloc_node+0x136/0x3e0 mm/slab.c:3574 __alloc_skb+0x71/0x550 net/core/skbuff.c:198 alloc_skb include/linux/skbuff.h:1094 [inline] alloc_skb_with_frags+0x92/0x570 net/core/skbuff.c:5771 sock_alloc_send_pskb+0x72a/0x880 net/core/sock.c:2348 packet_alloc_skb net/packet/af_packet.c:2837 [inline] packet_snd net/packet/af_packet.c:2932 [inline] packet_sendmsg+0x19fb/0x5290 net/packet/af_packet.c:3014 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:671 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 928: kasan_save_stack+0x1b/0x40 mm/kasan/common.c:48 kasan_set_track+0x1c/0x30 mm/kasan/common.c:56 kasan_set_free_info+0x1b/0x30 mm/kasan/generic.c:355 __kasan_slab_free+0xd8/0x120 mm/kasan/common.c:422 __cache_free mm/slab.c:3418 [inline] kmem_cache_free.part.0+0x74/0x1e0 mm/slab.c:3693 kfree_skbmem+0xef/0x1b0 net/core/skbuff.c:622 __kfree_skb net/core/skbuff.c:679 [inline] consume_skb net/core/skbuff.c:838 [inline] consume_skb+0xcf/0x160 net/core/skbuff.c:832 __dev_kfree_skb_any+0x9c/0xc0 net/core/dev.c:3107 fakelb_hw_xmit+0x20e/0x2a0 drivers/net/ieee802154/fakelb.c:81 drv_xmit_async net/mac802154/driver-ops.h:16 [inline] ieee802154_tx+0x282/0x480 net/mac802154/tx.c:81 ieee802154_subif_start_xmit+0xbe/0xe4 net/mac802154/tx.c:130 __netdev_start_xmit include/linux/netdevice.h:4634 [inline] netdev_start_xmit include/linux/netdevice.h:4648 [inline] dev_direct_xmit+0x4e9/0x6e0 net/core/dev.c:4203 packet_snd net/packet/af_packet.c:2989 [inline] packet_sendmsg+0x2413/0x5290 net/packet/af_packet.c:3014 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:671 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2353 ___sys_sendmsg+0xf3/0x170 net/socket.c:2407 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2440 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff8880251a8c00 which belongs to the cache skbuff_head_cache of size 224 The buggy address is located 112 bytes inside of 224-byte region [ffff8880251a8c00, ffff8880251a8ce0) The buggy address belongs to the page: page:0000000062b6a4f1 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x251a8 flags: 0xfffe0000000200(slab) raw: 00fffe0000000200 ffffea0000435c88 ffffea00028b6c08 ffff8880a9055d00 raw: 0000000000000000 ffff8880251a80c0 000000010000000c 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880251a8b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880251a8b80: fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc >ffff8880251a8c00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8880251a8c80: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ffff8880251a8d00: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb Fixes: 409c3b0c5f03 ("mac802154: tx: move stats tx increment") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Cc: Alexander Aring <alex.aring@gmail.com> Cc: Stefan Schmidt <stefan@datenfreihafen.org> Cc: linux-wpan@vger.kernel.org Link: https://lore.kernel.org/r/20200908104025.4009085-1-edumazet@google.com Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
| * | | | fib: fix fib_rule_ops indirect call wrappers when CONFIG_IPV6=mBrian Vazquez2020-09-081-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | If CONFIG_IPV6=m, the IPV6 functions won't be found by the linker: ld: net/core/fib_rules.o: in function `fib_rules_lookup': fib_rules.c:(.text+0x606): undefined reference to `fib6_rule_match' ld: fib_rules.c:(.text+0x611): undefined reference to `fib6_rule_match' ld: fib_rules.c:(.text+0x68c): undefined reference to `fib6_rule_action' ld: fib_rules.c:(.text+0x693): undefined reference to `fib6_rule_action' ld: fib_rules.c:(.text+0x6aa): undefined reference to `fib6_rule_suppress' ld: fib_rules.c:(.text+0x6bc): undefined reference to `fib6_rule_suppress' make: *** [Makefile:1166: vmlinux] Error 1 Reported-by: Sven Joachim <svenjoac@gmx.de> Fixes: b9aaec8f0be5 ("fib: use indirect call wrappers in the most common fib_rules_ops") Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested Signed-off-by: Brian Vazquez <brianvv@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nfDavid S. Miller2020-09-084-32/+66
| |\ \ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Pablo Neira Ayuso says: =================== Netfilter fixes for net The following patchset contains Netfilter fixes for net: 1) Allow conntrack entries with l3num == NFPROTO_IPV4 or == NFPROTO_IPV6 only via ctnetlink, from Will McVicker. 2) Batch notifications to userspace to improve netlink socket receive utilization. 3) Restore mark based dump filtering via ctnetlink, from Martin Willi. 4) nf_conncount_init() fails with -EPROTO with CONFIG_IPV6, from Eelco Chaudron. 5) Containers fail to match on meta skuid and skgid, use socket user_ns to retrieve meta skuid and skgid. =================== Signed-off-by: David S. Miller <davem@davemloft.net>
| | * | | | netfilter: nft_meta: use socket user_ns to retrieve skuid and skgidPablo Neira Ayuso2020-09-081-2/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ... instead of using init_user_ns. Fixes: 96518518cc41 ("netfilter: add nftables") Tested-by: Phil Sutter <phil@nwl.cc> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
| | * | | | netfilter: conntrack: nf_conncount_init is failing with IPv6 disabledEelco Chaudron2020-09-081-0/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The openvswitch module fails initialization when used in a kernel without IPv6 enabled. nf_conncount_init() fails because the ct code unconditionally tries to initialize the netns IPv6 related bit, regardless of the build option. The change below ignores the IPv6 part if not enabled. Note that the corresponding _put() function already has this IPv6 configuration check. Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Signed-off-by: Eelco Chaudron <echaudro@redhat.com> Reviewed-by: Simon Horman <simon.horman@netronome.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
| | * | | | netfilter: ctnetlink: fix mark based dump filtering regressionMartin Willi2020-09-081-16/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | conntrack mark based dump filtering may falsely skip entries if a mask is given: If the mask-based check does not filter out the entry, the else-if check is always true and compares the mark without considering the mask. The if/else-if logic seems wrong. Given that the mask during filter setup is implicitly set to 0xffffffff if not specified explicitly, the mark filtering flags seem to just complicate things. Restore the previously used approach by always matching against a zero mask is no filter mark is given. Fixes: cb8aa9a3affb ("netfilter: ctnetlink: add kernel side filtering for dump") Signed-off-by: Martin Willi <martin@strongswan.org> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
| | * | | | netfilter: nf_tables: coalesce multiple notifications into one skbuffPablo Neira Ayuso2020-09-081-13/+57
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | On x86_64, each notification results in one skbuff allocation which consumes at least 768 bytes due to the skbuff overhead. This patch coalesces several notifications into one single skbuff, so each notification consumes at least ~211 bytes, that ~3.5 times less memory consumption. As a result, this is reducing the chances to exhaust the netlink socket receive buffer. Rule of thumb is that each notification batch only contains netlink messages whose report flag is the same, nfnetlink_send() requires this to do appropriate delivery to userspace, either via unicast (echo mode) or multicast (monitor mode). The skbuff control buffer is used to annotate the report flag for later handling at the new coalescing routine. The batch skbuff notification size is NLMSG_GOODSIZE, using a larger skbuff would allow for more socket receiver buffer savings (to amortize the cost of the skbuff even more), however, going over that size might break userspace applications, so let's be conservative and stick to NLMSG_GOODSIZE. Reported-by: Phil Sutter <phil@nwl.cc> Acked-by: Phil Sutter <phil@nwl.cc> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
| | * | | | netfilter: ctnetlink: add a range check for l3/l4 protonumWill McVicker2020-09-081-1/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The indexes to the nf_nat_l[34]protos arrays come from userspace. So check the tuple's family, e.g. l3num, when creating the conntrack in order to prevent an OOB memory access during setup. Here is an example kernel panic on 4.14.180 when userspace passes in an index greater than NFPROTO_NUMPROTO. Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in:... Process poc (pid: 5614, stack limit = 0x00000000a3933121) CPU: 4 PID: 5614 Comm: poc Tainted: G S W O 4.14.180-g051355490483 Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150 Google Inc. MSM task: 000000002a3dfffe task.stack: 00000000a3933121 pc : __cfi_check_fail+0x1c/0x24 lr : __cfi_check_fail+0x1c/0x24 ... Call trace: __cfi_check_fail+0x1c/0x24 name_to_dev_t+0x0/0x468 nfnetlink_parse_nat_setup+0x234/0x258 ctnetlink_parse_nat_setup+0x4c/0x228 ctnetlink_new_conntrack+0x590/0xc40 nfnetlink_rcv_msg+0x31c/0x4d4 netlink_rcv_skb+0x100/0x184 nfnetlink_rcv+0xf4/0x180 netlink_unicast+0x360/0x770 netlink_sendmsg+0x5a0/0x6a4 ___sys_sendmsg+0x314/0x46c SyS_sendmsg+0xb4/0x108 el0_svc_naked+0x34/0x38 This crash is not happening since 5.4+, however, ctnetlink still allows for creating entries with unsupported layer 3 protocol number. Fixes: c1d10adb4a521 ("[NETFILTER]: Add ctnetlink port for nf_conntrack") Signed-off-by: Will McVicker <willmcvicker@google.com> [pablo@netfilter.org: rebased original patch on top of nf.git] Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
| * | | | | ipv6: avoid lockdep issue in fib6_del()Eric Dumazet2020-09-081-4/+9
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | syzbot reported twice a lockdep issue in fib6_del() [1] which I think is caused by net->ipv6.fib6_null_entry having a NULL fib6_table pointer. fib6_del() already checks for fib6_null_entry special case, we only need to return earlier. Bug seems to occur very rarely, I have thus chosen a 'bug origin' that makes backports not too complex. [1] WARNING: suspicious RCU usage 5.9.0-rc4-syzkaller #0 Not tainted ----------------------------- net/ipv6/ip6_fib.c:1996 suspicious rcu_dereference_protected() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 4 locks held by syz-executor.5/8095: #0: ffffffff8a7ea708 (rtnl_mutex){+.+.}-{3:3}, at: ppp_release+0x178/0x240 drivers/net/ppp/ppp_generic.c:401 #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: spin_trylock_bh include/linux/spinlock.h:414 [inline] #1: ffff88804c422dd8 (&net->ipv6.fib6_gc_lock){+.-.}-{2:2}, at: fib6_run_gc+0x21b/0x2d0 net/ipv6/ip6_fib.c:2312 #2: ffffffff89bd6a40 (rcu_read_lock){....}-{1:2}, at: __fib6_clean_all+0x0/0x290 net/ipv6/ip6_fib.c:2613 #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:359 [inline] #3: ffff8880a82e6430 (&tb->tb6_lock){+.-.}-{2:2}, at: __fib6_clean_all+0x107/0x290 net/ipv6/ip6_fib.c:2245 stack backtrace: CPU: 1 PID: 8095 Comm: syz-executor.5 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 fib6_del+0x12b4/0x1630 net/ipv6/ip6_fib.c:1996 fib6_clean_node+0x39b/0x570 net/ipv6/ip6_fib.c:2180 fib6_walk_continue+0x4aa/0x8e0 net/ipv6/ip6_fib.c:2102 fib6_walk+0x182/0x370 net/ipv6/ip6_fib.c:2150 fib6_clean_tree+0xdb/0x120 net/ipv6/ip6_fib.c:2230 __fib6_clean_all+0x120/0x290 net/ipv6/ip6_fib.c:2246 fib6_clean_all net/ipv6/ip6_fib.c:2257 [inline] fib6_run_gc+0x113/0x2d0 net/ipv6/ip6_fib.c:2320 ndisc_netdev_event+0x217/0x350 net/ipv6/ndisc.c:1805 notifier_call_chain+0xb5/0x200 kernel/notifier.c:83 call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:2033 call_netdevice_notifiers_extack net/core/dev.c:2045 [inline] call_netdevice_notifiers net/core/dev.c:2059 [inline] dev_close_many+0x30b/0x650 net/core/dev.c:1634 rollback_registered_many+0x3a8/0x1210 net/core/dev.c:9261 rollback_registered net/core/dev.c:9329 [inline] unregister_netdevice_queue+0x2dd/0x570 net/core/dev.c:10410 unregister_netdevice include/linux/netdevice.h:2774 [inline] ppp_release+0x216/0x240 drivers/net/ppp/ppp_generic.c:403 __fput+0x285/0x920 fs/file_table.c:281 task_work_run+0xdd/0x190 kernel/task_work.c:141 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:163 [inline] exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 421842edeaf6 ("net/ipv6: Add fib6_null_entry") Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: David Ahern <dsahern@gmail.com> Reviewed-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | net: dsa: link interfaces with the DSA master to get rid of lockdep warningsVladimir Oltean2020-09-081-2/+16
| |/ / / / | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Since commit 845e0ebb4408 ("net: change addr_list_lock back to static key"), cascaded DSA setups (DSA switch port as DSA master for another DSA switch port) are emitting this lockdep warning: ============================================ WARNING: possible recursive locking detected 5.8.0-rc1-00133-g923e4b5032dd-dirty #208 Not tainted -------------------------------------------- dhcpcd/323 is trying to acquire lock: ffff000066dd4268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 but task is already holding lock: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&dsa_master_addr_list_lock_key/1); lock(&dsa_master_addr_list_lock_key/1); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by dhcpcd/323: #0: ffffdbd1381dda18 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x24/0x30 #1: ffff00006614b268 (_xmit_ETHER){+...}-{2:2}, at: dev_set_rx_mode+0x28/0x48 #2: ffff00006608c268 (&dsa_master_addr_list_lock_key/1){+...}-{2:2}, at: dev_mc_sync+0x44/0x90 stack backtrace: Call trace: dump_backtrace+0x0/0x1e0 show_stack+0x20/0x30 dump_stack+0xec/0x158 __lock_acquire+0xca0/0x2398 lock_acquire+0xe8/0x440 _raw_spin_lock_nested+0x64/0x90 dev_mc_sync+0x44/0x90 dsa_slave_set_rx_mode+0x34/0x50 __dev_set_rx_mode+0x60/0xa0 dev_mc_sync+0x84/0x90 dsa_slave_set_rx_mode+0x34/0x50 __dev_set_rx_mode+0x60/0xa0 dev_set_rx_mode+0x30/0x48 __dev_open+0x10c/0x180 __dev_change_flags+0x170/0x1c8 dev_change_flags+0x2c/0x70 devinet_ioctl+0x774/0x878 inet_ioctl+0x348/0x3b0 sock_do_ioctl+0x50/0x310 sock_ioctl+0x1f8/0x580 ksys_ioctl+0xb0/0xf0 __arm64_sys_ioctl+0x28/0x38 el0_svc_common.constprop.0+0x7c/0x180 do_el0_svc+0x2c/0x98 el0_sync_handler+0x9c/0x1b8 el0_sync+0x158/0x180 Since DSA never made use of the netdev API for describing links between upper devices and lower devices, the dev->lower_level value of a DSA switch interface would be 1, which would warn when it is a DSA master. We can use netdev_upper_dev_link() to describe the relationship between a DSA slave and a DSA master. To be precise, a DSA "slave" (switch port) is an "upper" to a DSA "master" (host port). The relationship is "many uppers to one lower", like in the case of VLAN. So, for that reason, we use the same function as VLAN uses. There might be a chance that somebody will try to take hold of this interface and use it immediately after register_netdev() and before netdev_upper_dev_link(). To avoid that, we do the registration and linkage while holding the RTNL, and we use the RTNL-locked cousin of register_netdev(), which is register_netdevice(). Since this warning was not there when lockdep was using dynamic keys for addr_list_lock, we are blaming the lockdep patch itself. The network stack _has_ been using static lockdep keys before, and it _is_ likely that stacked DSA setups have been triggering these lockdep warnings since forever, however I can't test very old kernels on this particular stacked DSA setup, to ensure I'm not in fact introducing regressions. Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key") Suggested-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: Vladimir Oltean <olteanv@gmail.com> Reviewed-by: Florian Fainelli <f.fainelli@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | Revert "netns: don't disable BHs when locking "nsid_lock""Taehee Yoo2020-09-071-11/+11
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | This reverts commit 8d7e5dee972f1cde2ba96c621f1541fa36e7d4f4. To protect netns id, the nsid_lock is used when netns id is being allocated and removed by peernet2id_alloc() and unhash_nsid(). The nsid_lock can be used in BH context but only spin_lock() is used in this code. Using spin_lock() instead of spin_lock_bh() can result in a deadlock in the following scenario reported by the lockdep. In order to avoid a deadlock, the spin_lock_bh() should be used instead of spin_lock() to acquire nsid_lock. Test commands: ip netns del nst ip netns add nst ip link add veth1 type veth peer name veth2 ip link set veth1 netns nst ip netns exec nst ip link add name br1 type bridge vlan_filtering 1 ip netns exec nst ip link set dev br1 up ip netns exec nst ip link set dev veth1 master br1 ip netns exec nst ip link set dev veth1 up ip netns exec nst ip link add macvlan0 link br1 up type macvlan Splat looks like: [ 33.615860][ T607] WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected [ 33.617194][ T607] 5.9.0-rc1+ #665 Not tainted [ ... ] [ 33.670615][ T607] Chain exists of: [ 33.670615][ T607] &mc->mca_lock --> &bridge_netdev_addr_lock_key --> &net->nsid_lock [ 33.670615][ T607] [ 33.673118][ T607] Possible interrupt unsafe locking scenario: [ 33.673118][ T607] [ 33.674599][ T607] CPU0 CPU1 [ 33.675557][ T607] ---- ---- [ 33.676516][ T607] lock(&net->nsid_lock); [ 33.677306][ T607] local_irq_disable(); [ 33.678517][ T607] lock(&mc->mca_lock); [ 33.679725][ T607] lock(&bridge_netdev_addr_lock_key); [ 33.681166][ T607] <Interrupt> [ 33.681791][ T607] lock(&mc->mca_lock); [ 33.682579][ T607] [ 33.682579][ T607] *** DEADLOCK *** [ ... ] [ 33.922046][ T607] stack backtrace: [ 33.922999][ T607] CPU: 3 PID: 607 Comm: ip Not tainted 5.9.0-rc1+ #665 [ 33.924099][ T607] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 33.925714][ T607] Call Trace: [ 33.926238][ T607] dump_stack+0x78/0xab [ 33.926905][ T607] check_irq_usage+0x70b/0x720 [ 33.927708][ T607] ? iterate_chain_key+0x60/0x60 [ 33.928507][ T607] ? check_path+0x22/0x40 [ 33.929201][ T607] ? check_noncircular+0xcf/0x180 [ 33.930024][ T607] ? __lock_acquire+0x1952/0x1f20 [ 33.930860][ T607] __lock_acquire+0x1952/0x1f20 [ 33.931667][ T607] lock_acquire+0xaf/0x3a0 [ 33.932366][ T607] ? peernet2id_alloc+0x3a/0x170 [ 33.933147][ T607] ? br_port_fill_attrs+0x54c/0x6b0 [bridge] [ 33.934140][ T607] ? br_port_fill_attrs+0x5de/0x6b0 [bridge] [ 33.935113][ T607] ? kvm_sched_clock_read+0x14/0x30 [ 33.935974][ T607] _raw_spin_lock+0x30/0x70 [ 33.936728][ T607] ? peernet2id_alloc+0x3a/0x170 [ 33.937523][ T607] peernet2id_alloc+0x3a/0x170 [ 33.938313][ T607] rtnl_fill_ifinfo+0xb5e/0x1400 [ 33.939091][ T607] rtmsg_ifinfo_build_skb+0x8a/0xf0 [ 33.939953][ T607] rtmsg_ifinfo_event.part.39+0x17/0x50 [ 33.940863][ T607] rtmsg_ifinfo+0x1f/0x30 [ 33.941571][ T607] __dev_notify_flags+0xa5/0xf0 [ 33.942376][ T607] ? __irq_work_queue_local+0x49/0x50 [ 33.943249][ T607] ? irq_work_queue+0x1d/0x30 [ 33.943993][ T607] ? __dev_set_promiscuity+0x7b/0x1a0 [ 33.944878][ T607] __dev_set_promiscuity+0x7b/0x1a0 [ 33.945758][ T607] dev_set_promiscuity+0x1e/0x50 [ 33.946582][ T607] br_port_set_promisc+0x1f/0x40 [bridge] [ 33.947487][ T607] br_manage_promisc+0x8b/0xe0 [bridge] [ 33.948388][ T607] __dev_set_promiscuity+0x123/0x1a0 [ 33.949244][ T607] __dev_set_rx_mode+0x68/0x90 [ 33.950021][ T607] dev_uc_add+0x50/0x60 [ 33.950720][ T607] macvlan_open+0x18e/0x1f0 [macvlan] [ 33.951601][ T607] __dev_open+0xd6/0x170 [ 33.952269][ T607] __dev_change_flags+0x181/0x1d0 [ 33.953056][ T607] rtnl_configure_link+0x2f/0xa0 [ 33.953884][ T607] __rtnl_newlink+0x6b9/0x8e0 [ 33.954665][ T607] ? __lock_acquire+0x95d/0x1f20 [ 33.955450][ T607] ? lock_acquire+0xaf/0x3a0 [ 33.956193][ T607] ? is_bpf_text_address+0x5/0xe0 [ 33.956999][ T607] rtnl_newlink+0x47/0x70 Acked-by: Guillaume Nault <gnault@redhat.com> Fixes: 8d7e5dee972f ("netns: don't disable BHs when locking "nsid_lock"") Reported-by: syzbot+3f960c64a104eaa2c813@syzkaller.appspotmail.com Signed-off-by: Taehee Yoo <ap420073@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| * | | | act_ife: load meta modules before tcf_idr_check_alloc()Cong Wang2020-09-041-10/+34
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The following deadlock scenario is triggered by syzbot: Thread A: Thread B: tcf_idr_check_alloc() ... populate_metalist() rtnl_unlock() rtnl_lock() ... request_module() tcf_idr_check_alloc() rtnl_lock() At this point, thread A is waiting for thread B to release RTNL lock, while thread B is waiting for thread A to commit the IDR change with tcf_idr_insert() later. Break this deadlock situation by preloading ife modules earlier, before tcf_idr_check_alloc(), this is fine because we only need to load modules we need potentially. Reported-and-tested-by: syzbot+80e32b5d1f9923f8ace6@syzkaller.appspotmail.com Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Cc: Jamal Hadi Salim <jhs@mojatatu.com> Cc: Vlad Buslov <vladbu@mellanox.com> Cc: Jiri Pirko <jiri@resnulli.us> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
* | | | | Merge tag 'nfs-for-5.9-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfsLinus Torvalds2020-09-092-2/+4
|\ \ \ \ \ | |/ / / / |/| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Pull NFS client bugfixes from Trond Myklebust: - Fix an NFS/RDMA resource leak - Fix the error handling during delegation recall - NFSv4.0 needs to return the delegation on a zero-stateid SETATTR - Stop printk reading past end of string * tag 'nfs-for-5.9-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: SUNRPC: stop printk reading past end of string NFS: Zero-stateid SETATTR should first return delegation NFSv4.1 handle ERR_DELAY error reclaiming locking state on delegation recall xprtrdma: Release in-flight MRs on disconnect
| * | | | SUNRPC: stop printk reading past end of stringJ. Bruce Fields2020-09-051-2/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Since p points at raw xdr data, there's no guarantee that it's NULL terminated, so we should give a length. And probably escape any special characters too. Reported-by: Zhi Li <yieli@redhat.com> Signed-off-by: J. Bruce Fields <bfields@redhat.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
| * | | | xprtrdma: Release in-flight MRs on disconnectChuck Lever2020-08-261-0/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Dan Aloni reports that when a server disconnects abruptly, a few memory regions are left DMA mapped. Over time this leak could pin enough I/O resources to slow or even deadlock an NFS/RDMA client. I found that if a transport disconnects before pending Send and FastReg WRs can be posted, the to-be-registered MRs are stranded on the req's rl_registered list and never released -- since they weren't posted, there's no Send completion to DMA unmap them. Reported-by: Dan Aloni <dan@kernelim.com> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
* | | | | net/packet: fix overflow in tpacket_rcvOr Cohen2020-09-041-1/+6
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Using tp_reserve to calculate netoff can overflow as tp_reserve is unsigned int and netoff is unsigned short. This may lead to macoff receving a smaller value then sizeof(struct virtio_net_hdr), and if po->has_vnet_hdr is set, an out-of-bounds write will occur when calling virtio_net_hdr_from_skb. The bug is fixed by converting netoff to unsigned int and checking if it exceeds USHRT_MAX. This addresses CVE-2020-14386 Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt") Signed-off-by: Or Cohen <orcohen@paloaltonetworks.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
* | | | | Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/netLinus Torvalds2020-09-0352-343/+627
|\ \ \ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Pull networking fixes from David Miller: 1) Use netif_rx_ni() when necessary in batman-adv stack, from Jussi Kivilinna. 2) Fix loss of RTT samples in rxrpc, from David Howells. 3) Memory leak in hns_nic_dev_probe(), from Dignhao Liu. 4) ravb module cannot be unloaded, fix from Yuusuke Ashizuka. 5) We disable BH for too lokng in sctp_get_port_local(), add a cond_resched() here as well, from Xin Long. 6) Fix memory leak in st95hf_in_send_cmd, from Dinghao Liu. 7) Out of bound access in bpf_raw_tp_link_fill_link_info(), from Yonghong Song. 8) Missing of_node_put() in mt7530 DSA driver, from Sumera Priyadarsini. 9) Fix crash in bnxt_fw_reset_task(), from Michael Chan. 10) Fix geneve tunnel checksumming bug in hns3, from Yi Li. 11) Memory leak in rxkad_verify_response, from Dinghao Liu. 12) In tipc, don't use smp_processor_id() in preemptible context. From Tuong Lien. 13) Fix signedness issue in mlx4 memory allocation, from Shung-Hsi Yu. 14) Missing clk_disable_prepare() in gemini driver, from Dan Carpenter. 15) Fix ABI mismatch between driver and firmware in nfp, from Louis Peens. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (110 commits) net/smc: fix sock refcounting in case of termination net/smc: reset sndbuf_desc if freed net/smc: set rx_off for SMCR explicitly net/smc: fix toleration of fake add_link messages tg3: Fix soft lockup when tg3_reset_task() fails. doc: net: dsa: Fix typo in config code sample net: dp83867: Fix WoL SecureOn password nfp: flower: fix ABI mismatch between driver and firmware tipc: fix shutdown() of connectionless socket ipv6: Fix sysctl max for fib_multipath_hash_policy drivers/net/wan/hdlc: Change the default of hard_header_len to 0 net: gemini: Fix another missing clk_disable_unprepare() in probe net: bcmgenet: fix mask check in bcmgenet_validate_flow() amd-xgbe: Add support for new port mode net: usb: dm9601: Add USB ID of Keenetic Plus DSL vhost: fix typo in error message net: ethernet: mlx4: Fix memory allocation in mlx4_buddy_init() pktgen: fix error message with wrong function name net: ethernet: ti: am65-cpsw: fix rmii 100Mbit link mode cxgb4: fix thermal zone device registration ...
| * | | | | net/smc: fix sock refcounting in case of terminationUrsula Braun2020-09-031-7/+8
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When an ISM device is removed, all its linkgroups are terminated, i.e. all the corresponding connections are killed. Connection killing invokes smc_close_active_abort(), which decreases the sock refcount for certain states to simulate passive closing. And it cancels the close worker and has to give up the sock lock for this timeframe. This opens the door for a passive close worker or a socket close to run in between. In this case smc_close_active_abort() and passive close worker resp. smc_release() might do a sock_put for passive closing. This causes: [ 1323.315943] refcount_t: underflow; use-after-free. [ 1323.316055] WARNING: CPU: 3 PID: 54469 at lib/refcount.c:28 refcount_warn_saturate+0xe8/0x130 [ 1323.316069] Kernel panic - not syncing: panic_on_warn set ... [ 1323.316084] CPU: 3 PID: 54469 Comm: uperf Not tainted 5.9.0-20200826.rc2.git0.46328853ed20.300.fc32.s390x+debug #1 [ 1323.316096] Hardware name: IBM 2964 NC9 702 (z/VM 6.4.0) [ 1323.316108] Call Trace: [ 1323.316125] [<00000000c0d4aae8>] show_stack+0x90/0xf8 [ 1323.316143] [<00000000c15989b0>] dump_stack+0xa8/0xe8 [ 1323.316158] [<00000000c0d8344e>] panic+0x11e/0x288 [ 1323.316173] [<00000000c0d83144>] __warn+0xac/0x158 [ 1323.316187] [<00000000c1597a7a>] report_bug+0xb2/0x130 [ 1323.316201] [<00000000c0d36424>] monitor_event_exception+0x44/0xc0 [ 1323.316219] [<00000000c195c716>] pgm_check_handler+0x1da/0x238 [ 1323.316234] [<00000000c151844c>] refcount_warn_saturate+0xec/0x130 [ 1323.316280] ([<00000000c1518448>] refcount_warn_saturate+0xe8/0x130) [ 1323.316310] [<000003ff801f2e2a>] smc_release+0x192/0x1c8 [smc] [ 1323.316323] [<00000000c169f1fa>] __sock_release+0x5a/0xe0 [ 1323.316334] [<00000000c169f2ac>] sock_close+0x2c/0x40 [ 1323.316350] [<00000000c1086de0>] __fput+0xb8/0x278 [ 1323.316362] [<00000000c0db1e0e>] task_work_run+0x76/0xb8 [ 1323.316393] [<00000000c0d8ab84>] do_exit+0x26c/0x520 [ 1323.316408] [<00000000c0d8af08>] do_group_exit+0x48/0xc0 [ 1323.316421] [<00000000c0d8afa8>] __s390x_sys_exit_group+0x28/0x38 [ 1323.316433] [<00000000c195c32c>] system_call+0xe0/0x2b4 [ 1323.316446] 1 lock held by uperf/54469: [ 1323.316456] #0: 0000000044125e60 (&sb->s_type->i_mutex_key#9){+.+.}-{3:3}, at: __sock_release+0x44/0xe0 The patch rechecks sock state in smc_close_active_abort() after smc_close_cancel_work() to avoid duplicate decrease of sock refcount for the same purpose. Fixes: 611b63a12732 ("net/smc: cancel tx worker in case of socket aborts") Reviewed-by: Karsten Graul <kgraul@linux.ibm.com> Signed-off-by: Ursula Braun <ubraun@linux.ibm.com> Signed-off-by: Karsten Graul <kgraul@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | net/smc: reset sndbuf_desc if freedUrsula Braun2020-09-031-0/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When an SMC connection is created, and there is a problem to create an RMB or DMB, the previously created send buffer is thrown away as well including buffer descriptor freeing. Make sure the connection no longer references the freed buffer descriptor, otherwise bugs like this are possible: [71556.835148] ============================================================================= [71556.835168] BUG kmalloc-128 (Tainted: G B OE ): Poison overwritten [71556.835172] ----------------------------------------------------------------------------- [71556.835179] INFO: 0x00000000d20894be-0x00000000aaef63e9 @offset=2724. First byte 0x0 instead of 0x6b [71556.835215] INFO: Allocated in __smc_buf_create+0x184/0x578 [smc] age=0 cpu=5 pid=46726 [71556.835234] ___slab_alloc+0x5a4/0x690 [71556.835239] __slab_alloc.constprop.0+0x70/0xb0 [71556.835243] kmem_cache_alloc_trace+0x38e/0x3f8 [71556.835250] __smc_buf_create+0x184/0x578 [smc] [71556.835257] smc_buf_create+0x2e/0xe8 [smc] [71556.835264] smc_listen_work+0x516/0x6a0 [smc] [71556.835275] process_one_work+0x280/0x478 [71556.835280] worker_thread+0x66/0x368 [71556.835287] kthread+0x17a/0x1a0 [71556.835294] ret_from_fork+0x28/0x2c [71556.835301] INFO: Freed in smc_buf_create+0xd8/0xe8 [smc] age=0 cpu=5 pid=46726 [71556.835307] __slab_free+0x246/0x560 [71556.835311] kfree+0x398/0x3f8 [71556.835318] smc_buf_create+0xd8/0xe8 [smc] [71556.835324] smc_listen_work+0x516/0x6a0 [smc] [71556.835328] process_one_work+0x280/0x478 [71556.835332] worker_thread+0x66/0x368 [71556.835337] kthread+0x17a/0x1a0 [71556.835344] ret_from_fork+0x28/0x2c [71556.835348] INFO: Slab 0x00000000a0744551 objects=51 used=51 fp=0x0000000000000000 flags=0x1ffff00000010200 [71556.835352] INFO: Object 0x00000000563480a1 @offset=2688 fp=0x00000000289567b2 [71556.835359] Redzone 000000006783cde2: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ [71556.835363] Redzone 00000000e35b876e: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ [71556.835367] Redzone 0000000023074562: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ [71556.835372] Redzone 00000000b9564b8c: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ [71556.835376] Redzone 00000000810c6362: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ [71556.835380] Redzone 0000000065ef52c3: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ [71556.835384] Redzone 00000000c5dd6984: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ [71556.835388] Redzone 000000004c480f8f: bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ................ [71556.835392] Object 00000000563480a1: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [71556.835397] Object 000000009c479d06: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [71556.835401] Object 000000006e1dce92: 6b 6b 6b 6b 00 00 00 00 6b 6b 6b 6b 6b 6b 6b 6b kkkk....kkkkkkkk [71556.835405] Object 00000000227f7cf8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [71556.835410] Object 000000009a701215: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [71556.835414] Object 000000003731ce76: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [71556.835418] Object 00000000f7085967: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk [71556.835422] Object 0000000007f99927: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk. [71556.835427] Redzone 00000000579c4913: bb bb bb bb bb bb bb bb ........ [71556.835431] Padding 00000000305aef82: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ [71556.835435] Padding 00000000b1cdd722: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ [71556.835438] Padding 00000000c7568199: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ [71556.835442] Padding 00000000fad4c4d4: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ [71556.835451] CPU: 0 PID: 47939 Comm: kworker/0:15 Tainted: G B OE 5.9.0-rc1uschi+ #54 [71556.835456] Hardware name: IBM 3906 M03 703 (LPAR) [71556.835464] Workqueue: events smc_listen_work [smc] [71556.835470] Call Trace: [71556.835478] [<00000000d5eaeb10>] show_stack+0x90/0xf8 [71556.835493] [<00000000d66fc0f8>] dump_stack+0xa8/0xe8 [71556.835499] [<00000000d61a511c>] check_bytes_and_report+0x104/0x130 [71556.835504] [<00000000d61a57b2>] check_object+0x26a/0x2e0 [71556.835509] [<00000000d61a59bc>] alloc_debug_processing+0x194/0x238 [71556.835514] [<00000000d61a8c14>] ___slab_alloc+0x5a4/0x690 [71556.835519] [<00000000d61a9170>] __slab_alloc.constprop.0+0x70/0xb0 [71556.835524] [<00000000d61aaf66>] kmem_cache_alloc_trace+0x38e/0x3f8 [71556.835530] [<000003ff80549bbc>] __smc_buf_create+0x184/0x578 [smc] [71556.835538] [<000003ff8054a396>] smc_buf_create+0x2e/0xe8 [smc] [71556.835545] [<000003ff80540c16>] smc_listen_work+0x516/0x6a0 [smc] [71556.835549] [<00000000d5f0f448>] process_one_work+0x280/0x478 [71556.835554] [<00000000d5f0f6a6>] worker_thread+0x66/0x368 [71556.835559] [<00000000d5f18692>] kthread+0x17a/0x1a0 [71556.835563] [<00000000d6abf3b8>] ret_from_fork+0x28/0x2c [71556.835569] INFO: lockdep is turned off. [71556.835573] FIX kmalloc-128: Restoring 0x00000000d20894be-0x00000000aaef63e9=0x6b [71556.835577] FIX kmalloc-128: Marking all objects used Fixes: fd7f3a746582 ("net/smc: remove freed buffer from list") Reviewed-by: Karsten Graul <kgraul@linux.ibm.com> Signed-off-by: Ursula Braun <ubraun@linux.ibm.com> Signed-off-by: Karsten Graul <kgraul@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | net/smc: set rx_off for SMCR explicitlyUrsula Braun2020-09-031-0/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | SMC tries to make use of SMCD first. If a problem shows up, it tries to switch to SMCR. If the SMCD initializing problem shows up after the SMCD connection has already been initialized, field rx_off keeps the wrong SMCD value for SMCR, which results in corrupted data at the receiver. This patch adds an explicit (re-)setting of field rx_off to zero if the connection uses SMCR. Fixes: be244f28d22f ("net/smc: add SMC-D support in data transfer") Reviewed-by: Karsten Graul <kgraul@linux.ibm.com> Signed-off-by: Ursula Braun <ubraun@linux.ibm.com> Signed-off-by: Karsten Graul <kgraul@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | net/smc: fix toleration of fake add_link messagesKarsten Graul2020-09-031-1/+14
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Older SMCR implementations had no link failover support and used one link only. Because the handshake protocol requires to try the establishment of a second link the old code sent a fake add_link message and declined any server response afterwards. The current code supports multiple links and inspects the received fake add_link message more closely. To tolerate the fake add_link messages smc_llc_is_local_add_link() needs an improved check of the message to be able to separate between locally enqueued and fake add_link messages. And smc_llc_cli_add_link() needs to check if the provided qp_mtu size is invalid and reject the add_link request in that case. Fixes: c48254fa48e5 ("net/smc: move add link processing for new device into llc layer") Reviewed-by: Ursula Braun <ubraun@linux.ibm.com> Signed-off-by: Karsten Graul <kgraul@linux.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | tipc: fix shutdown() of connectionless socketTetsuo Handa2020-09-021-3/+6
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | syzbot is reporting hung task at nbd_ioctl() [1], for there are two problems regarding TIPC's connectionless socket's shutdown() operation. ---------- #include <fcntl.h> #include <sys/socket.h> #include <sys/ioctl.h> #include <linux/nbd.h> #include <unistd.h> int main(int argc, char *argv[]) { const int fd = open("/dev/nbd0", 3); alarm(5); ioctl(fd, NBD_SET_SOCK, socket(PF_TIPC, SOCK_DGRAM, 0)); ioctl(fd, NBD_DO_IT, 0); /* To be interrupted by SIGALRM. */ return 0; } ---------- One problem is that wait_for_completion() from flush_workqueue() from nbd_start_device_ioctl() from nbd_ioctl() cannot be completed when nbd_start_device_ioctl() received a signal at wait_event_interruptible(), for tipc_shutdown() from kernel_sock_shutdown(SHUT_RDWR) from nbd_mark_nsock_dead() from sock_shutdown() from nbd_start_device_ioctl() is failing to wake up a WQ thread sleeping at wait_woken() from tipc_wait_for_rcvmsg() from sock_recvmsg() from sock_xmit() from nbd_read_stat() from recv_work() scheduled by nbd_start_device() from nbd_start_device_ioctl(). Fix this problem by always invoking sk->sk_state_change() (like inet_shutdown() does) when tipc_shutdown() is called. The other problem is that tipc_wait_for_rcvmsg() cannot return when tipc_shutdown() is called, for tipc_shutdown() sets sk->sk_shutdown to SEND_SHUTDOWN (despite "how" is SHUT_RDWR) while tipc_wait_for_rcvmsg() needs sk->sk_shutdown set to RCV_SHUTDOWN or SHUTDOWN_MASK. Fix this problem by setting sk->sk_shutdown to SHUTDOWN_MASK (like inet_shutdown() does) when the socket is connectionless. [1] https://syzkaller.appspot.com/bug?id=3fe51d307c1f0a845485cf1798aa059d12bf18b2 Reported-by: syzbot <syzbot+e36f41d207137b5d12f7@syzkaller.appspotmail.com> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | ipv6: Fix sysctl max for fib_multipath_hash_policyIdo Schimmel2020-09-021-1/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Cited commit added the possible value of '2', but it cannot be set. Fix it by adjusting the maximum value to '2'. This is consistent with the corresponding IPv4 sysctl. Before: # sysctl -w net.ipv6.fib_multipath_hash_policy=2 sysctl: setting key "net.ipv6.fib_multipath_hash_policy": Invalid argument net.ipv6.fib_multipath_hash_policy = 2 # sysctl net.ipv6.fib_multipath_hash_policy net.ipv6.fib_multipath_hash_policy = 0 After: # sysctl -w net.ipv6.fib_multipath_hash_policy=2 net.ipv6.fib_multipath_hash_policy = 2 # sysctl net.ipv6.fib_multipath_hash_policy net.ipv6.fib_multipath_hash_policy = 2 Fixes: d8f74f0975d8 ("ipv6: Support multipath hashing on inner IP pkts") Signed-off-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: Stephen Suryaputra <ssuryaextr@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | pktgen: fix error message with wrong function nameLeesoo Ahn2020-09-011-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Error on calling kthread_create_on_node prints wrong function name, kernel_thread. Fixes: 94dcf29a11b3 ("kthread: use kthread_create_on_node()") Signed-off-by: Leesoo Ahn <dev@ooseel.net> Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nfDavid S. Miller2020-08-3110-59/+54
| |\ \ \ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Pablo Neira Ayuso says: ==================== Netfilter fixes for net The following patchset contains Netfilter fixes for net: 1) Do not delete clash entries on reply, let them expire instead, from Florian Westphal. 2) Do not report EAGAIN to nfnetlink, otherwise this enters a busy loop. Update nfnetlink_unicast() to translate EAGAIN to ENOBUFS. 3) Remove repeated words in code comments, from Randy Dunlap. 4) Several patches for the flowtable selftests, from Fabian Frederick. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
| | * | | | | netfilter: conntrack: do not auto-delete clash entries on replyFlorian Westphal2020-08-292-17/+11
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Its possible that we have more than one packet with the same ct tuple simultaneously, e.g. when an application emits n packets on same UDP socket from multiple threads. NAT rules might be applied to those packets. With the right set of rules, n packets will be mapped to m destinations, where at least two packets end up with the same destination. When this happens, the existing clash resolution may merge the skb that is processed after the first has been received with the identical tuple already in hash table. However, its possible that this identical tuple is a NAT_CLASH tuple. In that case the second skb will be sent, but no reply can be received since the reply that is processed first removes the NAT_CLASH tuple. Do not auto-delete, this gives a 1 second window for replies to be passed back to originator. Packets that are coming later (udp stream case) will not be affected: they match the original ct entry, not a NAT_CLASH one. Also prevent NAT_CLASH entries from getting offloaded. Fixes: 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries") Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
| | * | | | | netfilter: nfnetlink: nfnetlink_unicast() reports EAGAIN instead of ENOBUFSPablo Neira Ayuso2020-08-284-38/+39
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Frontend callback reports EAGAIN to nfnetlink to retry a command, this is used to signal that module autoloading is required. Unfortunately, nlmsg_unicast() reports EAGAIN in case the receiver socket buffer gets full, so it enters a busy-loop. This patch updates nfnetlink_unicast() to turn EAGAIN into ENOBUFS and to use nlmsg_unicast(). Remove the flags field in nfnetlink_unicast() since this is always MSG_DONTWAIT in the existing code which is exactly what nlmsg_unicast() passes to netlink_unicast() as parameter. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Phil Sutter <phil@nwl.cc> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
| | * | | | | netfilter: delete repeated wordsRandy Dunlap2020-08-284-4/+4
| | | |_|/ / | | |/| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Drop duplicated words in net/netfilter/ and net/ipv4/netfilter/. Signed-off-by: Randy Dunlap <rdunlap@infradead.org> Reviewed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
| * / | | | tipc: fix using smp_processor_id() in preemptibleTuong Lien2020-08-301-3/+9
| |/ / / / | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The 'this_cpu_ptr()' is used to obtain the AEAD key' TFM on the current CPU for encryption, however the execution can be preemptible since it's actually user-space context, so the 'using smp_processor_id() in preemptible' has been observed. We fix the issue by using the 'get/put_cpu_ptr()' API which consists of a 'preempt_disable()' instead. Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Acked-by: Jon Maloy <jmaloy@redhat.com> Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | net_sched: fix error path in red_init()Cong Wang2020-08-281-16/+4
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When ->init() fails, ->destroy() is called to clean up. So it is unnecessary to clean up in red_init(), and it would cause some refcount underflow. Fixes: aee9caa03fc3 ("net: sched: sch_red: Add qevents "early_drop" and "mark"") Reported-and-tested-by: syzbot+b33c1cb0a30ebdc8a5f9@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+e5ea5f8a3ecfd4427a1c@syzkaller.appspotmail.com Cc: Petr Machata <petrm@mellanox.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Reviewed-by: Petr Machata <petrm@nvidia.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | netlink: fix a data race in netlink_rcv_wake()zhudi2020-08-281-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The data races were reported by KCSAN: BUG: KCSAN: data-race in netlink_recvmsg / skb_queue_tail write (marked) to 0xffff8c0986e5a8c8 of 8 bytes by interrupt on cpu 3: skb_queue_tail+0xcc/0x120 __netlink_sendskb+0x55/0x80 netlink_broadcast_filtered+0x465/0x7e0 nlmsg_notify+0x8f/0x120 rtnl_notify+0x8e/0xb0 __neigh_notify+0xf2/0x120 neigh_update+0x927/0xde0 arp_process+0x8a3/0xf50 arp_rcv+0x27c/0x3b0 __netif_receive_skb_core+0x181c/0x1840 __netif_receive_skb+0x38/0xf0 netif_receive_skb_internal+0x77/0x1c0 napi_gro_receive+0x1bd/0x1f0 e1000_clean_rx_irq+0x538/0xb20 [e1000] e1000_clean+0x5e4/0x1340 [e1000] net_rx_action+0x310/0x9d0 __do_softirq+0xe8/0x308 irq_exit+0x109/0x110 do_IRQ+0x7f/0xe0 ret_from_intr+0x0/0x1d 0xffffffffffffffff read to 0xffff8c0986e5a8c8 of 8 bytes by task 1463 on cpu 0: netlink_recvmsg+0x40b/0x820 sock_recvmsg+0xc9/0xd0 ___sys_recvmsg+0x1a4/0x3b0 __sys_recvmsg+0x86/0x120 __x64_sys_recvmsg+0x52/0x70 do_syscall_64+0xb5/0x360 entry_SYSCALL_64_after_hwframe+0x65/0xca 0xffffffffffffffff Since the write is under sk_receive_queue->lock but the read is done as lockless. so fix it by using skb_queue_empty_lockless() instead of skb_queue_empty() for the read in netlink_rcv_wake() Signed-off-by: zhudi <zhudi21@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | Merge tag 'mac80211-for-davem-2020-08-28' of ↵David S. Miller2020-08-287-86/+192
| |\ \ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211 Johannes Berg says: ==================== We have: * fixes for AQL (airtime queue limits) * reduce packet loss detection false positives * a small channel number fix for the 6 GHz band * a fix for 80+80/160 MHz negotiation * an nl80211 attribute (NL80211_ATTR_HE_6GHZ_CAPABILITY) fix * add a missing sanity check for the regulatory code ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
| | * | | | cfg80211: Adjust 6 GHz frequency to channel conversionAmar Singhal2020-08-271-3/+5
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Adjust the 6 GHz frequency to channel conversion function, the other way around was previously handled. Signed-off-by: Amar Singhal <asinghal@codeaurora.org> Link: https://lore.kernel.org/r/1592599921-10607-1-git-send-email-asinghal@codeaurora.org [rewrite commit message, hard-code channel 2] Signed-off-by: Johannes Berg <johannes.berg@intel.com>
| | * | | | mac80211: reduce packet loss event false positivesFelix Fietkau2020-08-272-18/+17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When running a large number of packets per second with a high data rate and long A-MPDUs, the packet loss threshold can be reached very quickly when the link conditions change. This frequently shows up as spurious disconnects. Mitigate false positives by using a similar logic for regular stations as the one being used for TDLS, though with a more aggressive timeout. Packet loss events are only reported if no ACK was received for a second. Signed-off-by: Felix Fietkau <nbd@nbd.name> Link: https://lore.kernel.org/r/20200808172542.41628-1-nbd@nbd.name Signed-off-by: Johannes Berg <johannes.berg@intel.com>
| | * | | | cfg80211: regulatory: reject invalid hintsJohannes Berg2020-08-271-0/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Reject invalid hints early in order to not cause a kernel WARN later if they're restored to or similar. Reported-by: syzbot+d451401ffd00a60677ee@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=d451401ffd00a60677ee Link: https://lore.kernel.org/r/20200819084648.13956-1-johannes@sipsolutions.net Signed-off-by: Johannes Berg <johannes.berg@intel.com>
| | * | | | wireless: fix wrong 160/80+80 MHz settingShay Bar2020-08-271-4/+11
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Fix cfg80211_chandef_usable(): consider IEEE80211_VHT_CAP_EXT_NSS_BW when verifying 160/80+80 MHz. Based on: "Table 9-272 — Setting of the Supported Channel Width Set subfield and Extended NSS BW Support subfield at a STA transmitting the VHT Capabilities Information field" From "Draft P802.11REVmd_D3.0.pdf" Signed-off-by: Aviad Brikman <aviad.brikman@celeno.com> Signed-off-by: Shay Bar <shay.bar@celeno.com> Link: https://lore.kernel.org/r/20200826143139.25976-1-shay.bar@celeno.com [reformat the code a bit and use u32_get_bits()] Signed-off-by: Johannes Berg <johannes.berg@intel.com>
| | * | | | mac80211: improve AQL aggregation estimation for low data ratesFelix Fietkau2020-08-271-12/+26
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Links with low data rates use much smaller aggregates and are much more sensitive to latency added by bufferbloat. Tune the assumed aggregation length based on the tx rate duration. Signed-off-by: Felix Fietkau <nbd@nbd.name> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/r/20200821163045.62140-3-nbd@nbd.name Signed-off-by: Johannes Berg <johannes.berg@intel.com>
| | * | | | mac80211: factor out code to look up the average packet length duration for ↵Felix Fietkau2020-08-271-47/+74
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | a rate This will be used to enhance AQL estimated aggregation length Signed-off-by: Felix Fietkau <nbd@nbd.name> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/r/20200821163045.62140-2-nbd@nbd.name Signed-off-by: Johannes Berg <johannes.berg@intel.com>
| | * | | | mac80211: use rate provided via status->rate on ieee80211_tx_status_ext for AQLFelix Fietkau2020-08-273-6/+60
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Since ieee80211_tx_info does not have enough room to encode HE rates, HE drivers use status->rate to provide rate info. Store it in struct sta_info and use it for AQL. Signed-off-by: Felix Fietkau <nbd@nbd.name> Acked-by: Toke Høiland-Jørgensen <toke@redhat.com> Link: https://lore.kernel.org/r/20200821163045.62140-1-nbd@nbd.name Signed-off-by: Johannes Berg <johannes.berg@intel.com>
| | * | | | nl80211: fix NL80211_ATTR_HE_6GHZ_CAPABILITY usageJohannes Berg2020-08-201-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | In nl80211_set_station(), we check NL80211_ATTR_HE_6GHZ_CAPABILITY and then use NL80211_ATTR_HE_CAPABILITY, which is clearly wrong. Fix this to use NL80211_ATTR_HE_6GHZ_CAPABILITY as well. Cc: stable@vger.kernel.org Fixes: 43e64bf301fd ("cfg80211: handle 6 GHz capability of new station") Link: https://lore.kernel.org/r/20200805153516.310cef625955.I0abc04dc8abb2c7c005c88ef8fa2d0e3c9fb95c4@changeid Signed-off-by: Johannes Berg <johannes.berg@intel.com>
| * | | | | rxrpc: Fix memory leak in rxkad_verify_response()Dinghao Liu2020-08-271-1/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Fix a memory leak in rxkad_verify_response() whereby the response buffer doesn't get freed if we fail to allocate a ticket buffer. Fixes: ef68622da9cc ("rxrpc: Handle temporary errors better in rxkad security") Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | Merge tag 'rxrpc-fixes-20200820' of ↵David S. Miller2020-08-276-83/+155
| |\ \ \ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs David Howells says: ==================== rxrpc, afs: Fix probing issues Here are some fixes for rxrpc and afs to fix issues in the RTT measuring in rxrpc and thence the Volume Location server probing in afs: (1) Move the serial number of a received ACK into a local variable to simplify the next patch. (2) Fix the loss of RTT samples due to extra interposed ACKs causing baseline information to be discarded too early. This is a particular problem for afs when it sends a single very short call to probe a server it hasn't talked to recently. (3) Fix rxrpc_kernel_get_srtt() to indicate whether it actually has seen any valid samples or not. (4) Remove a field that's set/woken, but never read/waited on. (5) Expose the RTT and other probe information through procfs to make debugging of this stuff easier. (6) Fix VL rotation in afs to only use summary information from VL probing and not the probe running state (which gets clobbered when next a probe is issued). (7) Fix VL rotation to actually return the error aggregated from the probe errors. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
| | * | | | | rxrpc: Make rxrpc_kernel_get_srtt() indicate validityDavid Howells2020-08-201-3/+13
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Fix rxrpc_kernel_get_srtt() to indicate the validity of the returned smoothed RTT. If we haven't had any valid samples yet, the SRTT isn't useful. Fixes: c410bf01933e ("rxrpc: Fix the excessive initial retransmission timeout") Signed-off-by: David Howells <dhowells@redhat.com>
| | * | | | | rxrpc: Fix loss of RTT samples due to interposed ACKDavid Howells2020-08-205-71/+132
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The Rx protocol has a mechanism to help generate RTT samples that works by a client transmitting a REQUESTED-type ACK when it receives a DATA packet that has the REQUEST_ACK flag set. The peer, however, may interpose other ACKs before transmitting the REQUESTED-ACK, as can be seen in the following trace excerpt: rxrpc_tx_data: c=00000044 DATA d0b5ece8:00000001 00000001 q=00000001 fl=07 rxrpc_rx_ack: c=00000044 00000001 PNG r=00000000 f=00000002 p=00000000 n=0 rxrpc_rx_ack: c=00000044 00000002 REQ r=00000001 f=00000002 p=00000001 n=0 ... DATA packet 1 (q=xx) has REQUEST_ACK set (bit 1 of fl=xx). The incoming ping (labelled PNG) hard-acks the request DATA packet (f=xx exceeds the sequence number of the DATA packet), causing it to be discarded from the Tx ring. The ACK that was requested (labelled REQ, r=xx references the serial of the DATA packet) comes after the ping, but the sk_buff holding the timestamp has gone and the RTT sample is lost. This is particularly noticeable on RPC calls used to probe the service offered by the peer. A lot of peers end up with an unknown RTT because we only ever sent a single RPC. This confuses the server rotation algorithm. Fix this by caching the information about the outgoing packet in RTT calculations in the rxrpc_call struct rather than looking in the Tx ring. A four-deep buffer is maintained and both REQUEST_ACK-flagged DATA and PING-ACK transmissions are recorded in there. When the appropriate response ACK is received, the buffer is checked for a match and, if found, an RTT sample is recorded. If a received ACK refers to a packet with a later serial number than an entry in the cache, that entry is presumed lost and the entry is made available to record a new transmission. ACKs types other than REQUESTED-type and PING-type cause any matching sample to be cancelled as they don't necessarily represent a useful measurement. If there's no space in the buffer on ping/data transmission, the sample base is discarded. Fixes: 50235c4b5a2f ("rxrpc: Obtain RTT data by requesting ACKs on DATA packets") Signed-off-by: David Howells <dhowells@redhat.com>
| | * | | | | rxrpc: Keep the ACK serial in a var in rxrpc_input_ack()David Howells2020-08-201-10/+11
| | |/ / / / | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Keep the ACK serial number in a variable in rxrpc_input_ack() as it's used frequently. Signed-off-by: David Howells <dhowells@redhat.com>
| * | | | | net: Fix some commentsMiaohe Lin2020-08-274-5/+5
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Fix some comments, including wrong function name, duplicated word and so on. Signed-off-by: Miaohe Lin <linmiaohe@huawei.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | net: disable netpoll on fresh napisJakub Kicinski2020-08-262-2/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | napi_disable() makes sure to set the NAPI_STATE_NPSVC bit to prevent netpoll from accessing rings before init is complete. However, the same is not done for fresh napi instances in netif_napi_add(), even though we expect NAPI instances to be added as disabled. This causes crashes during driver reconfiguration (enabling XDP, changing the channel count) - if there is any printk() after netif_napi_add() but before napi_enable(). To ensure memory ordering is correct we need to use RCU accessors. Reported-by: Rob Sherwood <rsher@fb.com> Fixes: 2d8bff12699a ("netpoll: Close race condition between poll_one_napi and napi_disable") Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | ipv4: Silence suspicious RCU usage warningIdo Schimmel2020-08-261-1/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | fib_info_notify_update() is always called with RTNL held, but not from an RCU read-side critical section. This leads to the following warning [1] when the FIB table list is traversed with hlist_for_each_entry_rcu(), but without a proper lockdep expression. Since modification of the list is protected by RTNL, silence the warning by adding a lockdep expression which verifies RTNL is held. [1] ============================= WARNING: suspicious RCU usage 5.9.0-rc1-custom-14233-g2f26e122d62f #129 Not tainted ----------------------------- net/ipv4/fib_trie.c:2124 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by ip/834: #0: ffffffff85a3b6b0 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x49a/0xbd0 stack backtrace: CPU: 0 PID: 834 Comm: ip Not tainted 5.9.0-rc1-custom-14233-g2f26e122d62f #129 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x100/0x184 lockdep_rcu_suspicious+0x143/0x14d fib_info_notify_update+0x8d1/0xa60 __nexthop_replace_notify+0xd2/0x290 rtm_new_nexthop+0x35e2/0x5946 rtnetlink_rcv_msg+0x4f7/0xbd0 netlink_rcv_skb+0x17a/0x480 rtnetlink_rcv+0x22/0x30 netlink_unicast+0x5ae/0x890 netlink_sendmsg+0x98a/0xf40 ____sys_sendmsg+0x879/0xa00 ___sys_sendmsg+0x122/0x190 __sys_sendmsg+0x103/0x1d0 __x64_sys_sendmsg+0x7d/0xb0 do_syscall_64+0x32/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fde28c3be57 Code: 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 RSP: 002b:00007ffc09330028 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fde28c3be57 RDX: 0000000000000000 RSI: 00007ffc09330090 RDI: 0000000000000003 RBP: 000000005f45f911 R08: 0000000000000001 R09: 00007ffc0933012c R10: 0000000000000076 R11: 0000000000000246 R12: 0000000000000001 R13: 00007ffc09330290 R14: 00007ffc09330eee R15: 00005610e48ed020 Fixes: 1bff1a0c9bbd ("ipv4: Add function to send route updates") Signed-off-by: Ido Schimmel <idosch@nvidia.com> Reviewed-by: David Ahern <dsahern@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
| * | | | | mptcp: free acked data before waiting for more memoryFlorian Westphal2020-08-261-2/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | After subflow lock is dropped, more wmem might have been made available. This fixes a deadlock in mptcp_connect.sh 'mmap' mode: wmem is exhausted. But as the mptcp socket holds on to already-acked data (for retransmit) no wakeup will occur. Using 'goto restart' calls mptcp_clean_una(sk) which will free pages that have been acked completely in the mean time. Fixes: fb529e62d3f3 ("mptcp: break and restart in case mptcp sndbuf is full") Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>