From 71a06f1034b91e15d3ba6b5539c7d3a2d7f13030 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Sat, 17 Dec 2022 00:57:42 +0100 Subject: mac802154: Fix possible double free upon parsing error Commit 4d1c7d87030b ("mac802154: Move an skb free within the rx path") tried to simplify error handling within the receive path by moving the kfree_skb() call at the very end of the top-level function but missed one kfree_skb() called upon frame parsing error. Prevent this possible double free from happening. Fixes: 4d1c7d87030b ("mac802154: Move an skb free within the rx path") Reported-by: Dan Carpenter Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/r/20221216235742.646134-1-miquel.raynal@bootlin.com Signed-off-by: Stefan Schmidt --- net/mac802154/rx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index c2aae2a6d6a6..97bb4401dd3e 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -213,7 +213,6 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, ret = ieee802154_parse_frame_start(skb, &hdr); if (ret) { pr_debug("got invalid frame\n"); - kfree_skb(skb); return; } -- cgit v1.2.3 From 5b4a79ba65a1ab479903fff2e604865d229b70a9 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sat, 21 Jan 2023 13:41:43 +0100 Subject: bpf, sockmap: Don't let sock_map_{close,destroy,unhash} call itself sock_map proto callbacks should never call themselves by design. Protect against bugs like [1] and break out of the recursive loop to avoid a stack overflow in favor of a resource leak. [1] https://lore.kernel.org/all/00000000000073b14905ef2e7401@google.com/ Suggested-by: Eric Dumazet Signed-off-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/r/20230113-sockmap-fix-v2-1-1e0ee7ac2f90@cloudflare.com Signed-off-by: Alexei Starovoitov --- net/core/sock_map.c | 61 +++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 22fa2c5bc6ec..a68a7290a3b2 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1569,15 +1569,16 @@ void sock_map_unhash(struct sock *sk) psock = sk_psock(sk); if (unlikely(!psock)) { rcu_read_unlock(); - if (sk->sk_prot->unhash) - sk->sk_prot->unhash(sk); - return; + saved_unhash = READ_ONCE(sk->sk_prot)->unhash; + } else { + saved_unhash = psock->saved_unhash; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); } - - saved_unhash = psock->saved_unhash; - sock_map_remove_links(sk, psock); - rcu_read_unlock(); - saved_unhash(sk); + if (WARN_ON_ONCE(saved_unhash == sock_map_unhash)) + return; + if (saved_unhash) + saved_unhash(sk); } EXPORT_SYMBOL_GPL(sock_map_unhash); @@ -1590,17 +1591,18 @@ void sock_map_destroy(struct sock *sk) psock = sk_psock_get(sk); if (unlikely(!psock)) { rcu_read_unlock(); - if (sk->sk_prot->destroy) - sk->sk_prot->destroy(sk); - return; + saved_destroy = READ_ONCE(sk->sk_prot)->destroy; + } else { + saved_destroy = psock->saved_destroy; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + sk_psock_stop(psock); + sk_psock_put(sk, psock); } - - saved_destroy = psock->saved_destroy; - sock_map_remove_links(sk, psock); - rcu_read_unlock(); - sk_psock_stop(psock); - sk_psock_put(sk, psock); - saved_destroy(sk); + if (WARN_ON_ONCE(saved_destroy == sock_map_destroy)) + return; + if (saved_destroy) + saved_destroy(sk); } EXPORT_SYMBOL_GPL(sock_map_destroy); @@ -1615,16 +1617,21 @@ void sock_map_close(struct sock *sk, long timeout) if (unlikely(!psock)) { rcu_read_unlock(); release_sock(sk); - return sk->sk_prot->close(sk, timeout); + saved_close = READ_ONCE(sk->sk_prot)->close; + } else { + saved_close = psock->saved_close; + sock_map_remove_links(sk, psock); + rcu_read_unlock(); + sk_psock_stop(psock); + release_sock(sk); + cancel_work_sync(&psock->work); + sk_psock_put(sk, psock); } - - saved_close = psock->saved_close; - sock_map_remove_links(sk, psock); - rcu_read_unlock(); - sk_psock_stop(psock); - release_sock(sk); - cancel_work_sync(&psock->work); - sk_psock_put(sk, psock); + /* Make sure we do not recurse. This is a bug. + * Leak the socket instead of crashing on a stack overflow. + */ + if (WARN_ON_ONCE(saved_close == sock_map_close)) + return; saved_close(sk, timeout); } EXPORT_SYMBOL_GPL(sock_map_close); -- cgit v1.2.3 From ddce1e091757d0259107c6c0c7262df201de2b66 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sat, 21 Jan 2023 13:41:44 +0100 Subject: bpf, sockmap: Check for any of tcp_bpf_prots when cloning a listener A listening socket linked to a sockmap has its sk_prot overridden. It points to one of the struct proto variants in tcp_bpf_prots. The variant depends on the socket's family and which sockmap programs are attached. A child socket cloned from a TCP listener initially inherits their sk_prot. But before cloning is finished, we restore the child's proto to the listener's original non-tcp_bpf_prots one. This happens in tcp_create_openreq_child -> tcp_bpf_clone. Today, in tcp_bpf_clone we detect if the child's proto should be restored by checking only for the TCP_BPF_BASE proto variant. This is not correct. The sk_prot of listening socket linked to a sockmap can point to to any variant in tcp_bpf_prots. If the listeners sk_prot happens to be not the TCP_BPF_BASE variant, then the child socket unintentionally is left if the inherited sk_prot by tcp_bpf_clone. This leads to issues like infinite recursion on close [1], because the child state is otherwise not set up for use with tcp_bpf_prot operations. Adjust the check in tcp_bpf_clone to detect all of tcp_bpf_prots variants. Note that it wouldn't be sufficient to check the socket state when overriding the sk_prot in tcp_bpf_update_proto in order to always use the TCP_BPF_BASE variant for listening sockets. Since commit b8b8315e39ff ("bpf, sockmap: Remove unhash handler for BPF sockmap usage") it is possible for a socket to transition to TCP_LISTEN state while already linked to a sockmap, e.g. connect() -> insert into map -> connect(AF_UNSPEC) -> listen(). [1]: https://lore.kernel.org/all/00000000000073b14905ef2e7401@google.com/ Fixes: e80251555f0b ("tcp_bpf: Don't let child socket inherit parent protocol ops on copy") Reported-by: syzbot+04c21ed96d861dccc5cd@syzkaller.appspotmail.com Signed-off-by: Jakub Sitnicki Acked-by: John Fastabend Link: https://lore.kernel.org/r/20230113-sockmap-fix-v2-2-1e0ee7ac2f90@cloudflare.com Signed-off-by: Alexei Starovoitov --- net/ipv4/tcp_bpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 94aad3870c5f..cf26d65ca389 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -639,10 +640,9 @@ EXPORT_SYMBOL_GPL(tcp_bpf_update_proto); */ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) { - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; struct proto *prot = newsk->sk_prot; - if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) + if (is_insidevar(prot, tcp_bpf_prots)) newsk->sk_prot = sk->sk_prot_creator; } #endif /* CONFIG_BPF_SYSCALL */ -- cgit v1.2.3 From 7d2c89b325874a35564db5630a459966afab04cc Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 26 Jan 2023 11:06:59 -0800 Subject: skb: Do mix page pool and page referenced frags in GRO GSO should not merge page pool recycled frames with standard reference counted frames. Traditionally this didn't occur, at least not often. However as we start looking at adding support for wireless adapters there becomes the potential to mix the two due to A-MSDU repartitioning frames in the receive path. There are possibly other places where this may have occurred however I suspect they must be few and far between as we have not seen this issue until now. Fixes: 53e0961da1c7 ("page_pool: add frag page recycling support in page pool") Reported-by: Felix Fietkau Signed-off-by: Alexander Duyck Acked-by: Ilias Apalodimas Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/167475990764.1934330.11960904198087757911.stgit@localhost.localdomain Signed-off-by: Jakub Kicinski --- net/core/gro.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/core/gro.c b/net/core/gro.c index 506f83d715f8..4bac7ea6e025 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -162,6 +162,15 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) struct sk_buff *lp; int segs; + /* Do not splice page pool based packets w/ non-page pool + * packets. This can result in reference count issues as page + * pool pages will not decrement the reference count and will + * instead be immediately returned to the pool or have frag + * count decremented. + */ + if (p->pp_recycle != skb->pp_recycle) + return -ETOOMANYREFS; + /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */ gro_max_size = READ_ONCE(p->dev->gro_max_size); -- cgit v1.2.3 From 14caefcf9837a2be765a566005ad82cd0d2a429f Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Wed, 25 Jan 2023 02:59:44 -0800 Subject: net/rose: Fix to not accept on connected socket If you call listen() and accept() on an already connect()ed rose socket, accept() can successfully connect. This is because when the peer socket sends data to sendmsg, the skb with its own sk stored in the connected socket's sk->sk_receive_queue is connected, and rose_accept() dequeues the skb waiting in the sk->sk_receive_queue. This creates a child socket with the sk of the parent rose socket, which can cause confusion. Fix rose_listen() to return -EINVAL if the socket has already been successfully connected, and add lock_sock to prevent this issue. Signed-off-by: Hyunwoo Kim Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230125105944.GA133314@ubuntu Signed-off-by: Jakub Kicinski --- net/rose/af_rose.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 36fefc3957d7..ca2b17f32670 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -488,6 +488,12 @@ static int rose_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; + lock_sock(sk); + if (sock->state != SS_UNCONNECTED) { + release_sock(sk); + return -EINVAL; + } + if (sk->sk_state != TCP_LISTEN) { struct rose_sock *rose = rose_sk(sk); @@ -497,8 +503,10 @@ static int rose_listen(struct socket *sock, int backlog) memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS); sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; + release_sock(sk); return 0; } + release_sock(sk); return -EOPNOTSUPP; } -- cgit v1.2.3 From 29de68c2b32ce58d64dea496d281e25ad0f551bd Mon Sep 17 00:00:00 2001 From: Natalia Petrova Date: Wed, 25 Jan 2023 16:48:31 +0300 Subject: net: qrtr: free memory on error path in radix_tree_insert() Function radix_tree_insert() returns errors if the node hasn't been initialized and added to the tree. "kfree(node)" and return value "NULL" of node_get() help to avoid using unclear node in other calls. Found by Linux Verification Center (linuxtesting.org) with SVACE. Cc: # 5.7 Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Signed-off-by: Natalia Petrova Reviewed-by: Simon Horman Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20230125134831.8090-1-n.petrova@fintech.ru Signed-off-by: Jakub Kicinski --- net/qrtr/ns.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 1990d496fcfc..e595079c2caf 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -83,7 +83,10 @@ static struct qrtr_node *node_get(unsigned int node_id) node->id = node_id; - radix_tree_insert(&nodes, node_id, node); + if (radix_tree_insert(&nodes, node_id, node)) { + kfree(node); + return NULL; + } return node; } -- cgit v1.2.3 From 60bd1d9008a50cc78c4033a16a6f5d78210d481c Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 26 Jan 2023 14:45:51 +0800 Subject: net: mctp: purge receive queues on sk destruction We may have pending skbs in the receive queue when the sk is being destroyed; add a destructor to purge the queue. MCTP doesn't use the error queue, so only the receive_queue is purged. Fixes: 833ef3b91de6 ("mctp: Populate socket implementation") Signed-off-by: Jeremy Kerr Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20230126064551.464468-1-jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/af_mctp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 45bbe3e54cc2..3150f3f0c872 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -587,6 +587,11 @@ static void mctp_sk_unhash(struct sock *sk) del_timer_sync(&msk->key_expiry); } +static void mctp_sk_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + static struct proto mctp_proto = { .name = "MCTP", .owner = THIS_MODULE, @@ -623,6 +628,7 @@ static int mctp_pf_create(struct net *net, struct socket *sock, return -ENOMEM; sock_init_data(sock, sk); + sk->sk_destruct = mctp_sk_destruct; rc = 0; if (sk->sk_prot->init) -- cgit v1.2.3 From 611792920925fb088ddccbe2783c7f92fdfb6b64 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Thu, 26 Jan 2023 18:32:50 -0800 Subject: netrom: Fix use-after-free caused by accept on already connected socket If you call listen() and accept() on an already connect()ed AF_NETROM socket, accept() can successfully connect. This is because when the peer socket sends data to sendmsg, the skb with its own sk stored in the connected socket's sk->sk_receive_queue is connected, and nr_accept() dequeues the skb waiting in the sk->sk_receive_queue. As a result, nr_accept() allocates and returns a sock with the sk of the parent AF_NETROM socket. And here use-after-free can happen through complex race conditions: ``` cpu0 cpu1 1. socket_2 = socket(AF_NETROM) . . listen(socket_2) accepted_socket = accept(socket_2) 2. socket_1 = socket(AF_NETROM) nr_create() // sk refcount : 1 connect(socket_1) 3. write(accepted_socket) nr_sendmsg() nr_output() nr_kick() nr_send_iframe() nr_transmit_buffer() nr_route_frame() nr_loopback_queue() nr_loopback_timer() nr_rx_frame() nr_process_rx_frame(sk, skb); // sk : socket_1's sk nr_state3_machine() nr_queue_rx_frame() sock_queue_rcv_skb() sock_queue_rcv_skb_reason() __sock_queue_rcv_skb() __skb_queue_tail(list, skb); // list : socket_1's sk->sk_receive_queue 4. listen(socket_1) nr_listen() uaf_socket = accept(socket_1) nr_accept() skb_dequeue(&sk->sk_receive_queue); 5. close(accepted_socket) nr_release() nr_write_internal(sk, NR_DISCREQ) nr_transmit_buffer() // NR_DISCREQ nr_route_frame() nr_loopback_queue() nr_loopback_timer() nr_rx_frame() // sk : socket_1's sk nr_process_rx_frame() // NR_STATE_3 nr_state3_machine() // NR_DISCREQ nr_disconnect() nr_sk(sk)->state = NR_STATE_0; 6. close(socket_1) // sk refcount : 3 nr_release() // NR_STATE_0 sock_put(sk); // sk refcount : 0 sk_free(sk); close(uaf_socket) nr_release() sock_hold(sk); // UAF ``` KASAN report by syzbot: ``` BUG: KASAN: use-after-free in nr_release+0x66/0x460 net/netrom/af_netrom.c:520 Write of size 4 at addr ffff8880235d8080 by task syz-executor564/5128 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:306 [inline] print_report+0x15e/0x461 mm/kasan/report.c:417 kasan_report+0xbf/0x1f0 mm/kasan/report.c:517 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x141/0x190 mm/kasan/generic.c:189 instrument_atomic_read_write include/linux/instrumented.h:102 [inline] atomic_fetch_add_relaxed include/linux/atomic/atomic-instrumented.h:116 [inline] __refcount_add include/linux/refcount.h:193 [inline] __refcount_inc include/linux/refcount.h:250 [inline] refcount_inc include/linux/refcount.h:267 [inline] sock_hold include/net/sock.h:775 [inline] nr_release+0x66/0x460 net/netrom/af_netrom.c:520 __sock_release+0xcd/0x280 net/socket.c:650 sock_close+0x1c/0x20 net/socket.c:1365 __fput+0x27c/0xa90 fs/file_table.c:320 task_work_run+0x16f/0x270 kernel/task_work.c:179 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xaa8/0x2950 kernel/exit.c:867 do_group_exit+0xd4/0x2a0 kernel/exit.c:1012 get_signal+0x21c3/0x2450 kernel/signal.c:2859 arch_do_signal_or_restart+0x79/0x5c0 arch/x86/kernel/signal.c:306 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x15f/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f6c19e3c9b9 Code: Unable to access opcode bytes at 0x7f6c19e3c98f. RSP: 002b:00007fffd4ba2ce8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: 0000000000000116 RBX: 0000000000000003 RCX: 00007f6c19e3c9b9 RDX: 0000000000000318 RSI: 00000000200bd000 RDI: 0000000000000006 RBP: 0000000000000003 R08: 000000000000000d R09: 000000000000000d R10: 0000000000000000 R11: 0000000000000246 R12: 000055555566a2c0 R13: 0000000000000011 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 5128: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:371 [inline] ____kasan_kmalloc mm/kasan/common.c:330 [inline] __kasan_kmalloc+0xa3/0xb0 mm/kasan/common.c:380 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:968 [inline] __kmalloc+0x5a/0xd0 mm/slab_common.c:981 kmalloc include/linux/slab.h:584 [inline] sk_prot_alloc+0x140/0x290 net/core/sock.c:2038 sk_alloc+0x3a/0x7a0 net/core/sock.c:2091 nr_create+0xb6/0x5f0 net/netrom/af_netrom.c:433 __sock_create+0x359/0x790 net/socket.c:1515 sock_create net/socket.c:1566 [inline] __sys_socket_create net/socket.c:1603 [inline] __sys_socket_create net/socket.c:1588 [inline] __sys_socket+0x133/0x250 net/socket.c:1636 __do_sys_socket net/socket.c:1649 [inline] __se_sys_socket net/socket.c:1647 [inline] __x64_sys_socket+0x73/0xb0 net/socket.c:1647 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 5128: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:518 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x13b/0x1a0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:177 [inline] __cache_free mm/slab.c:3394 [inline] __do_kmem_cache_free mm/slab.c:3580 [inline] __kmem_cache_free+0xcd/0x3b0 mm/slab.c:3587 sk_prot_free net/core/sock.c:2074 [inline] __sk_destruct+0x5df/0x750 net/core/sock.c:2166 sk_destruct net/core/sock.c:2181 [inline] __sk_free+0x175/0x460 net/core/sock.c:2192 sk_free+0x7c/0xa0 net/core/sock.c:2203 sock_put include/net/sock.h:1991 [inline] nr_release+0x39e/0x460 net/netrom/af_netrom.c:554 __sock_release+0xcd/0x280 net/socket.c:650 sock_close+0x1c/0x20 net/socket.c:1365 __fput+0x27c/0xa90 fs/file_table.c:320 task_work_run+0x16f/0x270 kernel/task_work.c:179 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xaa8/0x2950 kernel/exit.c:867 do_group_exit+0xd4/0x2a0 kernel/exit.c:1012 get_signal+0x21c3/0x2450 kernel/signal.c:2859 arch_do_signal_or_restart+0x79/0x5c0 arch/x86/kernel/signal.c:306 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x15f/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd ``` To fix this issue, nr_listen() returns -EINVAL for sockets that successfully nr_connect(). Reported-by: syzbot+caa188bdfc1eeafeb418@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 6f7f4392cffb..5a4cb796150f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog) struct sock *sk = sock->sk; lock_sock(sk); + if (sock->state != SS_UNCONNECTED) { + release_sock(sk); + return -EINVAL; + } + if (sk->sk_state != TCP_LISTEN) { memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN); sk->sk_max_ack_backlog = backlog; -- cgit v1.2.3 From ffe2a22562444720b05bdfeb999c03e810d84cbb Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Sat, 28 Jan 2023 16:29:17 +0000 Subject: net/tls: tls_is_tx_ready() checked list_entry tls_is_tx_ready() checks that list_first_entry() does not return NULL. This condition can never happen. For empty lists, list_first_entry() returns the list_entry() of the head, which is a type confusion. Use list_first_entry_or_null() which returns NULL in case of empty lists. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Pietro Borrello Link: https://lore.kernel.org/r/20230128-list-entry-null-check-tls-v1-1-525bbfe6f0d0@diag.uniroma1.it Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9ed978634125..a83d2b4275fa 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2427,7 +2427,7 @@ static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx) { struct tls_rec *rec; - rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); + rec = list_first_entry_or_null(&ctx->tx_list, struct tls_rec, list); if (!rec) return false; -- cgit v1.2.3 From de5ca4c3852f896cacac2bf259597aab5e17d9e3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 27 Jan 2023 14:40:37 -0800 Subject: net: sched: sch: Bounds check priority Nothing was explicitly bounds checking the priority index used to access clpriop[]. WARN and bail out early if it's pathological. Seen with GCC 13: ../net/sched/sch_htb.c: In function 'htb_activate_prios': ../net/sched/sch_htb.c:437:44: warning: array subscript [0, 31] is outside array bounds of 'struct htb_prio[8]' [-Warray-bounds=] 437 | if (p->inner.clprio[prio].feed.rb_node) | ~~~~~~~~~~~~~~~^~~~~~ ../net/sched/sch_htb.c:131:41: note: while referencing 'clprio' 131 | struct htb_prio clprio[TC_HTB_NUMPRIO]; | ^~~~~~ Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Simon Horman Reviewed-by: Cong Wang Link: https://lore.kernel.org/r/20230127224036.never.561-kees@kernel.org Signed-off-by: Paolo Abeni --- net/sched/sch_htb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index f46643850df8..cc28e41fb745 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -431,7 +431,10 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) while (cl->cmode == HTB_MAY_BORROW && p && mask) { m = mask; while (m) { - int prio = ffz(~m); + unsigned int prio = ffz(~m); + + if (WARN_ON_ONCE(prio > ARRAY_SIZE(p->inner.clprio))) + break; m &= ~(1 << prio); if (p->inner.clprio[prio].feed.rb_node) -- cgit v1.2.3 From 2b272bb558f1d3a5aa95ed8a82253786fd1a48ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 30 Jan 2023 11:39:29 +0100 Subject: netfilter: br_netfilter: disable sabotage_in hook after first suppression When using a xfrm interface in a bridged setup (the outgoing device is bridged), the incoming packets in the xfrm interface are only tracked in the outgoing direction. $ brctl show bridge name interfaces br_eth1 eth1 $ conntrack -L tcp 115 SYN_SENT src=192... dst=192... [UNREPLIED] ... If br_netfilter is enabled, the first (encrypted) packet is received onR eth1, conntrack hooks are called from br_netfilter emulation which allocates nf_bridge info for this skb. If the packet is for local machine, skb gets passed up the ip stack. The skb passes through ip prerouting a second time. br_netfilter ip_sabotage_in supresses the re-invocation of the hooks. After this, skb gets decrypted in xfrm layer and appears in network stack a second time (after decryption). Then, ip_sabotage_in is called again and suppresses netfilter hook invocation, even though the bridge layer never called them for the plaintext incarnation of the packet. Free the bridge info after the first suppression to avoid this. I was unable to figure out where the regression comes from, as far as i can see br_netfilter always had this problem; i did not expect that skb is looped again with different headers. Fixes: c4b0e771f906 ("netfilter: avoid using skb->nf_bridge directly") Reported-and-tested-by: Wolfgang Nothdurft Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index f20f4373ff40..9554abcfd5b4 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -871,6 +871,7 @@ static unsigned int ip_sabotage_in(void *priv, if (nf_bridge && !nf_bridge->in_prerouting && !netif_is_l3_master(skb->dev) && !netif_is_l3_slave(skb->dev)) { + nf_bridge_info_free(skb); state->okfn(state->net, state->sk, skb); return NF_STOLEN; } -- cgit v1.2.3 From bd0e06f0def75ba26572a94e5350324474a55562 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Jan 2023 02:35:21 +0100 Subject: Revert "netfilter: conntrack: fix bug in for_each_sctp_chunk" There is no bug. If sch->length == 0, this would result in an infinite loop, but first caller, do_basic_checks(), errors out in this case. After this change, packets with bogus zero-length chunks are no longer detected as invalid, so revert & add comment wrt. 0 length check. Fixes: 98ee00774525 ("netfilter: conntrack: fix bug in for_each_sctp_chunk") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_sctp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 945dd40e7077..011d414038ea 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -142,10 +142,11 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) } #endif +/* do_basic_checks ensures sch->length > 0, do not use before */ #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ - ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))) && \ - (sch)->length; \ + (offset) < (skb)->len && \ + ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \ (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++) /* Some validity checks to make sure the chunks are fine */ -- cgit v1.2.3 From 8f35ae17ef565a605de5f409e04bcd49a55d7646 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 30 Jan 2023 11:25:33 -0500 Subject: sctp: do not check hb_timer.expires when resetting hb_timer It tries to avoid the frequently hb_timer refresh in commit ba6f5e33bdbb ("sctp: avoid refreshing heartbeat timer too often"), and it only allows mod_timer when the new expires is after hb_timer.expires. It means even a much shorter interval for hb timer gets applied, it will have to wait until the current hb timer to time out. In sctp_do_8_2_transport_strike(), when a transport enters PF state, it expects to update the hb timer to resend a heartbeat every rto after calling sctp_transport_reset_hb_timer(), which will not work as the change mentioned above. The frequently hb_timer refresh was caused by sctp_transport_reset_timers() called in sctp_outq_flush() and it was already removed in the commit above. So we don't have to check hb_timer.expires when resetting hb_timer as it is now not called very often. Fixes: ba6f5e33bdbb ("sctp: avoid refreshing heartbeat timer too often") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/d958c06985713ec84049a2d5664879802710179a.1675095933.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/transport.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index ca1eba95c293..2f66a2006517 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -196,9 +196,7 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport) /* When a data chunk is sent, reset the heartbeat interval. */ expires = jiffies + sctp_transport_timeout(transport); - if ((time_before(transport->hb_timer.expires, expires) || - !timer_pending(&transport->hb_timer)) && - !mod_timer(&transport->hb_timer, + if (!mod_timer(&transport->hb_timer, expires + get_random_u32_below(transport->rto))) sctp_transport_hold(transport); } -- cgit v1.2.3 From 876e8ca8366735a604bac86ff7e2732fc9d85d2d Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 30 Jan 2023 12:51:48 -0800 Subject: net: fix NULL pointer in skb_segment_list Commit 3a1296a38d0c ("net: Support GRO/GSO fraglist chaining.") introduced UDP listifyed GRO. The segmentation relies on frag_list being untouched when passing through the network stack. This assumption can be broken sometimes, where frag_list itself gets pulled into linear area, leaving frag_list being NULL. When this happens it can trigger following NULL pointer dereference, and panic the kernel. Reverse the test condition should fix it. [19185.577801][ C1] BUG: kernel NULL pointer dereference, address: ... [19185.663775][ C1] RIP: 0010:skb_segment_list+0x1cc/0x390 ... [19185.834644][ C1] Call Trace: [19185.841730][ C1] [19185.848563][ C1] __udp_gso_segment+0x33e/0x510 [19185.857370][ C1] inet_gso_segment+0x15b/0x3e0 [19185.866059][ C1] skb_mac_gso_segment+0x97/0x110 [19185.874939][ C1] __skb_gso_segment+0xb2/0x160 [19185.883646][ C1] udp_queue_rcv_skb+0xc3/0x1d0 [19185.892319][ C1] udp_unicast_rcv_skb+0x75/0x90 [19185.900979][ C1] ip_protocol_deliver_rcu+0xd2/0x200 [19185.910003][ C1] ip_local_deliver_finish+0x44/0x60 [19185.918757][ C1] __netif_receive_skb_one_core+0x8b/0xa0 [19185.927834][ C1] process_backlog+0x88/0x130 [19185.935840][ C1] __napi_poll+0x27/0x150 [19185.943447][ C1] net_rx_action+0x27e/0x5f0 [19185.951331][ C1] ? mlx5_cq_tasklet_cb+0x70/0x160 [mlx5_core] [19185.960848][ C1] __do_softirq+0xbc/0x25d [19185.968607][ C1] irq_exit_rcu+0x83/0xb0 [19185.976247][ C1] common_interrupt+0x43/0xa0 [19185.984235][ C1] asm_common_interrupt+0x22/0x40 ... [19186.094106][ C1] Fixes: 3a1296a38d0c ("net: Support GRO/GSO fraglist chaining.") Suggested-by: Daniel Borkmann Reviewed-by: Willem de Bruijn Signed-off-by: Yan Zhai Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/Y9gt5EUizK1UImEP@debian Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4a0eb5593275..a31ff4d83ecc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4100,7 +4100,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_shinfo(skb)->frag_list = NULL; - do { + while (list_skb) { nskb = list_skb; list_skb = list_skb->next; @@ -4146,8 +4146,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, if (skb_needs_linearize(nskb, features) && __skb_linearize(nskb)) goto err_linearize; - - } while (list_skb); + } skb->truesize = skb->truesize - delta_truesize; skb->data_len = skb->data_len - delta_len; -- cgit v1.2.3 From 23ca0c2c93406bdb1150659e720bda1cec1fad04 Mon Sep 17 00:00:00 2001 From: Thomas Winter Date: Tue, 31 Jan 2023 16:46:45 +1300 Subject: ip/ip6_gre: Fix changing addr gen mode not generating IPv6 link local address For our point-to-point GRE tunnels, they have IN6_ADDR_GEN_MODE_NONE when they are created then we set IN6_ADDR_GEN_MODE_EUI64 when they come up to generate the IPv6 link local address for the interface. Recently we found that they were no longer generating IPv6 addresses. This issue would also have affected SIT tunnels. Commit e5dd729460ca changed the code path so that GRE tunnels generate an IPv6 address based on the tunnel source address. It also changed the code path so GRE tunnels don't call addrconf_addr_gen in addrconf_dev_config which is called by addrconf_sysctl_addr_gen_mode when the IN6_ADDR_GEN_MODE is changed. This patch aims to fix this issue by moving the code in addrconf_notify which calls the addr gen for GRE and SIT into a separate function and calling it in the places that expect the IPv6 address to be generated. The previous addrconf_dev_config is renamed to addrconf_eth_config since it only expected eth type interfaces and follows the addrconf_gre/sit_config format. A part of this changes means that the loopback address will be attempted to be configured when changing addr_gen_mode for lo. This should not be a problem because the address should exist anyway and if does already exist then no error is produced. Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Thomas Winter Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f7a84a4acffc..f4ba68e096ac 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3447,6 +3447,30 @@ static void addrconf_gre_config(struct net_device *dev) } #endif +static void addrconf_init_auto_addrs(struct net_device *dev) +{ + switch (dev->type) { +#if IS_ENABLED(CONFIG_IPV6_SIT) + case ARPHRD_SIT: + addrconf_sit_config(dev); + break; +#endif +#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) + case ARPHRD_IP6GRE: + case ARPHRD_IPGRE: + addrconf_gre_config(dev); + break; +#endif + case ARPHRD_LOOPBACK: + init_loopback(dev); + break; + + default: + addrconf_dev_config(dev); + break; + } +} + static int fixup_permanent_addr(struct net *net, struct inet6_dev *idev, struct inet6_ifaddr *ifp) @@ -3615,26 +3639,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } - switch (dev->type) { -#if IS_ENABLED(CONFIG_IPV6_SIT) - case ARPHRD_SIT: - addrconf_sit_config(dev); - break; -#endif -#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) - case ARPHRD_IP6GRE: - case ARPHRD_IPGRE: - addrconf_gre_config(dev); - break; -#endif - case ARPHRD_LOOPBACK: - init_loopback(dev); - break; - - default: - addrconf_dev_config(dev); - break; - } + addrconf_init_auto_addrs(dev); if (!IS_ERR_OR_NULL(idev)) { if (run_pending) @@ -6397,7 +6402,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, if (idev->cnf.addr_gen_mode != new_val) { idev->cnf.addr_gen_mode = new_val; - addrconf_dev_config(idev->dev); + addrconf_init_auto_addrs(idev->dev); } } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { struct net_device *dev; @@ -6408,7 +6413,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, if (idev && idev->cnf.addr_gen_mode != new_val) { idev->cnf.addr_gen_mode = new_val; - addrconf_dev_config(idev->dev); + addrconf_init_auto_addrs(idev->dev); } } } -- cgit v1.2.3 From 30e2291f61f93f7132c060190f8360df52644ec1 Mon Sep 17 00:00:00 2001 From: Thomas Winter Date: Tue, 31 Jan 2023 16:46:46 +1300 Subject: ip/ip6_gre: Fix non-point-to-point tunnel not generating IPv6 link local address We recently found that our non-point-to-point tunnels were not generating any IPv6 link local address and instead generating an IPv6 compat address, breaking IPv6 communication on the tunnel. Previously, addrconf_gre_config always would call addrconf_addr_gen and generate a EUI64 link local address for the tunnel. Then commit e5dd729460ca changed the code path so that add_v4_addrs is called but this only generates a compat IPv6 address for non-point-to-point tunnels. I assume the compat address is specifically for SIT tunnels so have kept that only for SIT - GRE tunnels now always generate link local addresses. Fixes: e5dd729460ca ("ip/ip6_gre: use the same logic as SIT interfaces when computing v6LL address") Signed-off-by: Thomas Winter Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f4ba68e096ac..faa47f9ea73a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3127,17 +3127,17 @@ static void add_v4_addrs(struct inet6_dev *idev) offset = sizeof(struct in6_addr) - 4; memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); - if (idev->dev->flags&IFF_POINTOPOINT) { + if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { + scope = IPV6_ADDR_COMPATv4; + plen = 96; + pflags |= RTF_NONEXTHOP; + } else { if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE) return; addr.s6_addr32[0] = htonl(0xfe800000); scope = IFA_LINK; plen = 64; - } else { - scope = IPV6_ADDR_COMPATv4; - plen = 96; - pflags |= RTF_NONEXTHOP; } if (addr.s6_addr32[3]) { -- cgit v1.2.3 From d0553680f94c49bbe0e39eb50d033ba563b4212d Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Mon, 6 Sep 2021 17:42:00 +0800 Subject: can: j1939: fix errant WARN_ON_ONCE in j1939_session_deactivate The conclusion "j1939_session_deactivate() should be called with a session ref-count of at least 2" is incorrect. In some concurrent scenarios, j1939_session_deactivate can be called with the session ref-count less than 2. But there is not any problem because it will check the session active state before session putting in j1939_session_deactivate_locked(). Here is the concurrent scenario of the problem reported by syzbot and my reproduction log. cpu0 cpu1 j1939_xtp_rx_eoma j1939_xtp_rx_abort_one j1939_session_get_by_addr [kref == 2] j1939_session_get_by_addr [kref == 3] j1939_session_deactivate [kref == 2] j1939_session_put [kref == 1] j1939_session_completed j1939_session_deactivate WARN_ON_ONCE(kref < 2) ===================================================== WARNING: CPU: 1 PID: 21 at net/can/j1939/transport.c:1088 j1939_session_deactivate+0x5f/0x70 CPU: 1 PID: 21 Comm: ksoftirqd/1 Not tainted 5.14.0-rc7+ #32 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014 RIP: 0010:j1939_session_deactivate+0x5f/0x70 Call Trace: j1939_session_deactivate_activate_next+0x11/0x28 j1939_xtp_rx_eoma+0x12a/0x180 j1939_tp_recv+0x4a2/0x510 j1939_can_recv+0x226/0x380 can_rcv_filter+0xf8/0x220 can_receive+0x102/0x220 ? process_backlog+0xf0/0x2c0 can_rcv+0x53/0xf0 __netif_receive_skb_one_core+0x67/0x90 ? process_backlog+0x97/0x2c0 __netif_receive_skb+0x22/0x80 Fixes: 0c71437dd50d ("can: j1939: j1939_session_deactivate(): clarify lifetime of session object") Reported-by: syzbot+9981a614060dcee6eeca@syzkaller.appspotmail.com Signed-off-by: Ziyang Xuan Acked-by: Oleksij Rempel Link: https://lore.kernel.org/all/20210906094200.95868-1-william.xuanziyang@huawei.com Signed-off-by: Marc Kleine-Budde --- net/can/j1939/transport.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 5c722b55fe23..fce9b9ebf13f 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1092,10 +1092,6 @@ static bool j1939_session_deactivate(struct j1939_session *session) bool active; j1939_session_list_lock(priv); - /* This function should be called with a session ref-count of at - * least 2. - */ - WARN_ON_ONCE(kref_read(&session->kref) < 2); active = j1939_session_deactivate_locked(session); j1939_session_list_unlock(priv); -- cgit v1.2.3 From 3793301cbaa4a62d83e21f685307da7671f812ab Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 31 Jan 2023 11:56:13 +0100 Subject: can: raw: fix CAN FD frame transmissions over CAN XL devices A CAN XL device is always capable to process CAN FD frames. The former check when sending CAN FD frames relied on the existence of a CAN FD device and did not check for a CAN XL device that would be correct too. With this patch the CAN FD feature is enabled automatically when CAN XL is switched on - and CAN FD cannot be switch off while CAN XL is enabled. This precondition also leads to a clean up and reduction of checks in the hot path in raw_rcv() and raw_sendmsg(). Some conditions are reordered to handle simple checks first. changes since v1: https://lore.kernel.org/all/20230131091012.50553-1-socketcan@hartkopp.net - fixed typo: devive -> device changes since v2: https://lore.kernel.org/all/20230131091824.51026-1-socketcan@hartkopp.net/ - reorder checks in if statements to handle simple checks first Fixes: 626332696d75 ("can: raw: add CAN XL support") Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230131105613.55228-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/raw.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/can/raw.c b/net/can/raw.c index 81071cdb0301..ba86782ba8bb 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -132,8 +132,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data) return; /* make sure to not pass oversized frames to the socket */ - if ((can_is_canfd_skb(oskb) && !ro->fd_frames && !ro->xl_frames) || - (can_is_canxl_skb(oskb) && !ro->xl_frames)) + if ((!ro->fd_frames && can_is_canfd_skb(oskb)) || + (!ro->xl_frames && can_is_canxl_skb(oskb))) return; /* eliminate multiple filter matches for the same skb */ @@ -670,6 +670,11 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (copy_from_sockptr(&ro->fd_frames, optval, optlen)) return -EFAULT; + /* Enabling CAN XL includes CAN FD */ + if (ro->xl_frames && !ro->fd_frames) { + ro->fd_frames = ro->xl_frames; + return -EINVAL; + } break; case CAN_RAW_XL_FRAMES: @@ -679,6 +684,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, if (copy_from_sockptr(&ro->xl_frames, optval, optlen)) return -EFAULT; + /* Enabling CAN XL includes CAN FD */ + if (ro->xl_frames) + ro->fd_frames = ro->xl_frames; break; case CAN_RAW_JOIN_FILTERS: @@ -786,6 +794,25 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return 0; } +static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu) +{ + /* Classical CAN -> no checks for flags and device capabilities */ + if (can_is_can_skb(skb)) + return false; + + /* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */ + if (ro->fd_frames && can_is_canfd_skb(skb) && + (mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu))) + return false; + + /* CAN XL -> needs to be enabled and a CAN XL device */ + if (ro->xl_frames && can_is_canxl_skb(skb) && + can_is_canxl_dev_mtu(mtu)) + return false; + + return true; +} + static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; @@ -833,20 +860,8 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) goto free_skb; err = -EINVAL; - if (ro->xl_frames && can_is_canxl_dev_mtu(dev->mtu)) { - /* CAN XL, CAN FD and Classical CAN */ - if (!can_is_canxl_skb(skb) && !can_is_canfd_skb(skb) && - !can_is_can_skb(skb)) - goto free_skb; - } else if (ro->fd_frames && dev->mtu == CANFD_MTU) { - /* CAN FD and Classical CAN */ - if (!can_is_canfd_skb(skb) && !can_is_can_skb(skb)) - goto free_skb; - } else { - /* Classical CAN */ - if (!can_is_can_skb(skb)) - goto free_skb; - } + if (raw_bad_txframe(ro, skb, dev->mtu)) + goto free_skb; sockcm_init(&sockc, sk); if (msg->msg_controllen) { -- cgit v1.2.3 From 823b2e42720f96f277940c37ea438b7c5ead51a4 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 12 Jan 2023 20:23:47 +0100 Subject: can: isotp: handle wait_event_interruptible() return values When wait_event_interruptible() has been interrupted by a signal the tx.state value might not be ISOTP_IDLE. Force the state machines into idle state to inhibit the timer handlers to continue working. Fixes: 866337865f37 ("can: isotp: fix tx state handling for echo tx processing") Cc: stable@vger.kernel.org Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230112192347.1944-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/can/isotp.c b/net/can/isotp.c index 608f8c24ae46..dae421f6c901 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1162,6 +1162,10 @@ static int isotp_release(struct socket *sock) /* wait for complete transmission of current pdu */ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + /* force state machines to be idle also when a signal occurred */ + so->tx.state = ISOTP_IDLE; + so->rx.state = ISOTP_IDLE; + spin_lock(&isotp_notifier_lock); while (isotp_busy_notifier == so) { spin_unlock(&isotp_notifier_lock); -- cgit v1.2.3 From 4f027cba8216f42a18b544842efab134f8b1f9f4 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 4 Jan 2023 15:57:01 +0100 Subject: can: isotp: split tx timer into transmission and timeout The timer for the transmission of isotp PDUs formerly had two functions: 1. send two consecutive frames with a given time gap 2. monitor the timeouts for flow control frames and the echo frames This led to larger txstate checks and potentially to a problem discovered by syzbot which enabled the panic_on_warn feature while testing. The former 'txtimer' function is split into 'txfrtimer' and 'txtimer' to handle the two above functionalities with separate timer callbacks. The two simplified timers now run in one-shot mode and make the state transitions (especially with isotp_rcv_echo) better understandable. Fixes: 866337865f37 ("can: isotp: fix tx state handling for echo tx processing") Reported-by: syzbot+5aed6c3aaba661f5b917@syzkaller.appspotmail.com Cc: stable@vger.kernel.org # >= v6.0 Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230104145701.2422-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 65 +++++++++++++++++++++++++-------------------------------- 1 file changed, 29 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/can/isotp.c b/net/can/isotp.c index dae421f6c901..fc81d77724a1 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -140,7 +140,7 @@ struct isotp_sock { canid_t rxid; ktime_t tx_gap; ktime_t lastrxcf_tstamp; - struct hrtimer rxtimer, txtimer; + struct hrtimer rxtimer, txtimer, txfrtimer; struct can_isotp_options opt; struct can_isotp_fc_options rxfc, txfc; struct can_isotp_ll_options ll; @@ -871,7 +871,7 @@ static void isotp_rcv_echo(struct sk_buff *skb, void *data) } /* start timer to send next consecutive frame with correct delay */ - hrtimer_start(&so->txtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT); + hrtimer_start(&so->txfrtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT); } static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer) @@ -879,49 +879,39 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer) struct isotp_sock *so = container_of(hrtimer, struct isotp_sock, txtimer); struct sock *sk = &so->sk; - enum hrtimer_restart restart = HRTIMER_NORESTART; - switch (so->tx.state) { - case ISOTP_SENDING: + /* don't handle timeouts in IDLE state */ + if (so->tx.state == ISOTP_IDLE) + return HRTIMER_NORESTART; - /* cfecho should be consumed by isotp_rcv_echo() here */ - if (!so->cfecho) { - /* start timeout for unlikely lost echo skb */ - hrtimer_set_expires(&so->txtimer, - ktime_add(ktime_get(), - ktime_set(ISOTP_ECHO_TIMEOUT, 0))); - restart = HRTIMER_RESTART; + /* we did not get any flow control or echo frame in time */ - /* push out the next consecutive frame */ - isotp_send_cframe(so); - break; - } - - /* cfecho has not been cleared in isotp_rcv_echo() */ - pr_notice_once("can-isotp: cfecho %08X timeout\n", so->cfecho); - fallthrough; + /* report 'communication error on send' */ + sk->sk_err = ECOMM; + if (!sock_flag(sk, SOCK_DEAD)) + sk_error_report(sk); - case ISOTP_WAIT_FC: - case ISOTP_WAIT_FIRST_FC: + /* reset tx state */ + so->tx.state = ISOTP_IDLE; + wake_up_interruptible(&so->wait); - /* we did not get any flow control frame in time */ + return HRTIMER_NORESTART; +} - /* report 'communication error on send' */ - sk->sk_err = ECOMM; - if (!sock_flag(sk, SOCK_DEAD)) - sk_error_report(sk); +static enum hrtimer_restart isotp_txfr_timer_handler(struct hrtimer *hrtimer) +{ + struct isotp_sock *so = container_of(hrtimer, struct isotp_sock, + txfrtimer); - /* reset tx state */ - so->tx.state = ISOTP_IDLE; - wake_up_interruptible(&so->wait); - break; + /* start echo timeout handling and cover below protocol error */ + hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0), + HRTIMER_MODE_REL_SOFT); - default: - WARN_ONCE(1, "can-isotp: tx timer state %08X cfecho %08X\n", - so->tx.state, so->cfecho); - } + /* cfecho should be consumed by isotp_rcv_echo() here */ + if (so->tx.state == ISOTP_SENDING && !so->cfecho) + isotp_send_cframe(so); - return restart; + return HRTIMER_NORESTART; } static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -1198,6 +1188,7 @@ static int isotp_release(struct socket *sock) } } + hrtimer_cancel(&so->txfrtimer); hrtimer_cancel(&so->txtimer); hrtimer_cancel(&so->rxtimer); @@ -1601,6 +1592,8 @@ static int isotp_init(struct sock *sk) so->rxtimer.function = isotp_rx_timer_handler; hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); so->txtimer.function = isotp_tx_timer_handler; + hrtimer_init(&so->txfrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + so->txfrtimer.function = isotp_txfr_timer_handler; init_waitqueue_head(&so->wait); spin_lock_init(&so->rx_lock); -- cgit v1.2.3 From 0c598aed445eb45b0ee7ba405f7ece99ee349c30 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Thu, 2 Feb 2023 00:02:18 +0300 Subject: net: openvswitch: fix flow memory leak in ovs_flow_cmd_new Syzkaller reports a memory leak of new_flow in ovs_flow_cmd_new() as it is not freed when an allocation of a key fails. BUG: memory leak unreferenced object 0xffff888116668000 (size 632): comm "syz-executor231", pid 1090, jiffies 4294844701 (age 18.871s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000defa3494>] kmem_cache_zalloc include/linux/slab.h:654 [inline] [<00000000defa3494>] ovs_flow_alloc+0x19/0x180 net/openvswitch/flow_table.c:77 [<00000000c67d8873>] ovs_flow_cmd_new+0x1de/0xd40 net/openvswitch/datapath.c:957 [<0000000010a539a8>] genl_family_rcv_msg_doit+0x22d/0x330 net/netlink/genetlink.c:739 [<00000000dff3302d>] genl_family_rcv_msg net/netlink/genetlink.c:783 [inline] [<00000000dff3302d>] genl_rcv_msg+0x328/0x590 net/netlink/genetlink.c:800 [<000000000286dd87>] netlink_rcv_skb+0x153/0x430 net/netlink/af_netlink.c:2515 [<0000000061fed410>] genl_rcv+0x24/0x40 net/netlink/genetlink.c:811 [<000000009dc0f111>] netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] [<000000009dc0f111>] netlink_unicast+0x545/0x7f0 net/netlink/af_netlink.c:1339 [<000000004a5ee816>] netlink_sendmsg+0x8e7/0xde0 net/netlink/af_netlink.c:1934 [<00000000482b476f>] sock_sendmsg_nosec net/socket.c:651 [inline] [<00000000482b476f>] sock_sendmsg+0x152/0x190 net/socket.c:671 [<00000000698574ba>] ____sys_sendmsg+0x70a/0x870 net/socket.c:2356 [<00000000d28d9e11>] ___sys_sendmsg+0xf3/0x170 net/socket.c:2410 [<0000000083ba9120>] __sys_sendmsg+0xe5/0x1b0 net/socket.c:2439 [<00000000c00628f8>] do_syscall_64+0x30/0x40 arch/x86/entry/common.c:46 [<000000004abfdcf4>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 To fix this the patch rearranges the goto labels to reflect the order of object allocations and adds appropriate goto statements on the error paths. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: 68bb10101e6b ("openvswitch: Fix flow lookup to use unmasked key") Signed-off-by: Fedor Pchelkin Signed-off-by: Alexey Khoroshilov Acked-by: Eelco Chaudron Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230201210218.361970-1-pchelkin@ispras.ru Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a71795355aec..fcee6012293b 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1004,14 +1004,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) key = kzalloc(sizeof(*key), GFP_KERNEL); if (!key) { error = -ENOMEM; - goto err_kfree_key; + goto err_kfree_flow; } ovs_match_init(&match, key, false, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) - goto err_kfree_flow; + goto err_kfree_key; ovs_flow_mask_key(&new_flow->key, key, true, &mask); @@ -1019,14 +1019,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], key, log); if (error) - goto err_kfree_flow; + goto err_kfree_key; /* Validate actions. */ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts, log); if (error) { OVS_NLERR(log, "Flow actions may not be safe on all matching packets."); - goto err_kfree_flow; + goto err_kfree_key; } reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false, @@ -1126,10 +1126,10 @@ err_unlock_ovs: kfree_skb(reply); err_kfree_acts: ovs_nla_free_flow_actions(acts); -err_kfree_flow: - ovs_flow_free(new_flow, false); err_kfree_key: kfree(key); +err_kfree_flow: + ovs_flow_free(new_flow, false); error: return error; } -- cgit v1.2.3