From f15f29fd4779be8a418b66e9d52979bb6d6c2325 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 7 Sep 2023 08:22:33 +0200
Subject: netfilter: nf_tables: disallow rule removal from chain binding

Chain binding only requires the rule addition/insertion command within
the same transaction. Removal of rules from chain bindings within the
same transaction makes no sense, userspace does not utilize this
feature. Replace nft_chain_is_bound() check to nft_chain_binding() in
rule deletion commands. Replace command implies a rule deletion, reject
this command too.

Rule flush command can also safely rely on this nft_chain_binding()
check because unbound chains are not allowed since 62e1e94b246e
("netfilter: nf_tables: reject unbound chain set before commit phase").

Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
Reported-by: Kevin Rich <kevinrich1337@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e429ebba74b3..895c6e4fba97 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1432,7 +1432,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
 		if (!nft_is_active_next(ctx->net, chain))
 			continue;
 
-		if (nft_chain_is_bound(chain))
+		if (nft_chain_binding(chain))
 			continue;
 
 		ctx->chain = chain;
@@ -1477,7 +1477,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
 		if (!nft_is_active_next(ctx->net, chain))
 			continue;
 
-		if (nft_chain_is_bound(chain))
+		if (nft_chain_binding(chain))
 			continue;
 
 		ctx->chain = chain;
@@ -2910,6 +2910,9 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
 		return PTR_ERR(chain);
 	}
 
+	if (nft_chain_binding(chain))
+		return -EOPNOTSUPP;
+
 	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
 
 	if (nla[NFTA_CHAIN_HOOK]) {
@@ -3971,6 +3974,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 	}
 
 	if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+		if (nft_chain_binding(chain)) {
+			err = -EOPNOTSUPP;
+			goto err_destroy_flow_rule;
+		}
+
 		err = nft_delrule(&ctx, old_rule);
 		if (err < 0)
 			goto err_destroy_flow_rule;
@@ -4078,7 +4086,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
 			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
 			return PTR_ERR(chain);
 		}
-		if (nft_chain_is_bound(chain))
+		if (nft_chain_binding(chain))
 			return -EOPNOTSUPP;
 	}
 
@@ -4112,7 +4120,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
 		list_for_each_entry(chain, &table->chains, list) {
 			if (!nft_is_active_next(net, chain))
 				continue;
-			if (nft_chain_is_bound(chain))
+			if (nft_chain_binding(chain))
 				continue;
 
 			ctx.chain = chain;
@@ -11054,7 +11062,7 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
 	ctx.family = table->family;
 	ctx.table = table;
 	list_for_each_entry(chain, &table->chains, list) {
-		if (nft_chain_is_bound(chain))
+		if (nft_chain_binding(chain))
 			continue;
 
 		ctx.chain = chain;
-- 
cgit v1.2.3


From 96b33300fba880ec0eafcf3d82486f3463b4b6da Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 5 Sep 2023 12:52:24 +0200
Subject: netfilter: nft_set_rbtree: use read spinlock to avoid datapath
 contention

rbtree GC does not modify the datastructure, instead it collects expired
elements and it enqueues a GC transaction. Use a read spinlock instead
to avoid data contention while GC worker is running.

Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_rbtree.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index f250b5399344..70491ba98dec 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -622,8 +622,7 @@ static void nft_rbtree_gc(struct work_struct *work)
 	if (!gc)
 		goto done;
 
-	write_lock_bh(&priv->lock);
-	write_seqcount_begin(&priv->count);
+	read_lock_bh(&priv->lock);
 	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
 
 		/* Ruleset has been updated, try later. */
@@ -673,8 +672,7 @@ dead_elem:
 	gc = nft_trans_gc_catchall(gc, gc_seq);
 
 try_later:
-	write_seqcount_end(&priv->count);
-	write_unlock_bh(&priv->lock);
+	read_unlock_bh(&priv->lock);
 
 	if (gc)
 		nft_trans_gc_queue_async_done(gc);
-- 
cgit v1.2.3


From 4a9e12ea7e70223555ec010bec9f711089ce96f6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 6 Sep 2023 15:07:53 +0200
Subject: netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall
 GC

pipapo needs to enqueue GC transactions for catchall elements through
nft_trans_gc_queue_sync(). Add nft_trans_gc_catchall_sync() and
nft_trans_gc_catchall_async() to handle GC transaction queueing
accordingly.

Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  5 +++--
 net/netfilter/nf_tables_api.c     | 22 +++++++++++++++++++---
 net/netfilter/nft_set_hash.c      |  2 +-
 net/netfilter/nft_set_pipapo.c    |  2 +-
 net/netfilter/nft_set_rbtree.c    |  2 +-
 5 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index dd40c75011d2..a4455f4995ab 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1700,8 +1700,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
 
 void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
 
-struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
-					   unsigned int gc_seq);
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
+						 unsigned int gc_seq);
+struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc);
 
 void nft_setelem_data_deactivate(const struct net *net,
 				 const struct nft_set *set,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 895c6e4fba97..7b59311931fb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9613,8 +9613,9 @@ void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
 	call_rcu(&trans->rcu, nft_trans_gc_trans_free);
 }
 
-struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
-					   unsigned int gc_seq)
+static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+						  unsigned int gc_seq,
+						  bool sync)
 {
 	struct nft_set_elem_catchall *catchall;
 	const struct nft_set *set = gc->set;
@@ -9630,7 +9631,11 @@ struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
 
 		nft_set_elem_dead(ext);
 dead_elem:
-		gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+		if (sync)
+			gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+		else
+			gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+
 		if (!gc)
 			return NULL;
 
@@ -9640,6 +9645,17 @@ dead_elem:
 	return gc;
 }
 
+struct nft_trans_gc *nft_trans_gc_catchall_async(struct nft_trans_gc *gc,
+						 unsigned int gc_seq)
+{
+	return nft_trans_gc_catchall(gc, gc_seq, false);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
+{
+	return nft_trans_gc_catchall(gc, 0, true);
+}
+
 static void nf_tables_module_autoload_cleanup(struct net *net)
 {
 	struct nftables_pernet *nft_net = nft_pernet(net);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 524763659f25..eca20dc60138 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -372,7 +372,7 @@ dead_elem:
 		nft_trans_gc_elem_add(gc, he);
 	}
 
-	gc = nft_trans_gc_catchall(gc, gc_seq);
+	gc = nft_trans_gc_catchall_async(gc, gc_seq);
 
 try_later:
 	/* catchall list iteration requires rcu read side lock. */
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 6af9c9ed4b5c..10b89ac74476 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1610,7 +1610,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
 		}
 	}
 
-	gc = nft_trans_gc_catchall(gc, 0);
+	gc = nft_trans_gc_catchall_sync(gc);
 	if (gc) {
 		nft_trans_gc_queue_sync_done(gc);
 		priv->last_gc = jiffies;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 70491ba98dec..487572dcd614 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -669,7 +669,7 @@ dead_elem:
 		nft_trans_gc_elem_add(gc, rbe);
 	}
 
-	gc = nft_trans_gc_catchall(gc, gc_seq);
+	gc = nft_trans_gc_catchall_async(gc, gc_seq);
 
 try_later:
 	read_unlock_bh(&priv->lock);
-- 
cgit v1.2.3


From 6d365eabce3c018a80f6e0379b17df2abb17405e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 6 Sep 2023 17:22:58 +0200
Subject: netfilter: nft_set_pipapo: stop GC iteration if GC transaction
 allocation fails

nft_trans_gc_queue_sync() enqueues the GC transaction and it allocates a
new one. If this allocation fails, then stop this GC sync run and retry
later.

Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_pipapo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 10b89ac74476..c0dcc40de358 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1596,7 +1596,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
 
 			gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
 			if (!gc)
-				break;
+				return;
 
 			nft_pipapo_gc_deactivate(net, set, e);
 			pipapo_drop(m, rulemap);
-- 
cgit v1.2.3


From b079155faae94e9b3ab9337e82100a914ebb4e8d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 8 Sep 2023 01:39:43 +0200
Subject: netfilter: nft_set_hash: try later when GC hits EAGAIN on iteration

Skip GC run if iterator rewinds to the beginning with EAGAIN, otherwise GC
might collect the same element more than once.

Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_hash.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index eca20dc60138..2013de934cef 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -338,12 +338,9 @@ static void nft_rhash_gc(struct work_struct *work)
 
 	while ((he = rhashtable_walk_next(&hti))) {
 		if (IS_ERR(he)) {
-			if (PTR_ERR(he) != -EAGAIN) {
-				nft_trans_gc_destroy(gc);
-				gc = NULL;
-				goto try_later;
-			}
-			continue;
+			nft_trans_gc_destroy(gc);
+			gc = NULL;
+			goto try_later;
 		}
 
 		/* Ruleset has been updated, try later. */
-- 
cgit v1.2.3


From 41bc46c12a8053a1b3279a379bd6b5e87b045b85 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Sep 2023 22:06:51 +0200
Subject: bpf: Add override check to kprobe multi link attach

Currently the multi_kprobe link attach does not check error
injection list for programs with bpf_override_return helper
and allows them to attach anywhere. Adding the missing check.

Fixes: 0dcac2725406 ("bpf: Add multi kprobe link")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/bpf/20230907200652.926951-1-jolsa@kernel.org
---
 kernel/trace/bpf_trace.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a7264b2c17ad..c1c1af63ced2 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2853,6 +2853,17 @@ static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u3
 	return arr.mods_cnt;
 }
 
+static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt)
+{
+	u32 i;
+
+	for (i = 0; i < cnt; i++) {
+		if (!within_error_injection_list(addrs[i]))
+			return -EINVAL;
+	}
+	return 0;
+}
+
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
 	struct bpf_kprobe_multi_link *link = NULL;
@@ -2930,6 +2941,11 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
 			goto error;
 	}
 
+	if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) {
+		err = -EINVAL;
+		goto error;
+	}
+
 	link = kzalloc(sizeof(*link), GFP_KERNEL);
 	if (!link) {
 		err = -ENOMEM;
-- 
cgit v1.2.3


From 7182e56411b9a8b76797ed7b6095fc84be76dfb0 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Sep 2023 22:06:52 +0200
Subject: selftests/bpf: Add kprobe_multi override test

Adding test that tries to attach program with bpf_override_return
helper to function not within error injection list.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230907200652.926951-2-jolsa@kernel.org
---
 .../selftests/bpf/prog_tests/kprobe_multi_test.c   | 37 ++++++++++++++++++++++
 .../selftests/bpf/progs/kprobe_multi_override.c    | 13 ++++++++
 2 files changed, 50 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/kprobe_multi_override.c

diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 179fe300534f..e05477b210a5 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -3,6 +3,7 @@
 #include "kprobe_multi.skel.h"
 #include "trace_helpers.h"
 #include "kprobe_multi_empty.skel.h"
+#include "kprobe_multi_override.skel.h"
 #include "bpf/libbpf_internal.h"
 #include "bpf/hashmap.h"
 
@@ -453,6 +454,40 @@ cleanup:
 	}
 }
 
+void test_attach_override(void)
+{
+	struct kprobe_multi_override *skel = NULL;
+	struct bpf_link *link = NULL;
+
+	skel = kprobe_multi_override__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+		goto cleanup;
+
+	/* The test_override calls bpf_override_return so it should fail
+	 * to attach to bpf_fentry_test1 function, which is not on error
+	 * injection list.
+	 */
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override,
+						     "bpf_fentry_test1", NULL);
+	if (!ASSERT_ERR_PTR(link, "override_attached_bpf_fentry_test1")) {
+		bpf_link__destroy(link);
+		goto cleanup;
+	}
+
+	/* The should_fail_bio function is on error injection list,
+	 * attach should succeed.
+	 */
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override,
+						     "should_fail_bio", NULL);
+	if (!ASSERT_OK_PTR(link, "override_attached_should_fail_bio"))
+		goto cleanup;
+
+	bpf_link__destroy(link);
+
+cleanup:
+	kprobe_multi_override__destroy(skel);
+}
+
 void serial_test_kprobe_multi_bench_attach(void)
 {
 	if (test__start_subtest("kernel"))
@@ -480,4 +515,6 @@ void test_kprobe_multi_test(void)
 		test_attach_api_syms();
 	if (test__start_subtest("attach_api_fails"))
 		test_attach_api_fails();
+	if (test__start_subtest("attach_override"))
+		test_attach_override();
 }
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
new file mode 100644
index 000000000000..28f8487c9059
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe.multi")
+int test_override(struct pt_regs *ctx)
+{
+	bpf_override_return(ctx, 123);
+	return 0;
+}
-- 
cgit v1.2.3


From 23a3bfd4ba7acd36abf52b78605f61b21bdac216 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 10 Sep 2023 19:04:45 +0200
Subject: netfilter: nf_tables: disallow element removal on anonymous sets

Anonymous sets need to be populated once at creation and then they are
bound to rule since 938154b93be8 ("netfilter: nf_tables: reject unbound
anonymous set before commit phase"), otherwise transaction reports
EINVAL.

Userspace does not need to delete elements of anonymous sets that are
not yet bound, reject this with EOPNOTSUPP.

From flush command path, skip anonymous sets, they are expected to be
bound already. Otherwise, EINVAL is hit at the end of this transaction
for unbound sets.

Fixes: 96518518cc41 ("netfilter: add nftables")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7b59311931fb..c1e485aee763 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1446,8 +1446,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
 		if (!nft_is_active_next(ctx->net, set))
 			continue;
 
-		if (nft_set_is_anonymous(set) &&
-		    !list_empty(&set->bindings))
+		if (nft_set_is_anonymous(set))
 			continue;
 
 		err = nft_delset(ctx, set);
@@ -7191,8 +7190,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
 	if (IS_ERR(set))
 		return PTR_ERR(set);
 
-	if (!list_empty(&set->bindings) &&
-	    (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
+	if (nft_set_is_anonymous(set))
+		return -EOPNOTSUPP;
+
+	if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT))
 		return -EBUSY;
 
 	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
-- 
cgit v1.2.3


From d52b59315bf5e86e83c00bfae47cedd388dad6a8 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 8 Sep 2023 21:39:20 +0800
Subject: bpf: Adjust size_index according to the value of KMALLOC_MIN_SIZE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The following warning was reported when running "./test_progs -a
link_api -a linked_list" on a RISC-V QEMU VM:

  ------------[ cut here ]------------
  WARNING: CPU: 3 PID: 261 at kernel/bpf/memalloc.c:342 bpf_mem_refill
  Modules linked in: bpf_testmod(OE)
  CPU: 3 PID: 261 Comm: test_progs- ... 6.5.0-rc5-01743-gdcb152bb8328 #2
  Hardware name: riscv-virtio,qemu (DT)
  epc : bpf_mem_refill+0x1fc/0x206
   ra : irq_work_single+0x68/0x70
  epc : ffffffff801b1bc4 ra : ffffffff8015fe84 sp : ff2000000001be20
   gp : ffffffff82d26138 tp : ff6000008477a800 t0 : 0000000000046600
   t1 : ffffffff812b6ddc t2 : 0000000000000000 s0 : ff2000000001be70
   s1 : ff5ffffffffe8998 a0 : ff5ffffffffe8998 a1 : ff600003fef4b000
   a2 : 000000000000003f a3 : ffffffff80008250 a4 : 0000000000000060
   a5 : 0000000000000080 a6 : 0000000000000000 a7 : 0000000000735049
   s2 : ff5ffffffffe8998 s3 : 0000000000000022 s4 : 0000000000001000
   s5 : 0000000000000007 s6 : ff5ffffffffe8570 s7 : ffffffff82d6bd30
   s8 : 000000000000003f s9 : ffffffff82d2c5e8 s10: 000000000000ffff
   s11: ffffffff82d2c5d8 t3 : ffffffff81ea8f28 t4 : 0000000000000000
   t5 : ff6000008fd28278 t6 : 0000000000040000
  [<ffffffff801b1bc4>] bpf_mem_refill+0x1fc/0x206
  [<ffffffff8015fe84>] irq_work_single+0x68/0x70
  [<ffffffff8015feb4>] irq_work_run_list+0x28/0x36
  [<ffffffff8015fefa>] irq_work_run+0x38/0x66
  [<ffffffff8000828a>] handle_IPI+0x3a/0xb4
  [<ffffffff800a5c3a>] handle_percpu_devid_irq+0xa4/0x1f8
  [<ffffffff8009fafa>] generic_handle_domain_irq+0x28/0x36
  [<ffffffff800ae570>] ipi_mux_process+0xac/0xfa
  [<ffffffff8000a8ea>] sbi_ipi_handle+0x2e/0x88
  [<ffffffff8009fafa>] generic_handle_domain_irq+0x28/0x36
  [<ffffffff807ee70e>] riscv_intc_irq+0x36/0x4e
  [<ffffffff812b5d3a>] handle_riscv_irq+0x54/0x86
  [<ffffffff812b6904>] do_irq+0x66/0x98
  ---[ end trace 0000000000000000 ]---

The warning is due to WARN_ON_ONCE(tgt->unit_size != c->unit_size) in
free_bulk(). The direct reason is that a object is allocated and
freed by bpf_mem_caches with different unit_size.

The root cause is that KMALLOC_MIN_SIZE is 64 and there is no 96-bytes
slab cache in the specific VM. When linked_list test allocates a
72-bytes object through bpf_obj_new(), bpf_global_ma will allocate it
from a bpf_mem_cache with 96-bytes unit_size, but this bpf_mem_cache is
backed by 128-bytes slab cache. When the object is freed, bpf_mem_free()
uses ksize() to choose the corresponding bpf_mem_cache. Because the
object is allocated from 128-bytes slab cache, ksize() returns 128,
bpf_mem_free() chooses a 128-bytes bpf_mem_cache to free the object and
triggers the warning.

A similar warning will also be reported when using CONFIG_SLAB instead
of CONFIG_SLUB in a x86-64 kernel. Because CONFIG_SLUB defines
KMALLOC_MIN_SIZE as 8 but CONFIG_SLAB defines KMALLOC_MIN_SIZE as 32.

An alternative fix is to use kmalloc_size_round() in bpf_mem_alloc() to
choose a bpf_mem_cache which has the same unit_size with the backing
slab cache, but it may introduce performance degradation, so fix the
warning by adjusting the indexes in size_index according to the value of
KMALLOC_MIN_SIZE just like setup_kmalloc_cache_index_table() does.

Fixes: 822fb26bdb55 ("bpf: Add a hint to allocated objects.")
Reported-by: Björn Töpel <bjorn@kernel.org>
Closes: https://lore.kernel.org/bpf/87jztjmmy4.fsf@all.your.base.are.belong.to.us
Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20230908133923.2675053-2-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/memalloc.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 9c49ae53deaf..98d9e96fba3c 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -916,3 +916,41 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
 
 	return !ret ? NULL : ret + LLIST_NODE_SZ;
 }
+
+/* Most of the logic is taken from setup_kmalloc_cache_index_table() */
+static __init int bpf_mem_cache_adjust_size(void)
+{
+	unsigned int size, index;
+
+	/* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be
+	 * up-to 256-bytes.
+	 */
+	size = KMALLOC_MIN_SIZE;
+	if (size <= 192)
+		index = size_index[(size - 1) / 8];
+	else
+		index = fls(size - 1) - 1;
+	for (size = 8; size < KMALLOC_MIN_SIZE && size <= 192; size += 8)
+		size_index[(size - 1) / 8] = index;
+
+	/* The minimal alignment is 64-bytes, so disable 96-bytes cache and
+	 * use 128-bytes cache instead.
+	 */
+	if (KMALLOC_MIN_SIZE >= 64) {
+		index = size_index[(128 - 1) / 8];
+		for (size = 64 + 8; size <= 96; size += 8)
+			size_index[(size - 1) / 8] = index;
+	}
+
+	/* The minimal alignment is 128-bytes, so disable 192-bytes cache and
+	 * use 256-bytes cache instead.
+	 */
+	if (KMALLOC_MIN_SIZE >= 128) {
+		index = fls(256 - 1) - 1;
+		for (size = 128 + 8; size <= 192; size += 8)
+			size_index[(size - 1) / 8] = index;
+	}
+
+	return 0;
+}
+subsys_initcall(bpf_mem_cache_adjust_size);
-- 
cgit v1.2.3


From b1d53958b69312e43c118d4093d8f93d3f6f80af Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 8 Sep 2023 21:39:21 +0800
Subject: bpf: Don't prefill for unused bpf_mem_cache

When the unit_size of a bpf_mem_cache is unmatched with the object_size
of the underlying slab cache, the bpf_mem_cache will not be used, and
the allocation will be redirected to a bpf_mem_cache with a bigger
unit_size instead, so there is no need to prefill for these
unused bpf_mem_caches.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20230908133923.2675053-3-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/memalloc.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 98d9e96fba3c..90c1ed8210a2 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c)
  * Typical case will be between 11K and 116K closer to 11K.
  * bpf progs can and should share bpf_mem_cache when possible.
  */
-
-static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
+static void init_refill_work(struct bpf_mem_cache *c)
 {
 	init_irq_work(&c->refill_work, bpf_mem_refill);
 	if (c->unit_size <= 256) {
@@ -476,7 +475,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
 		c->high_watermark = max(96 * 256 / c->unit_size, 3);
 	}
 	c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1);
+}
 
+static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
+{
 	/* To avoid consuming memory assume that 1st run of bpf
 	 * prog won't be doing more than 4 map_update_elem from
 	 * irq disabled region
@@ -521,6 +523,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 			c->objcg = objcg;
 			c->percpu_size = percpu_size;
 			c->tgt = c;
+			init_refill_work(c);
 			prefill_mem_cache(c, cpu);
 		}
 		ma->cache = pc;
@@ -544,6 +547,15 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 			c->unit_size = sizes[i];
 			c->objcg = objcg;
 			c->tgt = c;
+
+			init_refill_work(c);
+			/* Another bpf_mem_cache will be used when allocating
+			 * c->unit_size in bpf_mem_alloc(), so doesn't prefill
+			 * for the bpf_mem_cache because these free objects will
+			 * never be used.
+			 */
+			if (i != bpf_mem_cache_idx(c->unit_size))
+				continue;
 			prefill_mem_cache(c, cpu);
 		}
 	}
-- 
cgit v1.2.3


From c930472552022bd09aab3cd946ba3f243070d5c7 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 8 Sep 2023 21:39:22 +0800
Subject: bpf: Ensure unit_size is matched with slab cache object size

Add extra check in bpf_mem_alloc_init() to ensure the unit_size of
bpf_mem_cache is matched with the object_size of underlying slab cache.
If these two sizes are unmatched, print a warning once and return
-EINVAL in bpf_mem_alloc_init(), so the mismatch can be found early and
the potential issue can be prevented.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20230908133923.2675053-4-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/memalloc.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 90c1ed8210a2..1c22b90e754a 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -486,6 +486,24 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
 	alloc_bulk(c, c->unit_size <= 256 ? 4 : 1, cpu_to_node(cpu), false);
 }
 
+static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
+{
+	struct llist_node *first;
+	unsigned int obj_size;
+
+	first = c->free_llist.first;
+	if (!first)
+		return 0;
+
+	obj_size = ksize(first);
+	if (obj_size != c->unit_size) {
+		WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n",
+			  idx, obj_size, c->unit_size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /* When size != 0 bpf_mem_cache for each cpu.
  * This is typical bpf hash map use case when all elements have equal size.
  *
@@ -496,10 +514,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
 int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 {
 	static u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
+	int cpu, i, err, unit_size, percpu_size = 0;
 	struct bpf_mem_caches *cc, __percpu *pcc;
 	struct bpf_mem_cache *c, __percpu *pc;
 	struct obj_cgroup *objcg = NULL;
-	int cpu, i, unit_size, percpu_size = 0;
 
 	if (size) {
 		pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
@@ -537,6 +555,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 	pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
 	if (!pcc)
 		return -ENOMEM;
+	err = 0;
 #ifdef CONFIG_MEMCG_KMEM
 	objcg = get_obj_cgroup_from_current();
 #endif
@@ -557,10 +576,20 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
 			if (i != bpf_mem_cache_idx(c->unit_size))
 				continue;
 			prefill_mem_cache(c, cpu);
+			err = check_obj_size(c, i);
+			if (err)
+				goto out;
 		}
 	}
+
+out:
 	ma->caches = pcc;
-	return 0;
+	/* refill_work is either zeroed or initialized, so it is safe to
+	 * call irq_work_sync().
+	 */
+	if (err)
+		bpf_mem_alloc_destroy(ma);
+	return err;
 }
 
 static void drain_mem_cache(struct bpf_mem_cache *c)
-- 
cgit v1.2.3


From f0a42ab5890f749626b35f9fddd8d0704fc89524 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 8 Sep 2023 21:39:23 +0800
Subject: selftests/bpf: Test all valid alloc sizes for bpf mem allocator

Add a test to test all possible and valid allocation size for bpf
memory allocator. For each possible allocation size, the test uses
the following two steps to test the alloc and free path:

1) allocate N (N > high_watermark) objects to trigger the refill
   executed in irq_work.
2) free N objects to trigger the freeing executed in irq_work.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20230908133923.2675053-5-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/test_bpf_ma.c |  50 +++++++++
 tools/testing/selftests/bpf/progs/test_bpf_ma.c    | 123 +++++++++++++++++++++
 2 files changed, 173 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_bpf_ma.c

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
new file mode 100644
index 000000000000..0cca4e8ae38e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <bpf/btf.h>
+#include <test_progs.h>
+
+#include "test_bpf_ma.skel.h"
+
+void test_test_bpf_ma(void)
+{
+	struct test_bpf_ma *skel;
+	struct btf *btf;
+	int i, err;
+
+	skel = test_bpf_ma__open();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	btf = bpf_object__btf(skel->obj);
+	if (!ASSERT_OK_PTR(btf, "btf"))
+		goto out;
+
+	for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) {
+		char name[32];
+		int id;
+
+		snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]);
+		id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT);
+		if (!ASSERT_GT(id, 0, "bin_data"))
+			goto out;
+		skel->rodata->data_btf_ids[i] = id;
+	}
+
+	err = test_bpf_ma__load(skel);
+	if (!ASSERT_OK(err, "load"))
+		goto out;
+
+	err = test_bpf_ma__attach(skel);
+	if (!ASSERT_OK(err, "attach"))
+		goto out;
+
+	skel->bss->pid = getpid();
+	usleep(1);
+	ASSERT_OK(skel->bss->err, "test error");
+out:
+	test_bpf_ma__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
new file mode 100644
index 000000000000..ecde41ae0fc8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+struct generic_map_value {
+	void *data;
+};
+
+char _license[] SEC("license") = "GPL";
+
+const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096};
+const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {};
+
+int err = 0;
+int pid = 0;
+
+#define DEFINE_ARRAY_WITH_KPTR(_size) \
+	struct bin_data_##_size { \
+		char data[_size - sizeof(void *)]; \
+	}; \
+	struct map_value_##_size { \
+		struct bin_data_##_size __kptr * data; \
+		/* To emit BTF info for bin_data_xx */ \
+		struct bin_data_##_size not_used; \
+	}; \
+	struct { \
+		__uint(type, BPF_MAP_TYPE_ARRAY); \
+		__type(key, int); \
+		__type(value, struct map_value_##_size); \
+		__uint(max_entries, 128); \
+	} array_##_size SEC(".maps");
+
+static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch,
+					     unsigned int idx)
+{
+	struct generic_map_value *value;
+	unsigned int i, key;
+	void *old, *new;
+
+	for (i = 0; i < batch; i++) {
+		key = i;
+		value = bpf_map_lookup_elem(map, &key);
+		if (!value) {
+			err = 1;
+			return;
+		}
+		new = bpf_obj_new_impl(data_btf_ids[idx], NULL);
+		if (!new) {
+			err = 2;
+			return;
+		}
+		old = bpf_kptr_xchg(&value->data, new);
+		if (old) {
+			bpf_obj_drop(old);
+			err = 3;
+			return;
+		}
+	}
+	for (i = 0; i < batch; i++) {
+		key = i;
+		value = bpf_map_lookup_elem(map, &key);
+		if (!value) {
+			err = 4;
+			return;
+		}
+		old = bpf_kptr_xchg(&value->data, NULL);
+		if (!old) {
+			err = 5;
+			return;
+		}
+		bpf_obj_drop(old);
+	}
+}
+
+#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \
+	batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx)
+
+DEFINE_ARRAY_WITH_KPTR(8);
+DEFINE_ARRAY_WITH_KPTR(16);
+DEFINE_ARRAY_WITH_KPTR(32);
+DEFINE_ARRAY_WITH_KPTR(64);
+DEFINE_ARRAY_WITH_KPTR(96);
+DEFINE_ARRAY_WITH_KPTR(128);
+DEFINE_ARRAY_WITH_KPTR(192);
+DEFINE_ARRAY_WITH_KPTR(256);
+DEFINE_ARRAY_WITH_KPTR(512);
+DEFINE_ARRAY_WITH_KPTR(1024);
+DEFINE_ARRAY_WITH_KPTR(2048);
+DEFINE_ARRAY_WITH_KPTR(4096);
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int test_bpf_mem_alloc_free(void *ctx)
+{
+	if ((u32)bpf_get_current_pid_tgid() != pid)
+		return 0;
+
+	/* Alloc 128 8-bytes objects in batch to trigger refilling,
+	 * then free 128 8-bytes objects in batch to trigger freeing.
+	 */
+	CALL_BATCH_ALLOC_FREE(8, 128, 0);
+	CALL_BATCH_ALLOC_FREE(16, 128, 1);
+	CALL_BATCH_ALLOC_FREE(32, 128, 2);
+	CALL_BATCH_ALLOC_FREE(64, 128, 3);
+	CALL_BATCH_ALLOC_FREE(96, 128, 4);
+	CALL_BATCH_ALLOC_FREE(128, 128, 5);
+	CALL_BATCH_ALLOC_FREE(192, 128, 6);
+	CALL_BATCH_ALLOC_FREE(256, 128, 7);
+	CALL_BATCH_ALLOC_FREE(512, 64, 8);
+	CALL_BATCH_ALLOC_FREE(1024, 32, 9);
+	CALL_BATCH_ALLOC_FREE(2048, 16, 10);
+	CALL_BATCH_ALLOC_FREE(4096, 8, 11);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 7cb779a6867fea00b4209bcf6de2f178a743247d Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 11 Sep 2023 12:47:30 -0700
Subject: bpf: Clarify error expectations from bpf_clone_redirect

Commit 151e887d8ff9 ("veth: Fixing transmit return status for dropped
packets") exposed the fact that bpf_clone_redirect is capable of
returning raw NET_XMIT_XXX return codes.

This is in the conflict with its UAPI doc which says the following:
"0 on success, or a negative error in case of failure."

Update the UAPI to reflect the fact that bpf_clone_redirect can
return positive error numbers, but don't explicitly define
their meaning.

Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230911194731.286342-1-sdf@google.com
---
 include/uapi/linux/bpf.h       | 4 +++-
 tools/include/uapi/linux/bpf.h | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8790b3962e4b..0448700890f7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1962,7 +1962,9 @@ union bpf_attr {
  * 		performed again, if the helper is used in combination with
  * 		direct packet access.
  * 	Return
- * 		0 on success, or a negative error in case of failure.
+ * 		0 on success, or a negative error in case of failure. Positive
+ * 		error indicates a potential drop or congestion in the target
+ * 		device. The particular positive error codes are not defined.
  *
  * u64 bpf_get_current_pid_tgid(void)
  * 	Description
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8790b3962e4b..0448700890f7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1962,7 +1962,9 @@ union bpf_attr {
  * 		performed again, if the helper is used in combination with
  * 		direct packet access.
  * 	Return
- * 		0 on success, or a negative error in case of failure.
+ * 		0 on success, or a negative error in case of failure. Positive
+ * 		error indicates a potential drop or congestion in the target
+ * 		device. The particular positive error codes are not defined.
  *
  * u64 bpf_get_current_pid_tgid(void)
  * 	Description
-- 
cgit v1.2.3


From b772b70b69046c5b76e3f2eda680f692dee5e6d5 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 11 Sep 2023 12:47:31 -0700
Subject: selftests/bpf: Update bpf_clone_redirect expected return code

Commit 151e887d8ff9 ("veth: Fixing transmit return status for dropped
packets") started propagating proper NET_XMIT_DROP error to the caller
which means it's now possible to get positive error code when calling
bpf_clone_redirect() in this particular test. Update the test to reflect
that.

Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230911194731.286342-2-sdf@google.com
---
 tools/testing/selftests/bpf/prog_tests/empty_skb.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
index 3b77d8a422db..261228eb68e8 100644
--- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -24,6 +24,7 @@ void test_empty_skb(void)
 		int *ifindex;
 		int err;
 		int ret;
+		int lwt_egress_ret; /* expected retval at lwt/egress */
 		bool success_on_tc;
 	} tests[] = {
 		/* Empty packets are always rejected. */
@@ -57,6 +58,7 @@ void test_empty_skb(void)
 			.data_size_in = sizeof(eth_hlen),
 			.ifindex = &veth_ifindex,
 			.ret = -ERANGE,
+			.lwt_egress_ret = -ERANGE,
 			.success_on_tc = true,
 		},
 		{
@@ -70,6 +72,7 @@ void test_empty_skb(void)
 			.data_size_in = sizeof(eth_hlen),
 			.ifindex = &ipip_ifindex,
 			.ret = -ERANGE,
+			.lwt_egress_ret = -ERANGE,
 		},
 
 		/* ETH_HLEN+1-sized packet should be redirected. */
@@ -79,6 +82,7 @@ void test_empty_skb(void)
 			.data_in = eth_hlen_pp,
 			.data_size_in = sizeof(eth_hlen_pp),
 			.ifindex = &veth_ifindex,
+			.lwt_egress_ret = 1, /* veth_xmit NET_XMIT_DROP */
 		},
 		{
 			.msg = "ipip ETH_HLEN+1 packet ingress",
@@ -108,8 +112,12 @@ void test_empty_skb(void)
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		bpf_object__for_each_program(prog, bpf_obj->obj) {
-			char buf[128];
+			bool at_egress = strstr(bpf_program__name(prog), "egress") != NULL;
 			bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2);
+			int expected_ret;
+			char buf[128];
+
+			expected_ret = at_egress && !at_tc ? tests[i].lwt_egress_ret : tests[i].ret;
 
 			tattr.data_in = tests[i].data_in;
 			tattr.data_size_in = tests[i].data_size_in;
@@ -128,7 +136,7 @@ void test_empty_skb(void)
 			if (at_tc && tests[i].success_on_tc)
 				ASSERT_GE(bpf_obj->bss->ret, 0, buf);
 			else
-				ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf);
+				ASSERT_EQ(bpf_obj->bss->ret, expected_ret, buf);
 		}
 	}
 
-- 
cgit v1.2.3


From a34a9f1a19afe9c60ca0ea61dfeee63a1c2baac8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Mon, 11 Sep 2023 15:28:14 +0200
Subject: bpf: Avoid deadlock when using queue and stack maps from NMI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sysbot discovered that the queue and stack maps can deadlock if they are
being used from a BPF program that can be called from NMI context (such as
one that is attached to a perf HW counter event). To fix this, add an
in_nmi() check and use raw_spin_trylock() in NMI context, erroring out if
grabbing the lock fails.

Fixes: f1a2e44a3aec ("bpf: add queue and stack maps")
Reported-by: Hsin-Wei Hung <hsinweih@uci.edu>
Tested-by: Hsin-Wei Hung <hsinweih@uci.edu>
Co-developed-by: Hsin-Wei Hung <hsinweih@uci.edu>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20230911132815.717240-1-toke@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/queue_stack_maps.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 8d2ddcb7566b..d869f51ea93a 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -98,7 +98,12 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
 	int err = 0;
 	void *ptr;
 
-	raw_spin_lock_irqsave(&qs->lock, flags);
+	if (in_nmi()) {
+		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
+			return -EBUSY;
+	} else {
+		raw_spin_lock_irqsave(&qs->lock, flags);
+	}
 
 	if (queue_stack_map_is_empty(qs)) {
 		memset(value, 0, qs->map.value_size);
@@ -128,7 +133,12 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
 	void *ptr;
 	u32 index;
 
-	raw_spin_lock_irqsave(&qs->lock, flags);
+	if (in_nmi()) {
+		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
+			return -EBUSY;
+	} else {
+		raw_spin_lock_irqsave(&qs->lock, flags);
+	}
 
 	if (queue_stack_map_is_empty(qs)) {
 		memset(value, 0, qs->map.value_size);
@@ -193,7 +203,12 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
 	if (flags & BPF_NOEXIST || flags > BPF_EXIST)
 		return -EINVAL;
 
-	raw_spin_lock_irqsave(&qs->lock, irq_flags);
+	if (in_nmi()) {
+		if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
+			return -EBUSY;
+	} else {
+		raw_spin_lock_irqsave(&qs->lock, irq_flags);
+	}
 
 	if (queue_stack_map_is_full(qs)) {
 		if (!replace) {
-- 
cgit v1.2.3


From 1a49f4195d3498fe458a7f5ff7ec5385da70d92e Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Tue, 12 Sep 2023 03:55:37 +0300
Subject: bpf: Avoid dummy bpf_offload_netdev in __bpf_prog_dev_bound_init

Fix for a bug observable under the following sequence of events:
1. Create a network device that does not support XDP offload.
2. Load a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag
   (such programs are not offloaded).
3. Load a device bound XDP program with zero flags
   (such programs are offloaded).

At step (2) __bpf_prog_dev_bound_init() associates with device (1)
a dummy bpf_offload_netdev struct with .offdev field set to NULL.
At step (3) __bpf_prog_dev_bound_init() would reuse dummy struct
allocated at step (2).
However, downstream usage of the bpf_offload_netdev assumes that
.offdev field can't be NULL, e.g. in bpf_prog_offload_verifier_prep().

Adjust __bpf_prog_dev_bound_init() to require bpf_offload_netdev
with non-NULL .offdev for offloaded BPF programs.

Fixes: 2b3486bc2d23 ("bpf: Introduce device-bound XDP programs")
Reported-by: syzbot+291100dcb32190ec02a8@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/bpf/000000000000d97f3c060479c4f8@google.com/
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20230912005539.2248244-2-eddyz87@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 kernel/bpf/offload.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 3e4f2ec1af06..87d6693d8233 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -199,12 +199,14 @@ static int __bpf_prog_dev_bound_init(struct bpf_prog *prog, struct net_device *n
 	offload->netdev = netdev;
 
 	ondev = bpf_offload_find_netdev(offload->netdev);
+	/* When program is offloaded require presence of "true"
+	 * bpf_offload_netdev, avoid the one created for !ondev case below.
+	 */
+	if (bpf_prog_is_offloaded(prog->aux) && (!ondev || !ondev->offdev)) {
+		err = -EINVAL;
+		goto err_free;
+	}
 	if (!ondev) {
-		if (bpf_prog_is_offloaded(prog->aux)) {
-			err = -EINVAL;
-			goto err_free;
-		}
-
 		/* When only binding to the device, explicitly
 		 * create an entry in the hashtable.
 		 */
-- 
cgit v1.2.3


From e4c31164737e9a00de1be6455e2c667ac5478b3c Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Tue, 12 Sep 2023 03:55:38 +0300
Subject: selftests/bpf: Offloaded prog after non-offloaded should not cause
 BUG

Check what happens if non-offloaded dev bound BPF
program is followed by offloaded dev bound program.
Test case adapated from syzbot report [1].

[1] https://lore.kernel.org/bpf/000000000000d97f3c060479c4f8@google.com/

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20230912005539.2248244-3-eddyz87@gmail.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../selftests/bpf/prog_tests/xdp_dev_bound_only.c  | 61 ++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c
new file mode 100644
index 000000000000..7dd18c6d06c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/if.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#define LOCAL_NETNS "xdp_dev_bound_only_netns"
+
+static int load_dummy_prog(char *name, __u32 ifindex, __u32 flags)
+{
+	struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN() };
+	LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+	opts.prog_flags = flags;
+	opts.prog_ifindex = ifindex;
+	return bpf_prog_load(BPF_PROG_TYPE_XDP, name, "GPL", insns, ARRAY_SIZE(insns), &opts);
+}
+
+/* A test case for bpf_offload_netdev->offload handling bug:
+ * - create a veth device (does not support offload);
+ * - create a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag
+ *   (such programs are not offloaded);
+ * - create a device bound XDP program without flags (such programs are offloaded).
+ * This might lead to 'BUG: kernel NULL pointer dereference'.
+ */
+void test_xdp_dev_bound_only_offdev(void)
+{
+	struct nstoken *tok = NULL;
+	__u32 ifindex;
+	int fd1 = -1;
+	int fd2 = -1;
+
+	SYS(out, "ip netns add " LOCAL_NETNS);
+	tok = open_netns(LOCAL_NETNS);
+	if (!ASSERT_OK_PTR(tok, "open_netns"))
+		goto out;
+	SYS(out, "ip link add eth42 type veth");
+	ifindex = if_nametoindex("eth42");
+	if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) {
+		perror("if_nametoindex");
+		goto out;
+	}
+	fd1 = load_dummy_prog("dummy1", ifindex, BPF_F_XDP_DEV_BOUND_ONLY);
+	if (!ASSERT_GE(fd1, 0, "load_dummy_prog #1")) {
+		perror("load_dummy_prog #1");
+		goto out;
+	}
+	/* Program with ifindex is considered offloaded, however veth
+	 * does not support offload => error should be reported.
+	 */
+	fd2 = load_dummy_prog("dummy2", ifindex, 0);
+	ASSERT_EQ(fd2, -EINVAL, "load_dummy_prog #2 (offloaded)");
+
+out:
+	close(fd1);
+	close(fd2);
+	close_netns(tok);
+	/* eth42 was added inside netns, removing the netns will
+	 * also remove eth42 veth pair.
+	 */
+	SYS_NOFAIL("ip netns del " LOCAL_NETNS);
+}
-- 
cgit v1.2.3


From 4eb94a7793074f799b1f558471019e9a21fa9546 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 11 Sep 2023 22:59:28 -0700
Subject: selftests/bpf: ensure all CI arches set CONFIG_BPF_KPROBE_OVERRIDE=y

Turns out CONFIG_BPF_KPROBE_OVERRIDE=y is only enabled in x86-64 CI, but
is not set on aarch64, causing CI failures ([0]).

Move CONFIG_BPF_KPROBE_OVERRIDE=y to arch-agnostic CI config.

  [0] https://github.com/kernel-patches/bpf/actions/runs/6122324047/job/16618390535

Fixes: 7182e56411b9 ("selftests/bpf: Add kprobe_multi override test")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230912055928.1704269-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config        | 1 +
 tools/testing/selftests/bpf/config.x86_64 | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 1c7584e8dd9e..e41eb33b2704 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -4,6 +4,7 @@ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
 CONFIG_BPF=y
 CONFIG_BPF_EVENTS=y
 CONFIG_BPF_JIT=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
 CONFIG_BPF_LIRC_MODE2=y
 CONFIG_BPF_LSM=y
 CONFIG_BPF_STREAM_PARSER=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index b650b2e617b8..2e70a6048278 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -20,7 +20,6 @@ CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_BONDING=y
 CONFIG_BOOTTIME_TRACING=y
 CONFIG_BPF_JIT_ALWAYS_ON=y
-CONFIG_BPF_KPROBE_OVERRIDE=y
 CONFIG_BPF_PRELOAD=y
 CONFIG_BPF_PRELOAD_UMD=y
 CONFIG_BPFILTER=y
-- 
cgit v1.2.3


From a8f12572860ad8ba659d96eee9cf09e181f6ebcc Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 8 Sep 2023 18:33:35 +0200
Subject: bpf: Fix a erroneous check after snprintf()

snprintf() does not return negative error code on error, it returns the
number of characters which *would* be generated for the given input.

Fix the error handling check.

Fixes: 57539b1c0ac2 ("bpf: Enable annotating trusted nested pointers")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/393bdebc87b22563c08ace094defa7160eb7a6c0.1694190795.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 1095bbe29859..8090d7fb11ef 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -8501,7 +8501,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
 	tname = btf_name_by_offset(btf, walk_type->name_off);
 
 	ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix);
-	if (ret < 0)
+	if (ret >= sizeof(safe_tname))
 		return false;
 
 	safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info));
-- 
cgit v1.2.3


From d128860dbb29cafc3c65ca2d22082745a32829dd Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Tue, 12 Sep 2023 14:06:31 +0200
Subject: selftests/bpf: fix unpriv_disabled check in test_verifier

Commit 1d56ade032a49 changed the function get_unpriv_disabled() to
return its results as a bool instead of updating a global variable, but
test_verifier was not updated to keep in line with these changes. Thus
unpriv_disabled is always false in test_verifier and unprivileged tests
are not properly skipped on systems with unprivileged bpf disabled.

Fixes: 1d56ade032a49 ("selftests/bpf: Unprivileged tests for test_loader.c")
Signed-off-by: Artem Savkov <asavkov@redhat.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20230912120631.213139-1-asavkov@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 31f1c935cd07..98107e0452d3 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1880,7 +1880,7 @@ int main(int argc, char **argv)
 		}
 	}
 
-	get_unpriv_disabled();
+	unpriv_disabled = get_unpriv_disabled();
 	if (unpriv && unpriv_disabled) {
 		printf("Cannot run as unprivileged user with sysctl %s.\n",
 		       UNPRIV_SYSCTL);
-- 
cgit v1.2.3


From 214bfd267f4929722b374b43fda456c21cd6f016 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 11 Sep 2023 23:08:12 -0700
Subject: bpf, cgroup: fix multiple kernel-doc warnings

Fix missing or extra function parameter kernel-doc warnings
in cgroup.c:

kernel/bpf/cgroup.c:1359: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_skb'
kernel/bpf/cgroup.c:1359: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_skb'
kernel/bpf/cgroup.c:1439: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sk'
kernel/bpf/cgroup.c:1439: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sk'
kernel/bpf/cgroup.c:1467: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sock_addr'
kernel/bpf/cgroup.c:1467: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sock_addr'
kernel/bpf/cgroup.c:1512: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sock_ops'
kernel/bpf/cgroup.c:1512: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sock_ops'
kernel/bpf/cgroup.c:1685: warning: Excess function parameter 'type' description in '__cgroup_bpf_run_filter_sysctl'
kernel/bpf/cgroup.c:1685: warning: Function parameter or member 'atype' not described in '__cgroup_bpf_run_filter_sysctl'
kernel/bpf/cgroup.c:795: warning: Excess function parameter 'type' description in '__cgroup_bpf_replace'
kernel/bpf/cgroup.c:795: warning: Function parameter or member 'new_prog' not described in '__cgroup_bpf_replace'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230912060812.1715-1-rdunlap@infradead.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/cgroup.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 5b2741aa0d9b..03b3d4492980 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -785,7 +785,8 @@ found:
  *                          to descendants
  * @cgrp: The cgroup which descendants to traverse
  * @link: A link for which to replace BPF program
- * @type: Type of attach operation
+ * @new_prog: &struct bpf_prog for the target BPF program with its refcnt
+ *            incremented
  *
  * Must be called with cgroup_mutex held.
  */
@@ -1334,7 +1335,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
  * @sk: The socket sending or receiving traffic
  * @skb: The skb that is being sent or received
- * @type: The type of program to be executed
+ * @atype: The type of program to be executed
  *
  * If no socket is passed, or the socket is not of type INET or INET6,
  * this function does nothing and returns 0.
@@ -1424,7 +1425,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
 /**
  * __cgroup_bpf_run_filter_sk() - Run a program on a sock
  * @sk: sock structure to manipulate
- * @type: The type of program to be executed
+ * @atype: The type of program to be executed
  *
  * socket is passed is expected to be of type INET or INET6.
  *
@@ -1449,7 +1450,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  *                                       provided by user sockaddr
  * @sk: sock struct that will use sockaddr
  * @uaddr: sockaddr struct provided by user
- * @type: The type of program to be executed
+ * @atype: The type of program to be executed
  * @t_ctx: Pointer to attach type specific context
  * @flags: Pointer to u32 which contains higher bits of BPF program
  *         return value (OR'ed together).
@@ -1496,7 +1497,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
  * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
  * sk with connection information (IP addresses, etc.) May not contain
  * cgroup info if it is a req sock.
- * @type: The type of program to be executed
+ * @atype: The type of program to be executed
  *
  * socket passed is expected to be of type INET or INET6.
  *
@@ -1670,7 +1671,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
  * @ppos: value-result argument: value is position at which read from or write
  *	to sysctl is happening, result is new position if program overrode it,
  *	initial value otherwise
- * @type: type of program to be executed
+ * @atype: type of program to be executed
  *
  * Program is run when sysctl is being accessed, either read or written, and
  * can allow or deny such access.
-- 
cgit v1.2.3


From 8a19edd4fa6f5b22d5a35bb7c8bb3e7c571a74d4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 13 Sep 2023 13:47:11 +0200
Subject: selftests/bpf: Fix kprobe_multi_test/attach_override test

We need to deny the attach_override test for arm64, denying the
whole kprobe_multi_test suite. Also making attach_override static.

Fixes: 7182e56411b9 ("selftests/bpf: Add kprobe_multi override test")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230913114711.499829-1-jolsa@kernel.org
---
 tools/testing/selftests/bpf/DENYLIST.aarch64               | 10 ++--------
 tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c |  2 +-
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 7f768d335698..3babaf3eee5c 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,14 +1,8 @@
 bpf_cookie/multi_kprobe_attach_api               # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
 bpf_cookie/multi_kprobe_link_api                 # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
 fexit_sleep                                      # The test never returns. The remaining tests cannot start.
-kprobe_multi_bench_attach                        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-kprobe_multi_test/attach_api_addrs               # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-kprobe_multi_test/attach_api_pattern             # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-kprobe_multi_test/attach_api_syms                # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-kprobe_multi_test/bench_attach                   # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-kprobe_multi_test/link_api_addrs                 # link_fd unexpected link_fd: actual -95 < expected 0
-kprobe_multi_test/link_api_syms                  # link_fd unexpected link_fd: actual -95 < expected 0
-kprobe_multi_test/skel_api                       # libbpf: failed to load BPF skeleton 'kprobe_multi': -3
+kprobe_multi_bench_attach                        # needs CONFIG_FPROBE
+kprobe_multi_test                                # needs CONFIG_FPROBE
 module_attach                                    # prog 'kprobe_multi': failed to auto-attach: -95
 fentry_test/fentry_many_args                     # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
 fexit_test/fexit_many_args                       # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index e05477b210a5..4041cfa670eb 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -454,7 +454,7 @@ cleanup:
 	}
 }
 
-void test_attach_override(void)
+static void test_attach_override(void)
 {
 	struct kprobe_multi_override *skel = NULL;
 	struct bpf_link *link = NULL;
-- 
cgit v1.2.3


From 4908d5af16676b9d2901830551c2af911e452524 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 12 Sep 2023 10:56:07 +0200
Subject: netfilter: conntrack: fix extension size table

The size table is incorrect due to copypaste error,
this reserves more size than needed.

TSTAMP reserved 32 instead of 16 bytes.
TIMEOUT reserved 16 instead of 8 bytes.

Fixes: 5f31edc0676b ("netfilter: conntrack: move extension sizes into core")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_extend.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 0b513f7bf9f3..dd62cc12e775 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -40,10 +40,10 @@ static const u8 nf_ct_ext_type_len[NF_CT_EXT_NUM] = {
 	[NF_CT_EXT_ECACHE] = sizeof(struct nf_conntrack_ecache),
 #endif
 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
-	[NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_acct),
+	[NF_CT_EXT_TSTAMP] = sizeof(struct nf_conn_tstamp),
 #endif
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-	[NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_tstamp),
+	[NF_CT_EXT_TIMEOUT] = sizeof(struct nf_conn_timeout),
 #endif
 #ifdef CONFIG_NF_CONNTRACK_LABELS
 	[NF_CT_EXT_LABELS] = sizeof(struct nf_conn_labels),
-- 
cgit v1.2.3


From 7fb818f248cff996180b7cdcdcb86b6b4f6e44e2 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 13 Sep 2023 15:51:36 +0200
Subject: netfilter: nf_tables: Fix entries val in rule reset audit log

The value in idx and the number of rules handled in that particular
__nf_tables_dump_rules() call is not identical. The former is a cursor
to pick up from if multiple netlink messages are needed, so its value is
ever increasing. Fixing this is not just a matter of subtracting s_idx
from it, though: When resetting rules in multiple chains,
__nf_tables_dump_rules() is called for each and cb->args[0] is not
adjusted in between. Introduce a dedicated counter to record the number
of rules reset in this call in a less confusing way.

While being at it, prevent the direct return upon buffer exhaustion: Any
rules previously dumped into that skb would evade audit logging
otherwise.

Fixes: 9b5ba5c9c5109 ("netfilter: nf_tables: Unbreak audit log reset")
Signed-off-by: Phil Sutter <phil@nwl.cc>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c1e485aee763..d819b4d42962 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3451,6 +3451,8 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
 	struct net *net = sock_net(skb->sk);
 	const struct nft_rule *rule, *prule;
 	unsigned int s_idx = cb->args[0];
+	unsigned int entries = 0;
+	int ret = 0;
 	u64 handle;
 
 	prule = NULL;
@@ -3473,9 +3475,11 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
 					NFT_MSG_NEWRULE,
 					NLM_F_MULTI | NLM_F_APPEND,
 					table->family,
-					table, chain, rule, handle, reset) < 0)
-			return 1;
-
+					table, chain, rule, handle, reset) < 0) {
+			ret = 1;
+			break;
+		}
+		entries++;
 		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 		prule = rule;
@@ -3483,10 +3487,10 @@ cont_skip:
 		(*idx)++;
 	}
 
-	if (reset && *idx)
-		audit_log_rule_reset(table, cb->seq, *idx);
+	if (reset && entries)
+		audit_log_rule_reset(table, cb->seq, entries);
 
-	return 0;
+	return ret;
 }
 
 static int nf_tables_dump_rules(struct sk_buff *skb,
-- 
cgit v1.2.3


From e8dbde59ca3fe925d0105bfb380e8429928b16dd Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 13 Sep 2023 15:51:37 +0200
Subject: selftests: netfilter: Test nf_tables audit logging

Compare NETFILTER_CFG type audit logs emitted from kernel upon ruleset
modifications against expected output.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/.gitignore      |   1 +
 tools/testing/selftests/netfilter/Makefile        |   4 +-
 tools/testing/selftests/netfilter/audit_logread.c | 165 ++++++++++++++++++++++
 tools/testing/selftests/netfilter/config          |   1 +
 tools/testing/selftests/netfilter/nft_audit.sh    | 108 ++++++++++++++
 5 files changed, 277 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/netfilter/audit_logread.c
 create mode 100755 tools/testing/selftests/netfilter/nft_audit.sh

diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
index 4cb887b57413..4b2928e1c19d 100644
--- a/tools/testing/selftests/netfilter/.gitignore
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 nf-queue
 connect_close
+audit_logread
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 3686bfa6c58d..321db8850da0 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -6,13 +6,13 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
 	nft_concat_range.sh nft_conntrack_helper.sh \
 	nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
 	ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
-	conntrack_vrf.sh nft_synproxy.sh rpath.sh
+	conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh
 
 HOSTPKG_CONFIG := pkg-config
 
 CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
 LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
 
-TEST_GEN_FILES =  nf-queue connect_close
+TEST_GEN_FILES =  nf-queue connect_close audit_logread
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c
new file mode 100644
index 000000000000..a0a880fc2d9d
--- /dev/null
+++ b/tools/testing/selftests/netfilter/audit_logread.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <linux/audit.h>
+#include <linux/netlink.h>
+
+static int fd;
+
+#define MAX_AUDIT_MESSAGE_LENGTH	8970
+struct audit_message {
+	struct nlmsghdr nlh;
+	union {
+		struct audit_status s;
+		char data[MAX_AUDIT_MESSAGE_LENGTH];
+	} u;
+};
+
+int audit_recv(int fd, struct audit_message *rep)
+{
+	struct sockaddr_nl addr;
+	socklen_t addrlen = sizeof(addr);
+	int ret;
+
+	do {
+		ret = recvfrom(fd, rep, sizeof(*rep), 0,
+			       (struct sockaddr *)&addr, &addrlen);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0 ||
+	    addrlen != sizeof(addr) ||
+	    addr.nl_pid != 0 ||
+	    rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */
+		return -1;
+
+	return ret;
+}
+
+int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val)
+{
+	static int seq = 0;
+	struct audit_message msg = {
+		.nlh = {
+			.nlmsg_len   = NLMSG_SPACE(sizeof(msg.u.s)),
+			.nlmsg_type  = type,
+			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+			.nlmsg_seq   = ++seq,
+		},
+		.u.s = {
+			.mask    = key,
+			.enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+			.pid     = key == AUDIT_STATUS_PID ? val : 0,
+		}
+	};
+	struct sockaddr_nl addr = {
+		.nl_family = AF_NETLINK,
+	};
+	int ret;
+
+	do {
+		ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0,
+			     (struct sockaddr *)&addr, sizeof(addr));
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret != (int)msg.nlh.nlmsg_len)
+		return -1;
+	return 0;
+}
+
+int audit_set(int fd, uint32_t key, uint32_t val)
+{
+	struct audit_message rep = { 0 };
+	int ret;
+
+	ret = audit_send(fd, AUDIT_SET, key, val);
+	if (ret)
+		return ret;
+
+	ret = audit_recv(fd, &rep);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int readlog(int fd)
+{
+	struct audit_message rep = { 0 };
+	int ret = audit_recv(fd, &rep);
+	const char *sep = "";
+	char *k, *v;
+
+	if (ret < 0)
+		return ret;
+
+	if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG)
+		return 0;
+
+	/* skip the initial "audit(...): " part */
+	strtok(rep.u.data, " ");
+
+	while ((k = strtok(NULL, "="))) {
+		v = strtok(NULL, " ");
+
+		/* these vary and/or are uninteresting, ignore */
+		if (!strcmp(k, "pid") ||
+		    !strcmp(k, "comm") ||
+		    !strcmp(k, "subj"))
+			continue;
+
+		/* strip the varying sequence number */
+		if (!strcmp(k, "table"))
+			*strchrnul(v, ':') = '\0';
+
+		printf("%s%s=%s", sep, k, v);
+		sep = " ";
+	}
+	if (*sep) {
+		printf("\n");
+		fflush(stdout);
+	}
+	return 0;
+}
+
+void cleanup(int sig)
+{
+	audit_set(fd, AUDIT_STATUS_ENABLED, 0);
+	close(fd);
+	if (sig)
+		exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	struct sigaction act = {
+		.sa_handler = cleanup,
+	};
+
+	fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+	if (fd < 0) {
+		perror("Can't open netlink socket");
+		return -1;
+	}
+
+	if (sigaction(SIGTERM, &act, NULL) < 0 ||
+	    sigaction(SIGINT, &act, NULL) < 0) {
+		perror("Can't set signal handler");
+		close(fd);
+		return -1;
+	}
+
+	audit_set(fd, AUDIT_STATUS_ENABLED, 1);
+	audit_set(fd, AUDIT_STATUS_PID, getpid());
+
+	while (1)
+		readlog(fd);
+}
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
index 4faf2ce021d9..7c42b1b2c69b 100644
--- a/tools/testing/selftests/netfilter/config
+++ b/tools/testing/selftests/netfilter/config
@@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m
 CONFIG_NFT_MASQ=m
 CONFIG_NFT_FLOW_OFFLOAD=m
 CONFIG_NF_CT_NETLINK=m
+CONFIG_AUDIT=y
diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh
new file mode 100755
index 000000000000..83c271b1c735
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_audit.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that audit logs generated for nft commands are as expected.
+
+SKIP_RC=4
+RC=0
+
+nft --version >/dev/null 2>&1 || {
+	echo "SKIP: missing nft tool"
+	exit $SKIP_RC
+}
+
+logfile=$(mktemp)
+echo "logging into $logfile"
+./audit_logread >"$logfile" &
+logread_pid=$!
+trap 'kill $logread_pid; rm -f $logfile' EXIT
+exec 3<"$logfile"
+
+do_test() { # (cmd, log)
+	echo -n "testing for cmd: $1 ... "
+	cat <&3 >/dev/null
+	$1 >/dev/null || exit 1
+	sleep 0.1
+	res=$(diff -a -u <(echo "$2") - <&3)
+	[ $? -eq 0 ] && { echo "OK"; return; }
+	echo "FAIL"
+	echo "$res"
+	((RC++))
+}
+
+nft flush ruleset
+
+for table in t1 t2; do
+	do_test "nft add table $table" \
+	"table=$table family=2 entries=1 op=nft_register_table"
+
+	do_test "nft add chain $table c1" \
+	"table=$table family=2 entries=1 op=nft_register_chain"
+
+	do_test "nft add chain $table c2; add chain $table c3" \
+	"table=$table family=2 entries=2 op=nft_register_chain"
+
+	cmd="add rule $table c1 counter"
+
+	do_test "nft $cmd" \
+	"table=$table family=2 entries=1 op=nft_register_rule"
+
+	do_test "nft $cmd; $cmd" \
+	"table=$table family=2 entries=2 op=nft_register_rule"
+
+	cmd=""
+	sep=""
+	for chain in c2 c3; do
+		for i in {1..3}; do
+			cmd+="$sep add rule $table $chain counter"
+			sep=";"
+		done
+	done
+	do_test "nft $cmd" \
+	"table=$table family=2 entries=6 op=nft_register_rule"
+done
+
+do_test 'nft reset rules t1 c2' \
+'table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules table t1' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule'
+
+for ((i = 0; i < 500; i++)); do
+	echo "add rule t2 c3 counter accept comment \"rule $i\""
+done | do_test 'nft -f -' \
+'table=t2 family=2 entries=500 op=nft_register_rule'
+
+do_test 'nft reset rules t2 c3' \
+'table=t2 family=2 entries=189 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=126 op=nft_reset_rule'
+
+do_test 'nft reset rules t2' \
+'table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=186 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=129 op=nft_reset_rule'
+
+do_test 'nft reset rules' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=180 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=135 op=nft_reset_rule'
+
+exit $RC
-- 
cgit v1.2.3


From f037fc9905ffa6fa19b89bfbc86946798cede071 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie@huawei.com>
Date: Tue, 12 Sep 2023 19:03:06 +0800
Subject: net: microchip: sparx5: Fix memory leak for
 vcap_api_rule_add_keyvalue_test()

Inject fault while probing kunit-example-test.ko, the field which
is allocated by kzalloc in vcap_rule_add_key() of
vcap_rule_add_key_bit/u32/u128() is not freed, and it cause
the memory leaks below.

unreferenced object 0xffff0276c14b7240 (size 64):
  comm "kunit_try_catch", pid 284, jiffies 4294894220 (age 920.072s)
  hex dump (first 32 bytes):
    28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff  (<a.....(<a.....
    67 00 00 00 00 00 00 00 00 01 37 2b af ab ff ff  g.........7+....
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180
    [<00000000ff8002d3>] vcap_api_rule_add_keyvalue_test+0x100/0xba8
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0276c14b7280 (size 64):
  comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.068s)
  hex dump (first 32 bytes):
    28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff  (<a.....(<a.....
    67 00 00 00 00 00 00 00 01 01 37 2b af ab ff ff  g.........7+....
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180
    [<00000000f5ac9dc7>] vcap_api_rule_add_keyvalue_test+0x168/0xba8
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0276c14b72c0 (size 64):
  comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.068s)
  hex dump (first 32 bytes):
    28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff  (<a.....(<a.....
    67 00 00 00 00 00 00 00 00 00 37 2b af ab ff ff  g.........7+....
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180
    [<00000000c918ae7f>] vcap_api_rule_add_keyvalue_test+0x1d0/0xba8
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0276c14b7300 (size 64):
  comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.084s)
  hex dump (first 32 bytes):
    28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff  (<a.....(<a.....
    7d 00 00 00 01 00 00 00 32 54 76 98 ab ff 00 ff  }.......2Tv.....
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180
    [<0000000003352814>] vcap_api_rule_add_keyvalue_test+0x240/0xba8
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0276c14b7340 (size 64):
  comm "kunit_try_catch", pid 284, jiffies 4294894221 (age 920.084s)
  hex dump (first 32 bytes):
    28 3c 61 82 00 80 ff ff 28 3c 61 82 00 80 ff ff  (<a.....(<a.....
    51 00 00 00 07 00 00 00 17 26 35 44 63 62 71 00  Q........&5Dcbq.
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<0000000059ad6bcd>] vcap_rule_add_key+0x104/0x180
    [<000000001516f109>] vcap_api_rule_add_keyvalue_test+0x2cc/0xba8
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20

Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
index c07f25e791c7..2fb0b8cf2b0c 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
@@ -995,6 +995,16 @@ static void vcap_api_encode_rule_actionset_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, (u32)0x00000000, actwords[11]);
 }
 
+static void vcap_free_ckf(struct vcap_rule *rule)
+{
+	struct vcap_client_keyfield *ckf, *next_ckf;
+
+	list_for_each_entry_safe(ckf, next_ckf, &rule->keyfields, ctrl.list) {
+		list_del(&ckf->ctrl.list);
+		kfree(ckf);
+	}
+}
+
 static void vcap_api_rule_add_keyvalue_test(struct kunit *test)
 {
 	struct vcap_admin admin = {
@@ -1027,6 +1037,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, kf->ctrl.type);
 	KUNIT_EXPECT_EQ(test, 0x0, kf->data.u1.value);
 	KUNIT_EXPECT_EQ(test, 0x1, kf->data.u1.mask);
+	vcap_free_ckf(rule);
 
 	INIT_LIST_HEAD(&rule->keyfields);
 	ret = vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, VCAP_BIT_1);
@@ -1039,6 +1050,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, kf->ctrl.type);
 	KUNIT_EXPECT_EQ(test, 0x1, kf->data.u1.value);
 	KUNIT_EXPECT_EQ(test, 0x1, kf->data.u1.mask);
+	vcap_free_ckf(rule);
 
 	INIT_LIST_HEAD(&rule->keyfields);
 	ret = vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS,
@@ -1052,6 +1064,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, kf->ctrl.type);
 	KUNIT_EXPECT_EQ(test, 0x0, kf->data.u1.value);
 	KUNIT_EXPECT_EQ(test, 0x0, kf->data.u1.mask);
+	vcap_free_ckf(rule);
 
 	INIT_LIST_HEAD(&rule->keyfields);
 	ret = vcap_rule_add_key_u32(rule, VCAP_KF_TYPE, 0x98765432, 0xff00ffab);
@@ -1064,6 +1077,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, VCAP_FIELD_U32, kf->ctrl.type);
 	KUNIT_EXPECT_EQ(test, 0x98765432, kf->data.u32.value);
 	KUNIT_EXPECT_EQ(test, 0xff00ffab, kf->data.u32.mask);
+	vcap_free_ckf(rule);
 
 	INIT_LIST_HEAD(&rule->keyfields);
 	ret = vcap_rule_add_key_u128(rule, VCAP_KF_L3_IP6_SIP, &dip);
@@ -1078,6 +1092,7 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test)
 		KUNIT_EXPECT_EQ(test, dip.value[idx], kf->data.u128.value[idx]);
 	for (idx = 0; idx < ARRAY_SIZE(dip.mask); ++idx)
 		KUNIT_EXPECT_EQ(test, dip.mask[idx], kf->data.u128.mask[idx]);
+	vcap_free_ckf(rule);
 }
 
 static void vcap_api_rule_add_actionvalue_test(struct kunit *test)
-- 
cgit v1.2.3


From 39d0ccc185315408e7cecfcaf06d167927b51052 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie@huawei.com>
Date: Tue, 12 Sep 2023 19:03:07 +0800
Subject: net: microchip: sparx5: Fix memory leak for
 vcap_api_rule_add_actionvalue_test()

Inject fault while probing kunit-example-test.ko, the field which
is allocated by kzalloc in vcap_rule_add_action() of
vcap_rule_add_action_bit/u32() is not freed, and it cause
the memory leaks below.

unreferenced object 0xffff0276c496b300 (size 64):
  comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.072s)
  hex dump (first 32 bytes):
    68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff  h<b.....h<b.....
    3c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  <...............
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178
    [<00000000ae66c16c>] vcap_api_rule_add_actionvalue_test+0xa4/0x990
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0276c496b2c0 (size 64):
  comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.072s)
  hex dump (first 32 bytes):
    68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff  h<b.....h<b.....
    3c 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00  <...............
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178
    [<00000000607782aa>] vcap_api_rule_add_actionvalue_test+0x100/0x990
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0276c496b280 (size 64):
  comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.072s)
  hex dump (first 32 bytes):
    68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff  h<b.....h<b.....
    3c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  <...............
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178
    [<000000004e640602>] vcap_api_rule_add_actionvalue_test+0x15c/0x990
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0276c496b240 (size 64):
  comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.092s)
  hex dump (first 32 bytes):
    68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff  h<b.....h<b.....
    5a 00 00 00 01 00 00 00 32 54 76 98 00 00 00 00  Z.......2Tv.....
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178
    [<0000000011141bf8>] vcap_api_rule_add_actionvalue_test+0x1bc/0x990
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0276c496b200 (size 64):
  comm "kunit_try_catch", pid 286, jiffies 4294894224 (age 920.092s)
  hex dump (first 32 bytes):
    68 3c 62 82 00 80 ff ff 68 3c 62 82 00 80 ff ff  h<b.....h<b.....
    28 00 00 00 01 00 00 00 dd cc bb aa 00 00 00 00  (...............
  backtrace:
    [<0000000028f08898>] slab_post_alloc_hook+0xb8/0x368
    [<00000000514b9b37>] __kmem_cache_alloc_node+0x174/0x290
    [<000000004620684a>] kmalloc_trace+0x40/0x164
    [<000000008b41c84d>] vcap_rule_add_action+0x104/0x178
    [<00000000d5ed3088>] vcap_api_rule_add_actionvalue_test+0x22c/0x990
    [<00000000fcc5326c>] kunit_try_run_case+0x50/0xac
    [<00000000f5f45b20>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000026284079>] kthread+0x124/0x130
    [<0000000024d4a996>] ret_from_fork+0x10/0x20

Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
index 2fb0b8cf2b0c..f268383a7570 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
@@ -1095,6 +1095,17 @@ static void vcap_api_rule_add_keyvalue_test(struct kunit *test)
 	vcap_free_ckf(rule);
 }
 
+static void vcap_free_caf(struct vcap_rule *rule)
+{
+	struct vcap_client_actionfield *caf, *next_caf;
+
+	list_for_each_entry_safe(caf, next_caf,
+				 &rule->actionfields, ctrl.list) {
+		list_del(&caf->ctrl.list);
+		kfree(caf);
+	}
+}
+
 static void vcap_api_rule_add_actionvalue_test(struct kunit *test)
 {
 	struct vcap_admin admin = {
@@ -1120,6 +1131,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, VCAP_AF_POLICE_ENA, af->ctrl.action);
 	KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, af->ctrl.type);
 	KUNIT_EXPECT_EQ(test, 0x0, af->data.u1.value);
+	vcap_free_caf(rule);
 
 	INIT_LIST_HEAD(&rule->actionfields);
 	ret = vcap_rule_add_action_bit(rule, VCAP_AF_POLICE_ENA, VCAP_BIT_1);
@@ -1131,6 +1143,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, VCAP_AF_POLICE_ENA, af->ctrl.action);
 	KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, af->ctrl.type);
 	KUNIT_EXPECT_EQ(test, 0x1, af->data.u1.value);
+	vcap_free_caf(rule);
 
 	INIT_LIST_HEAD(&rule->actionfields);
 	ret = vcap_rule_add_action_bit(rule, VCAP_AF_POLICE_ENA, VCAP_BIT_ANY);
@@ -1142,6 +1155,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, VCAP_AF_POLICE_ENA, af->ctrl.action);
 	KUNIT_EXPECT_EQ(test, VCAP_FIELD_BIT, af->ctrl.type);
 	KUNIT_EXPECT_EQ(test, 0x0, af->data.u1.value);
+	vcap_free_caf(rule);
 
 	INIT_LIST_HEAD(&rule->actionfields);
 	ret = vcap_rule_add_action_u32(rule, VCAP_AF_TYPE, 0x98765432);
@@ -1153,6 +1167,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, VCAP_AF_TYPE, af->ctrl.action);
 	KUNIT_EXPECT_EQ(test, VCAP_FIELD_U32, af->ctrl.type);
 	KUNIT_EXPECT_EQ(test, 0x98765432, af->data.u32.value);
+	vcap_free_caf(rule);
 
 	INIT_LIST_HEAD(&rule->actionfields);
 	ret = vcap_rule_add_action_u32(rule, VCAP_AF_MASK_MODE, 0xaabbccdd);
@@ -1164,6 +1179,7 @@ static void vcap_api_rule_add_actionvalue_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, VCAP_AF_MASK_MODE, af->ctrl.action);
 	KUNIT_EXPECT_EQ(test, VCAP_FIELD_U32, af->ctrl.type);
 	KUNIT_EXPECT_EQ(test, 0xaabbccdd, af->data.u32.value);
+	vcap_free_caf(rule);
 }
 
 static void vcap_api_rule_find_keyset_basic_test(struct kunit *test)
-- 
cgit v1.2.3


From 89e3af0277388f32d56915a6715c735e4afae5d6 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie@huawei.com>
Date: Tue, 12 Sep 2023 19:03:08 +0800
Subject: net: microchip: sparx5: Fix possible memory leak in
 vcap_api_encode_rule_test()

Inject fault while probing kunit-example-test.ko, the duprule which
is allocated in vcap_dup_rule() and the vcap enabled port which
is allocated in vcap_enable() of vcap_enable_lookups in
vcap_api_encode_rule_test() is not freed, and it cause the memory
leaks below.

Use vcap_enable_lookups() with false arg to free the vcap enabled
port as other drivers do it. And use vcap_del_rule() to
free the duprule.

unreferenced object 0xffff677a0278bb00 (size 64):
  comm "kunit_try_catch", pid 388, jiffies 4294895987 (age 1101.840s)
  hex dump (first 32 bytes):
    18 bd a5 82 00 80 ff ff 18 bd a5 82 00 80 ff ff  ................
    40 fe c8 0e be c6 ff ff 00 00 00 00 00 00 00 00  @...............
  backtrace:
    [<000000007d53023a>] slab_post_alloc_hook+0xb8/0x368
    [<0000000076e3f654>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000034d76721>] kmalloc_trace+0x40/0x164
    [<00000000013380a5>] vcap_enable_lookups+0x1c8/0x70c
    [<00000000bbec496b>] vcap_api_encode_rule_test+0x2f8/0xb18
    [<000000002c2bfb7b>] kunit_try_run_case+0x50/0xac
    [<00000000ff74642b>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<000000004af845ca>] kthread+0x124/0x130
    [<0000000038a000ca>] ret_from_fork+0x10/0x20
unreferenced object 0xffff677a027803c0 (size 192):
  comm "kunit_try_catch", pid 388, jiffies 4294895988 (age 1101.836s)
  hex dump (first 32 bytes):
    00 12 7a 00 05 00 00 00 0a 00 00 00 64 00 00 00  ..z.........d...
    00 00 00 00 00 00 00 00 d8 03 78 02 7a 67 ff ff  ..........x.zg..
  backtrace:
    [<000000007d53023a>] slab_post_alloc_hook+0xb8/0x368
    [<0000000076e3f654>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000034d76721>] kmalloc_trace+0x40/0x164
    [<00000000c1010131>] vcap_dup_rule+0x34/0x14c
    [<00000000d43c54a4>] vcap_add_rule+0x29c/0x32c
    [<0000000073f1c26d>] vcap_api_encode_rule_test+0x304/0xb18
    [<000000002c2bfb7b>] kunit_try_run_case+0x50/0xac
    [<00000000ff74642b>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<000000004af845ca>] kthread+0x124/0x130
    [<0000000038a000ca>] ret_from_fork+0x10/0x20

Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
index f268383a7570..8c61a5dbce55 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
@@ -1439,6 +1439,10 @@ static void vcap_api_encode_rule_test(struct kunit *test)
 	ret = list_empty(&is2_admin.rules);
 	KUNIT_EXPECT_EQ(test, false, ret);
 	KUNIT_EXPECT_EQ(test, 0, ret);
+
+	vcap_enable_lookups(&test_vctrl, &test_netdev, 0, 0,
+			    rule->cookie, false);
+
 	vcap_free_rule(rule);
 
 	/* Check that the rule has been freed: tricky to access since this
@@ -1449,6 +1453,8 @@ static void vcap_api_encode_rule_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, true, ret);
 	ret = list_empty(&rule->actionfields);
 	KUNIT_EXPECT_EQ(test, true, ret);
+
+	vcap_del_rule(&test_vctrl, &test_netdev, id);
 }
 
 static void vcap_api_set_rule_counter_test(struct kunit *test)
-- 
cgit v1.2.3


From 20146fa73ab8db2ab9f4916bbaf4610646787a09 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie@huawei.com>
Date: Tue, 12 Sep 2023 19:03:09 +0800
Subject: net: microchip: sparx5: Fix possible memory leaks in
 test_vcap_xn_rule_creator()

Inject fault while probing kunit-example-test.ko, the rule which
is allocated by kzalloc in vcap_alloc_rule(), the field which is
allocated by kzalloc in vcap_rule_add_action() and
vcap_rule_add_key() is not freed, and it cause the memory leaks
below. Use vcap_free_rule() to free them as other drivers do it.

And since the return rule of test_vcap_xn_rule_creator() is not
used, remove it and switch to void.

unreferenced object 0xffff058383334240 (size 192):
  comm "kunit_try_catch", pid 309, jiffies 4294894222 (age 639.800s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00  .'..............
    00 00 00 00 00 00 00 00 00 81 93 84 83 05 ff ff  ................
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c
    [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328
    [<00000000231b1097>] vcap_api_rule_insert_in_order_test+0xcc/0x184
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0583849380c0 (size 64):
  comm "kunit_try_catch", pid 309, jiffies 4294894222 (age 639.800s)
  hex dump (first 32 bytes):
    40 81 93 84 83 05 ff ff 68 42 33 83 83 05 ff ff  @.......hB3.....
    22 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00  "...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178
    [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328
    [<00000000231b1097>] vcap_api_rule_insert_in_order_test+0xcc/0x184
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff058384938100 (size 64):
  comm "kunit_try_catch", pid 309, jiffies 4294894222 (age 639.800s)
  hex dump (first 32 bytes):
    80 81 93 84 83 05 ff ff 58 42 33 83 83 05 ff ff  ........XB3.....
    7d 00 00 00 01 00 00 00 02 00 00 00 ff 00 00 00  }...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<0000000043c78991>] vcap_rule_add_key+0x104/0x180
    [<00000000ba73cfbe>] vcap_add_type_keyfield+0xfc/0x128
    [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8
    [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328
    [<00000000231b1097>] vcap_api_rule_insert_in_order_test+0xcc/0x184
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20

unreferenced object 0xffff0583833b6240 (size 192):
  comm "kunit_try_catch", pid 311, jiffies 4294894225 (age 639.844s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00  .'..........,...
    00 00 00 00 00 00 00 00 40 91 8f 84 83 05 ff ff  ........@.......
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c
    [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328
    [<00000000509de3f4>] vcap_api_rule_insert_reverse_order_test+0x10c/0x654
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0583848f9100 (size 64):
  comm "kunit_try_catch", pid 311, jiffies 4294894225 (age 639.844s)
  hex dump (first 32 bytes):
    80 91 8f 84 83 05 ff ff 68 62 3b 83 83 05 ff ff  ........hb;.....
    22 00 00 00 01 00 00 00 00 00 00 00 a5 b4 ff ff  "...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178
    [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328
    [<00000000509de3f4>] vcap_api_rule_insert_reverse_order_test+0x10c/0x654
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0583848f9140 (size 64):
  comm "kunit_try_catch", pid 311, jiffies 4294894225 (age 639.844s)
  hex dump (first 32 bytes):
    c0 91 8f 84 83 05 ff ff 58 62 3b 83 83 05 ff ff  ........Xb;.....
    7d 00 00 00 01 00 00 00 02 00 00 00 ff 00 00 00  }...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<0000000043c78991>] vcap_rule_add_key+0x104/0x180
    [<00000000ba73cfbe>] vcap_add_type_keyfield+0xfc/0x128
    [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8
    [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328
    [<00000000509de3f4>] vcap_api_rule_insert_reverse_order_test+0x10c/0x654
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20

unreferenced object 0xffff05838264e0c0 (size 192):
  comm "kunit_try_catch", pid 313, jiffies 4294894230 (age 639.864s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 0a 00 00 00 f4 01 00 00  .'..............
    00 00 00 00 00 00 00 00 40 3a 97 84 83 05 ff ff  ........@:......
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c
    [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328
    [<00000000a29794d8>] vcap_api_rule_remove_at_end_test+0xbc/0xb48
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff058384973a80 (size 64):
  comm "kunit_try_catch", pid 313, jiffies 4294894230 (age 639.864s)
  hex dump (first 32 bytes):
    e8 e0 64 82 83 05 ff ff e8 e0 64 82 83 05 ff ff  ..d.......d.....
    22 00 00 00 01 00 00 00 00 00 00 00 00 80 ff ff  "...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178
    [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328
    [<00000000a29794d8>] vcap_api_rule_remove_at_end_test+0xbc/0xb48
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff058384973a40 (size 64):
  comm "kunit_try_catch", pid 313, jiffies 4294894230 (age 639.880s)
  hex dump (first 32 bytes):
    80 39 97 84 83 05 ff ff d8 e0 64 82 83 05 ff ff  .9........d.....
    7d 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00  }...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<0000000043c78991>] vcap_rule_add_key+0x104/0x180
    [<0000000094335477>] vcap_add_type_keyfield+0xbc/0x128
    [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8
    [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328
    [<00000000a29794d8>] vcap_api_rule_remove_at_end_test+0xbc/0xb48
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20

unreferenced object 0xffff0583832fa240 (size 192):
  comm "kunit_try_catch", pid 315, jiffies 4294894233 (age 639.920s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00  .'..............
    00 00 00 00 00 00 00 00 00 a1 8b 84 83 05 ff ff  ................
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c
    [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328
    [<00000000be638a45>] vcap_api_rule_remove_in_middle_test+0xc4/0xb80
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0583848ba0c0 (size 64):
  comm "kunit_try_catch", pid 315, jiffies 4294894233 (age 639.920s)
  hex dump (first 32 bytes):
    40 a1 8b 84 83 05 ff ff 68 a2 2f 83 83 05 ff ff  @.......h./.....
    22 00 00 00 01 00 00 00 00 00 00 00 00 80 ff ff  "...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178
    [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328
    [<00000000be638a45>] vcap_api_rule_remove_in_middle_test+0xc4/0xb80
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff0583848ba100 (size 64):
  comm "kunit_try_catch", pid 315, jiffies 4294894233 (age 639.920s)
  hex dump (first 32 bytes):
    80 a1 8b 84 83 05 ff ff 58 a2 2f 83 83 05 ff ff  ........X./.....
    7d 00 00 00 01 00 00 00 02 00 00 00 ff 00 00 00  }...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<0000000043c78991>] vcap_rule_add_key+0x104/0x180
    [<00000000ba73cfbe>] vcap_add_type_keyfield+0xfc/0x128
    [<000000002b00f7df>] vcap_val_rule+0x274/0x3e8
    [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328
    [<00000000be638a45>] vcap_api_rule_remove_in_middle_test+0xc4/0xb80
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20

unreferenced object 0xffff0583827d2180 (size 192):
  comm "kunit_try_catch", pid 317, jiffies 4294894238 (age 639.956s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00  .'..............
    00 00 00 00 00 00 00 00 00 e1 06 83 83 05 ff ff  ................
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000648fefae>] vcap_alloc_rule+0x17c/0x26c
    [<000000004da16164>] test_vcap_xn_rule_creator.constprop.43+0xac/0x328
    [<00000000e1ed8350>] vcap_api_rule_remove_in_front_test+0x144/0x6c0
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff05838306e0c0 (size 64):
  comm "kunit_try_catch", pid 317, jiffies 4294894238 (age 639.956s)
  hex dump (first 32 bytes):
    40 e1 06 83 83 05 ff ff a8 21 7d 82 83 05 ff ff  @........!}.....
    22 00 00 00 01 00 00 00 00 00 00 00 00 80 ff ff  "...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<00000000ee41df9e>] vcap_rule_add_action+0x104/0x178
    [<000000001cc1bb38>] test_vcap_xn_rule_creator.constprop.43+0xd8/0x328
    [<00000000e1ed8350>] vcap_api_rule_remove_in_front_test+0x144/0x6c0
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20
unreferenced object 0xffff05838306e180 (size 64):
  comm "kunit_try_catch", pid 317, jiffies 4294894238 (age 639.968s)
  hex dump (first 32 bytes):
    98 21 7d 82 83 05 ff ff 00 e1 06 83 83 05 ff ff  .!}.............
    67 00 00 00 00 00 00 00 01 01 00 00 ff 00 00 00  g...............
  backtrace:
    [<000000008585a8f7>] slab_post_alloc_hook+0xb8/0x368
    [<00000000795eba12>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000061886991>] kmalloc_trace+0x40/0x164
    [<0000000043c78991>] vcap_rule_add_key+0x104/0x180
    [<000000006ce4945d>] test_add_def_fields+0x84/0x8c
    [<00000000507e0ab6>] vcap_val_rule+0x294/0x3e8
    [<00000000e67d2ff5>] test_vcap_xn_rule_creator.constprop.43+0xf0/0x328
    [<00000000e1ed8350>] vcap_api_rule_remove_in_front_test+0x144/0x6c0
    [<00000000548b559e>] kunit_try_run_case+0x50/0xac
    [<00000000663f0105>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<00000000e646f120>] kthread+0x124/0x130
    [<000000005257599e>] ret_from_fork+0x10/0x20

Fixes: dccc30cc4906 ("net: microchip: sparx5: Add KUNIT test of counters and sorted rules")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202309090950.uOTEKQq3-lkp@intel.com/
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
index 8c61a5dbce55..99f04a53a442 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
@@ -243,10 +243,9 @@ static void vcap_test_api_init(struct vcap_admin *admin)
 }
 
 /* Helper function to create a rule of a specific size */
-static struct vcap_rule *
-test_vcap_xn_rule_creator(struct kunit *test, int cid, enum vcap_user user,
-			  u16 priority,
-			  int id, int size, int expected_addr)
+static void test_vcap_xn_rule_creator(struct kunit *test, int cid,
+				      enum vcap_user user, u16 priority,
+				      int id, int size, int expected_addr)
 {
 	struct vcap_rule *rule;
 	struct vcap_rule_internal *ri;
@@ -311,7 +310,7 @@ test_vcap_xn_rule_creator(struct kunit *test, int cid, enum vcap_user user,
 	ret = vcap_add_rule(rule);
 	KUNIT_EXPECT_EQ(test, 0, ret);
 	KUNIT_EXPECT_EQ(test, expected_addr, ri->addr);
-	return rule;
+	vcap_free_rule(rule);
 }
 
 /* Prepare testing rule deletion */
-- 
cgit v1.2.3


From 2a2dffd911d4139258b828b9c5056cb64b826758 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie@huawei.com>
Date: Tue, 12 Sep 2023 19:03:10 +0800
Subject: net: microchip: sparx5: Fix possible memory leaks in vcap_api_kunit

Inject fault while probing kunit-example-test.ko, the duprule which
is allocated by kzalloc in vcap_dup_rule() of
test_vcap_xn_rule_creator() is not freed, and it cause the memory leaks
below. Use vcap_del_rule() to free them as other functions do it.

unreferenced object 0xffff6eb4846f6180 (size 192):
  comm "kunit_try_catch", pid 405, jiffies 4294895522 (age 880.004s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 0a 00 00 00 f4 01 00 00  .'..............
    00 00 00 00 00 00 00 00 98 61 6f 84 b4 6e ff ff  .........ao..n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<00000000d2ac4ccb>] vcap_api_rule_insert_in_order_test+0xa4/0x114
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20
unreferenced object 0xffff6eb4846f6240 (size 192):
  comm "kunit_try_catch", pid 405, jiffies 4294895524 (age 879.996s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 14 00 00 00 90 01 00 00  .'..............
    00 00 00 00 00 00 00 00 58 62 6f 84 b4 6e ff ff  ........Xbo..n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<0000000052e6ad35>] vcap_api_rule_insert_in_order_test+0xbc/0x114
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20
unreferenced object 0xffff6eb4846f6300 (size 192):
  comm "kunit_try_catch", pid 405, jiffies 4294895524 (age 879.996s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00  .'..........,...
    00 00 00 00 00 00 00 00 18 63 6f 84 b4 6e ff ff  .........co..n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<000000001b0895d4>] vcap_api_rule_insert_in_order_test+0xd4/0x114
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20
unreferenced object 0xffff6eb4846f63c0 (size 192):
  comm "kunit_try_catch", pid 405, jiffies 4294895524 (age 880.012s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 28 00 00 00 c8 00 00 00  .'......(.......
    00 00 00 00 00 00 00 00 d8 63 6f 84 b4 6e ff ff  .........co..n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<00000000134c151f>] vcap_api_rule_insert_in_order_test+0xec/0x114
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20
unreferenced object 0xffff6eb4845fc180 (size 192):
  comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.000s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 14 00 00 00 c8 00 00 00  .'..............
    00 00 00 00 00 00 00 00 98 c1 5f 84 b4 6e ff ff  .........._..n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<00000000fa5f64d3>] vcap_api_rule_insert_reverse_order_test+0xc8/0x600
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20
unreferenced object 0xffff6eb4845fc240 (size 192):
  comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.000s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00  .'..........,...
    00 00 00 00 00 00 00 00 58 c2 5f 84 b4 6e ff ff  ........X._..n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000453dcd80>] vcap_add_rule+0x134/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<00000000a7db42de>] vcap_api_rule_insert_reverse_order_test+0x108/0x600
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20
unreferenced object 0xffff6eb4845fc300 (size 192):
  comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.000s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 28 00 00 00 90 01 00 00  .'......(.......
    00 00 00 00 00 00 00 00 18 c3 5f 84 b4 6e ff ff  .........._..n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000453dcd80>] vcap_add_rule+0x134/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<00000000ea416c94>] vcap_api_rule_insert_reverse_order_test+0x150/0x600
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20
unreferenced object 0xffff6eb4845fc3c0 (size 192):
  comm "kunit_try_catch", pid 407, jiffies 4294895527 (age 880.020s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 32 00 00 00 f4 01 00 00  .'......2.......
    00 00 00 00 00 00 00 00 d8 c3 5f 84 b4 6e ff ff  .........._..n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000453dcd80>] vcap_add_rule+0x134/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<00000000764a39b4>] vcap_api_rule_insert_reverse_order_test+0x198/0x600
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20
unreferenced object 0xffff6eb484cd4240 (size 192):
  comm "kunit_try_catch", pid 413, jiffies 4294895543 (age 879.956s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 1e 00 00 00 2c 01 00 00  .'..........,...
    00 00 00 00 00 00 00 00 58 42 cd 84 b4 6e ff ff  ........XB...n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<0000000023976dd4>] vcap_api_rule_remove_in_front_test+0x158/0x658
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20
unreferenced object 0xffff6eb484cd4300 (size 192):
  comm "kunit_try_catch", pid 413, jiffies 4294895543 (age 879.956s)
  hex dump (first 32 bytes):
    10 27 00 00 04 00 00 00 28 00 00 00 c8 00 00 00  .'......(.......
    00 00 00 00 00 00 00 00 18 43 cd 84 b4 6e ff ff  .........C...n..
  backtrace:
    [<00000000f1b5b86e>] slab_post_alloc_hook+0xb8/0x368
    [<00000000c56cdd9a>] __kmem_cache_alloc_node+0x174/0x290
    [<0000000046ef1b64>] kmalloc_trace+0x40/0x164
    [<000000008565145b>] vcap_dup_rule+0x38/0x210
    [<00000000bd9e1f12>] vcap_add_rule+0x29c/0x32c
    [<0000000070a539b1>] test_vcap_xn_rule_creator.constprop.43+0x120/0x330
    [<000000000b4760ff>] vcap_api_rule_remove_in_front_test+0x170/0x658
    [<000000000f88f9cb>] kunit_try_run_case+0x50/0xac
    [<00000000e848de5a>] kunit_generic_run_threadfn_adapter+0x20/0x2c
    [<0000000058a88b6b>] kthread+0x124/0x130
    [<00000000891cf28a>] ret_from_fork+0x10/0x20

Fixes: dccc30cc4906 ("net: microchip: sparx5: Add KUNIT test of counters and sorted rules")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
index 99f04a53a442..fe4e166de8a0 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c
@@ -1597,6 +1597,11 @@ static void vcap_api_rule_insert_in_order_test(struct kunit *test)
 	test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 20, 400, 6, 774);
 	test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 30, 300, 3, 771);
 	test_vcap_xn_rule_creator(test, 10000, VCAP_USER_QOS, 40, 200, 2, 768);
+
+	vcap_del_rule(&test_vctrl, &test_netdev, 200);
+	vcap_del_rule(&test_vctrl, &test_netdev, 300);
+	vcap_del_rule(&test_vctrl, &test_netdev, 400);
+	vcap_del_rule(&test_vctrl, &test_netdev, 500);
 }
 
 static void vcap_api_rule_insert_reverse_order_test(struct kunit *test)
@@ -1655,6 +1660,11 @@ static void vcap_api_rule_insert_reverse_order_test(struct kunit *test)
 		++idx;
 	}
 	KUNIT_EXPECT_EQ(test, 768, admin.last_used_addr);
+
+	vcap_del_rule(&test_vctrl, &test_netdev, 500);
+	vcap_del_rule(&test_vctrl, &test_netdev, 400);
+	vcap_del_rule(&test_vctrl, &test_netdev, 300);
+	vcap_del_rule(&test_vctrl, &test_netdev, 200);
 }
 
 static void vcap_api_rule_remove_at_end_test(struct kunit *test)
@@ -1855,6 +1865,9 @@ static void vcap_api_rule_remove_in_front_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, 786, test_init_start);
 	KUNIT_EXPECT_EQ(test, 8, test_init_count);
 	KUNIT_EXPECT_EQ(test, 794, admin.last_used_addr);
+
+	vcap_del_rule(&test_vctrl, &test_netdev, 200);
+	vcap_del_rule(&test_vctrl, &test_netdev, 300);
 }
 
 static struct kunit_case vcap_api_rule_remove_test_cases[] = {
-- 
cgit v1.2.3


From c326ca98446e0ae4fee43a40acf79412b74cfedb Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 12 Sep 2023 16:16:25 +0200
Subject: selftests: tls: swap the TX and RX sockets in some tests

tls.sendmsg_large and tls.sendmsg_multiple are trying to send through
the self->cfd socket (only configured with TLS_RX) and to receive through
the self->fd socket (only configured with TLS_TX), so they're not using
kTLS at all. Swap the sockets.

Fixes: 7f657d5bf507 ("selftests: tls: add selftests for TLS sockets")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tls.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 297d972558fb..464853a7f982 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -613,11 +613,11 @@ TEST_F(tls, sendmsg_large)
 
 		msg.msg_iov = &vec;
 		msg.msg_iovlen = 1;
-		EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len);
+		EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
 	}
 
 	while (recvs++ < sends) {
-		EXPECT_NE(recv(self->fd, mem, send_len, 0), -1);
+		EXPECT_NE(recv(self->cfd, mem, send_len, 0), -1);
 	}
 
 	free(mem);
@@ -646,9 +646,9 @@ TEST_F(tls, sendmsg_multiple)
 	msg.msg_iov = vec;
 	msg.msg_iovlen = iov_len;
 
-	EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len);
+	EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len);
 	buf = malloc(total_len);
-	EXPECT_NE(recv(self->fd, buf, total_len, 0), -1);
+	EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1);
 	for (i = 0; i < iov_len; i++) {
 		EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp,
 				 strlen(test_strs[i])),
-- 
cgit v1.2.3


From a8f367f7e131e76713b2949b168ac97f671fce7a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 12 Sep 2023 20:54:51 +0200
Subject: net: ti: icssg-prueth: add PTP dependency

The driver can now use PTP if enabled but fails to link built-in
if PTP is a loadable module:

aarch64-linux-ld: drivers/net/ethernet/ti/icssg/icss_iep.o: in function `icss_iep_get_ptp_clock_idx':
icss_iep.c:(.text+0x200): undefined reference to `ptp_clock_index'

Add the usual dependency to avoid this.

Fixes: 186734c158865 ("net: ti: icssg-prueth: add packet timestamping and ptp support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: MD Danish Anwar <danishanwar@ti.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 88b5b1b47779..0a3346650e03 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -199,6 +199,7 @@ config TI_ICSSG_PRUETH
 
 config TI_ICSS_IEP
 	tristate "TI PRU ICSS IEP driver"
+	depends on PTP_1588_CLOCK_OPTIONAL
 	depends on TI_PRUSS
 	default TI_PRUSS
 	help
-- 
cgit v1.2.3


From 75ad80ed88a182ab2ad5513e448cf07b403af5c3 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Wed, 13 Sep 2023 09:39:05 +0300
Subject: net/core: Fix ETH_P_1588 flow dissector

When a PTP ethernet raw frame with a size of more than 256 bytes followed
by a 0xff pattern is sent to __skb_flow_dissect, nhoff value calculation
is wrong. For example: hdr->message_length takes the wrong value (0xffff)
and it does not replicate real header length. In this case, 'nhoff' value
was overridden and the PTP header was badly dissected. This leads to a
kernel crash.

net/core: flow_dissector
net/core flow dissector nhoff = 0x0000000e
net/core flow dissector hdr->message_length = 0x0000ffff
net/core flow dissector nhoff = 0x0001000d (u16 overflow)
...
skb linear:   00000000: 00 a0 c9 00 00 00 00 a0 c9 00 00 00 88
skb frag:     00000000: f7 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff

Using the size of the ptp_header struct will allow the corrected
calculation of the nhoff value.

net/core flow dissector nhoff = 0x0000000e
net/core flow dissector nhoff = 0x00000030 (sizeof ptp_header)
...
skb linear:   00000000: 00 a0 c9 00 00 00 00 a0 c9 00 00 00 88 f7 ff ff
skb linear:   00000010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
skb linear:   00000020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
skb frag:     00000000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff

Kernel trace:
[   74.984279] ------------[ cut here ]------------
[   74.989471] kernel BUG at include/linux/skbuff.h:2440!
[   74.995237] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
[   75.001098] CPU: 4 PID: 0 Comm: swapper/4 Tainted: G     U            5.15.85-intel-ese-standard-lts #1
[   75.011629] Hardware name: Intel Corporation A-Island (CPU:AlderLake)/A-Island (ID:06), BIOS SB_ADLP.01.01.00.01.03.008.D-6A9D9E73-dirty Mar 30 2023
[   75.026507] RIP: 0010:eth_type_trans+0xd0/0x130
[   75.031594] Code: 03 88 47 78 eb c7 8b 47 68 2b 47 6c 48 8b 97 c0 00 00 00 83 f8 01 7e 1b 48 85 d2 74 06 66 83 3a ff 74 09 b8 00 04 00 00 eb ab <0f> 0b b8 00 01 00 00 eb a2 48 85 ff 74 eb 48 8d 54 24 06 31 f6 b9
[   75.052612] RSP: 0018:ffff9948c0228de0 EFLAGS: 00010297
[   75.058473] RAX: 00000000000003f2 RBX: ffff8e47047dc300 RCX: 0000000000001003
[   75.066462] RDX: ffff8e4e8c9ea040 RSI: ffff8e4704e0a000 RDI: ffff8e47047dc300
[   75.074458] RBP: ffff8e4704e2acc0 R08: 00000000000003f3 R09: 0000000000000800
[   75.082466] R10: 000000000000000d R11: ffff9948c0228dec R12: ffff8e4715e4e010
[   75.090461] R13: ffff9948c0545018 R14: 0000000000000001 R15: 0000000000000800
[   75.098464] FS:  0000000000000000(0000) GS:ffff8e4e8fb00000(0000) knlGS:0000000000000000
[   75.107530] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   75.113982] CR2: 00007f5eb35934a0 CR3: 0000000150e0a002 CR4: 0000000000770ee0
[   75.121980] PKRU: 55555554
[   75.125035] Call Trace:
[   75.127792]  <IRQ>
[   75.130063]  ? eth_get_headlen+0xa4/0xc0
[   75.134472]  igc_process_skb_fields+0xcd/0x150
[   75.139461]  igc_poll+0xc80/0x17b0
[   75.143272]  __napi_poll+0x27/0x170
[   75.147192]  net_rx_action+0x234/0x280
[   75.151409]  __do_softirq+0xef/0x2f4
[   75.155424]  irq_exit_rcu+0xc7/0x110
[   75.159432]  common_interrupt+0xb8/0xd0
[   75.163748]  </IRQ>
[   75.166112]  <TASK>
[   75.168473]  asm_common_interrupt+0x22/0x40
[   75.173175] RIP: 0010:cpuidle_enter_state+0xe2/0x350
[   75.178749] Code: 85 c0 0f 8f 04 02 00 00 31 ff e8 39 6c 67 ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 50 02 00 00 31 ff e8 52 b0 6d ff fb 45 85 f6 <0f> 88 b1 00 00 00 49 63 ce 4c 2b 2c 24 48 89 c8 48 6b d1 68 48 c1
[   75.199757] RSP: 0018:ffff9948c013bea8 EFLAGS: 00000202
[   75.205614] RAX: ffff8e4e8fb00000 RBX: ffffb948bfd23900 RCX: 000000000000001f
[   75.213619] RDX: 0000000000000004 RSI: ffffffff94206161 RDI: ffffffff94212e20
[   75.221620] RBP: 0000000000000004 R08: 000000117568973a R09: 0000000000000001
[   75.229622] R10: 000000000000afc8 R11: ffff8e4e8fb29ce4 R12: ffffffff945ae980
[   75.237628] R13: 000000117568973a R14: 0000000000000004 R15: 0000000000000000
[   75.245635]  ? cpuidle_enter_state+0xc7/0x350
[   75.250518]  cpuidle_enter+0x29/0x40
[   75.254539]  do_idle+0x1d9/0x260
[   75.258166]  cpu_startup_entry+0x19/0x20
[   75.262582]  secondary_startup_64_no_verify+0xc2/0xcb
[   75.268259]  </TASK>
[   75.270721] Modules linked in: 8021q snd_sof_pci_intel_tgl snd_sof_intel_hda_common tpm_crb snd_soc_hdac_hda snd_sof_intel_hda snd_hda_ext_core snd_sof_pci snd_sof snd_sof_xtensa_dsp snd_soc_acpi_intel_match snd_soc_acpi snd_soc_core snd_compress iTCO_wdt ac97_bus intel_pmc_bxt mei_hdcp iTCO_vendor_support snd_hda_codec_hdmi pmt_telemetry intel_pmc_core pmt_class snd_hda_intel x86_pkg_temp_thermal snd_intel_dspcfg snd_hda_codec snd_hda_core kvm_intel snd_pcm snd_timer kvm snd mei_me soundcore tpm_tis irqbypass i2c_i801 mei tpm_tis_core pcspkr intel_rapl_msr tpm i2c_smbus intel_pmt thermal sch_fq_codel uio uhid i915 drm_buddy video drm_display_helper drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm fuse configfs
[   75.342736] ---[ end trace 3785f9f360400e3a ]---
[   75.347913] RIP: 0010:eth_type_trans+0xd0/0x130
[   75.352984] Code: 03 88 47 78 eb c7 8b 47 68 2b 47 6c 48 8b 97 c0 00 00 00 83 f8 01 7e 1b 48 85 d2 74 06 66 83 3a ff 74 09 b8 00 04 00 00 eb ab <0f> 0b b8 00 01 00 00 eb a2 48 85 ff 74 eb 48 8d 54 24 06 31 f6 b9
[   75.373994] RSP: 0018:ffff9948c0228de0 EFLAGS: 00010297
[   75.379860] RAX: 00000000000003f2 RBX: ffff8e47047dc300 RCX: 0000000000001003
[   75.387856] RDX: ffff8e4e8c9ea040 RSI: ffff8e4704e0a000 RDI: ffff8e47047dc300
[   75.395864] RBP: ffff8e4704e2acc0 R08: 00000000000003f3 R09: 0000000000000800
[   75.403857] R10: 000000000000000d R11: ffff9948c0228dec R12: ffff8e4715e4e010
[   75.411863] R13: ffff9948c0545018 R14: 0000000000000001 R15: 0000000000000800
[   75.419875] FS:  0000000000000000(0000) GS:ffff8e4e8fb00000(0000) knlGS:0000000000000000
[   75.428946] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   75.435403] CR2: 00007f5eb35934a0 CR3: 0000000150e0a002 CR4: 0000000000770ee0
[   75.443410] PKRU: 55555554
[   75.446477] Kernel panic - not syncing: Fatal exception in interrupt
[   75.453738] Kernel Offset: 0x11c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[   75.465794] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---

Fixes: 4f1cc51f3488 ("net: flow_dissector: Parse PTP L2 packet header")
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index b3b3af0e7844..272f09251343 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1446,7 +1446,7 @@ proto_again:
 			break;
 		}
 
-		nhoff += ntohs(hdr->message_length);
+		nhoff += sizeof(struct ptp_header);
 		fdret = FLOW_DISSECT_RET_OUT_GOOD;
 		break;
 	}
-- 
cgit v1.2.3


From e0b65f9b81fef180cf5f103adecbe5505c961153 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 13 Sep 2023 08:26:47 +0300
Subject: net: thunderbolt: Fix TCPv6 GSO checksum calculation

Alex reported that running ssh over IPv6 does not work with
Thunderbolt/USB4 networking driver. The reason for that is that driver
should call skb_is_gso() before calling skb_is_gso_v6(), and it should
not return false after calculates the checksum successfully. This probably
was a copy paste error from the original driver where it was done properly.

Reported-by: Alex Balcanquall <alex@alexbal.com>
Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable")
Cc: stable@vger.kernel.org
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/thunderbolt/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/thunderbolt/main.c b/drivers/net/thunderbolt/main.c
index 0c1e8970ee58..0a53ec293d04 100644
--- a/drivers/net/thunderbolt/main.c
+++ b/drivers/net/thunderbolt/main.c
@@ -1049,12 +1049,11 @@ static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
 		*tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
 					    ip_hdr(skb)->daddr, 0,
 					    ip_hdr(skb)->protocol, 0);
-	} else if (skb_is_gso_v6(skb)) {
+	} else if (skb_is_gso(skb) && skb_is_gso_v6(skb)) {
 		tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
 		*tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 					  &ipv6_hdr(skb)->daddr, 0,
 					  IPPROTO_TCP, 0);
-		return false;
 	} else if (protocol == htons(ETH_P_IPV6)) {
 		tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset;
 		*tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-- 
cgit v1.2.3


From 350db8a59eb392bf42e62b6b2a37d56b5833012b Mon Sep 17 00:00:00 2001
From: Shinas Rasheed <srasheed@marvell.com>
Date: Wed, 13 Sep 2023 01:41:56 -0700
Subject: octeon_ep: fix tx dma unmap len values in SG

Lengths of SG pointers are kept in the following order in
the SG entries in hardware.
 63      48|47     32|31     16|15       0
 -----------------------------------------
 |  Len 0  |  Len 1  |  Len 2  |  Len 3  |
 -----------------------------------------
 |                Ptr 0                  |
 -----------------------------------------
 |                Ptr 1                  |
 -----------------------------------------
 |                Ptr 2                  |
 -----------------------------------------
 |                Ptr 3                  |
 -----------------------------------------
Dma pointers have to be unmapped based on their
respective lengths given in this format.

Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support")
Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeon_ep/octep_main.c |  8 ++++----
 drivers/net/ethernet/marvell/octeon_ep/octep_tx.c   |  8 ++++----
 drivers/net/ethernet/marvell/octeon_ep/octep_tx.h   | 16 +++++++++++++++-
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 4424de2ffd70..dbc518ff8276 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -734,13 +734,13 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
 dma_map_sg_err:
 	if (si > 0) {
 		dma_unmap_single(iq->dev, sglist[0].dma_ptr[0],
-				 sglist[0].len[0], DMA_TO_DEVICE);
-		sglist[0].len[0] = 0;
+				 sglist[0].len[3], DMA_TO_DEVICE);
+		sglist[0].len[3] = 0;
 	}
 	while (si > 1) {
 		dma_unmap_page(iq->dev, sglist[si >> 2].dma_ptr[si & 3],
-			       sglist[si >> 2].len[si & 3], DMA_TO_DEVICE);
-		sglist[si >> 2].len[si & 3] = 0;
+			       sglist[si >> 2].len[3 - (si & 3)], DMA_TO_DEVICE);
+		sglist[si >> 2].len[3 - (si & 3)] = 0;
 		si--;
 	}
 	tx_buffer->gather = 0;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
index 5a520d37bea0..d0adb82d65c3 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
@@ -69,12 +69,12 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
 		compl_sg++;
 
 		dma_unmap_single(iq->dev, tx_buffer->sglist[0].dma_ptr[0],
-				 tx_buffer->sglist[0].len[0], DMA_TO_DEVICE);
+				 tx_buffer->sglist[0].len[3], DMA_TO_DEVICE);
 
 		i = 1; /* entry 0 is main skb, unmapped above */
 		while (frags--) {
 			dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3],
-				       tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE);
+				       tx_buffer->sglist[i >> 2].len[3 - (i & 3)], DMA_TO_DEVICE);
 			i++;
 		}
 
@@ -131,13 +131,13 @@ static void octep_iq_free_pending(struct octep_iq *iq)
 
 		dma_unmap_single(iq->dev,
 				 tx_buffer->sglist[0].dma_ptr[0],
-				 tx_buffer->sglist[0].len[0],
+				 tx_buffer->sglist[0].len[3],
 				 DMA_TO_DEVICE);
 
 		i = 1; /* entry 0 is main skb, unmapped above */
 		while (frags--) {
 			dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3],
-				       tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE);
+				       tx_buffer->sglist[i >> 2].len[3 - (i & 3)], DMA_TO_DEVICE);
 			i++;
 		}
 
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
index 2ef57980eb47..21e75ff9f5e7 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
@@ -17,7 +17,21 @@
 #define TX_BUFTYPE_NET_SG        2
 #define NUM_TX_BUFTYPES          3
 
-/* Hardware format for Scatter/Gather list */
+/* Hardware format for Scatter/Gather list
+ *
+ * 63      48|47     32|31     16|15       0
+ * -----------------------------------------
+ * |  Len 0  |  Len 1  |  Len 2  |  Len 3  |
+ * -----------------------------------------
+ * |                Ptr 0                  |
+ * -----------------------------------------
+ * |                Ptr 1                  |
+ * -----------------------------------------
+ * |                Ptr 2                  |
+ * -----------------------------------------
+ * |                Ptr 3                  |
+ * -----------------------------------------
+ */
 struct octep_tx_sglist_desc {
 	u16 len[4];
 	dma_addr_t dma_ptr[4];
-- 
cgit v1.2.3


From c8de44b577eb540e8bfea55afe1d0904bb571b7a Mon Sep 17 00:00:00 2001
From: Radoslaw Tyl <radoslawx.tyl@intel.com>
Date: Mon, 7 Aug 2023 14:59:40 +0200
Subject: iavf: do not process adminq tasks when __IAVF_IN_REMOVE_TASK is set

Prevent schedule operations for adminq during device remove and when
__IAVF_IN_REMOVE_TASK flag is set. Currently, the iavf_down function
adds operations for adminq that shouldn't be processed when the device
is in the __IAVF_REMOVE state.

Reproduction:

echo 4 > /sys/bus/pci/devices/0000:17:00.0/sriov_numvfs
ip link set dev ens1f0 vf 0 trust on
ip link set dev ens1f0 vf 1 trust on
ip link set dev ens1f0 vf 2 trust on
ip link set dev ens1f0 vf 3 trust on

ip link set dev ens1f0 vf 0 mac 00:22:33:44:55:66
ip link set dev ens1f0 vf 1 mac 00:22:33:44:55:67
ip link set dev ens1f0 vf 2 mac 00:22:33:44:55:68
ip link set dev ens1f0 vf 3 mac 00:22:33:44:55:69

echo 0000:17:02.0 > /sys/bus/pci/devices/0000\:17\:02.0/driver/unbind
echo 0000:17:02.1 > /sys/bus/pci/devices/0000\:17\:02.1/driver/unbind
echo 0000:17:02.2 > /sys/bus/pci/devices/0000\:17\:02.2/driver/unbind
echo 0000:17:02.3 > /sys/bus/pci/devices/0000\:17\:02.3/driver/unbind
sleep 10
echo 0000:17:02.0 > /sys/bus/pci/drivers/iavf/bind
echo 0000:17:02.1 > /sys/bus/pci/drivers/iavf/bind
echo 0000:17:02.2 > /sys/bus/pci/drivers/iavf/bind
echo 0000:17:02.3 > /sys/bus/pci/drivers/iavf/bind

modprobe vfio-pci
echo 8086 154c > /sys/bus/pci/drivers/vfio-pci/new_id

qemu-system-x86_64 -accel kvm -m 4096 -cpu host \
-drive file=centos9.qcow2,if=none,id=virtio-disk0 \
-device virtio-blk-pci,drive=virtio-disk0,bootindex=0 -smp 4 \
-device vfio-pci,host=17:02.0 -net none \
-device vfio-pci,host=17:02.1 -net none \
-device vfio-pci,host=17:02.2 -net none \
-device vfio-pci,host=17:02.3 -net none \
-daemonize -vnc :5

Current result:
There is a probability that the mac of VF in guest is inconsistent with
it in host

Expected result:
When passthrough NIC VF to guest, the VF in guest should always get
the same mac as it in host.

Fixes: 14756b2ae265 ("iavf: Fix __IAVF_RESETTING state usage")
Signed-off-by: Radoslaw Tyl <radoslawx.tyl@intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 7b300c86ceda..b23ca9d80189 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1421,7 +1421,8 @@ void iavf_down(struct iavf_adapter *adapter)
 	iavf_clear_fdir_filters(adapter);
 	iavf_clear_adv_rss_conf(adapter);
 
-	if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)) {
+	if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) &&
+	    !(test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))) {
 		/* cancel any current operation */
 		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 		/* Schedule operations to close down the HW. Don't wait
-- 
cgit v1.2.3


From ed4cad33df9e272feaa6698b33359b29c2929564 Mon Sep 17 00:00:00 2001
From: Petr Oros <poros@redhat.com>
Date: Thu, 7 Sep 2023 17:02:50 +0200
Subject: iavf: add iavf_schedule_aq_request() helper

Add helper for set iavf aq request AVF_FLAG_AQ_* and immediately
schedule watchdog_task. Helper will be used in cases where it is
necessary to run aq requests asap

Signed-off-by: Petr Oros <poros@redhat.com>
Co-developed-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Co-developed-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf.h         |  2 +-
 drivers/net/ethernet/intel/iavf/iavf_ethtool.c |  2 +-
 drivers/net/ethernet/intel/iavf/iavf_main.c    | 10 ++++------
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index 85fba85fbb23..e110ba346185 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -521,7 +521,7 @@ void iavf_down(struct iavf_adapter *adapter);
 int iavf_process_config(struct iavf_adapter *adapter);
 int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter);
 void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags);
-void iavf_schedule_request_stats(struct iavf_adapter *adapter);
+void iavf_schedule_aq_request(struct iavf_adapter *adapter, u64 flags);
 void iavf_schedule_finish_config(struct iavf_adapter *adapter);
 void iavf_reset(struct iavf_adapter *adapter);
 void iavf_set_ethtool_ops(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index a34303ad057d..90397293525f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -362,7 +362,7 @@ static void iavf_get_ethtool_stats(struct net_device *netdev,
 	unsigned int i;
 
 	/* Explicitly request stats refresh */
-	iavf_schedule_request_stats(adapter);
+	iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_REQUEST_STATS);
 
 	iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats);
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index b23ca9d80189..4b02a8cd77e9 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -314,15 +314,13 @@ void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags)
 }
 
 /**
- * iavf_schedule_request_stats - Set the flags and schedule statistics request
+ * iavf_schedule_aq_request - Set the flags and schedule aq request
  * @adapter: board private structure
- *
- * Sets IAVF_FLAG_AQ_REQUEST_STATS flag so iavf_watchdog_task() will explicitly
- * request and refresh ethtool stats
+ * @flags: requested aq flags
  **/
-void iavf_schedule_request_stats(struct iavf_adapter *adapter)
+void iavf_schedule_aq_request(struct iavf_adapter *adapter, u64 flags)
 {
-	adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS;
+	adapter->aq_required |= flags;
 	mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
 }
 
-- 
cgit v1.2.3


From 5f3d319a248654a805bafc9e7094bcea47dac6c7 Mon Sep 17 00:00:00 2001
From: Petr Oros <poros@redhat.com>
Date: Thu, 7 Sep 2023 17:02:51 +0200
Subject: iavf: schedule a request immediately after add/delete vlan

When the iavf driver wants to reconfigure the VLAN filters
(iavf_add_vlan, iavf_del_vlan), it sets a flag in
aq_required:
  adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
or:
  adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;

This is later processed by the watchdog_task, but it runs periodically
every 2 seconds, so it can be a long time before it processes the request.

In the worst case, the interface is unable to receive traffic for more
than 2 seconds for no objective reason.

Fixes: 5eae00c57f5e ("i40evf: main driver core")
Signed-off-by: Petr Oros <poros@redhat.com>
Co-developed-by: Michal Schmidt <mschmidt@redhat.com>
Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
Co-developed-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Reviewed-by: Ahmed Zaki <ahmed.zaki@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 4b02a8cd77e9..6a2e6d64bc3a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -821,7 +821,7 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter,
 		list_add_tail(&f->list, &adapter->vlan_filter_list);
 		f->state = IAVF_VLAN_ADD;
 		adapter->num_vlan_filters++;
-		adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+		iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER);
 	}
 
 clearout:
@@ -843,7 +843,7 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan)
 	f = iavf_find_vlan(adapter, vlan);
 	if (f) {
 		f->state = IAVF_VLAN_REMOVE;
-		adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
+		iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_VLAN_FILTER);
 	}
 
 	spin_unlock_bh(&adapter->mac_vlan_list_lock);
-- 
cgit v1.2.3


From d0d362ffa33da4acdcf7aee2116ceef8c8fef658 Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Thu, 7 Sep 2023 17:44:57 +0200
Subject: i40e: Fix VF VLAN offloading when port VLAN is configured

If port VLAN is configured on a VF then any other VLANs on top of this VF
are broken.

During i40e_ndo_set_vf_port_vlan() call the i40e driver reset the VF and
iavf driver asks PF (using VIRTCHNL_OP_GET_VF_RESOURCES) for VF capabilities
but this reset occurs too early, prior setting of vf->info.pvid field
and because this field can be zero during i40e_vc_get_vf_resources_msg()
then VIRTCHNL_VF_OFFLOAD_VLAN capability is reported to iavf driver.

This is wrong because iavf driver should not report VLAN offloading
capability when port VLAN is configured as i40e does not support QinQ
offloading.

Fix the issue by moving VF reset after setting of vf->port_vlan_id
field.

Without this patch:
$ echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs
$ ip link set enp2s0f0 vf 0 vlan 3
$ ip link set enp2s0f0v0 up
$ ip link add link enp2s0f0v0 name vlan4 type vlan id 4
$ ip link set vlan4 up
...
$ ethtool -k enp2s0f0v0 | grep vlan-offload
rx-vlan-offload: on
tx-vlan-offload: on
$ dmesg -l err | grep iavf
[1292500.742914] iavf 0000:02:02.0: Failed to add VLAN filter, error IAVF_ERR_INVALID_QP_ID

With this patch:
$ echo 1 > /sys/class/net/enp2s0f0/device/sriov_numvfs
$ ip link set enp2s0f0 vf 0 vlan 3
$ ip link set enp2s0f0v0 up
$ ip link add link enp2s0f0v0 name vlan4 type vlan id 4
$ ip link set vlan4 up
...
$ ethtool -k enp2s0f0v0 | grep vlan-offload
rx-vlan-offload: off [requested on]
tx-vlan-offload: off [requested on]
$ dmesg -l err | grep iavf

Fixes: f9b4b6278d51 ("i40e: Reset the VF upon conflicting VLAN configuration")
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 8ea1a238dcef..d3d6415553ed 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -4475,9 +4475,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 		goto error_pvid;
 
 	i40e_vlan_stripping_enable(vsi);
-	i40e_vc_reset_vf(vf, true);
-	/* During reset the VF got a new VSI, so refresh a pointer. */
-	vsi = pf->vsi[vf->lan_vsi_idx];
+
 	/* Locked once because multiple functions below iterate list */
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
@@ -4563,6 +4561,10 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 	 */
 	vf->port_vlan_id = le16_to_cpu(vsi->info.pvid);
 
+	i40e_vc_reset_vf(vf, true);
+	/* During reset the VF got a new VSI, so refresh a pointer. */
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
 	ret = i40e_config_vf_promiscuous_mode(vf, vsi->id, allmulti, alluni);
 	if (ret) {
 		dev_err(&pf->pdev->dev, "Unable to config vf promiscuous mode\n");
-- 
cgit v1.2.3


From 837723b22a63cfbff584655b009b9d488d0e9087 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Wed, 30 Aug 2023 03:07:43 +0200
Subject: netfilter, bpf: Adjust timeouts of non-confirmed CTs in
 bpf_ct_insert_entry()

bpf_nf testcase fails on s390x: bpf_skb_ct_lookup() cannot find the entry
that was added by bpf_ct_insert_entry() within the same BPF function.

The reason is that this entry is deleted by nf_ct_gc_expired().

The CT timeout starts ticking after the CT confirmation; therefore
nf_conn.timeout is initially set to the timeout value, and
__nf_conntrack_confirm() sets it to the deadline value.

bpf_ct_insert_entry() sets IPS_CONFIRMED_BIT, but does not adjust the
timeout, making its value meaningless and causing false positives.

Fix the problem by making bpf_ct_insert_entry() adjust the timeout,
like __nf_conntrack_confirm().

Fixes: 2cdaa3eefed8 ("netfilter: conntrack: restore IPS_CONFIRMED out of nf_conntrack_hash_check_insert()")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Florian Westphal <fw@strlen.de>
Link: https://lore.kernel.org/bpf/20230830011128.1415752-3-iii@linux.ibm.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/netfilter/nf_conntrack_bpf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index c7a6114091ae..b21799d468d2 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -381,6 +381,8 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
 	struct nf_conn *nfct = (struct nf_conn *)nfct_i;
 	int err;
 
+	if (!nf_ct_is_confirmed(nfct))
+		nfct->timeout += nfct_time_stamp;
 	nfct->status |= IPS_CONFIRMED;
 	err = nf_conntrack_hash_check_insert(nfct);
 	if (err < 0) {
-- 
cgit v1.2.3


From dca7acd84e93f2881e3f63465bbb5d89a40b5d17 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 13 Sep 2023 21:59:43 +0800
Subject: bpf: Skip unit_size checking for global per-cpu allocator

For global per-cpu allocator, the size of free object in free list
doesn't match with unit_size and now there is no way to get the size of
per-cpu pointer saved in free object, so just skip the checking.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/bpf/20230913133436.0eeec4cb@canb.auug.org.au/
Signed-off-by: Hou Tao <houtao1@huawei.com>
Tested-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230913135943.3137292-1-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/memalloc.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 1c22b90e754a..cf1941516643 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -491,6 +491,13 @@ static int check_obj_size(struct bpf_mem_cache *c, unsigned int idx)
 	struct llist_node *first;
 	unsigned int obj_size;
 
+	/* For per-cpu allocator, the size of free objects in free list doesn't
+	 * match with unit_size and now there is no way to get the size of
+	 * per-cpu pointer saved in free object, so just skip the checking.
+	 */
+	if (c->percpu_size)
+		return 0;
+
 	first = c->free_llist.first;
 	if (!first)
 		return 0;
-- 
cgit v1.2.3


From 57eb5e1c5c57972c95e8efab6bc81b87161b0b07 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 15 Sep 2023 12:14:20 +0200
Subject: bpf: Fix uprobe_multi get_pid_task error path

Dan reported Smatch static checker warning due to missing error
value set in uprobe multi link's get_pid_task error path.

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/bpf/c5ffa7c0-6b06-40d5-aca2-63833b5cd9af@moroto.mountain/
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Reviewed-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230915101420.1193800-1-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/trace/bpf_trace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c1c1af63ced2..868008f56fec 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3223,8 +3223,10 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
 		rcu_read_lock();
 		task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
 		rcu_read_unlock();
-		if (!task)
+		if (!task) {
+			err = -ESRCH;
 			goto error_path_put;
+		}
 	}
 
 	err = -ENOMEM;
-- 
cgit v1.2.3


From 8f908db77782630c45ba29dac35c434b5ce0b730 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 15 Sep 2023 10:34:27 -0700
Subject: bpf: Fix BTF_ID symbol generation collision

Marcus and Satya reported an issue where BTF_ID macro generates same
symbol in separate objects and that breaks final vmlinux link.

ld.lld: error: ld-temp.o <inline asm>:14577:1: symbol
'__BTF_ID__struct__cgroup__624' is already defined

This can be triggered under specific configs when __COUNTER__ happens to
be the same for the same symbol in two different translation units,
which is already quite unlikely to happen.

Add __LINE__ number suffix to make BTF_ID symbol more unique, which is
not a complete fix, but it would help for now and meanwhile we can work
on better solution as suggested by Andrii.

Cc: stable@vger.kernel.org
Reported-by: Satya Durga Srinivasu Prabhala <quic_satyap@quicinc.com>
Reported-by: Marcus Seyfarth <m.seyfarth@gmail.com>
Closes: https://github.com/ClangBuiltLinux/linux/issues/1913
Debugged-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-1-263fc519c21f@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/btf_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index a3462a9b8e18..a9cb10b0e2e9 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -49,7 +49,7 @@ word							\
 	____BTF_ID(symbol, word)
 
 #define __ID(prefix) \
-	__PASTE(prefix, __COUNTER__)
+	__PASTE(__PASTE(prefix, __COUNTER__), __LINE__)
 
 /*
  * The BTF_ID defines unique symbol for each ID pointing
-- 
cgit v1.2.3


From c0bb9fb0e52a64601d38b3739b729d9138d4c8a1 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 15 Sep 2023 10:34:28 -0700
Subject: bpf: Fix BTF_ID symbol generation collision in tools/

Marcus and Satya reported an issue where BTF_ID macro generates same
symbol in separate objects and that breaks final vmlinux link.

  ld.lld: error: ld-temp.o <inline asm>:14577:1: symbol
  '__BTF_ID__struct__cgroup__624' is already defined

This can be triggered under specific configs when __COUNTER__ happens to
be the same for the same symbol in two different translation units,
which is already quite unlikely to happen.

Add __LINE__ number suffix to make BTF_ID symbol more unique, which is
not a complete fix, but it would help for now and meanwhile we can work
on better solution as suggested by Andrii.

Cc: stable@vger.kernel.org
Reported-by: Satya Durga Srinivasu Prabhala <quic_satyap@quicinc.com>
Reported-by: Marcus Seyfarth <m.seyfarth@gmail.com>
Closes: https://github.com/ClangBuiltLinux/linux/issues/1913
Debugged-by: Nathan Chancellor <nathan@kernel.org>
Co-developed-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/CAEf4Bzb5KQ2_LmhN769ifMeSJaWfebccUasQOfQKaOd0nQ51tw@mail.gmail.com/
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/r/20230915-bpf_collision-v3-2-263fc519c21f@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/linux/btf_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 71e54b1e3796..2f882d5cb30f 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -38,7 +38,7 @@ asm(							\
 	____BTF_ID(symbol)
 
 #define __ID(prefix) \
-	__PASTE(prefix, __COUNTER__)
+	__PASTE(__PASTE(prefix, __COUNTER__), __LINE__)
 
 /*
  * The BTF_ID defines unique symbol for each ID pointing
-- 
cgit v1.2.3


From 8f6b846b0a86c3cbae8a25b772651cfc2270ad0a Mon Sep 17 00:00:00 2001
From: David Christensen <drc@linux.vnet.ibm.com>
Date: Thu, 14 Sep 2023 18:02:52 -0400
Subject: ionic: fix 16bit math issue when PAGE_SIZE >= 64KB

The ionic device supports a maximum buffer length of 16 bits (see
ionic_rxq_desc or ionic_rxq_sg_elem).  When adding new buffers to
the receive rings, the function ionic_rx_fill() uses 16bit math when
calculating the number of pages to allocate for an RX descriptor,
given the interface's MTU setting. If the system PAGE_SIZE >= 64KB,
and the buf_info->page_offset is 0, the remain_len value will never
decrement from the original MTU value and the frag_len value will
always be 0, causing additional pages to be allocated as scatter-
gather elements unnecessarily.

A similar math issue exists in ionic_rx_frags(), but no failures
have been observed here since a 64KB page should not normally
require any scatter-gather elements at any legal Ethernet MTU size.

Fixes: 4b0a7539a372 ("ionic: implement Rx page reuse")
Signed-off-by: David Christensen <drc@linux.vnet.ibm.com>
Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.h  |  1 +
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 6aac98bcb9f4..aae4131f146a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -187,6 +187,7 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
 			      struct ionic_desc_info *desc_info,
 			      struct ionic_cq_info *cq_info, void *cb_arg);
 
+#define IONIC_MAX_BUF_LEN			((u16)-1)
 #define IONIC_PAGE_SIZE				PAGE_SIZE
 #define IONIC_PAGE_SPLIT_SZ			(PAGE_SIZE / 2)
 #define IONIC_PAGE_GFP_MASK			(GFP_ATOMIC | __GFP_NOWARN |\
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 26798fc635db..44466e8c5d77 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -207,7 +207,8 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 			return NULL;
 		}
 
-		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
+		frag_len = min_t(u16, len, min_t(u32, IONIC_MAX_BUF_LEN,
+						 IONIC_PAGE_SIZE - buf_info->page_offset));
 		len -= frag_len;
 
 		dma_sync_single_for_cpu(dev,
@@ -452,7 +453,8 @@ void ionic_rx_fill(struct ionic_queue *q)
 
 		/* fill main descriptor - buf[0] */
 		desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
-		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
+		frag_len = min_t(u16, len, min_t(u32, IONIC_MAX_BUF_LEN,
+						 IONIC_PAGE_SIZE - buf_info->page_offset));
 		desc->len = cpu_to_le16(frag_len);
 		remain_len -= frag_len;
 		buf_info++;
@@ -471,7 +473,9 @@ void ionic_rx_fill(struct ionic_queue *q)
 			}
 
 			sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
-			frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset);
+			frag_len = min_t(u16, remain_len, min_t(u32, IONIC_MAX_BUF_LEN,
+								IONIC_PAGE_SIZE -
+								buf_info->page_offset));
 			sg_elem->len = cpu_to_le16(frag_len);
 			remain_len -= frag_len;
 			buf_info++;
-- 
cgit v1.2.3


From aabb4af9bb29f8532e19c872b48ad1e7fd208617 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 13 Sep 2023 14:09:57 +0300
Subject: net: core: Use the bitmap API to allocate bitmaps

Use bitmap_zalloc() and bitmap_free() instead of hand-writing them.
It is less verbose and it improves the type checking and semantic.

While at it, add missing header inclusion (should be bitops.h,
but with the above change it becomes bitmap.h).

Suggested-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20230911154534.4174265-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index ccff2b6ef958..85df22f05c38 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -69,7 +69,7 @@
  */
 
 #include <linux/uaccess.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/types.h>
@@ -1080,7 +1080,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 			return -EINVAL;
 
 		/* Use one page as a bit array of possible slots */
-		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
+		inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
 		if (!inuse)
 			return -ENOMEM;
 
@@ -1109,7 +1109,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 		}
 
 		i = find_first_zero_bit(inuse, max_netdevices);
-		free_page((unsigned long) inuse);
+		bitmap_free(inuse);
 	}
 
 	snprintf(buf, IFNAMSIZ, name, i);
-- 
cgit v1.2.3


From cb47b1f679c4d83a5fa5f1852e472f844e41a3da Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Wed, 13 Sep 2023 11:06:15 -0700
Subject: igc: Fix infinite initialization loop with early XDP redirect

When an XDP redirect happens before the link is ready, that
transmission will not finish and will timeout, causing an adapter
reset. If the redirects do not stop, the adapter will not stop
resetting.

Wait for the driver to signal that there's a carrier before allowing
transmissions to proceed.

Previous code was relying that when __IGC_DOWN is cleared, the NIC is
ready to transmit as all the queues are ready, what happens is that
the carrier presence will only be signaled later, after the watchdog
workqueue has a chance to run. And during this interval (between
clearing __IGC_DOWN and the watchdog running) if any transmission
happens the timeout is emitted (detected by igc_tx_timeout()) which
causes the reset, with the potential for the infinite loop.

Fixes: 4ff320361092 ("igc: Add support for XDP_REDIRECT action")
Reported-by: Ferenc Fejes <ferenc.fejes@ericsson.com>
Closes: https://lore.kernel.org/netdev/0caf33cf6adb3a5bf137eeaa20e89b167c9986d5.camel@ericsson.com/
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Tested-by: Ferenc Fejes <ferenc.fejes@ericsson.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 293b45717683..98de34d0ce07 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6491,7 +6491,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
 	struct igc_ring *ring;
 	int i, drops;
 
-	if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
+	if (unlikely(!netif_carrier_ok(dev)))
 		return -ENETDOWN;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
-- 
cgit v1.2.3


From 0113d9c9d1ccc07f5a3710dac4aa24b6d711278c Mon Sep 17 00:00:00 2001
From: Kyle Zeng <zengyhkyle@gmail.com>
Date: Thu, 14 Sep 2023 22:12:57 -0700
Subject: ipv4: fix null-deref in ipv4_link_failure

Currently, we assume the skb is associated with a device before calling
__ip_options_compile, which is not always the case if it is re-routed by
ipvs.
When skb->dev is NULL, dev_net(skb->dev) will become null-dereference.
This patch adds a check for the edge case and switch to use the net_device
from the rtable when skb->dev is NULL.

Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure")
Suggested-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Kyle Zeng <zengyhkyle@gmail.com>
Cc: Stephen Suryaputra <ssuryaextr@gmail.com>
Cc: Vadim Fedorenko <vfedorenko@novek.ru>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 66f419e7f9a7..a57062283219 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1213,6 +1213,7 @@ EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);
 
 static void ipv4_send_dest_unreach(struct sk_buff *skb)
 {
+	struct net_device *dev;
 	struct ip_options opt;
 	int res;
 
@@ -1230,7 +1231,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb)
 		opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
 
 		rcu_read_lock();
-		res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+		dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev;
+		res = __ip_options_compile(dev_net(dev), &opt, skb, NULL);
 		rcu_read_unlock();
 
 		if (res)
-- 
cgit v1.2.3


From f4f82c52a0ead5ab363d207d06f81b967d09ffb8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 15 Sep 2023 17:11:11 +0000
Subject: scsi: iscsi_tcp: restrict to TCP sockets

Nothing prevents iscsi_sw_tcp_conn_bind() to receive file descriptor
pointing to non TCP socket (af_unix for example).

Return -EINVAL if this is attempted, instead of crashing the kernel.

Fixes: 7ba247138907 ("[SCSI] open-iscsi/linux-iscsi-5 Initiator: Initiator code")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Lee Duncan <lduncan@suse.com>
Cc: Chris Leech <cleech@redhat.com>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: open-iscsi@googlegroups.com
Cc: linux-scsi@vger.kernel.org
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/scsi/iscsi_tcp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 9ab8555180a3..8e14cea15f98 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -724,6 +724,10 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
 		return -EEXIST;
 	}
 
+	err = -EINVAL;
+	if (!sk_is_tcp(sock->sk))
+		goto free_socket;
+
 	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
 	if (err)
 		goto free_socket;
-- 
cgit v1.2.3


From 3780bb29311eccb7a1c9641032a112eed237f7e3 Mon Sep 17 00:00:00 2001
From: Johnathan Mantey <johnathanx.mantey@intel.com>
Date: Fri, 15 Sep 2023 09:12:35 -0700
Subject: ncsi: Propagate carrier gain/loss events to the NCSI controller

Report the carrier/no-carrier state for the network interface
shared between the BMC and the passthrough channel. Without this
functionality the BMC is unable to reconfigure the NIC in the event
of a re-cabling to a different subnet.

Signed-off-by: Johnathan Mantey <johnathanx.mantey@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ncsi/ncsi-aen.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 62fb1031763d..f8854bff286c 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -89,6 +89,11 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
 	if ((had_link == has_link) || chained)
 		return 0;
 
+	if (had_link)
+		netif_carrier_off(ndp->ndev.dev);
+	else
+		netif_carrier_on(ndp->ndev.dev);
+
 	if (!ndp->multi_package && !nc->package->multi_channel) {
 		if (had_link) {
 			ndp->flags |= NCSI_DEV_RESHUFFLE;
-- 
cgit v1.2.3


From 6af289746a636f71f4c0535a9801774118486c7a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 15 Sep 2023 19:00:35 +0000
Subject: dccp: fix dccp_v4_err()/dccp_v6_err() again

dh->dccph_x is the 9th byte (offset 8) in "struct dccp_hdr",
not in the "byte 7" as Jann claimed.

We need to make sure the ICMP messages are big enough,
using more standard ways (no more assumptions).

syzbot reported:
BUG: KMSAN: uninit-value in pskb_may_pull_reason include/linux/skbuff.h:2667 [inline]
BUG: KMSAN: uninit-value in pskb_may_pull include/linux/skbuff.h:2681 [inline]
BUG: KMSAN: uninit-value in dccp_v6_err+0x426/0x1aa0 net/dccp/ipv6.c:94
pskb_may_pull_reason include/linux/skbuff.h:2667 [inline]
pskb_may_pull include/linux/skbuff.h:2681 [inline]
dccp_v6_err+0x426/0x1aa0 net/dccp/ipv6.c:94
icmpv6_notify+0x4c7/0x880 net/ipv6/icmp.c:867
icmpv6_rcv+0x19d5/0x30d0
ip6_protocol_deliver_rcu+0xda6/0x2a60 net/ipv6/ip6_input.c:438
ip6_input_finish net/ipv6/ip6_input.c:483 [inline]
NF_HOOK include/linux/netfilter.h:304 [inline]
ip6_input+0x15d/0x430 net/ipv6/ip6_input.c:492
ip6_mc_input+0xa7e/0xc80 net/ipv6/ip6_input.c:586
dst_input include/net/dst.h:468 [inline]
ip6_rcv_finish+0x5db/0x870 net/ipv6/ip6_input.c:79
NF_HOOK include/linux/netfilter.h:304 [inline]
ipv6_rcv+0xda/0x390 net/ipv6/ip6_input.c:310
__netif_receive_skb_one_core net/core/dev.c:5523 [inline]
__netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5637
netif_receive_skb_internal net/core/dev.c:5723 [inline]
netif_receive_skb+0x58/0x660 net/core/dev.c:5782
tun_rx_batched+0x83b/0x920
tun_get_user+0x564c/0x6940 drivers/net/tun.c:2002
tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048
call_write_iter include/linux/fs.h:1985 [inline]
new_sync_write fs/read_write.c:491 [inline]
vfs_write+0x8ef/0x15c0 fs/read_write.c:584
ksys_write+0x20f/0x4c0 fs/read_write.c:637
__do_sys_write fs/read_write.c:649 [inline]
__se_sys_write fs/read_write.c:646 [inline]
__x64_sys_write+0x93/0xd0 fs/read_write.c:646
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

Uninit was created at:
slab_post_alloc_hook+0x12f/0xb70 mm/slab.h:767
slab_alloc_node mm/slub.c:3478 [inline]
kmem_cache_alloc_node+0x577/0xa80 mm/slub.c:3523
kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:559
__alloc_skb+0x318/0x740 net/core/skbuff.c:650
alloc_skb include/linux/skbuff.h:1286 [inline]
alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6313
sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2795
tun_alloc_skb drivers/net/tun.c:1531 [inline]
tun_get_user+0x23cf/0x6940 drivers/net/tun.c:1846
tun_chr_write_iter+0x3af/0x5d0 drivers/net/tun.c:2048
call_write_iter include/linux/fs.h:1985 [inline]
new_sync_write fs/read_write.c:491 [inline]
vfs_write+0x8ef/0x15c0 fs/read_write.c:584
ksys_write+0x20f/0x4c0 fs/read_write.c:637
__do_sys_write fs/read_write.c:649 [inline]
__se_sys_write fs/read_write.c:646 [inline]
__x64_sys_write+0x93/0xd0 fs/read_write.c:646
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

CPU: 0 PID: 4995 Comm: syz-executor153 Not tainted 6.6.0-rc1-syzkaller-00014-ga747acc0b752 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023

Fixes: 977ad86c2a1b ("dccp: Fix out of bounds access in DCCP error handler")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jann Horn <jannh@google.com>
Reviewed-by: Jann Horn <jannh@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ipv4.c | 9 ++-------
 net/dccp/ipv6.c | 9 ++-------
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8f56e8723c73..69453b936bd5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -254,13 +254,8 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
 	int err;
 	struct net *net = dev_net(skb->dev);
 
-	/* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
-	 * which is in byte 7 of the dccp header.
-	 * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
-	 *
-	 * Later on, we want to access the sequence number fields, which are
-	 * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
-	 */
+	if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+		return -EINVAL;
 	dh = (struct dccp_hdr *)(skb->data + offset);
 	if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
 		return -EINVAL;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 33f6ccf6ba77..c693a570682f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -83,13 +83,8 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	__u64 seq;
 	struct net *net = dev_net(skb->dev);
 
-	/* For the first __dccp_basic_hdr_len() check, we only need dh->dccph_x,
-	 * which is in byte 7 of the dccp header.
-	 * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
-	 *
-	 * Later on, we want to access the sequence number fields, which are
-	 * beyond 8 bytes, so we have to pskb_may_pull() ourselves.
-	 */
+	if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+		return -EINVAL;
 	dh = (struct dccp_hdr *)(skb->data + offset);
 	if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
 		return -EINVAL;
-- 
cgit v1.2.3


From 295de650d3aaf9e50258465c5f1c84b465d836f6 Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <lukma@denx.de>
Date: Fri, 15 Sep 2023 20:10:02 +0200
Subject: net: hsr: Properly parse HSRv1 supervisor frames.

While adding support for parsing the redbox supervision frames, the
author added `pull_size' and `total_pull_size' to track the amount of
bytes that were pulled from the skb during while parsing the skb so it
can be reverted/ pushed back at the end.
In the process probably copy&paste error occurred and for the HSRv1 case
the ethhdr was used instead of the hsr_tag. Later the hsr_tag was used
instead of hsr_sup_tag. The later error didn't matter because both
structs have the size so HSRv0 was still working. It broke however HSRv1
parsing because struct ethhdr is larger than struct hsr_tag.

Reinstate the old pulling flow and pull first ethhdr, hsr_tag in v1 case
followed by hsr_sup_tag.

[bigeasy: commit message]

Fixes: eafaa88b3eb7 ("net: hsr: Add support for redbox supervision frames")'
Suggested-by: Tristram.Ha@microchip.com
Signed-off-by: Lukasz Majewski <lukma@denx.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_framereg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index b77f1189d19d..6d14d935ee82 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -288,13 +288,13 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
 
 	/* And leave the HSR tag. */
 	if (ethhdr->h_proto == htons(ETH_P_HSR)) {
-		pull_size = sizeof(struct ethhdr);
+		pull_size = sizeof(struct hsr_tag);
 		skb_pull(skb, pull_size);
 		total_pull_size += pull_size;
 	}
 
 	/* And leave the HSR sup tag. */
-	pull_size = sizeof(struct hsr_tag);
+	pull_size = sizeof(struct hsr_sup_tag);
 	skb_pull(skb, pull_size);
 	total_pull_size += pull_size;
 
-- 
cgit v1.2.3


From fbd825fcd7dd4c11d4c48c3d0adc248a4a0ce90b Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 15 Sep 2023 20:10:03 +0200
Subject: net: hsr: Add __packed to struct hsr_sup_tlv.

Struct hsr_sup_tlv describes HW layout and therefore it needs a __packed
attribute to ensure the compiler does not add any padding.
Due to the size and __packed attribute of the structs that use
hsr_sup_tlv it has no functional impact.

Add __packed to struct hsr_sup_tlv.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 6851e33df7d1..18e01791ad79 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -83,7 +83,7 @@ struct hsr_vlan_ethhdr {
 struct hsr_sup_tlv {
 	u8		HSR_TLV_type;
 	u8		HSR_TLV_length;
-};
+} __packed;
 
 /* HSR/PRP Supervision Frame data types.
  * Field names as defined in the IEC:2010 standard for HSR.
-- 
cgit v1.2.3


From 5c3ce539a11185268aff3bb30d2fad8c7fa42f86 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 15 Sep 2023 20:10:04 +0200
Subject: selftests: hsr: Use `let' properly.

The timeout in the while loop is never subtracted due wrong usage of
`let' leading to an endless loop if the former condition never gets
true.

Put the statement for let in quotes so it is parsed as a single
statement.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/hsr/hsr_ping.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index df9143538708..183f4a0f19dd 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -197,7 +197,7 @@ do
 		break
 	fi
 	sleep 1
-	let WAIT = WAIT - 1
+	let "WAIT = WAIT - 1"
 done
 
 # Just a safety delay in case the above check didn't handle it.
-- 
cgit v1.2.3


From d53f23fe164c24335d001cf725599a95e6fdf92d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 15 Sep 2023 20:10:05 +0200
Subject: selftests: hsr: Reorder the testsuite.

Move the code and group into functions so it will be easier to extend
the test to HSRv1 so that both versions are covered.

Move the ping/test part into do_complete_ping_test() and the interface
setup into setup_hsr_interfaces().

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/hsr/hsr_ping.sh | 255 ++++++++++++++--------------
 1 file changed, 132 insertions(+), 123 deletions(-)

diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 183f4a0f19dd..d4613b7b7188 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -41,61 +41,6 @@ cleanup()
 	done
 }
 
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
-	echo "SKIP: Could not run test without ip tool"
-	exit $ksft_skip
-fi
-
-trap cleanup EXIT
-
-for i in "$ns1" "$ns2" "$ns3" ;do
-	ip netns add $i || exit $ksft_skip
-	ip -net $i link set lo up
-done
-
-echo "INFO: preparing interfaces."
-# Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
-#
-#    ns1eth1 ----- ns2eth1
-#      hsr1         hsr2
-#    ns1eth2       ns2eth2
-#       |            |
-#    ns3eth1      ns3eth2
-#           \    /
-#            hsr3
-#
-# Interfaces
-ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
-ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3"
-ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
-
-# HSRv0.
-ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0
-ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0
-ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0
-
-# IP for HSR
-ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
-ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad
-ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2
-ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad
-ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
-ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
-
-# All Links up
-ip -net "$ns1" link set ns1eth1 up
-ip -net "$ns1" link set ns1eth2 up
-ip -net "$ns1" link set hsr1 up
-
-ip -net "$ns2" link set ns2eth1 up
-ip -net "$ns2" link set ns2eth2 up
-ip -net "$ns2" link set hsr2 up
-
-ip -net "$ns3" link set ns3eth1 up
-ip -net "$ns3" link set ns3eth2 up
-ip -net "$ns3" link set hsr3 up
-
 # $1: IP address
 is_v6()
 {
@@ -164,93 +109,157 @@ stop_if_error()
 	fi
 }
 
-
-echo "INFO: Initial validation ping."
-# Each node has to be able each one.
-do_ping "$ns1" 100.64.0.2
-do_ping "$ns2" 100.64.0.1
-do_ping "$ns3" 100.64.0.1
-stop_if_error "Initial validation failed."
-
-do_ping "$ns1" 100.64.0.3
-do_ping "$ns2" 100.64.0.3
-do_ping "$ns3" 100.64.0.2
-
-do_ping "$ns1" dead:beef:1::2
-do_ping "$ns1" dead:beef:1::3
-do_ping "$ns2" dead:beef:1::1
-do_ping "$ns2" dead:beef:1::2
-do_ping "$ns3" dead:beef:1::1
-do_ping "$ns3" dead:beef:1::2
-
-stop_if_error "Initial validation failed."
+do_complete_ping_test()
+{
+	echo "INFO: Initial validation ping."
+	# Each node has to be able each one.
+	do_ping "$ns1" 100.64.0.2
+	do_ping "$ns2" 100.64.0.1
+	do_ping "$ns3" 100.64.0.1
+	stop_if_error "Initial validation failed."
+
+	do_ping "$ns1" 100.64.0.3
+	do_ping "$ns2" 100.64.0.3
+	do_ping "$ns3" 100.64.0.2
+
+	do_ping "$ns1" dead:beef:1::2
+	do_ping "$ns1" dead:beef:1::3
+	do_ping "$ns2" dead:beef:1::1
+	do_ping "$ns2" dead:beef:1::2
+	do_ping "$ns3" dead:beef:1::1
+	do_ping "$ns3" dead:beef:1::2
+
+	stop_if_error "Initial validation failed."
 
 # Wait until supervisor all supervision frames have been processed and the node
 # entries have been merged. Otherwise duplicate frames will be observed which is
 # valid at this stage.
-WAIT=5
-while [ ${WAIT} -gt 0 ]
-do
-	grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table
-	if [ $? -ne 0 ]
-	then
-		break
-	fi
-	sleep 1
-	let "WAIT = WAIT - 1"
-done
+	WAIT=5
+	while [ ${WAIT} -gt 0 ]
+	do
+		grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table
+		if [ $? -ne 0 ]
+		then
+			break
+		fi
+		sleep 1
+		let "WAIT = WAIT - 1"
+	done
 
 # Just a safety delay in case the above check didn't handle it.
-sleep 1
+	sleep 1
+
+	echo "INFO: Longer ping test."
+	do_ping_long "$ns1" 100.64.0.2
+	do_ping_long "$ns1" dead:beef:1::2
+	do_ping_long "$ns1" 100.64.0.3
+	do_ping_long "$ns1" dead:beef:1::3
+
+	stop_if_error "Longer ping test failed."
+
+	do_ping_long "$ns2" 100.64.0.1
+	do_ping_long "$ns2" dead:beef:1::1
+	do_ping_long "$ns2" 100.64.0.3
+	do_ping_long "$ns2" dead:beef:1::2
+	stop_if_error "Longer ping test failed."
+
+	do_ping_long "$ns3" 100.64.0.1
+	do_ping_long "$ns3" dead:beef:1::1
+	do_ping_long "$ns3" 100.64.0.2
+	do_ping_long "$ns3" dead:beef:1::2
+	stop_if_error "Longer ping test failed."
+
+	echo "INFO: Cutting one link."
+	do_ping_long "$ns1" 100.64.0.3 &
 
-echo "INFO: Longer ping test."
-do_ping_long "$ns1" 100.64.0.2
-do_ping_long "$ns1" dead:beef:1::2
-do_ping_long "$ns1" 100.64.0.3
-do_ping_long "$ns1" dead:beef:1::3
+	sleep 3
+	ip -net "$ns3" link set ns3eth1 down
+	wait
 
-stop_if_error "Longer ping test failed."
+	ip -net "$ns3" link set ns3eth1 up
 
-do_ping_long "$ns2" 100.64.0.1
-do_ping_long "$ns2" dead:beef:1::1
-do_ping_long "$ns2" 100.64.0.3
-do_ping_long "$ns2" dead:beef:1::2
-stop_if_error "Longer ping test failed."
+	stop_if_error "Failed with one link down."
 
-do_ping_long "$ns3" 100.64.0.1
-do_ping_long "$ns3" dead:beef:1::1
-do_ping_long "$ns3" 100.64.0.2
-do_ping_long "$ns3" dead:beef:1::2
-stop_if_error "Longer ping test failed."
+	echo "INFO: Delay the link and drop a few packages."
+	tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms
+	tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25%
 
-echo "INFO: Cutting one link."
-do_ping_long "$ns1" 100.64.0.3 &
+	do_ping_long "$ns1" 100.64.0.2
+	do_ping_long "$ns1" 100.64.0.3
 
-sleep 3
-ip -net "$ns3" link set ns3eth1 down
-wait
+	stop_if_error "Failed with delay and packetloss."
 
-ip -net "$ns3" link set ns3eth1 up
+	do_ping_long "$ns2" 100.64.0.1
+	do_ping_long "$ns2" 100.64.0.3
 
-stop_if_error "Failed with one link down."
+	stop_if_error "Failed with delay and packetloss."
 
-echo "INFO: Delay the link and drop a few packages."
-tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms
-tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25%
+	do_ping_long "$ns3" 100.64.0.1
+	do_ping_long "$ns3" 100.64.0.2
+	stop_if_error "Failed with delay and packetloss."
 
-do_ping_long "$ns1" 100.64.0.2
-do_ping_long "$ns1" 100.64.0.3
+	echo "INFO: All good."
+}
+
+setup_hsr_interfaces()
+{
+	echo "INFO: preparing interfaces."
+# Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
+#
+#    ns1eth1 ----- ns2eth1
+#      hsr1         hsr2
+#    ns1eth2       ns2eth2
+#       |            |
+#    ns3eth1      ns3eth2
+#           \    /
+#            hsr3
+#
+	# Interfaces
+	ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+	ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3"
+	ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
+
+	# HSRv0.
+	ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0
+	ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0
+	ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0
+
+	# IP for HSR
+	ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
+	ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad
+	ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2
+	ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad
+	ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
+	ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
+
+	# All Links up
+	ip -net "$ns1" link set ns1eth1 up
+	ip -net "$ns1" link set ns1eth2 up
+	ip -net "$ns1" link set hsr1 up
+
+	ip -net "$ns2" link set ns2eth1 up
+	ip -net "$ns2" link set ns2eth2 up
+	ip -net "$ns2" link set hsr2 up
+
+	ip -net "$ns3" link set ns3eth1 up
+	ip -net "$ns3" link set ns3eth2 up
+	ip -net "$ns3" link set hsr3 up
+}
 
-stop_if_error "Failed with delay and packetloss."
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
 
-do_ping_long "$ns2" 100.64.0.1
-do_ping_long "$ns2" 100.64.0.3
+trap cleanup EXIT
 
-stop_if_error "Failed with delay and packetloss."
+for i in "$ns1" "$ns2" "$ns3" ;do
+	ip netns add $i || exit $ksft_skip
+	ip -net $i link set lo up
+done
 
-do_ping_long "$ns3" 100.64.0.1
-do_ping_long "$ns3" 100.64.0.2
-stop_if_error "Failed with delay and packetloss."
+setup_hsr_interfaces
+do_complete_ping_test
 
-echo "INFO: All good."
 exit $ret
-- 
cgit v1.2.3


From b0e9c3b5fdafbe60e7a82be69439f95e06a4de39 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 15 Sep 2023 20:10:06 +0200
Subject: selftests: hsr: Extend the testsuite to also cover HSRv1.

The testsuite already has simply tests for HSRv0. The testuite would
have been able to notice the v1 breakage if it was there at the time.

Extend the testsuite to also cover HSRv1.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/hsr/hsr_ping.sh | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index d4613b7b7188..1c6457e54625 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -203,7 +203,9 @@ do_complete_ping_test()
 
 setup_hsr_interfaces()
 {
-	echo "INFO: preparing interfaces."
+	local HSRv="$1"
+
+	echo "INFO: preparing interfaces for HSRv${HSRv}."
 # Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
 #
 #    ns1eth1 ----- ns2eth1
@@ -219,10 +221,10 @@ setup_hsr_interfaces()
 	ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3"
 	ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
 
-	# HSRv0.
-	ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0
-	ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0
-	ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0
+	# HSRv0/1
+	ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0
+	ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0
+	ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0
 
 	# IP for HSR
 	ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
@@ -259,7 +261,16 @@ for i in "$ns1" "$ns2" "$ns3" ;do
 	ip -net $i link set lo up
 done
 
-setup_hsr_interfaces
+setup_hsr_interfaces 0
+do_complete_ping_test
+cleanup
+
+for i in "$ns1" "$ns2" "$ns3" ;do
+	ip netns add $i || exit $ksft_skip
+	ip -net $i link set lo up
+done
+
+setup_hsr_interfaces 1
 do_complete_ping_test
 
 exit $ret
-- 
cgit v1.2.3


From ea852c17f5382a0a52041cfbd9a4451ae0fa1a38 Mon Sep 17 00:00:00 2001
From: Gerhard Engleder <gerhard@engleder-embedded.com>
Date: Fri, 15 Sep 2023 23:01:24 +0200
Subject: tsnep: Fix NAPI scheduling

According to the NAPI documentation networking/napi.rst, drivers which
have to mask interrupts explicitly should use the napi_schedule_prep()
and __napi_schedule() calls.

No problem seen so far with current implementation. Nevertheless, let's
align the implementation with documentation.

Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/engleder/tsnep_main.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index f61bd89734c5..0cdf0de555ed 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -87,8 +87,11 @@ static irqreturn_t tsnep_irq(int irq, void *arg)
 
 	/* handle TX/RX queue 0 interrupt */
 	if ((active & adapter->queue[0].irq_mask) != 0) {
-		tsnep_disable_irq(adapter, adapter->queue[0].irq_mask);
-		napi_schedule(&adapter->queue[0].napi);
+		if (napi_schedule_prep(&adapter->queue[0].napi)) {
+			tsnep_disable_irq(adapter, adapter->queue[0].irq_mask);
+			/* schedule after masking to avoid races */
+			__napi_schedule(&adapter->queue[0].napi);
+		}
 	}
 
 	return IRQ_HANDLED;
@@ -99,8 +102,11 @@ static irqreturn_t tsnep_irq_txrx(int irq, void *arg)
 	struct tsnep_queue *queue = arg;
 
 	/* handle TX/RX queue interrupt */
-	tsnep_disable_irq(queue->adapter, queue->irq_mask);
-	napi_schedule(&queue->napi);
+	if (napi_schedule_prep(&queue->napi)) {
+		tsnep_disable_irq(queue->adapter, queue->irq_mask);
+		/* schedule after masking to avoid races */
+		__napi_schedule(&queue->napi);
+	}
 
 	return IRQ_HANDLED;
 }
-- 
cgit v1.2.3


From a7f991953d73dd50c4c23b5437c0139960e1fad4 Mon Sep 17 00:00:00 2001
From: Gerhard Engleder <gerhard@engleder-embedded.com>
Date: Fri, 15 Sep 2023 23:01:25 +0200
Subject: tsnep: Fix ethtool channels

According to the NAPI documentation networking/napi.rst, for the ethtool
API a channel is a IRQ/NAPI which services queues of a given type.

tsnep uses a single IRQ/NAPI instance for every TX/RX queue pair.
Therefore, combined channels shall be returned instead of separate tx/rx
channels.

Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/engleder/tsnep_ethtool.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/engleder/tsnep_ethtool.c b/drivers/net/ethernet/engleder/tsnep_ethtool.c
index 716815dad7d2..65ec1abc9442 100644
--- a/drivers/net/ethernet/engleder/tsnep_ethtool.c
+++ b/drivers/net/ethernet/engleder/tsnep_ethtool.c
@@ -300,10 +300,8 @@ static void tsnep_ethtool_get_channels(struct net_device *netdev,
 {
 	struct tsnep_adapter *adapter = netdev_priv(netdev);
 
-	ch->max_rx = adapter->num_rx_queues;
-	ch->max_tx = adapter->num_tx_queues;
-	ch->rx_count = adapter->num_rx_queues;
-	ch->tx_count = adapter->num_tx_queues;
+	ch->max_combined = adapter->num_queues;
+	ch->combined_count = adapter->num_queues;
 }
 
 static int tsnep_ethtool_get_ts_info(struct net_device *netdev,
-- 
cgit v1.2.3


From 46589db3817bd8b523701274885984b5a5dda7d1 Mon Sep 17 00:00:00 2001
From: Gerhard Engleder <gerhard@engleder-embedded.com>
Date: Fri, 15 Sep 2023 23:01:26 +0200
Subject: tsnep: Fix NAPI polling with budget 0

According to the NAPI documentation networking/napi.rst, Rx specific
APIs like page pool and XDP cannot be used at all when budget is 0.
skb Tx processing should happen regardless of the budget.

Stop NAPI polling after Tx processing and skip Rx processing if budget
is 0.

Signed-off-by: Gerhard Engleder <gerhard@engleder-embedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/engleder/tsnep_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c
index 0cdf0de555ed..8b992dc9bb52 100644
--- a/drivers/net/ethernet/engleder/tsnep_main.c
+++ b/drivers/net/ethernet/engleder/tsnep_main.c
@@ -1734,6 +1734,10 @@ static int tsnep_poll(struct napi_struct *napi, int budget)
 	if (queue->tx)
 		complete = tsnep_tx_poll(queue->tx, budget);
 
+	/* handle case where we are called by netpoll with a budget of 0 */
+	if (unlikely(budget <= 0))
+		return budget;
+
 	if (queue->rx) {
 		done = queue->rx->xsk_pool ?
 		       tsnep_rx_poll_zc(queue->rx, napi, budget) :
-- 
cgit v1.2.3


From 6bec041147a2a64a490d1f813e8a004443061b38 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Sat, 16 Sep 2023 12:52:45 +0200
Subject: mptcp: fix bogus receive window shrinkage with multiple subflows

In case multiple subflows race to update the mptcp-level receive
window, the subflow losing the race should use the window value
provided by the "winning" subflow to update it's own tcp-level
rcv_wnd.

To such goal, the current code bogusly uses the mptcp-level rcv_wnd
value as observed before the update attempt. On unlucky circumstances
that may lead to TCP-level window shrinkage, and stall the other end.

Address the issue feeding to the rcv wnd update the correct value.

Fixes: f3589be0c420 ("mptcp: never shrink offered window")
Cc: stable@vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/427
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c254accb14de..cd15ec73073e 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1269,12 +1269,13 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
 
 			if (rcv_wnd == rcv_wnd_old)
 				break;
-			if (before64(rcv_wnd_new, rcv_wnd)) {
+
+			rcv_wnd_old = rcv_wnd;
+			if (before64(rcv_wnd_new, rcv_wnd_old)) {
 				MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
 				goto raise_win;
 			}
 			MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
-			rcv_wnd_old = rcv_wnd;
 		}
 		return;
 	}
-- 
cgit v1.2.3


From d5fbeff1ab812b6c473b6924bee8748469462e2c Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Sat, 16 Sep 2023 12:52:46 +0200
Subject: mptcp: move __mptcp_error_report in protocol.c

This will simplify the next patch ("mptcp: process pending subflow error
on close").

No functional change intended.

Cc: stable@vger.kernel.org # v5.12+
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++++++++++++
 net/mptcp/subflow.c  | 36 ------------------------------------
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a7fc16f5175d..915860027b1a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -770,6 +770,42 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
 	return moved;
 }
 
+void __mptcp_error_report(struct sock *sk)
+{
+	struct mptcp_subflow_context *subflow;
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+		int err = sock_error(ssk);
+		int ssk_state;
+
+		if (!err)
+			continue;
+
+		/* only propagate errors on fallen-back sockets or
+		 * on MPC connect
+		 */
+		if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
+			continue;
+
+		/* We need to propagate only transition to CLOSE state.
+		 * Orphaned socket will see such state change via
+		 * subflow_sched_work_if_closed() and that path will properly
+		 * destroy the msk as needed.
+		 */
+		ssk_state = inet_sk_state_load(ssk);
+		if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+			inet_sk_state_store(sk, ssk_state);
+		WRITE_ONCE(sk->sk_err, -err);
+
+		/* This barrier is coupled with smp_rmb() in mptcp_poll() */
+		smp_wmb();
+		sk_error_report(sk);
+		break;
+	}
+}
+
 /* In most cases we will be able to lock the mptcp socket.  If its already
  * owned, we need to defer to the work queue to avoid ABBA deadlock.
  */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9bf3c7bc1762..2f40c23fdb0d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
 	*full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
 }
 
-void __mptcp_error_report(struct sock *sk)
-{
-	struct mptcp_subflow_context *subflow;
-	struct mptcp_sock *msk = mptcp_sk(sk);
-
-	mptcp_for_each_subflow(msk, subflow) {
-		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		int err = sock_error(ssk);
-		int ssk_state;
-
-		if (!err)
-			continue;
-
-		/* only propagate errors on fallen-back sockets or
-		 * on MPC connect
-		 */
-		if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
-			continue;
-
-		/* We need to propagate only transition to CLOSE state.
-		 * Orphaned socket will see such state change via
-		 * subflow_sched_work_if_closed() and that path will properly
-		 * destroy the msk as needed.
-		 */
-		ssk_state = inet_sk_state_load(ssk);
-		if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
-			inet_sk_state_store(sk, ssk_state);
-		WRITE_ONCE(sk->sk_err, -err);
-
-		/* This barrier is coupled with smp_rmb() in mptcp_poll() */
-		smp_wmb();
-		sk_error_report(sk);
-		break;
-	}
-}
-
 static void subflow_error_report(struct sock *ssk)
 {
 	struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
-- 
cgit v1.2.3


From 9f1a98813b4b686482e5ef3c9d998581cace0ba6 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Sat, 16 Sep 2023 12:52:47 +0200
Subject: mptcp: process pending subflow error on close

On incoming TCP reset, subflow closing could happen before error
propagation. That in turn could cause the socket error being ignored,
and a missing socket state transition, as reported by Daire-Byrne.

Address the issues explicitly checking for subflow socket error at
close time. To avoid code duplication, factor-out of __mptcp_error_report()
a new helper implementing the relevant bits.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/429
Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 63 ++++++++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 29 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 915860027b1a..1c96b8da71df 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -770,40 +770,44 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
 	return moved;
 }
 
-void __mptcp_error_report(struct sock *sk)
+static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
 {
-	struct mptcp_subflow_context *subflow;
-	struct mptcp_sock *msk = mptcp_sk(sk);
+	int err = sock_error(ssk);
+	int ssk_state;
 
-	mptcp_for_each_subflow(msk, subflow) {
-		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		int err = sock_error(ssk);
-		int ssk_state;
+	if (!err)
+		return false;
 
-		if (!err)
-			continue;
+	/* only propagate errors on fallen-back sockets or
+	 * on MPC connect
+	 */
+	if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk)))
+		return false;
 
-		/* only propagate errors on fallen-back sockets or
-		 * on MPC connect
-		 */
-		if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
-			continue;
+	/* We need to propagate only transition to CLOSE state.
+	 * Orphaned socket will see such state change via
+	 * subflow_sched_work_if_closed() and that path will properly
+	 * destroy the msk as needed.
+	 */
+	ssk_state = inet_sk_state_load(ssk);
+	if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+		inet_sk_state_store(sk, ssk_state);
+	WRITE_ONCE(sk->sk_err, -err);
 
-		/* We need to propagate only transition to CLOSE state.
-		 * Orphaned socket will see such state change via
-		 * subflow_sched_work_if_closed() and that path will properly
-		 * destroy the msk as needed.
-		 */
-		ssk_state = inet_sk_state_load(ssk);
-		if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
-			inet_sk_state_store(sk, ssk_state);
-		WRITE_ONCE(sk->sk_err, -err);
-
-		/* This barrier is coupled with smp_rmb() in mptcp_poll() */
-		smp_wmb();
-		sk_error_report(sk);
-		break;
-	}
+	/* This barrier is coupled with smp_rmb() in mptcp_poll() */
+	smp_wmb();
+	sk_error_report(sk);
+	return true;
+}
+
+void __mptcp_error_report(struct sock *sk)
+{
+	struct mptcp_subflow_context *subflow;
+	struct mptcp_sock *msk = mptcp_sk(sk);
+
+	mptcp_for_each_subflow(msk, subflow)
+		if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow)))
+			break;
 }
 
 /* In most cases we will be able to lock the mptcp socket.  If its already
@@ -2428,6 +2432,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	}
 
 out_release:
+	__mptcp_subflow_error_report(sk, ssk);
 	release_sock(ssk);
 
 	sock_put(ssk);
-- 
cgit v1.2.3


From f6909dc1c1f4452879278128012da6c76bc186a5 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Sat, 16 Sep 2023 12:52:48 +0200
Subject: mptcp: rename timer related helper to less confusing names

The msk socket uses to different timeout to track close related
events and retransmissions. The existing helpers do not indicate
clearly which timer they actually touch, making the related code
quite confusing.

Change the existing helpers name to avoid such confusion. No
functional change intended.

This patch is linked to the next one ("mptcp: fix dangling connection
hang-up"). The two patches are supposed to be backported together.

Cc: stable@vger.kernel.org # v5.11+
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 42 +++++++++++++++++++++---------------------
 net/mptcp/protocol.h |  2 +-
 net/mptcp/subflow.c  |  2 +-
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1c96b8da71df..c8f38f303a90 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -405,7 +405,7 @@ drop:
 	return false;
 }
 
-static void mptcp_stop_timer(struct sock *sk)
+static void mptcp_stop_rtx_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -911,12 +911,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
 	}
 }
 
-static bool mptcp_timer_pending(struct sock *sk)
+static bool mptcp_rtx_timer_pending(struct sock *sk)
 {
 	return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
 }
 
-static void mptcp_reset_timer(struct sock *sk)
+static void mptcp_reset_rtx_timer(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	unsigned long tout;
@@ -1050,10 +1050,10 @@ static void __mptcp_clean_una(struct sock *sk)
 out:
 	if (snd_una == READ_ONCE(msk->snd_nxt) &&
 	    snd_una == READ_ONCE(msk->write_seq)) {
-		if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
-			mptcp_stop_timer(sk);
+		if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
+			mptcp_stop_rtx_timer(sk);
 	} else {
-		mptcp_reset_timer(sk);
+		mptcp_reset_rtx_timer(sk);
 	}
 }
 
@@ -1626,8 +1626,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 		mptcp_push_release(ssk, &info);
 
 	/* ensure the rtx timer is running */
-	if (!mptcp_timer_pending(sk))
-		mptcp_reset_timer(sk);
+	if (!mptcp_rtx_timer_pending(sk))
+		mptcp_reset_rtx_timer(sk);
 	if (do_check_data_fin)
 		mptcp_check_send_data_fin(sk);
 }
@@ -1690,8 +1690,8 @@ out:
 	if (copied) {
 		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
 			 info.size_goal);
-		if (!mptcp_timer_pending(sk))
-			mptcp_reset_timer(sk);
+		if (!mptcp_rtx_timer_pending(sk))
+			mptcp_reset_rtx_timer(sk);
 
 		if (msk->snd_data_fin_enable &&
 		    msk->snd_nxt + 1 == msk->write_seq)
@@ -2260,7 +2260,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
 	sock_put(sk);
 }
 
-static void mptcp_timeout_timer(struct timer_list *t)
+static void mptcp_tout_timer(struct timer_list *t)
 {
 	struct sock *sk = from_timer(sk, t, sk_timer);
 
@@ -2629,14 +2629,14 @@ static void __mptcp_retrans(struct sock *sk)
 reset_timer:
 	mptcp_check_and_set_pending(sk);
 
-	if (!mptcp_timer_pending(sk))
-		mptcp_reset_timer(sk);
+	if (!mptcp_rtx_timer_pending(sk))
+		mptcp_reset_rtx_timer(sk);
 }
 
 /* schedule the timeout timer for the relevant event: either close timeout
  * or mp_fail timeout. The close timeout takes precedence on the mp_fail one
  */
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
 {
 	struct sock *sk = (struct sock *)msk;
 	unsigned long timeout, close_timeout;
@@ -2669,7 +2669,7 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
 	WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
 	unlock_sock_fast(ssk, slow);
 
-	mptcp_reset_timeout(msk, 0);
+	mptcp_reset_tout_timer(msk, 0);
 }
 
 static void mptcp_do_fastclose(struct sock *sk)
@@ -2758,7 +2758,7 @@ static void __mptcp_init_sock(struct sock *sk)
 
 	/* re-use the csk retrans timer for MPTCP-level retrans */
 	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
-	timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
+	timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);
 }
 
 static void mptcp_ca_reset(struct sock *sk)
@@ -2849,8 +2849,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
 		} else {
 			pr_debug("Sending DATA_FIN on subflow %p", ssk);
 			tcp_send_ack(ssk);
-			if (!mptcp_timer_pending(sk))
-				mptcp_reset_timer(sk);
+			if (!mptcp_rtx_timer_pending(sk))
+				mptcp_reset_rtx_timer(sk);
 		}
 		break;
 	}
@@ -2933,7 +2933,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
 
 	might_sleep();
 
-	mptcp_stop_timer(sk);
+	mptcp_stop_rtx_timer(sk);
 	sk_stop_timer(sk, &sk->sk_timer);
 	msk->pm.status = 0;
 	mptcp_release_sched(msk);
@@ -3053,7 +3053,7 @@ cleanup:
 		__mptcp_destroy_sock(sk);
 		do_cancel_work = true;
 	} else {
-		mptcp_reset_timeout(msk, 0);
+		mptcp_reset_tout_timer(msk, 0);
 	}
 
 	return do_cancel_work;
@@ -3116,7 +3116,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
 	mptcp_check_listen_stop(sk);
 	inet_sk_state_store(sk, TCP_CLOSE);
 
-	mptcp_stop_timer(sk);
+	mptcp_stop_rtx_timer(sk);
 	sk_stop_timer(sk, &sk->sk_timer);
 
 	if (msk->token)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 7254b3562575..5e2026815c8e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -718,7 +718,7 @@ void mptcp_get_options(const struct sk_buff *skb,
 
 void mptcp_finish_connect(struct sock *sk);
 void __mptcp_set_connected(struct sock *sk);
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
 static inline bool mptcp_is_fully_established(struct sock *sk)
 {
 	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 2f40c23fdb0d..433f290984c8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
 	WRITE_ONCE(subflow->fail_tout, fail_tout);
 	tcp_send_ack(ssk);
 
-	mptcp_reset_timeout(msk, subflow->fail_tout);
+	mptcp_reset_tout_timer(msk, subflow->fail_tout);
 }
 
 static bool subflow_check_data_avail(struct sock *ssk)
-- 
cgit v1.2.3


From 27e5ccc2d5a50ed61bb73153edb1066104b108b3 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Sat, 16 Sep 2023 12:52:49 +0200
Subject: mptcp: fix dangling connection hang-up

According to RFC 8684 section 3.3:

  A connection is not closed unless [...] or an implementation-specific
  connection-level send timeout.

Currently the MPTCP protocol does not implement such timeout, and
connection timing-out at the TCP-level never move to close state.

Introduces a catch-up condition at subflow close time to move the
MPTCP socket to close, too.

That additionally allows removing similar existing inside the worker.

Finally, allow some additional timeout for plain ESTABLISHED mptcp
sockets, as the protocol allows creating new subflows even at that
point and making the connection functional again.

This issue is actually present since the beginning, but it is basically
impossible to solve without a long chain of functional pre-requisites
topped by commit bbd49d114d57 ("mptcp: consolidate transition to
TCP_CLOSE in mptcp_do_fastclose()"). When backporting this current
patch, please also backport this other commit as well.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/430
Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 86 +++++++++++++++++++++++++---------------------------
 net/mptcp/protocol.h | 22 ++++++++++++++
 net/mptcp/subflow.c  |  1 +
 3 files changed, 65 insertions(+), 44 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c8f38f303a90..e252539b1e19 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -892,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
 	mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
 	mptcp_sockopt_sync_locked(msk, ssk);
 	mptcp_subflow_joined(msk, ssk);
+	mptcp_stop_tout_timer(sk);
 	return true;
 }
 
@@ -2369,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	bool dispose_it, need_push = false;
 
 	/* If the first subflow moved to a close state before accept, e.g. due
-	 * to an incoming reset, mptcp either:
-	 * - if either the subflow or the msk are dead, destroy the context
-	 *   (the subflow socket is deleted by inet_child_forget) and the msk
-	 * - otherwise do nothing at the moment and take action at accept and/or
-	 *   listener shutdown - user-space must be able to accept() the closed
-	 *   socket.
+	 * to an incoming reset or listener shutdown, the subflow socket is
+	 * already deleted by inet_child_forget() and the mptcp socket can't
+	 * survive too.
 	 */
-	if (msk->in_accept_queue && msk->first == ssk) {
-		if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
-			return;
-
+	if (msk->in_accept_queue && msk->first == ssk &&
+	    (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
 		/* ensure later check in mptcp_worker() will dispose the msk */
+		mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1));
 		sock_set_flag(sk, SOCK_DEAD);
 		lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
 		mptcp_subflow_drop_ctx(ssk);
@@ -2443,6 +2440,22 @@ out_release:
 out:
 	if (need_push)
 		__mptcp_push_pending(sk, 0);
+
+	/* Catch every 'all subflows closed' scenario, including peers silently
+	 * closing them, e.g. due to timeout.
+	 * For established sockets, allow an additional timeout before closing,
+	 * as the protocol can still create more subflows.
+	 */
+	if (list_is_singular(&msk->conn_list) && msk->first &&
+	    inet_sk_state_load(msk->first) == TCP_CLOSE) {
+		if (sk->sk_state != TCP_ESTABLISHED ||
+		    msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
+			inet_sk_state_store(sk, TCP_CLOSE);
+			mptcp_close_wake_up(sk);
+		} else {
+			mptcp_start_tout_timer(sk);
+		}
+	}
 }
 
 void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2486,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk)
 
 }
 
-static bool mptcp_should_close(const struct sock *sk)
+static bool mptcp_close_tout_expired(const struct sock *sk)
 {
-	s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
-	struct mptcp_subflow_context *subflow;
-
-	if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
-		return true;
+	if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
+	    sk->sk_state == TCP_CLOSE)
+		return false;
 
-	/* if all subflows are in closed status don't bother with additional
-	 * timeout
-	 */
-	mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
-		if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
-		    TCP_CLOSE)
-			return false;
-	}
-	return true;
+	return time_after32(tcp_jiffies32,
+		  inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
 }
 
 static void mptcp_check_fastclose(struct mptcp_sock *msk)
@@ -2641,15 +2645,16 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
 	struct sock *sk = (struct sock *)msk;
 	unsigned long timeout, close_timeout;
 
-	if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+	if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
 		return;
 
-	close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
+	close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
+			TCP_TIMEWAIT_LEN;
 
 	/* the close timeout takes precedence on the fail one, and here at least one of
 	 * them is active
 	 */
-	timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+	timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
 
 	sk_reset_timer(sk, &sk->sk_timer, timeout);
 }
@@ -2668,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
 	mptcp_subflow_reset(ssk);
 	WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
 	unlock_sock_fast(ssk, slow);
-
-	mptcp_reset_tout_timer(msk, 0);
 }
 
 static void mptcp_do_fastclose(struct sock *sk)
@@ -2706,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work)
 	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
 		__mptcp_close_subflow(sk);
 
-	/* There is no point in keeping around an orphaned sk timedout or
-	 * closed, but we need the msk around to reply to incoming DATA_FIN,
-	 * even if it is orphaned and in FIN_WAIT2 state
-	 */
-	if (sock_flag(sk, SOCK_DEAD)) {
-		if (mptcp_should_close(sk))
-			mptcp_do_fastclose(sk);
+	if (mptcp_close_tout_expired(sk)) {
+		mptcp_do_fastclose(sk);
+		mptcp_close_wake_up(sk);
+	}
 
-		if (sk->sk_state == TCP_CLOSE) {
-			__mptcp_destroy_sock(sk);
-			goto unlock;
-		}
+	if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
+		__mptcp_destroy_sock(sk);
+		goto unlock;
 	}
 
 	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
@@ -3016,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
 
 cleanup:
 	/* orphan all the subflows */
-	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 		bool slow = lock_sock_fast_nested(ssk);
@@ -3053,7 +3051,7 @@ cleanup:
 		__mptcp_destroy_sock(sk);
 		do_cancel_work = true;
 	} else {
-		mptcp_reset_tout_timer(msk, 0);
+		mptcp_start_tout_timer(sk);
 	}
 
 	return do_cancel_work;
@@ -3117,7 +3115,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
 	inet_sk_state_store(sk, TCP_CLOSE);
 
 	mptcp_stop_rtx_timer(sk);
-	sk_stop_timer(sk, &sk->sk_timer);
+	mptcp_stop_tout_timer(sk);
 
 	if (msk->token)
 		mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 5e2026815c8e..ed61d6850cce 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -719,6 +719,28 @@ void mptcp_get_options(const struct sk_buff *skb,
 void mptcp_finish_connect(struct sock *sk);
 void __mptcp_set_connected(struct sock *sk);
 void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
+
+static inline void mptcp_stop_tout_timer(struct sock *sk)
+{
+	if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
+		return;
+
+	sk_stop_timer(sk, &sk->sk_timer);
+	inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
+}
+
+static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout)
+{
+	/* avoid 0 timestamp, as that means no close timeout */
+	inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1;
+}
+
+static inline void mptcp_start_tout_timer(struct sock *sk)
+{
+	mptcp_set_close_tout(sk, tcp_jiffies32);
+	mptcp_reset_tout_timer(mptcp_sk(sk), 0);
+}
+
 static inline bool mptcp_is_fully_established(struct sock *sk)
 {
 	return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 433f290984c8..918c1a235790 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1552,6 +1552,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	mptcp_sock_graft(ssk, sk->sk_socket);
 	iput(SOCK_INODE(sf));
 	WRITE_ONCE(msk->allow_infinite_fallback, false);
+	mptcp_stop_tout_timer(sk);
 	return 0;
 
 failed_unlink:
-- 
cgit v1.2.3


From 418f438a2db67503fe00cef5bbd64fd1f891e145 Mon Sep 17 00:00:00 2001
From: Peter Lafreniere <peter@n8pjl.ca>
Date: Sun, 17 Sep 2023 15:29:58 +0000
Subject: Documentation: netdev: fix dead link in ax25.rst

http://linux-ax25.org has been down for nearly a year. Its official
replacement is https://linux-ax25.in-berlin.de.

Update the documentation to point there instead. And acknowledge that
while the linux-hams list isn't entirely dead, it isn't what most would
call 'active'. Remove that word.

Link: https://marc.info/?m=166792551600315
Signed-off-by: Peter Lafreniere <peter@n8pjl.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ax25.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst
index f060cfb1445a..605e72c6c877 100644
--- a/Documentation/networking/ax25.rst
+++ b/Documentation/networking/ax25.rst
@@ -7,9 +7,9 @@ AX.25
 To use the amateur radio protocols within Linux you will need to get a
 suitable copy of the AX.25 Utilities. More detailed information about
 AX.25, NET/ROM and ROSE, associated programs and utilities can be
-found on http://www.linux-ax25.org.
+found on https://linux-ax25.in-berlin.de.
 
-There is an active mailing list for discussing Linux amateur radio matters
+There is a mailing list for discussing Linux amateur radio matters
 called linux-hams@vger.kernel.org. To subscribe to it, send a message to
 majordomo@vger.kernel.org with the words "subscribe linux-hams" in the body
 of the message, the subject field is ignored.  You don't need to be
-- 
cgit v1.2.3


From 1943f2b0ac5a9fde718c18c1f4ab8332c8b5cd60 Mon Sep 17 00:00:00 2001
From: Peter Lafreniere <peter@n8pjl.ca>
Date: Sun, 17 Sep 2023 15:30:10 +0000
Subject: MAINTAINERS: Update link for linux-ax25.org

http://linux-ax25.org has been down for nearly a year. Its official
replacement is https://linux-ax25.in-berlin.de. Update all links to the
new URL.

Link: https://marc.info/?m=166792551600315
Signed-off-by: Peter Lafreniere <peter@n8pjl.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bf0f54c24f81..c155eb535906 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3344,7 +3344,7 @@ AX.25 NETWORK LAYER
 M:	Ralf Baechle <ralf@linux-mips.org>
 L:	linux-hams@vger.kernel.org
 S:	Maintained
-W:	http://www.linux-ax25.org/
+W:	https://linux-ax25.in-berlin.de
 F:	include/net/ax25.h
 F:	include/uapi/linux/ax25.h
 F:	net/ax25/
@@ -14756,7 +14756,7 @@ NETROM NETWORK LAYER
 M:	Ralf Baechle <ralf@linux-mips.org>
 L:	linux-hams@vger.kernel.org
 S:	Maintained
-W:	http://www.linux-ax25.org/
+W:	https://linux-ax25.in-berlin.de
 F:	include/net/netrom.h
 F:	include/uapi/linux/netrom.h
 F:	net/netrom/
@@ -18607,7 +18607,7 @@ ROSE NETWORK LAYER
 M:	Ralf Baechle <ralf@linux-mips.org>
 L:	linux-hams@vger.kernel.org
 S:	Maintained
-W:	http://www.linux-ax25.org/
+W:	https://linux-ax25.in-berlin.de
 F:	include/net/rose.h
 F:	include/uapi/linux/rose.h
 F:	net/rose/
-- 
cgit v1.2.3


From 71273c46a34823e54af7828df67e5983ba85d6c2 Mon Sep 17 00:00:00 2001
From: Peter Lafreniere <peter@n8pjl.ca>
Date: Sun, 17 Sep 2023 15:30:21 +0000
Subject: ax25: Kconfig: Update link for linux-ax25.org

http://linux-ax25.org has been down for nearly a year. Its official
replacement is https://linux-ax25.in-berlin.de. Change all references to
the old site in the ax25 Kconfig to its replacement.

Link: https://marc.info/?m=166792551600315
Signed-off-by: Peter Lafreniere <peter@n8pjl.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/Kconfig | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index d3a9843a043d..fdb666607f10 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -10,7 +10,7 @@ menuconfig HAMRADIO
 	  If you want to connect your Linux box to an amateur radio, answer Y
 	  here. You want to read <https://www.tapr.org/>
 	  and more specifically about AX.25 on Linux
-	  <http://www.linux-ax25.org/>.
+	  <https://linux-ax25.in-berlin.de>.
 
 	  Note that the answer to this question won't directly affect the
 	  kernel: saying N will just cause the configurator to skip all
@@ -61,7 +61,7 @@ config AX25_DAMA_SLAVE
 	  configuration. Linux cannot yet act as a DAMA server.  This option
 	  only compiles DAMA slave support into the kernel.  It still needs to
 	  be enabled at runtime.  For more about DAMA see
-	  <http://www.linux-ax25.org>.  If unsure, say Y.
+	  <https://linux-ax25.in-berlin.de>.  If unsure, say Y.
 
 # placeholder until implemented
 config AX25_DAMA_MASTER
@@ -87,9 +87,9 @@ config NETROM
 	  A comprehensive listing of all the software for Linux amateur radio
 	  users as well as information about how to configure an AX.25 port is
 	  contained in the Linux Ham Wiki, available from
-	  <http://www.linux-ax25.org>. You also might want to check out the
-	  file <file:Documentation/networking/ax25.rst>. More information about
-	  digital amateur radio in general is on the WWW at
+	  <https://linux-ax25.in-berlin.de>. You also might want to check out
+	  the file <file:Documentation/networking/ax25.rst>. More information
+	  about digital amateur radio in general is on the WWW at
 	  <https://www.tapr.org/>.
 
 	  To compile this driver as a module, choose M here: the
@@ -106,9 +106,9 @@ config ROSE
 	  A comprehensive listing of all the software for Linux amateur radio
 	  users as well as information about how to configure an AX.25 port is
 	  contained in the Linux Ham Wiki, available from
-	  <http://www.linux-ax25.org>.  You also might want to check out the
-	  file <file:Documentation/networking/ax25.rst>. More information about
-	  digital amateur radio in general is on the WWW at
+	  <https://linux-ax25.in-berlin.de>.  You also might want to check out
+	  the file <file:Documentation/networking/ax25.rst>. More information
+	  about digital amateur radio in general is on the WWW at
 	  <https://www.tapr.org/>.
 
 	  To compile this driver as a module, choose M here: the
-- 
cgit v1.2.3


From 8070274b472e2e9f5f67a990f5e697634c415708 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Mon, 18 Sep 2023 00:53:28 +0800
Subject: net: stmmac: fix incorrect rxq|txq_stats reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 133466c3bbe1 ("net: stmmac: use per-queue 64 bit statistics
where necessary") caused one regression as found by Uwe, the backtrace
looks like:

	INFO: trying to register non-static key.
	The code is fine but needs lockdep annotation, or maybe
	you didn't initialize this object before use?
	turning off the locking correctness validator.
	CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.5.0-rc1-00449-g133466c3bbe1-dirty #21
	Hardware name: STM32 (Device Tree Support)
	 unwind_backtrace from show_stack+0x18/0x1c
	 show_stack from dump_stack_lvl+0x60/0x90
	 dump_stack_lvl from register_lock_class+0x98c/0x99c
	 register_lock_class from __lock_acquire+0x74/0x293c
	 __lock_acquire from lock_acquire+0x134/0x398
	 lock_acquire from stmmac_get_stats64+0x2ac/0x2fc
	 stmmac_get_stats64 from dev_get_stats+0x44/0x130
	 dev_get_stats from rtnl_fill_stats+0x38/0x120
	 rtnl_fill_stats from rtnl_fill_ifinfo+0x834/0x17f4
	 rtnl_fill_ifinfo from rtmsg_ifinfo_build_skb+0xc0/0x144
	 rtmsg_ifinfo_build_skb from rtmsg_ifinfo+0x50/0x88
	 rtmsg_ifinfo from __dev_notify_flags+0xc0/0xec
	 __dev_notify_flags from dev_change_flags+0x50/0x5c
	 dev_change_flags from ip_auto_config+0x2f4/0x1260
	 ip_auto_config from do_one_initcall+0x70/0x35c
	 do_one_initcall from kernel_init_freeable+0x2ac/0x308
	 kernel_init_freeable from kernel_init+0x1c/0x138
	 kernel_init from ret_from_fork+0x14/0x2c

The reason is the rxq|txq_stats structures are not what expected
because stmmac_open() -> __stmmac_open() the structure is overwritten
by "memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf));"
This causes the well initialized syncp member of rxq|txq_stats is
overwritten unexpectedly as pointed out by Johannes and Uwe.

Fix this issue by moving rxq|txq_stats back to stmmac_extra_stats. For
SMP cache friendly, we also mark stmmac_txq_stats and stmmac_rxq_stats
as ____cacheline_aligned_in_smp.

Fixes: 133466c3bbe1 ("net: stmmac: use per-queue 64 bit statistics where necessary")
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230917165328.3403-1-jszhang@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |   7 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c  |  16 +--
 drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c   |  16 +--
 drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c    |  16 +--
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c |  16 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |   2 -
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   |  32 +++---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 125 +++++++++++----------
 8 files changed, 120 insertions(+), 110 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 403cb397d4d3..1e996c29043d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -70,7 +70,7 @@ struct stmmac_txq_stats {
 	u64 tx_tso_frames;
 	u64 tx_tso_nfrags;
 	struct u64_stats_sync syncp;
-};
+} ____cacheline_aligned_in_smp;
 
 struct stmmac_rxq_stats {
 	u64 rx_bytes;
@@ -79,7 +79,7 @@ struct stmmac_rxq_stats {
 	u64 rx_normal_irq_n;
 	u64 napi_poll;
 	struct u64_stats_sync syncp;
-};
+} ____cacheline_aligned_in_smp;
 
 /* Extra statistic and debug information exposed by ethtool */
 struct stmmac_extra_stats {
@@ -202,6 +202,9 @@ struct stmmac_extra_stats {
 	unsigned long mtl_est_hlbf;
 	unsigned long mtl_est_btre;
 	unsigned long mtl_est_btrlm;
+	/* per queue statistics */
+	struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
+	struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
 	unsigned long rx_dropped;
 	unsigned long rx_errors;
 	unsigned long tx_dropped;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 01e77368eef1..465ff1fd4785 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -441,8 +441,8 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
 				     struct stmmac_extra_stats *x, u32 chan,
 				     u32 dir)
 {
-	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
-	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
+	struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
+	struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
 	int ret = 0;
 	u32 v;
 
@@ -455,9 +455,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
 
 	if (v & EMAC_TX_INT) {
 		ret |= handle_tx;
-		u64_stats_update_begin(&tx_q->txq_stats.syncp);
-		tx_q->txq_stats.tx_normal_irq_n++;
-		u64_stats_update_end(&tx_q->txq_stats.syncp);
+		u64_stats_update_begin(&txq_stats->syncp);
+		txq_stats->tx_normal_irq_n++;
+		u64_stats_update_end(&txq_stats->syncp);
 	}
 
 	if (v & EMAC_TX_DMA_STOP_INT)
@@ -479,9 +479,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
 
 	if (v & EMAC_RX_INT) {
 		ret |= handle_rx;
-		u64_stats_update_begin(&rx_q->rxq_stats.syncp);
-		rx_q->rxq_stats.rx_normal_irq_n++;
-		u64_stats_update_end(&rx_q->rxq_stats.syncp);
+		u64_stats_update_begin(&rxq_stats->syncp);
+		rxq_stats->rx_normal_irq_n++;
+		u64_stats_update_end(&rxq_stats->syncp);
 	}
 
 	if (v & EMAC_RX_BUF_UA_INT)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 980e5f8a37ec..9470d3fd2ded 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -171,8 +171,8 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
 	const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
 	u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
 	u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
-	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
-	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
+	struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
+	struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
 	int ret = 0;
 
 	if (dir == DMA_DIR_RX)
@@ -201,15 +201,15 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
 	}
 	/* TX/RX NORMAL interrupts */
 	if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
-		u64_stats_update_begin(&rx_q->rxq_stats.syncp);
-		rx_q->rxq_stats.rx_normal_irq_n++;
-		u64_stats_update_end(&rx_q->rxq_stats.syncp);
+		u64_stats_update_begin(&rxq_stats->syncp);
+		rxq_stats->rx_normal_irq_n++;
+		u64_stats_update_end(&rxq_stats->syncp);
 		ret |= handle_rx;
 	}
 	if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
-		u64_stats_update_begin(&tx_q->txq_stats.syncp);
-		tx_q->txq_stats.tx_normal_irq_n++;
-		u64_stats_update_end(&tx_q->txq_stats.syncp);
+		u64_stats_update_begin(&txq_stats->syncp);
+		txq_stats->tx_normal_irq_n++;
+		u64_stats_update_end(&txq_stats->syncp);
 		ret |= handle_tx;
 	}
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index aaa09b16b016..7907d62d3437 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -162,8 +162,8 @@ static void show_rx_process_state(unsigned int status)
 int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
 			struct stmmac_extra_stats *x, u32 chan, u32 dir)
 {
-	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
-	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
+	struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
+	struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
 	int ret = 0;
 	/* read the status register (CSR5) */
 	u32 intr_status = readl(ioaddr + DMA_STATUS);
@@ -215,16 +215,16 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
 			u32 value = readl(ioaddr + DMA_INTR_ENA);
 			/* to schedule NAPI on real RIE event. */
 			if (likely(value & DMA_INTR_ENA_RIE)) {
-				u64_stats_update_begin(&rx_q->rxq_stats.syncp);
-				rx_q->rxq_stats.rx_normal_irq_n++;
-				u64_stats_update_end(&rx_q->rxq_stats.syncp);
+				u64_stats_update_begin(&rxq_stats->syncp);
+				rxq_stats->rx_normal_irq_n++;
+				u64_stats_update_end(&rxq_stats->syncp);
 				ret |= handle_rx;
 			}
 		}
 		if (likely(intr_status & DMA_STATUS_TI)) {
-			u64_stats_update_begin(&tx_q->txq_stats.syncp);
-			tx_q->txq_stats.tx_normal_irq_n++;
-			u64_stats_update_end(&tx_q->txq_stats.syncp);
+			u64_stats_update_begin(&txq_stats->syncp);
+			txq_stats->tx_normal_irq_n++;
+			u64_stats_update_end(&txq_stats->syncp);
 			ret |= handle_tx;
 		}
 		if (unlikely(intr_status & DMA_STATUS_ERI))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index fa69d64a8694..3cde695fec91 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -337,8 +337,8 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
 				  struct stmmac_extra_stats *x, u32 chan,
 				  u32 dir)
 {
-	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[chan];
-	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[chan];
+	struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan];
+	struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
 	u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
 	u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
 	int ret = 0;
@@ -367,15 +367,15 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
 	/* TX/RX NORMAL interrupts */
 	if (likely(intr_status & XGMAC_NIS)) {
 		if (likely(intr_status & XGMAC_RI)) {
-			u64_stats_update_begin(&rx_q->rxq_stats.syncp);
-			rx_q->rxq_stats.rx_normal_irq_n++;
-			u64_stats_update_end(&rx_q->rxq_stats.syncp);
+			u64_stats_update_begin(&rxq_stats->syncp);
+			rxq_stats->rx_normal_irq_n++;
+			u64_stats_update_end(&rxq_stats->syncp);
 			ret |= handle_rx;
 		}
 		if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
-			u64_stats_update_begin(&tx_q->txq_stats.syncp);
-			tx_q->txq_stats.tx_normal_irq_n++;
-			u64_stats_update_end(&tx_q->txq_stats.syncp);
+			u64_stats_update_begin(&txq_stats->syncp);
+			txq_stats->tx_normal_irq_n++;
+			u64_stats_update_end(&txq_stats->syncp);
 			ret |= handle_tx;
 		}
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 3401e888a9f6..cd7a9768de5f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -78,7 +78,6 @@ struct stmmac_tx_queue {
 	dma_addr_t dma_tx_phy;
 	dma_addr_t tx_tail_addr;
 	u32 mss;
-	struct stmmac_txq_stats txq_stats;
 };
 
 struct stmmac_rx_buffer {
@@ -123,7 +122,6 @@ struct stmmac_rx_queue {
 		unsigned int len;
 		unsigned int error;
 	} state;
-	struct stmmac_rxq_stats rxq_stats;
 };
 
 struct stmmac_channel {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index b7ac7abecdd3..6aa5c0556d22 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -548,14 +548,14 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
 
 	pos = data;
 	for (q = 0; q < tx_cnt; q++) {
-		struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[q];
+		struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q];
 		struct stmmac_txq_stats snapshot;
 
 		data = pos;
 		do {
-			start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp);
-			snapshot = tx_q->txq_stats;
-		} while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start));
+			start = u64_stats_fetch_begin(&txq_stats->syncp);
+			snapshot = *txq_stats;
+		} while (u64_stats_fetch_retry(&txq_stats->syncp, start));
 
 		p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n);
 		for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) {
@@ -566,14 +566,14 @@ static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
 
 	pos = data;
 	for (q = 0; q < rx_cnt; q++) {
-		struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[q];
+		struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q];
 		struct stmmac_rxq_stats snapshot;
 
 		data = pos;
 		do {
-			start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp);
-			snapshot = rx_q->rxq_stats;
-		} while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start));
+			start = u64_stats_fetch_begin(&rxq_stats->syncp);
+			snapshot = *rxq_stats;
+		} while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
 
 		p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n);
 		for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) {
@@ -637,14 +637,14 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 
 	pos = j;
 	for (i = 0; i < rx_queues_count; i++) {
-		struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[i];
+		struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i];
 		struct stmmac_rxq_stats snapshot;
 
 		j = pos;
 		do {
-			start = u64_stats_fetch_begin(&rx_q->rxq_stats.syncp);
-			snapshot = rx_q->rxq_stats;
-		} while (u64_stats_fetch_retry(&rx_q->rxq_stats.syncp, start));
+			start = u64_stats_fetch_begin(&rxq_stats->syncp);
+			snapshot = *rxq_stats;
+		} while (u64_stats_fetch_retry(&rxq_stats->syncp, start));
 
 		data[j++] += snapshot.rx_pkt_n;
 		data[j++] += snapshot.rx_normal_irq_n;
@@ -654,14 +654,14 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 
 	pos = j;
 	for (i = 0; i < tx_queues_count; i++) {
-		struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[i];
+		struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i];
 		struct stmmac_txq_stats snapshot;
 
 		j = pos;
 		do {
-			start = u64_stats_fetch_begin(&tx_q->txq_stats.syncp);
-			snapshot = tx_q->txq_stats;
-		} while (u64_stats_fetch_retry(&tx_q->txq_stats.syncp, start));
+			start = u64_stats_fetch_begin(&txq_stats->syncp);
+			snapshot = *txq_stats;
+		} while (u64_stats_fetch_retry(&txq_stats->syncp, start));
 
 		data[j++] += snapshot.tx_pkt_n;
 		data[j++] += snapshot.tx_normal_irq_n;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 2206789802bf..83c567a89a46 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2426,6 +2426,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
 {
 	struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue);
 	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
+	struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue];
 	struct xsk_buff_pool *pool = tx_q->xsk_pool;
 	unsigned int entry = tx_q->cur_tx;
 	struct dma_desc *tx_desc = NULL;
@@ -2505,9 +2506,9 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
 		tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_conf.dma_tx_size);
 		entry = tx_q->cur_tx;
 	}
-	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
-	tx_q->txq_stats.tx_set_ic_bit += tx_set_ic_bit;
-	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
+	flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
+	txq_stats->tx_set_ic_bit += tx_set_ic_bit;
+	u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
 
 	if (tx_desc) {
 		stmmac_flush_tx_descriptors(priv, queue);
@@ -2547,6 +2548,7 @@ static void stmmac_bump_dma_threshold(struct stmmac_priv *priv, u32 chan)
 static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
+	struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue];
 	unsigned int bytes_compl = 0, pkts_compl = 0;
 	unsigned int entry, xmits = 0, count = 0;
 	u32 tx_packets = 0, tx_errors = 0;
@@ -2706,11 +2708,11 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 	if (tx_q->dirty_tx != tx_q->cur_tx)
 		stmmac_tx_timer_arm(priv, queue);
 
-	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
-	tx_q->txq_stats.tx_packets += tx_packets;
-	tx_q->txq_stats.tx_pkt_n += tx_packets;
-	tx_q->txq_stats.tx_clean++;
-	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
+	flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
+	txq_stats->tx_packets += tx_packets;
+	txq_stats->tx_pkt_n += tx_packets;
+	txq_stats->tx_clean++;
+	u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
 
 	priv->xstats.tx_errors += tx_errors;
 
@@ -4114,6 +4116,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	u32 queue = skb_get_queue_mapping(skb);
 	unsigned int first_entry, tx_packets;
+	struct stmmac_txq_stats *txq_stats;
 	int tmp_pay_len = 0, first_tx;
 	struct stmmac_tx_queue *tx_q;
 	bool has_vlan, set_ic;
@@ -4124,6 +4127,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i;
 
 	tx_q = &priv->dma_conf.tx_queue[queue];
+	txq_stats = &priv->xstats.txq_stats[queue];
 	first_tx = tx_q->cur_tx;
 
 	/* Compute header lengths */
@@ -4282,13 +4286,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
 
-	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
-	tx_q->txq_stats.tx_bytes += skb->len;
-	tx_q->txq_stats.tx_tso_frames++;
-	tx_q->txq_stats.tx_tso_nfrags += nfrags;
+	flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
+	txq_stats->tx_bytes += skb->len;
+	txq_stats->tx_tso_frames++;
+	txq_stats->tx_tso_nfrags += nfrags;
 	if (set_ic)
-		tx_q->txq_stats.tx_set_ic_bit++;
-	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
+		txq_stats->tx_set_ic_bit++;
+	u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
 
 	if (priv->sarc_type)
 		stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4359,6 +4363,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	u32 queue = skb_get_queue_mapping(skb);
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	int gso = skb_shinfo(skb)->gso_type;
+	struct stmmac_txq_stats *txq_stats;
 	struct dma_edesc *tbs_desc = NULL;
 	struct dma_desc *desc, *first;
 	struct stmmac_tx_queue *tx_q;
@@ -4368,6 +4373,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	dma_addr_t des;
 
 	tx_q = &priv->dma_conf.tx_queue[queue];
+	txq_stats = &priv->xstats.txq_stats[queue];
 	first_tx = tx_q->cur_tx;
 
 	if (priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en)
@@ -4519,11 +4525,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_tx_stop_queue(netdev_get_tx_queue(priv->dev, queue));
 	}
 
-	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
-	tx_q->txq_stats.tx_bytes += skb->len;
+	flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
+	txq_stats->tx_bytes += skb->len;
 	if (set_ic)
-		tx_q->txq_stats.tx_set_ic_bit++;
-	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
+		txq_stats->tx_set_ic_bit++;
+	u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
 
 	if (priv->sarc_type)
 		stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -4730,6 +4736,7 @@ static unsigned int stmmac_rx_buf2_len(struct stmmac_priv *priv,
 static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
 				struct xdp_frame *xdpf, bool dma_map)
 {
+	struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue];
 	struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
 	unsigned int entry = tx_q->cur_tx;
 	struct dma_desc *tx_desc;
@@ -4789,9 +4796,9 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue,
 		unsigned long flags;
 		tx_q->tx_count_frames = 0;
 		stmmac_set_tx_ic(priv, tx_desc);
-		flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
-		tx_q->txq_stats.tx_set_ic_bit++;
-		u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
+		flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
+		txq_stats->tx_set_ic_bit++;
+		u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
 	}
 
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
@@ -4936,7 +4943,7 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
 				   struct dma_desc *p, struct dma_desc *np,
 				   struct xdp_buff *xdp)
 {
-	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
+	struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue];
 	struct stmmac_channel *ch = &priv->channel[queue];
 	unsigned int len = xdp->data_end - xdp->data;
 	enum pkt_hash_types hash_type;
@@ -4966,10 +4973,10 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
 	skb_record_rx_queue(skb, queue);
 	napi_gro_receive(&ch->rxtx_napi, skb);
 
-	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
-	rx_q->rxq_stats.rx_pkt_n++;
-	rx_q->rxq_stats.rx_bytes += len;
-	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
+	flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
+	rxq_stats->rx_pkt_n++;
+	rxq_stats->rx_bytes += len;
+	u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
 }
 
 static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
@@ -5042,6 +5049,7 @@ static struct stmmac_xdp_buff *xsk_buff_to_stmmac_ctx(struct xdp_buff *xdp)
 
 static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue)
 {
+	struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue];
 	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
 	unsigned int count = 0, error = 0, len = 0;
 	int dirty = stmmac_rx_dirty(priv, queue);
@@ -5205,9 +5213,9 @@ read_again:
 
 	stmmac_finalize_xdp_rx(priv, xdp_status);
 
-	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
-	rx_q->rxq_stats.rx_pkt_n += count;
-	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
+	flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
+	rxq_stats->rx_pkt_n += count;
+	u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
 
 	priv->xstats.rx_dropped += rx_dropped;
 	priv->xstats.rx_errors += rx_errors;
@@ -5235,6 +5243,7 @@ read_again:
 static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 {
 	u32 rx_errors = 0, rx_dropped = 0, rx_bytes = 0, rx_packets = 0;
+	struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[queue];
 	struct stmmac_rx_queue *rx_q = &priv->dma_conf.rx_queue[queue];
 	struct stmmac_channel *ch = &priv->channel[queue];
 	unsigned int count = 0, error = 0, len = 0;
@@ -5496,11 +5505,11 @@ drain_data:
 
 	stmmac_rx_refill(priv, queue);
 
-	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
-	rx_q->rxq_stats.rx_packets += rx_packets;
-	rx_q->rxq_stats.rx_bytes += rx_bytes;
-	rx_q->rxq_stats.rx_pkt_n += count;
-	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
+	flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
+	rxq_stats->rx_packets += rx_packets;
+	rxq_stats->rx_bytes += rx_bytes;
+	rxq_stats->rx_pkt_n += count;
+	u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
 
 	priv->xstats.rx_dropped += rx_dropped;
 	priv->xstats.rx_errors += rx_errors;
@@ -5513,15 +5522,15 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
 	struct stmmac_channel *ch =
 		container_of(napi, struct stmmac_channel, rx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
-	struct stmmac_rx_queue *rx_q;
+	struct stmmac_rxq_stats *rxq_stats;
 	u32 chan = ch->index;
 	unsigned long flags;
 	int work_done;
 
-	rx_q = &priv->dma_conf.rx_queue[chan];
-	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
-	rx_q->rxq_stats.napi_poll++;
-	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
+	rxq_stats = &priv->xstats.rxq_stats[chan];
+	flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
+	rxq_stats->napi_poll++;
+	u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
 
 	work_done = stmmac_rx(priv, budget, chan);
 	if (work_done < budget && napi_complete_done(napi, work_done)) {
@@ -5540,15 +5549,15 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
 	struct stmmac_channel *ch =
 		container_of(napi, struct stmmac_channel, tx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
-	struct stmmac_tx_queue *tx_q;
+	struct stmmac_txq_stats *txq_stats;
 	u32 chan = ch->index;
 	unsigned long flags;
 	int work_done;
 
-	tx_q = &priv->dma_conf.tx_queue[chan];
-	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
-	tx_q->txq_stats.napi_poll++;
-	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
+	txq_stats = &priv->xstats.txq_stats[chan];
+	flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
+	txq_stats->napi_poll++;
+	u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
 
 	work_done = stmmac_tx_clean(priv, budget, chan);
 	work_done = min(work_done, budget);
@@ -5570,20 +5579,20 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
 		container_of(napi, struct stmmac_channel, rxtx_napi);
 	struct stmmac_priv *priv = ch->priv_data;
 	int rx_done, tx_done, rxtx_done;
-	struct stmmac_rx_queue *rx_q;
-	struct stmmac_tx_queue *tx_q;
+	struct stmmac_rxq_stats *rxq_stats;
+	struct stmmac_txq_stats *txq_stats;
 	u32 chan = ch->index;
 	unsigned long flags;
 
-	rx_q = &priv->dma_conf.rx_queue[chan];
-	flags = u64_stats_update_begin_irqsave(&rx_q->rxq_stats.syncp);
-	rx_q->rxq_stats.napi_poll++;
-	u64_stats_update_end_irqrestore(&rx_q->rxq_stats.syncp, flags);
+	rxq_stats = &priv->xstats.rxq_stats[chan];
+	flags = u64_stats_update_begin_irqsave(&rxq_stats->syncp);
+	rxq_stats->napi_poll++;
+	u64_stats_update_end_irqrestore(&rxq_stats->syncp, flags);
 
-	tx_q = &priv->dma_conf.tx_queue[chan];
-	flags = u64_stats_update_begin_irqsave(&tx_q->txq_stats.syncp);
-	tx_q->txq_stats.napi_poll++;
-	u64_stats_update_end_irqrestore(&tx_q->txq_stats.syncp, flags);
+	txq_stats = &priv->xstats.txq_stats[chan];
+	flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
+	txq_stats->napi_poll++;
+	u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
 
 	tx_done = stmmac_tx_clean(priv, budget, chan);
 	tx_done = min(tx_done, budget);
@@ -6926,7 +6935,7 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64
 	int q;
 
 	for (q = 0; q < tx_cnt; q++) {
-		struct stmmac_txq_stats *txq_stats = &priv->dma_conf.tx_queue[q].txq_stats;
+		struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q];
 		u64 tx_packets;
 		u64 tx_bytes;
 
@@ -6941,7 +6950,7 @@ static void stmmac_get_stats64(struct net_device *dev, struct rtnl_link_stats64
 	}
 
 	for (q = 0; q < rx_cnt; q++) {
-		struct stmmac_rxq_stats *rxq_stats = &priv->dma_conf.rx_queue[q].rxq_stats;
+		struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q];
 		u64 rx_packets;
 		u64 rx_bytes;
 
@@ -7342,9 +7351,9 @@ int stmmac_dvr_probe(struct device *device,
 	priv->dev = ndev;
 
 	for (i = 0; i < MTL_MAX_RX_QUEUES; i++)
-		u64_stats_init(&priv->dma_conf.rx_queue[i].rxq_stats.syncp);
+		u64_stats_init(&priv->xstats.rxq_stats[i].syncp);
 	for (i = 0; i < MTL_MAX_TX_QUEUES; i++)
-		u64_stats_init(&priv->dma_conf.tx_queue[i].txq_stats.syncp);
+		u64_stats_init(&priv->xstats.txq_stats[i].syncp);
 
 	stmmac_set_ethtool_ops(ndev);
 	priv->pause = pause;
-- 
cgit v1.2.3


From bd3caddf299a640efb66c6022efed7fe744db626 Mon Sep 17 00:00:00 2001
From: Jie Wang <wangjie125@huawei.com>
Date: Mon, 18 Sep 2023 15:48:36 +0800
Subject: net: hns3: add cmdq check for vf periodic service task

When the vf cmdq is disabled, there is no need to keep these task running.
So this patch skip these task when the cmdq is disabled.

Fixes: ff200099d271 ("net: hns3: remove unnecessary work in hclgevf_main")
Signed-off-by: Jie Wang <wangjie125@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 7a2f9233d695..a4d68fb216fb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1855,7 +1855,8 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev)
 	unsigned long delta = round_jiffies_relative(HZ);
 	struct hnae3_handle *handle = &hdev->nic;
 
-	if (test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state))
+	if (test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state) ||
+	    test_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state))
 		return;
 
 	if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
-- 
cgit v1.2.3


From f9f651261130cdcb7adc9a3e365b356bc2749ab3 Mon Sep 17 00:00:00 2001
From: Jie Wang <wangjie125@huawei.com>
Date: Mon, 18 Sep 2023 15:48:37 +0800
Subject: net: hns3: fix GRE checksum offload issue

The device_version V3 hardware can't offload the checksum for IP in GRE
packets, but can do it for NvGRE. So default to disable the checksum and
GSO offload for GRE, but keep the ability to enable it when only using
NvGRE.

Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC")
Signed-off-by: Jie Wang <wangjie125@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index b4895c7b3efd..cf50368441b7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3353,6 +3353,15 @@ static void hns3_set_default_feature(struct net_device *netdev)
 		  NETIF_F_HW_TC);
 
 	netdev->hw_enc_features |= netdev->vlan_features | NETIF_F_TSO_MANGLEID;
+
+	/* The device_version V3 hardware can't offload the checksum for IP in
+	 * GRE packets, but can do it for NvGRE. So default to disable the
+	 * checksum and GSO offload for GRE.
+	 */
+	if (ae_dev->dev_version > HNAE3_DEVICE_VERSION_V2) {
+		netdev->features &= ~NETIF_F_GSO_GRE;
+		netdev->features &= ~NETIF_F_GSO_GRE_CSUM;
+	}
 }
 
 static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
-- 
cgit v1.2.3


From f2ed304922a55690529bcca59678dd92d7466ce8 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 18 Sep 2023 15:48:38 +0800
Subject: net: hns3: only enable unicast promisc when mac table full

Currently, the driver will enable unicast promisc for the function
once configure mac address fail. It's unreasonable when the failure
is caused by using same mac address with other functions. So only
enable unicast promisc when mac table full.

Fixes: c631c696823c ("net: hns3: refactor the promisc mode setting")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 8ca368424436..c0d03283775f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -8824,7 +8824,7 @@ static void hclge_update_overflow_flags(struct hclge_vport *vport,
 	if (mac_type == HCLGE_MAC_ADDR_UC) {
 		if (is_all_added)
 			vport->overflow_promisc_flags &= ~HNAE3_OVERFLOW_UPE;
-		else
+		else if (hclge_is_umv_space_full(vport, true))
 			vport->overflow_promisc_flags |= HNAE3_OVERFLOW_UPE;
 	} else {
 		if (is_all_added)
-- 
cgit v1.2.3


From 1a7be66e4685b8541546222c305cce9710718a88 Mon Sep 17 00:00:00 2001
From: Jijie Shao <shaojijie@huawei.com>
Date: Mon, 18 Sep 2023 15:48:39 +0800
Subject: net: hns3: fix fail to delete tc flower rules during reset issue

Firmware does not respond driver commands during reset
Therefore, rule will fail to delete while the firmware is resetting

So, if failed to delete rule, set rule state to TO_DEL,
and the rule will be deleted when periodic task being scheduled.

Fixes: 0205ec041ec6 ("net: hns3: add support for hw tc offload of tc flower")
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c0d03283775f..2bd77871f3bf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -7348,6 +7348,12 @@ static int hclge_del_cls_flower(struct hnae3_handle *handle,
 	ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location,
 				   NULL, false);
 	if (ret) {
+		/* if tcam config fail, set rule state to TO_DEL,
+		 * so the rule will be deleted when periodic
+		 * task being scheduled.
+		 */
+		hclge_update_fd_list(hdev, HCLGE_FD_TO_DEL, rule->location, NULL);
+		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
 		spin_unlock_bh(&hdev->fd_rule_lock);
 		return ret;
 	}
-- 
cgit v1.2.3


From 0770063096d5da4a8e467b6e73c1646a75589628 Mon Sep 17 00:00:00 2001
From: Jie Wang <wangjie125@huawei.com>
Date: Mon, 18 Sep 2023 15:48:40 +0800
Subject: net: hns3: add 5ms delay before clear firmware reset irq source

Currently the reset process in hns3 and firmware watchdog init process is
asynchronous. we think firmware watchdog initialization is completed
before hns3 clear the firmware interrupt source. However, firmware
initialization may not complete early.

so we add delay before hns3 clear firmware interrupt source and 5 ms delay
is enough to avoid second firmware reset interrupt.

Fixes: c1a81619d73a ("net: hns3: Add mailbox interrupt handling to PF driver")
Signed-off-by: Jie Wang <wangjie125@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 2bd77871f3bf..c42574e29747 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3564,9 +3564,14 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 static void hclge_clear_event_cause(struct hclge_dev *hdev, u32 event_type,
 				    u32 regclr)
 {
+#define HCLGE_IMP_RESET_DELAY		5
+
 	switch (event_type) {
 	case HCLGE_VECTOR0_EVENT_PTP:
 	case HCLGE_VECTOR0_EVENT_RST:
+		if (regclr == BIT(HCLGE_VECTOR0_IMPRESET_INT_B))
+			mdelay(HCLGE_IMP_RESET_DELAY);
+
 		hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, regclr);
 		break;
 	case HCLGE_VECTOR0_EVENT_MBX:
-- 
cgit v1.2.3


From 44bdb313da57322c9b3c108eb66981c6ec6509f4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 18 Sep 2023 09:13:51 +0000
Subject: net: bridge: use DEV_STATS_INC()

syzbot/KCSAN reported data-races in br_handle_frame_finish() [1]
This function can run from multiple cpus without mutual exclusion.

Adopt SMP safe DEV_STATS_INC() to update dev->stats fields.

Handles updates to dev->stats.tx_dropped while we are at it.

[1]
BUG: KCSAN: data-race in br_handle_frame_finish / br_handle_frame_finish

read-write to 0xffff8881374b2178 of 8 bytes by interrupt on cpu 1:
br_handle_frame_finish+0xd4f/0xef0 net/bridge/br_input.c:189
br_nf_hook_thresh+0x1ed/0x220
br_nf_pre_routing_finish_ipv6+0x50f/0x540
NF_HOOK include/linux/netfilter.h:304 [inline]
br_nf_pre_routing_ipv6+0x1e3/0x2a0 net/bridge/br_netfilter_ipv6.c:178
br_nf_pre_routing+0x526/0xba0 net/bridge/br_netfilter_hooks.c:508
nf_hook_entry_hookfn include/linux/netfilter.h:144 [inline]
nf_hook_bridge_pre net/bridge/br_input.c:272 [inline]
br_handle_frame+0x4c9/0x940 net/bridge/br_input.c:417
__netif_receive_skb_core+0xa8a/0x21e0 net/core/dev.c:5417
__netif_receive_skb_one_core net/core/dev.c:5521 [inline]
__netif_receive_skb+0x57/0x1b0 net/core/dev.c:5637
process_backlog+0x21f/0x380 net/core/dev.c:5965
__napi_poll+0x60/0x3b0 net/core/dev.c:6527
napi_poll net/core/dev.c:6594 [inline]
net_rx_action+0x32b/0x750 net/core/dev.c:6727
__do_softirq+0xc1/0x265 kernel/softirq.c:553
run_ksoftirqd+0x17/0x20 kernel/softirq.c:921
smpboot_thread_fn+0x30a/0x4a0 kernel/smpboot.c:164
kthread+0x1d7/0x210 kernel/kthread.c:388
ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304

read-write to 0xffff8881374b2178 of 8 bytes by interrupt on cpu 0:
br_handle_frame_finish+0xd4f/0xef0 net/bridge/br_input.c:189
br_nf_hook_thresh+0x1ed/0x220
br_nf_pre_routing_finish_ipv6+0x50f/0x540
NF_HOOK include/linux/netfilter.h:304 [inline]
br_nf_pre_routing_ipv6+0x1e3/0x2a0 net/bridge/br_netfilter_ipv6.c:178
br_nf_pre_routing+0x526/0xba0 net/bridge/br_netfilter_hooks.c:508
nf_hook_entry_hookfn include/linux/netfilter.h:144 [inline]
nf_hook_bridge_pre net/bridge/br_input.c:272 [inline]
br_handle_frame+0x4c9/0x940 net/bridge/br_input.c:417
__netif_receive_skb_core+0xa8a/0x21e0 net/core/dev.c:5417
__netif_receive_skb_one_core net/core/dev.c:5521 [inline]
__netif_receive_skb+0x57/0x1b0 net/core/dev.c:5637
process_backlog+0x21f/0x380 net/core/dev.c:5965
__napi_poll+0x60/0x3b0 net/core/dev.c:6527
napi_poll net/core/dev.c:6594 [inline]
net_rx_action+0x32b/0x750 net/core/dev.c:6727
__do_softirq+0xc1/0x265 kernel/softirq.c:553
do_softirq+0x5e/0x90 kernel/softirq.c:454
__local_bh_enable_ip+0x64/0x70 kernel/softirq.c:381
__raw_spin_unlock_bh include/linux/spinlock_api_smp.h:167 [inline]
_raw_spin_unlock_bh+0x36/0x40 kernel/locking/spinlock.c:210
spin_unlock_bh include/linux/spinlock.h:396 [inline]
batadv_tt_local_purge+0x1a8/0x1f0 net/batman-adv/translation-table.c:1356
batadv_tt_purge+0x2b/0x630 net/batman-adv/translation-table.c:3560
process_one_work kernel/workqueue.c:2630 [inline]
process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703
worker_thread+0x525/0x730 kernel/workqueue.c:2784
kthread+0x1d7/0x210 kernel/kthread.c:388
ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147
ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304

value changed: 0x00000000000d7190 -> 0x00000000000d7191

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 14848 Comm: kworker/u4:11 Not tainted 6.6.0-rc1-syzkaller-00236-gad8a69f361b9 #0

Fixes: 1c29fc4989bc ("[BRIDGE]: keep track of received multicast packets")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Roopa Prabhu <roopa@nvidia.com>
Cc: Nikolay Aleksandrov <razor@blackwall.org>
Cc: bridge@lists.linux-foundation.org
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://lore.kernel.org/r/20230918091351.1356153-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/bridge/br_forward.c | 4 ++--
 net/bridge/br_input.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 9d7bc8b96b53..7431f89e897b 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -124,7 +124,7 @@ static int deliver_clone(const struct net_bridge_port *prev,
 
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (!skb) {
-		dev->stats.tx_dropped++;
+		DEV_STATS_INC(dev, tx_dropped);
 		return -ENOMEM;
 	}
 
@@ -268,7 +268,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
 
 	skb = skb_copy(skb, GFP_ATOMIC);
 	if (!skb) {
-		dev->stats.tx_dropped++;
+		DEV_STATS_INC(dev, tx_dropped);
 		return;
 	}
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index c34a0b0901b0..c729528b5e85 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -181,12 +181,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
 			if ((mdst && mdst->host_joined) ||
 			    br_multicast_is_router(brmctx, skb)) {
 				local_rcv = true;
-				br->dev->stats.multicast++;
+				DEV_STATS_INC(br->dev, multicast);
 			}
 			mcast_hit = true;
 		} else {
 			local_rcv = true;
-			br->dev->stats.multicast++;
+			DEV_STATS_INC(br->dev, multicast);
 		}
 		break;
 	case BR_PKT_UNICAST:
-- 
cgit v1.2.3


From 492032760127251e5540a5716a70996bacf2a3fd Mon Sep 17 00:00:00 2001
From: Ziyang Xuan <william.xuanziyang@huawei.com>
Date: Mon, 18 Sep 2023 20:30:11 +0800
Subject: team: fix null-ptr-deref when team device type is changed

Get a null-ptr-deref bug as follows with reproducer [1].

BUG: kernel NULL pointer dereference, address: 0000000000000228
...
RIP: 0010:vlan_dev_hard_header+0x35/0x140 [8021q]
...
Call Trace:
 <TASK>
 ? __die+0x24/0x70
 ? page_fault_oops+0x82/0x150
 ? exc_page_fault+0x69/0x150
 ? asm_exc_page_fault+0x26/0x30
 ? vlan_dev_hard_header+0x35/0x140 [8021q]
 ? vlan_dev_hard_header+0x8e/0x140 [8021q]
 neigh_connected_output+0xb2/0x100
 ip6_finish_output2+0x1cb/0x520
 ? nf_hook_slow+0x43/0xc0
 ? ip6_mtu+0x46/0x80
 ip6_finish_output+0x2a/0xb0
 mld_sendpack+0x18f/0x250
 mld_ifc_work+0x39/0x160
 process_one_work+0x1e6/0x3f0
 worker_thread+0x4d/0x2f0
 ? __pfx_worker_thread+0x10/0x10
 kthread+0xe5/0x120
 ? __pfx_kthread+0x10/0x10
 ret_from_fork+0x34/0x50
 ? __pfx_kthread+0x10/0x10
 ret_from_fork_asm+0x1b/0x30

[1]
$ teamd -t team0 -d -c '{"runner": {"name": "loadbalance"}}'
$ ip link add name t-dummy type dummy
$ ip link add link t-dummy name t-dummy.100 type vlan id 100
$ ip link add name t-nlmon type nlmon
$ ip link set t-nlmon master team0
$ ip link set t-nlmon nomaster
$ ip link set t-dummy up
$ ip link set team0 up
$ ip link set t-dummy.100 down
$ ip link set t-dummy.100 master team0

When enslave a vlan device to team device and team device type is changed
from non-ether to ether, header_ops of team device is changed to
vlan_header_ops. That is incorrect and will trigger null-ptr-deref
for vlan->real_dev in vlan_dev_hard_header() because team device is not
a vlan device.

Cache eth_header_ops in team_setup(), then assign cached header_ops to
header_ops of team net device when its type is changed from non-ether
to ether to fix the bug.

Fixes: 1d76efe1577b ("team: add support for non-ethernet devices")
Suggested-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20230918123011.1884401-1-william.xuanziyang@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/team/team.c | 10 +++++++++-
 include/linux/if_team.h |  2 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index e8b94580194e..508d9a392ab1 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2115,7 +2115,12 @@ static const struct ethtool_ops team_ethtool_ops = {
 static void team_setup_by_port(struct net_device *dev,
 			       struct net_device *port_dev)
 {
-	dev->header_ops	= port_dev->header_ops;
+	struct team *team = netdev_priv(dev);
+
+	if (port_dev->type == ARPHRD_ETHER)
+		dev->header_ops	= team->header_ops_cache;
+	else
+		dev->header_ops	= port_dev->header_ops;
 	dev->type = port_dev->type;
 	dev->hard_header_len = port_dev->hard_header_len;
 	dev->needed_headroom = port_dev->needed_headroom;
@@ -2162,8 +2167,11 @@ static int team_dev_type_check_change(struct net_device *dev,
 
 static void team_setup(struct net_device *dev)
 {
+	struct team *team = netdev_priv(dev);
+
 	ether_setup(dev);
 	dev->max_mtu = ETH_MAX_MTU;
+	team->header_ops_cache = dev->header_ops;
 
 	dev->netdev_ops = &team_netdev_ops;
 	dev->ethtool_ops = &team_ethtool_ops;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 1b9b15a492fa..cdc684e04a2f 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -189,6 +189,8 @@ struct team {
 	struct net_device *dev; /* associated netdevice */
 	struct team_pcpu_stats __percpu *pcpu_stats;
 
+	const struct header_ops *header_ops_cache;
+
 	struct mutex lock; /* used for overall locking, e.g. port lists write */
 
 	/*
-- 
cgit v1.2.3


From f1d95df0f31048f1c59092648997686e3f7d9478 Mon Sep 17 00:00:00 2001
From: Artem Chernyshev <artem.chernyshev@red-soft.ru>
Date: Mon, 18 Sep 2023 16:56:23 +0300
Subject: net: rds: Fix possible NULL-pointer dereference

In rds_rdma_cm_event_handler_cmn() check, if conn pointer exists
before dereferencing it as rdma_set_service_type() argument

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: fd261ce6a30e ("rds: rdma: update rdma transport for tos")
Signed-off-by: Artem Chernyshev <artem.chernyshev@red-soft.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/rdma_transport.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index d36f3f6b4351..b15cf316b23a 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -86,11 +86,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 		break;
 
 	case RDMA_CM_EVENT_ADDR_RESOLVED:
-		rdma_set_service_type(cm_id, conn->c_tos);
-		rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32);
-		/* XXX do we need to clean up if this fails? */
-		ret = rdma_resolve_route(cm_id,
-					 RDS_RDMA_RESOLVE_TIMEOUT_MS);
+		if (conn) {
+			rdma_set_service_type(cm_id, conn->c_tos);
+			rdma_set_min_rnr_timer(cm_id, IB_RNR_TIMER_000_32);
+			/* XXX do we need to clean up if this fails? */
+			ret = rdma_resolve_route(cm_id,
+						 RDS_RDMA_RESOLVE_TIMEOUT_MS);
+		}
 		break;
 
 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
-- 
cgit v1.2.3


From 4e4b1798cc90e376b8b61d0098b4093898a32227 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@nvidia.com>
Date: Mon, 18 Sep 2023 11:40:15 -0400
Subject: vxlan: Add missing entries to vxlan_get_size()

There are some attributes added by vxlan_fill_info() which are not
accounted for in vxlan_get_size(). Add them.

I didn't find a way to trigger an actual problem from this miscalculation
since there is usually extra space in netlink size calculations like
if_nlmsg_size(); but maybe I just didn't search long enough.

Fixes: 3511494ce2f3 ("vxlan: Group Policy extension")
Fixes: e1e5314de08b ("vxlan: implement GPE")
Fixes: 0ace2ca89cbd ("vxlan: Use checksum partial with remote checksum offload")
Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device")
Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan/vxlan_core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index e463f59e95c2..5b5597073b00 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -4331,6 +4331,10 @@ static size_t vxlan_get_size(const struct net_device *dev)
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */
+		nla_total_size(0) + /* IFLA_VXLAN_GBP */
+		nla_total_size(0) + /* IFLA_VXLAN_GPE */
+		nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */
 		0;
 }
 
-- 
cgit v1.2.3


From c9bd26513b3a11b3adb3c2ed8a31a01a87173ff1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 15 Sep 2023 15:18:11 +0200
Subject: netfilter: nf_tables: disable toggling dormant table state more than
 once

nft -f -<<EOF
add table ip t
add table ip t { flags dormant; }
add chain ip t c { type filter hook input priority 0; }
add table ip t
EOF

Triggers a splat from nf core on next table delete because we lose
track of right hook register state:

WARNING: CPU: 2 PID: 1597 at net/netfilter/core.c:501 __nf_unregister_net_hook
RIP: 0010:__nf_unregister_net_hook+0x41b/0x570
 nf_unregister_net_hook+0xb4/0xf0
 __nf_tables_unregister_hook+0x160/0x1d0
[..]

The above should have table in *active* state, but in fact no
hooks were registered.

Reject on/off/on games rather than attempting to fix this.

Fixes: 179d9ba5559a ("netfilter: nf_tables: fix table flag updates")
Reported-by: "Lee, Cherie-Anne" <cherie.lee@starlabs.sg>
Cc: Bing-Jhong Billy Jheng <billy@starlabs.sg>
Cc: info@starlabs.sg
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/nf_tables_api.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d819b4d42962..a3680638ec60 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1219,6 +1219,10 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 	     flags & NFT_TABLE_F_OWNER))
 		return -EOPNOTSUPP;
 
+	/* No dormant off/on/off/on games in single transaction */
+	if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+		return -EINVAL;
+
 	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
 				sizeof(struct nft_trans_table));
 	if (trans == NULL)
-- 
cgit v1.2.3


From cf5000a7787cbc10341091d37245a42c119d26c5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Sep 2023 15:36:13 +0200
Subject: netfilter: nf_tables: fix memleak when more than 255 elements expired

When more than 255 elements expired we're supposed to switch to a new gc
container structure.

This never happens: u8 type will wrap before reaching the boundary
and nft_trans_gc_space() always returns true.

This means we recycle the initial gc container structure and
lose track of the elements that came before.

While at it, don't deref 'gc' after we've passed it to call_rcu.

Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane")
Reported-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/net/netfilter/nf_tables.h |  2 +-
 net/netfilter/nf_tables_api.c     | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index a4455f4995ab..7c816359d5a9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1682,7 +1682,7 @@ struct nft_trans_gc {
 	struct net		*net;
 	struct nft_set		*set;
 	u32			seq;
-	u8			count;
+	u16			count;
 	void			*priv[NFT_TRANS_GC_BATCHCOUNT];
 	struct rcu_head		rcu;
 };
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a3680638ec60..4356189360fb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -9579,12 +9579,15 @@ static int nft_trans_gc_space(struct nft_trans_gc *trans)
 struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
 					      unsigned int gc_seq, gfp_t gfp)
 {
+	struct nft_set *set;
+
 	if (nft_trans_gc_space(gc))
 		return gc;
 
+	set = gc->set;
 	nft_trans_gc_queue_work(gc);
 
-	return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+	return nft_trans_gc_alloc(set, gc_seq, gfp);
 }
 
 void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
@@ -9599,15 +9602,18 @@ void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
 
 struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
 {
+	struct nft_set *set;
+
 	if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
 		return NULL;
 
 	if (nft_trans_gc_space(gc))
 		return gc;
 
+	set = gc->set;
 	call_rcu(&gc->rcu, nft_trans_gc_trans_free);
 
-	return nft_trans_gc_alloc(gc->set, 0, gfp);
+	return nft_trans_gc_alloc(set, 0, gfp);
 }
 
 void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
-- 
cgit v1.2.3


From 7433b6d2afd512d04398c73aa984d1e285be125b Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@netfilter.org>
Date: Tue, 19 Sep 2023 20:04:45 +0200
Subject: netfilter: ipset: Fix race between IPSET_CMD_CREATE and
 IPSET_CMD_SWAP

Kyle Zeng reported that there is a race between IPSET_CMD_ADD and IPSET_CMD_SWAP
in netfilter/ip_set, which can lead to the invocation of `__ip_set_put` on a
wrong `set`, triggering the `BUG_ON(set->ref == 0);` check in it.

The race is caused by using the wrong reference counter, i.e. the ref counter instead
of ref_netlink.

Fixes: 24e227896bbf ("netfilter: ipset: Add schedule point in call_ad().")
Reported-by: Kyle Zeng <zengyhkyle@gmail.com>
Closes: https://lore.kernel.org/netfilter-devel/ZPZqetxOmH+w%2Fmyc@westworld/#r
Tested-by: Kyle Zeng <zengyhkyle@gmail.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/netfilter/ipset/ip_set_core.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e564b5174261..35d2f9c9ada0 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -682,6 +682,14 @@ __ip_set_put(struct ip_set *set)
 /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
  * a separate reference counter
  */
+static void
+__ip_set_get_netlink(struct ip_set *set)
+{
+	write_lock_bh(&ip_set_ref_lock);
+	set->ref_netlink++;
+	write_unlock_bh(&ip_set_ref_lock);
+}
+
 static void
 __ip_set_put_netlink(struct ip_set *set)
 {
@@ -1693,11 +1701,11 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 
 	do {
 		if (retried) {
-			__ip_set_get(set);
+			__ip_set_get_netlink(set);
 			nfnl_unlock(NFNL_SUBSYS_IPSET);
 			cond_resched();
 			nfnl_lock(NFNL_SUBSYS_IPSET);
-			__ip_set_put(set);
+			__ip_set_put_netlink(set);
 		}
 
 		ip_set_lock(set);
-- 
cgit v1.2.3


From 22b6e7f3d6d51ff2716480f3d8f3098d90d69165 Mon Sep 17 00:00:00 2001
From: Cai Huoqing <cai.huoqing@linux.dev>
Date: Tue, 19 Sep 2023 10:27:15 +0800
Subject: net: hinic: Fix warning-hinic_set_vlan_fliter() warn: variable
 dereferenced before check 'hwdev'

'hwdev' is checked too late and hwdev will not be NULL, so remove the check

Fixes: 2acf960e3be6 ("net: hinic: Add support for configuration of rx-vlan-filter by ethtool")
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/r/202309112354.pikZCmyk-lkp@intel.com/
Signed-off-by: Cai Huoqing <cai.huoqing@linux.dev>
Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_port.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 9406237c461e..f81a43d2cdfc 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -456,9 +456,6 @@ int hinic_set_vlan_fliter(struct hinic_dev *nic_dev, u32 en)
 	u16 out_size = sizeof(vlan_filter);
 	int err;
 
-	if (!hwdev)
-		return -EINVAL;
-
 	vlan_filter.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
 	vlan_filter.enable = en;
 
-- 
cgit v1.2.3


From 4a0f07d71b0483cc08c03cefa7c85749e187c214 Mon Sep 17 00:00:00 2001
From: Jinjie Ruan <ruanjinjie@huawei.com>
Date: Tue, 19 Sep 2023 18:44:06 +0800
Subject: net/handshake: Fix memory leak in __sock_create() and
 sock_alloc_file()

When making CONFIG_DEBUG_KMEMLEAK=y and CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN=y,
modprobe handshake-test and then rmmmod handshake-test, the below memory
leak is detected.

The struct socket_alloc which is allocated by alloc_inode_sb() in
__sock_create() is not freed. And the struct dentry which is allocated
by __d_alloc() in sock_alloc_file() is not freed.

Since fput() will call file->f_op->release() which is sock_close() here and
it will call __sock_release(). and fput() will call dput(dentry) to free
the struct dentry. So replace sock_release() with fput() to fix the
below memory leak. After applying this patch, the following memory leak is
never detected.

unreferenced object 0xffff888109165840 (size 768):
  comm "kunit_try_catch", pid 1852, jiffies 4294685807 (age 976.262s)
  hex dump (first 32 bytes):
    01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00  ......ZZ .......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff8397993f>] sock_alloc_inode+0x1f/0x1b0
    [<ffffffff81a2cb5b>] alloc_inode+0x5b/0x1a0
    [<ffffffff81a32bed>] new_inode_pseudo+0xd/0x70
    [<ffffffff8397889c>] sock_alloc+0x3c/0x260
    [<ffffffff83979b46>] __sock_create+0x66/0x3d0
    [<ffffffffa0209ba2>] 0xffffffffa0209ba2
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810f472008 (size 192):
  comm "kunit_try_catch", pid 1852, jiffies 4294685808 (age 976.261s)
  hex dump (first 32 bytes):
    00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00  ..P@............
    00 00 00 00 00 00 00 00 08 20 47 0f 81 88 ff ff  ......... G.....
  backtrace:
    [<ffffffff81a1ff11>] __d_alloc+0x31/0x8a0
    [<ffffffff81a2910e>] d_alloc_pseudo+0xe/0x50
    [<ffffffff819d549e>] alloc_file_pseudo+0xce/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa0209bbb>] 0xffffffffa0209bbb
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810958e580 (size 224):
  comm "kunit_try_catch", pid 1852, jiffies 4294685808 (age 976.261s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff819d4b90>] alloc_empty_file+0x50/0x160
    [<ffffffff819d4cf9>] alloc_file+0x59/0x730
    [<ffffffff819d5524>] alloc_file_pseudo+0x154/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa0209bbb>] 0xffffffffa0209bbb
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810926dc88 (size 192):
  comm "kunit_try_catch", pid 1854, jiffies 4294685809 (age 976.271s)
  hex dump (first 32 bytes):
    00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00  ..P@............
    00 00 00 00 00 00 00 00 88 dc 26 09 81 88 ff ff  ..........&.....
  backtrace:
    [<ffffffff81a1ff11>] __d_alloc+0x31/0x8a0
    [<ffffffff81a2910e>] d_alloc_pseudo+0xe/0x50
    [<ffffffff819d549e>] alloc_file_pseudo+0xce/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa0208fdc>] 0xffffffffa0208fdc
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810a241380 (size 224):
  comm "kunit_try_catch", pid 1854, jiffies 4294685809 (age 976.271s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff819d4b90>] alloc_empty_file+0x50/0x160
    [<ffffffff819d4cf9>] alloc_file+0x59/0x730
    [<ffffffff819d5524>] alloc_file_pseudo+0x154/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa0208fdc>] 0xffffffffa0208fdc
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff888109165040 (size 768):
  comm "kunit_try_catch", pid 1856, jiffies 4294685811 (age 976.269s)
  hex dump (first 32 bytes):
    01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00  ......ZZ .......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff8397993f>] sock_alloc_inode+0x1f/0x1b0
    [<ffffffff81a2cb5b>] alloc_inode+0x5b/0x1a0
    [<ffffffff81a32bed>] new_inode_pseudo+0xd/0x70
    [<ffffffff8397889c>] sock_alloc+0x3c/0x260
    [<ffffffff83979b46>] __sock_create+0x66/0x3d0
    [<ffffffffa0208860>] 0xffffffffa0208860
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810926d568 (size 192):
  comm "kunit_try_catch", pid 1856, jiffies 4294685811 (age 976.269s)
  hex dump (first 32 bytes):
    00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00  ..P@............
    00 00 00 00 00 00 00 00 68 d5 26 09 81 88 ff ff  ........h.&.....
  backtrace:
    [<ffffffff81a1ff11>] __d_alloc+0x31/0x8a0
    [<ffffffff81a2910e>] d_alloc_pseudo+0xe/0x50
    [<ffffffff819d549e>] alloc_file_pseudo+0xce/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa0208879>] 0xffffffffa0208879
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810a240580 (size 224):
  comm "kunit_try_catch", pid 1856, jiffies 4294685811 (age 976.347s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff819d4b90>] alloc_empty_file+0x50/0x160
    [<ffffffff819d4cf9>] alloc_file+0x59/0x730
    [<ffffffff819d5524>] alloc_file_pseudo+0x154/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa0208879>] 0xffffffffa0208879
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff888109164c40 (size 768):
  comm "kunit_try_catch", pid 1858, jiffies 4294685816 (age 976.342s)
  hex dump (first 32 bytes):
    01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00  ......ZZ .......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff8397993f>] sock_alloc_inode+0x1f/0x1b0
    [<ffffffff81a2cb5b>] alloc_inode+0x5b/0x1a0
    [<ffffffff81a32bed>] new_inode_pseudo+0xd/0x70
    [<ffffffff8397889c>] sock_alloc+0x3c/0x260
    [<ffffffff83979b46>] __sock_create+0x66/0x3d0
    [<ffffffffa0208541>] 0xffffffffa0208541
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810926cd18 (size 192):
  comm "kunit_try_catch", pid 1858, jiffies 4294685816 (age 976.342s)
  hex dump (first 32 bytes):
    00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00  ..P@............
    00 00 00 00 00 00 00 00 18 cd 26 09 81 88 ff ff  ..........&.....
  backtrace:
    [<ffffffff81a1ff11>] __d_alloc+0x31/0x8a0
    [<ffffffff81a2910e>] d_alloc_pseudo+0xe/0x50
    [<ffffffff819d549e>] alloc_file_pseudo+0xce/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa020855a>] 0xffffffffa020855a
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810a240200 (size 224):
  comm "kunit_try_catch", pid 1858, jiffies 4294685816 (age 976.342s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff819d4b90>] alloc_empty_file+0x50/0x160
    [<ffffffff819d4cf9>] alloc_file+0x59/0x730
    [<ffffffff819d5524>] alloc_file_pseudo+0x154/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa020855a>] 0xffffffffa020855a
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff888109164840 (size 768):
  comm "kunit_try_catch", pid 1860, jiffies 4294685817 (age 976.416s)
  hex dump (first 32 bytes):
    01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00  ......ZZ .......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff8397993f>] sock_alloc_inode+0x1f/0x1b0
    [<ffffffff81a2cb5b>] alloc_inode+0x5b/0x1a0
    [<ffffffff81a32bed>] new_inode_pseudo+0xd/0x70
    [<ffffffff8397889c>] sock_alloc+0x3c/0x260
    [<ffffffff83979b46>] __sock_create+0x66/0x3d0
    [<ffffffffa02093e2>] 0xffffffffa02093e2
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810926cab8 (size 192):
  comm "kunit_try_catch", pid 1860, jiffies 4294685817 (age 976.416s)
  hex dump (first 32 bytes):
    00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00  ..P@............
    00 00 00 00 00 00 00 00 b8 ca 26 09 81 88 ff ff  ..........&.....
  backtrace:
    [<ffffffff81a1ff11>] __d_alloc+0x31/0x8a0
    [<ffffffff81a2910e>] d_alloc_pseudo+0xe/0x50
    [<ffffffff819d549e>] alloc_file_pseudo+0xce/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa02093fb>] 0xffffffffa02093fb
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810a240040 (size 224):
  comm "kunit_try_catch", pid 1860, jiffies 4294685817 (age 976.416s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff819d4b90>] alloc_empty_file+0x50/0x160
    [<ffffffff819d4cf9>] alloc_file+0x59/0x730
    [<ffffffff819d5524>] alloc_file_pseudo+0x154/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa02093fb>] 0xffffffffa02093fb
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff888109166440 (size 768):
  comm "kunit_try_catch", pid 1862, jiffies 4294685819 (age 976.489s)
  hex dump (first 32 bytes):
    01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00  ......ZZ .......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff8397993f>] sock_alloc_inode+0x1f/0x1b0
    [<ffffffff81a2cb5b>] alloc_inode+0x5b/0x1a0
    [<ffffffff81a32bed>] new_inode_pseudo+0xd/0x70
    [<ffffffff8397889c>] sock_alloc+0x3c/0x260
    [<ffffffff83979b46>] __sock_create+0x66/0x3d0
    [<ffffffffa02097c1>] 0xffffffffa02097c1
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810926c398 (size 192):
  comm "kunit_try_catch", pid 1862, jiffies 4294685819 (age 976.489s)
  hex dump (first 32 bytes):
    00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00  ..P@............
    00 00 00 00 00 00 00 00 98 c3 26 09 81 88 ff ff  ..........&.....
  backtrace:
    [<ffffffff81a1ff11>] __d_alloc+0x31/0x8a0
    [<ffffffff81a2910e>] d_alloc_pseudo+0xe/0x50
    [<ffffffff819d549e>] alloc_file_pseudo+0xce/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa02097da>] 0xffffffffa02097da
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff888107e0b8c0 (size 224):
  comm "kunit_try_catch", pid 1862, jiffies 4294685819 (age 976.489s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff819d4b90>] alloc_empty_file+0x50/0x160
    [<ffffffff819d4cf9>] alloc_file+0x59/0x730
    [<ffffffff819d5524>] alloc_file_pseudo+0x154/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa02097da>] 0xffffffffa02097da
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff888109164440 (size 768):
  comm "kunit_try_catch", pid 1864, jiffies 4294685821 (age 976.487s)
  hex dump (first 32 bytes):
    01 00 00 00 01 00 5a 5a 20 00 00 00 00 00 00 00  ......ZZ .......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff8397993f>] sock_alloc_inode+0x1f/0x1b0
    [<ffffffff81a2cb5b>] alloc_inode+0x5b/0x1a0
    [<ffffffff81a32bed>] new_inode_pseudo+0xd/0x70
    [<ffffffff8397889c>] sock_alloc+0x3c/0x260
    [<ffffffff83979b46>] __sock_create+0x66/0x3d0
    [<ffffffffa020824e>] 0xffffffffa020824e
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff88810f4cf698 (size 192):
  comm "kunit_try_catch", pid 1864, jiffies 4294685821 (age 976.501s)
  hex dump (first 32 bytes):
    00 00 50 40 02 00 00 00 00 00 00 00 00 00 00 00  ..P@............
    00 00 00 00 00 00 00 00 98 f6 4c 0f 81 88 ff ff  ..........L.....
  backtrace:
    [<ffffffff81a1ff11>] __d_alloc+0x31/0x8a0
    [<ffffffff81a2910e>] d_alloc_pseudo+0xe/0x50
    [<ffffffff819d549e>] alloc_file_pseudo+0xce/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa0208267>] 0xffffffffa0208267
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20
unreferenced object 0xffff888107e0b000 (size 224):
  comm "kunit_try_catch", pid 1864, jiffies 4294685821 (age 976.501s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 03 00 2e 08 01 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff819d4b90>] alloc_empty_file+0x50/0x160
    [<ffffffff819d4cf9>] alloc_file+0x59/0x730
    [<ffffffff819d5524>] alloc_file_pseudo+0x154/0x210
    [<ffffffff83978582>] sock_alloc_file+0x42/0x1b0
    [<ffffffffa0208267>] 0xffffffffa0208267
    [<ffffffff829cf03a>] kunit_generic_run_threadfn_adapter+0x4a/0x90
    [<ffffffff81236fc6>] kthread+0x2b6/0x380
    [<ffffffff81096afd>] ret_from_fork+0x2d/0x70
    [<ffffffff81003511>] ret_from_fork_asm+0x11/0x20

Fixes: 88232ec1ec5e ("net/handshake: Add Kunit tests for the handshake consumer API")
Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/handshake/handshake-test.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index 6d37bab35c8f..16ed7bfd29e4 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -235,7 +235,7 @@ static void handshake_req_submit_test4(struct kunit *test)
 	KUNIT_EXPECT_PTR_EQ(test, req, result);
 
 	handshake_req_cancel(sock->sk);
-	sock_release(sock);
+	fput(filp);
 }
 
 static void handshake_req_submit_test5(struct kunit *test)
@@ -272,7 +272,7 @@ static void handshake_req_submit_test5(struct kunit *test)
 	/* Assert */
 	KUNIT_EXPECT_EQ(test, err, -EAGAIN);
 
-	sock_release(sock);
+	fput(filp);
 	hn->hn_pending = saved;
 }
 
@@ -306,7 +306,7 @@ static void handshake_req_submit_test6(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, err, -EBUSY);
 
 	handshake_req_cancel(sock->sk);
-	sock_release(sock);
+	fput(filp);
 }
 
 static void handshake_req_cancel_test1(struct kunit *test)
@@ -340,7 +340,7 @@ static void handshake_req_cancel_test1(struct kunit *test)
 	/* Assert */
 	KUNIT_EXPECT_TRUE(test, result);
 
-	sock_release(sock);
+	fput(filp);
 }
 
 static void handshake_req_cancel_test2(struct kunit *test)
@@ -382,7 +382,7 @@ static void handshake_req_cancel_test2(struct kunit *test)
 	/* Assert */
 	KUNIT_EXPECT_TRUE(test, result);
 
-	sock_release(sock);
+	fput(filp);
 }
 
 static void handshake_req_cancel_test3(struct kunit *test)
@@ -427,7 +427,7 @@ static void handshake_req_cancel_test3(struct kunit *test)
 	/* Assert */
 	KUNIT_EXPECT_FALSE(test, result);
 
-	sock_release(sock);
+	fput(filp);
 }
 
 static struct handshake_req *handshake_req_destroy_test;
@@ -471,7 +471,7 @@ static void handshake_req_destroy_test1(struct kunit *test)
 	handshake_req_cancel(sock->sk);
 
 	/* Act */
-	sock_release(sock);
+	fput(filp);
 
 	/* Assert */
 	KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
-- 
cgit v1.2.3


From 6f411fb5ca9419090bee6a0a46425e0a5060b734 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 18 Sep 2023 17:36:09 +0200
Subject: net: ena: Flush XDP packets on error.

xdp_do_flush() should be invoked before leaving the NAPI poll function
after a XDP-redirect. This is not the case if the driver leaves via
the error path (after having a redirect in one of its previous
iterations).

Invoke xdp_do_flush() also in the error path.

Cc: Arthur Kiyanovski <akiyano@amazon.com>
Cc: David Arinzon <darinzon@amazon.com>
Cc: Noam Dagan <ndagan@amazon.com>
Cc: Saeed Bishara <saeedb@amazon.com>
Cc: Shay Agroskin <shayagr@amazon.com>
Fixes: a318c70ad152b ("net: ena: introduce XDP redirect implementation")
Acked-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index ad32ca81f7ef..f955bde10cf9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1833,6 +1833,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 	return work_done;
 
 error:
+	if (xdp_flags & ENA_XDP_REDIRECT)
+		xdp_do_flush();
+
 	adapter = netdev_priv(rx_ring->netdev);
 
 	if (rc == -ENOSPC) {
-- 
cgit v1.2.3


From edc0140cc3b7b91874ebe70eb7d2a851e8817ccc Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 18 Sep 2023 17:36:10 +0200
Subject: bnxt_en: Flush XDP for bnxt_poll_nitroa0()'s NAPI

bnxt_poll_nitroa0() invokes bnxt_rx_pkt() which can run a XDP program
which in turn can return XDP_REDIRECT. bnxt_rx_pkt() is also used by
__bnxt_poll_work() which flushes (xdp_do_flush()) the packets after each
round. bnxt_poll_nitroa0() lacks this feature.
xdp_do_flush() should be invoked before leaving the NAPI callback.

Invoke xdp_do_flush() after a redirect in bnxt_poll_nitroa0() NAPI.

Cc: Michael Chan <michael.chan@broadcom.com>
Fixes: f18c2b77b2e4e ("bnxt_en: optimized XDP_REDIRECT support")
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5cc0dbe12132..7551aa8068f8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2614,6 +2614,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
 	struct rx_cmp_ext *rxcmp1;
 	u32 cp_cons, tmp_raw_cons;
 	u32 raw_cons = cpr->cp_raw_cons;
+	bool flush_xdp = false;
 	u32 rx_pkts = 0;
 	u8 event = 0;
 
@@ -2648,6 +2649,8 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
 				rx_pkts++;
 			else if (rc == -EBUSY)	/* partial completion */
 				break;
+			if (event & BNXT_REDIRECT_EVENT)
+				flush_xdp = true;
 		} else if (unlikely(TX_CMP_TYPE(txcmp) ==
 				    CMPL_BASE_TYPE_HWRM_DONE)) {
 			bnxt_hwrm_handler(bp, txcmp);
@@ -2667,6 +2670,8 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
 
 	if (event & BNXT_AGG_EVENT)
 		bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
+	if (flush_xdp)
+		xdp_do_flush();
 
 	if (!bnxt_has_work(bp, cpr) && rx_pkts < budget) {
 		napi_complete_done(napi, rx_pkts);
-- 
cgit v1.2.3


From 70b2b6892645e58ed6f051dad7f8d1083f0ad553 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 18 Sep 2023 17:36:11 +0200
Subject: octeontx2-pf: Do xdp_do_flush() after redirects.

xdp_do_flush() should be invoked before leaving the NAPI poll function
if XDP-redirect has been performed.

Invoke xdp_do_flush() before leaving NAPI.

Cc: Geetha sowjanya <gakula@marvell.com>
Cc: Subbaraya Sundeep <sbhatta@marvell.com>
Cc: Sunil Goutham <sgoutham@marvell.com>
Cc: hariprasad <hkelam@marvell.com>
Fixes: 06059a1a9a4a5 ("octeontx2-pf: Add XDP support to netdev PF")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Geethasowjanya Akula <gakula@marvell.com>
Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c    | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index e77d43848955..53b2a4ef5298 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -29,7 +29,8 @@
 static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
 				     struct bpf_prog *prog,
 				     struct nix_cqe_rx_s *cqe,
-				     struct otx2_cq_queue *cq);
+				     struct otx2_cq_queue *cq,
+				     bool *need_xdp_flush);
 
 static int otx2_nix_cq_op_status(struct otx2_nic *pfvf,
 				 struct otx2_cq_queue *cq)
@@ -337,7 +338,7 @@ static bool otx2_check_rcv_errors(struct otx2_nic *pfvf,
 static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf,
 				 struct napi_struct *napi,
 				 struct otx2_cq_queue *cq,
-				 struct nix_cqe_rx_s *cqe)
+				 struct nix_cqe_rx_s *cqe, bool *need_xdp_flush)
 {
 	struct nix_rx_parse_s *parse = &cqe->parse;
 	struct nix_rx_sg_s *sg = &cqe->sg;
@@ -353,7 +354,7 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf,
 	}
 
 	if (pfvf->xdp_prog)
-		if (otx2_xdp_rcv_pkt_handler(pfvf, pfvf->xdp_prog, cqe, cq))
+		if (otx2_xdp_rcv_pkt_handler(pfvf, pfvf->xdp_prog, cqe, cq, need_xdp_flush))
 			return;
 
 	skb = napi_get_frags(napi);
@@ -388,6 +389,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
 				struct napi_struct *napi,
 				struct otx2_cq_queue *cq, int budget)
 {
+	bool need_xdp_flush = false;
 	struct nix_cqe_rx_s *cqe;
 	int processed_cqe = 0;
 
@@ -409,13 +411,15 @@ process_cqe:
 		cq->cq_head++;
 		cq->cq_head &= (cq->cqe_cnt - 1);
 
-		otx2_rcv_pkt_handler(pfvf, napi, cq, cqe);
+		otx2_rcv_pkt_handler(pfvf, napi, cq, cqe, &need_xdp_flush);
 
 		cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
 		cqe->sg.seg_addr = 0x00;
 		processed_cqe++;
 		cq->pend_cqe--;
 	}
+	if (need_xdp_flush)
+		xdp_do_flush();
 
 	/* Free CQEs to HW */
 	otx2_write64(pfvf, NIX_LF_CQ_OP_DOOR,
@@ -1354,7 +1358,8 @@ bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx)
 static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
 				     struct bpf_prog *prog,
 				     struct nix_cqe_rx_s *cqe,
-				     struct otx2_cq_queue *cq)
+				     struct otx2_cq_queue *cq,
+				     bool *need_xdp_flush)
 {
 	unsigned char *hard_start, *data;
 	int qidx = cq->cq_idx;
@@ -1391,8 +1396,10 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf,
 
 		otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize,
 				    DMA_FROM_DEVICE);
-		if (!err)
+		if (!err) {
+			*need_xdp_flush = true;
 			return true;
+		}
 		put_page(page);
 		break;
 	default:
-- 
cgit v1.2.3


From 1703b2e0de653b459ca6230be32ce7f2ea0ae7ee Mon Sep 17 00:00:00 2001
From: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Date: Tue, 19 Sep 2023 10:03:31 -0700
Subject: igc: Expose tx-usecs coalesce setting to user

When users attempt to obtain the coalesce setting using the
ethtool command, current code always returns 0 for tx-usecs.
This is because I225/6 always uses a queue pair setting, hence
tx_coalesce_usecs does not return a value during the
igc_ethtool_get_coalesce() callback process. The pair queue
condition checking in igc_ethtool_get_coalesce() is removed by
this patch so that the user gets information of the value of tx-usecs.

Even if i225/6 is using queue pair setting, there is no harm in
notifying the user of the tx-usecs. The implementation of the current
code may have previously been a copy of the legacy code i210.
Since I225 has the queue pair setting enabled, tx-usecs will always adhere
to the user-set rx-usecs value. An error message will appear when the user
attempts to set the tx-usecs value for the input parameters because,
by default, they should only set the rx-usecs value.

This patch also adds the helper function to get the
previous rx coalesce value similar to tx coalesce.

How to test:
User can get the coalesce value using ethtool command.

Example command:
Get: ethtool -c <interface>

Previous output:

rx-usecs: 3
rx-frames: n/a
rx-usecs-irq: n/a
rx-frames-irq: n/a

tx-usecs: 0
tx-frames: n/a
tx-usecs-irq: n/a
tx-frames-irq: n/a

New output:

rx-usecs: 3
rx-frames: n/a
rx-usecs-irq: n/a
rx-frames-irq: n/a

tx-usecs: 3
tx-frames: n/a
tx-usecs-irq: n/a
tx-frames-irq: n/a

Fixes: 8c5ad0dae93c ("igc: Add ethtool support")
Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Link: https://lore.kernel.org/r/20230919170331.1581031-1-anthony.l.nguyen@intel.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 31 +++++++++++++++++-----------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 93bce729be76..7ab6dd58e400 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -868,6 +868,18 @@ static void igc_ethtool_get_stats(struct net_device *netdev,
 	spin_unlock(&adapter->stats64_lock);
 }
 
+static int igc_ethtool_get_previous_rx_coalesce(struct igc_adapter *adapter)
+{
+	return (adapter->rx_itr_setting <= 3) ?
+		adapter->rx_itr_setting : adapter->rx_itr_setting >> 2;
+}
+
+static int igc_ethtool_get_previous_tx_coalesce(struct igc_adapter *adapter)
+{
+	return (adapter->tx_itr_setting <= 3) ?
+		adapter->tx_itr_setting : adapter->tx_itr_setting >> 2;
+}
+
 static int igc_ethtool_get_coalesce(struct net_device *netdev,
 				    struct ethtool_coalesce *ec,
 				    struct kernel_ethtool_coalesce *kernel_coal,
@@ -875,17 +887,8 @@ static int igc_ethtool_get_coalesce(struct net_device *netdev,
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	if (adapter->rx_itr_setting <= 3)
-		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
-	else
-		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
-
-	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) {
-		if (adapter->tx_itr_setting <= 3)
-			ec->tx_coalesce_usecs = adapter->tx_itr_setting;
-		else
-			ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
-	}
+	ec->rx_coalesce_usecs = igc_ethtool_get_previous_rx_coalesce(adapter);
+	ec->tx_coalesce_usecs = igc_ethtool_get_previous_tx_coalesce(adapter);
 
 	return 0;
 }
@@ -910,8 +913,12 @@ static int igc_ethtool_set_coalesce(struct net_device *netdev,
 	    ec->tx_coalesce_usecs == 2)
 		return -EINVAL;
 
-	if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs)
+	if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) &&
+	    ec->tx_coalesce_usecs != igc_ethtool_get_previous_tx_coalesce(adapter)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Queue Pair mode enabled, both Rx and Tx coalescing controlled by rx-usecs");
 		return -EINVAL;
+	}
 
 	/* If ITR is disabled, disable DMAC */
 	if (ec->rx_coalesce_usecs == 0) {
-- 
cgit v1.2.3


From fc21f08375dbf654bd1fda748261955de580ac14 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree.xilinx@gmail.com>
Date: Tue, 19 Sep 2023 19:39:49 +0100
Subject: sfc: handle error pointers returned by
 rhashtable_lookup_get_insert_fast()

Several places in TC offload code assumed that the return from
 rhashtable_lookup_get_insert_fast() was always either NULL or a valid
 pointer to an existing entry, but in fact that function can return an
 error pointer.  In that case, perform the usual cleanup of the newly
 created entry, then pass up the error, rather than attempting to take a
 reference on the old entry.

Fixes: d902e1a737d4 ("sfc: bare bones TC offload on EF100")
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Link: https://lore.kernel.org/r/20230919183949.59392-1-edward.cree@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/sfc/tc.c               | 21 ++++++++++++++++++---
 drivers/net/ethernet/sfc/tc_conntrack.c     |  7 ++++++-
 drivers/net/ethernet/sfc/tc_counters.c      |  2 ++
 drivers/net/ethernet/sfc/tc_encap_actions.c |  4 ++++
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index 047322b04d4f..834f000ba1c4 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -136,6 +136,8 @@ static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx
 	if (old) {
 		/* don't need our new entry */
 		kfree(ped);
+		if (IS_ERR(old)) /* oh dear, it's actually an error */
+			return ERR_CAST(old);
 		if (!refcount_inc_not_zero(&old->ref))
 			return ERR_PTR(-EAGAIN);
 		/* existing entry found, ref taken */
@@ -602,6 +604,8 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
 		kfree(encap);
 		if (pseudo) /* don't need our new pseudo either */
 			efx_tc_flower_release_encap_match(efx, pseudo);
+		if (IS_ERR(old)) /* oh dear, it's actually an error */
+			return PTR_ERR(old);
 		/* check old and new em_types are compatible */
 		switch (old->type) {
 		case EFX_TC_EM_DIRECT:
@@ -700,6 +704,8 @@ static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx,
 	if (old) {
 		/* don't need our new entry */
 		kfree(rid);
+		if (IS_ERR(old)) /* oh dear, it's actually an error */
+			return ERR_CAST(old);
 		if (!refcount_inc_not_zero(&old->ref))
 			return ERR_PTR(-EAGAIN);
 		/* existing entry found */
@@ -1482,7 +1488,10 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
 						&rule->linkage,
 						efx_tc_match_action_ht_params);
-	if (old) {
+	if (IS_ERR(old)) {
+		rc = PTR_ERR(old);
+		goto release;
+	} else if (old) {
 		netif_dbg(efx, drv, efx->net_dev,
 			  "Ignoring already-offloaded rule (cookie %lx)\n",
 			  tc->cookie);
@@ -1697,7 +1706,10 @@ static int efx_tc_flower_replace_lhs(struct efx_nic *efx,
 	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
 						&rule->linkage,
 						efx_tc_lhs_rule_ht_params);
-	if (old) {
+	if (IS_ERR(old)) {
+		rc = PTR_ERR(old);
+		goto release;
+	} else if (old) {
 		netif_dbg(efx, drv, efx->net_dev,
 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
 		rc = -EEXIST;
@@ -1858,7 +1870,10 @@ static int efx_tc_flower_replace(struct efx_nic *efx,
 	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
 						&rule->linkage,
 						efx_tc_match_action_ht_params);
-	if (old) {
+	if (IS_ERR(old)) {
+		rc = PTR_ERR(old);
+		goto release;
+	} else if (old) {
 		netif_dbg(efx, drv, efx->net_dev,
 			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
 		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
diff --git a/drivers/net/ethernet/sfc/tc_conntrack.c b/drivers/net/ethernet/sfc/tc_conntrack.c
index 8e06bfbcbea1..44bb57670340 100644
--- a/drivers/net/ethernet/sfc/tc_conntrack.c
+++ b/drivers/net/ethernet/sfc/tc_conntrack.c
@@ -298,7 +298,10 @@ static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
 	old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht,
 						&conn->linkage,
 						efx_tc_ct_ht_params);
-	if (old) {
+	if (IS_ERR(old)) {
+		rc = PTR_ERR(old);
+		goto release;
+	} else if (old) {
 		netif_dbg(efx, drv, efx->net_dev,
 			  "Already offloaded conntrack (cookie %lx)\n", tc->cookie);
 		rc = -EEXIST;
@@ -482,6 +485,8 @@ struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone,
 	if (old) {
 		/* don't need our new entry */
 		kfree(ct_zone);
+		if (IS_ERR(old)) /* oh dear, it's actually an error */
+			return ERR_CAST(old);
 		if (!refcount_inc_not_zero(&old->ref))
 			return ERR_PTR(-EAGAIN);
 		/* existing entry found */
diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c
index 0fafb47ea082..c44088424323 100644
--- a/drivers/net/ethernet/sfc/tc_counters.c
+++ b/drivers/net/ethernet/sfc/tc_counters.c
@@ -236,6 +236,8 @@ struct efx_tc_counter_index *efx_tc_flower_get_counter_index(
 	if (old) {
 		/* don't need our new entry */
 		kfree(ctr);
+		if (IS_ERR(old)) /* oh dear, it's actually an error */
+			return ERR_CAST(old);
 		if (!refcount_inc_not_zero(&old->ref))
 			return ERR_PTR(-EAGAIN);
 		/* existing entry found */
diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c
index 7e8bcdb222ad..87443f9dfd22 100644
--- a/drivers/net/ethernet/sfc/tc_encap_actions.c
+++ b/drivers/net/ethernet/sfc/tc_encap_actions.c
@@ -132,6 +132,8 @@ static int efx_bind_neigh(struct efx_nic *efx,
 		/* don't need our new entry */
 		put_net_track(neigh->net, &neigh->ns_tracker);
 		kfree(neigh);
+		if (IS_ERR(old)) /* oh dear, it's actually an error */
+			return PTR_ERR(old);
 		if (!refcount_inc_not_zero(&old->ref))
 			return -EAGAIN;
 		/* existing entry found, ref taken */
@@ -640,6 +642,8 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md(
 	if (old) {
 		/* don't need our new entry */
 		kfree(encap);
+		if (IS_ERR(old)) /* oh dear, it's actually an error */
+			return ERR_CAST(old);
 		if (!refcount_inc_not_zero(&old->ref))
 			return ERR_PTR(-EAGAIN);
 		/* existing entry found, ref taken */
-- 
cgit v1.2.3