From 444c8263151afc06c01ac8ddcd1204624a7d4bb3 Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Tue, 21 Aug 2018 14:03:04 +0000
Subject: netfilter: conntrack: remove duplicated include from
 nf_conntrack_proto_udp.c

Remove duplicated include.

Fixes: c779e849608a ("netfilter: conntrack: remove get_timeout() indirection")
Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_udp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 7a1b8988a931..9272a2c525a8 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -393,4 +393,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
 };
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6);
 #endif
-#include <net/netfilter/nf_conntrack_timeout.h>
-- 
cgit v1.2.3


From c1dc2912059901f97345d9e10c96b841215fdc0f Mon Sep 17 00:00:00 2001
From: Martin Willi <martin@strongswan.org>
Date: Wed, 22 Aug 2018 10:27:17 +0200
Subject: netfilter: xt_cluster: add dependency on conntrack module

The cluster match requires conntrack for matching packets. If the
netns does not have conntrack hooks registered, the match does not
work at all.

Implicitly load the conntrack hook for the family, exactly as many
other extensions do. This ensures that the match works even if the
hooks have not been registered by other means.

Signed-off-by: Martin Willi <martin@strongswan.org>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_cluster.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index dfbdbb2fc0ed..51d0c257e7a5 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -125,6 +125,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
 static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_cluster_match_info *info = par->matchinfo;
+	int ret;
 
 	if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
 		pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n",
@@ -135,7 +136,17 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 		pr_info_ratelimited("node mask cannot exceed total number of nodes\n");
 		return -EDOM;
 	}
-	return 0;
+
+	ret = nf_ct_netns_get(par->net, par->family);
+	if (ret < 0)
+		pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+				    par->family);
+	return ret;
+}
+
+static void xt_cluster_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	nf_ct_netns_put(par->net, par->family);
 }
 
 static struct xt_match xt_cluster_match __read_mostly = {
@@ -144,6 +155,7 @@ static struct xt_match xt_cluster_match __read_mostly = {
 	.match		= xt_cluster_mt,
 	.checkentry	= xt_cluster_mt_checkentry,
 	.matchsize	= sizeof(struct xt_cluster_match_info),
+	.destroy	= xt_cluster_mt_destroy,
 	.me		= THIS_MODULE,
 };
 
-- 
cgit v1.2.3


From 10568f6c5761db24249c610c94d6e44d5505a0ba Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 22 Aug 2018 11:33:27 +0200
Subject: netfilter: xt_checksum: ignore gso skbs

Satish Patel reports a skb_warn_bad_offload() splat caused
by -j CHECKSUM rules:

-A POSTROUTING -p tcp -m tcp --sport 80 -j CHECKSUM

The CHECKSUM target has never worked with GSO skbs, and the above rule
makes no sense as kernel will handle checksum updates on transmit.

Unfortunately, there are 3rd party tools that install such rules, so we
cannot reject this from the config plane without potential breakage.

Amend Kconfig text to clarify that the CHECKSUM target is only useful
in virtualized environments, where old dhcp clients that use AF_PACKET
used to discard UDP packets with a 'bad' header checksum and add a
one-time warning in case such rule isn't restricted to UDP.

v2: check IP6T_F_PROTO flag before cmp (Michal Kubecek)

Reported-by: Satish Patel <satish.txt@gmail.com>
Reported-by: Markos Chandras <markos.chandras@suse.com>
Reported-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/Kconfig       | 12 ++++++------
 net/netfilter/xt_CHECKSUM.c | 22 +++++++++++++++++++++-
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 71709c104081..f61c306de1d0 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -771,13 +771,13 @@ config NETFILTER_XT_TARGET_CHECKSUM
 	depends on NETFILTER_ADVANCED
 	---help---
 	  This option adds a `CHECKSUM' target, which can be used in the iptables mangle
-	  table.
+	  table to work around buggy DHCP clients in virtualized environments.
 
-	  You can use this target to compute and fill in the checksum in
-	  a packet that lacks a checksum.  This is particularly useful,
-	  if you need to work around old applications such as dhcp clients,
-	  that do not work well with checksum offloads, but don't want to disable
-	  checksum offload in your device.
+	  Some old DHCP clients drop packets because they are not aware
+	  that the checksum would normally be offloaded to hardware and
+	  thus should be considered valid.
+	  This target can be used to fill in the checksum using iptables
+	  when such packets are sent via a virtual network device.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
index 9f4151ec3e06..6c7aa6a0a0d2 100644
--- a/net/netfilter/xt_CHECKSUM.c
+++ b/net/netfilter/xt_CHECKSUM.c
@@ -16,6 +16,9 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CHECKSUM.h>
 
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>");
 MODULE_DESCRIPTION("Xtables: checksum modification");
@@ -25,7 +28,7 @@ MODULE_ALIAS("ip6t_CHECKSUM");
 static unsigned int
 checksum_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
+	if (skb->ip_summed == CHECKSUM_PARTIAL && !skb_is_gso(skb))
 		skb_checksum_help(skb);
 
 	return XT_CONTINUE;
@@ -34,6 +37,8 @@ checksum_tg(struct sk_buff *skb, const struct xt_action_param *par)
 static int checksum_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_CHECKSUM_info *einfo = par->targinfo;
+	const struct ip6t_ip6 *i6 = par->entryinfo;
+	const struct ipt_ip *i4 = par->entryinfo;
 
 	if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
 		pr_info_ratelimited("unsupported CHECKSUM operation %x\n",
@@ -43,6 +48,21 @@ static int checksum_tg_check(const struct xt_tgchk_param *par)
 	if (!einfo->operation)
 		return -EINVAL;
 
+	switch (par->family) {
+	case NFPROTO_IPV4:
+		if (i4->proto == IPPROTO_UDP &&
+		    (i4->invflags & XT_INV_PROTO) == 0)
+			return 0;
+		break;
+	case NFPROTO_IPV6:
+		if ((i6->flags & IP6T_F_PROTO) &&
+		    i6->proto == IPPROTO_UDP &&
+		    (i6->invflags & XT_INV_PROTO) == 0)
+			return 0;
+		break;
+	}
+
+	pr_warn_once("CHECKSUM should be avoided.  If really needed, restrict with \"-p udp\" and only use in OUTPUT\n");
 	return 0;
 }
 
-- 
cgit v1.2.3


From ef39078d6342deaddacdd550c4197421bd83fb76 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 24 Aug 2018 08:43:35 +0200
Subject: netfilter: conntrack: place 'new' timeout in first location too

tcp, sctp and dccp trackers re-use the userspace ctnetlink states
to index their timeout arrays, which means timeout[0] is never
used.  Copy the 'new' state (syn-sent, dccp-request, ..) to 0 as well
so external users can simply read it off timeouts[0] without need to
differentiate dccp/sctp/tcp and udp/icmp/gre/generic.

The alternative is to map all array accesses to 'i - 1', but that
is a much more intrusive change.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_dccp.c | 7 +++++++
 net/netfilter/nf_conntrack_proto_sctp.c | 7 +++++++
 net/netfilter/nf_conntrack_proto_tcp.c  | 7 +++++++
 3 files changed, 21 insertions(+)

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 8c58f96b59e7..b81f70039828 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -697,6 +697,8 @@ static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
 			timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ;
 		}
 	}
+
+	timeouts[CTA_TIMEOUT_DCCP_UNSPEC] = timeouts[CTA_TIMEOUT_DCCP_REQUEST];
 	return 0;
 }
 
@@ -827,6 +829,11 @@ static int dccp_init_net(struct net *net, u_int16_t proto)
 		dn->dccp_timeout[CT_DCCP_CLOSEREQ]	= 64 * HZ;
 		dn->dccp_timeout[CT_DCCP_CLOSING]	= 64 * HZ;
 		dn->dccp_timeout[CT_DCCP_TIMEWAIT]	= 2 * DCCP_MSL;
+
+		/* timeouts[0] is unused, make it same as SYN_SENT so
+		 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+		 */
+		dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
 	}
 
 	return dccp_kmemdup_sysctl_table(net, pn, dn);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 8d1e085fc14a..5eddfd32b852 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -613,6 +613,8 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
 			timeouts[i] = ntohl(nla_get_be32(tb[i])) * HZ;
 		}
 	}
+
+	timeouts[CTA_TIMEOUT_SCTP_UNSPEC] = timeouts[CTA_TIMEOUT_SCTP_CLOSED];
 	return 0;
 }
 
@@ -743,6 +745,11 @@ static int sctp_init_net(struct net *net, u_int16_t proto)
 
 		for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
 			sn->timeouts[i] = sctp_timeouts[i];
+
+		/* timeouts[0] is unused, init it so ->timeouts[0] contains
+		 * 'new' timeout, like udp or icmp.
+		 */
+		sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED];
 	}
 
 	return sctp_kmemdup_sysctl_table(pn, sn);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index d80d322b9d8b..3e2dc56a96c3 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1301,6 +1301,7 @@ static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
 		timeouts[TCP_CONNTRACK_SYN_SENT] =
 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
 	}
+
 	if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
 		timeouts[TCP_CONNTRACK_SYN_RECV] =
 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
@@ -1341,6 +1342,8 @@ static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
 		timeouts[TCP_CONNTRACK_UNACK] =
 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
 	}
+
+	timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT];
 	return 0;
 }
 
@@ -1518,6 +1521,10 @@ static int tcp_init_net(struct net *net, u_int16_t proto)
 		for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
 			tn->timeouts[i] = tcp_timeouts[i];
 
+		/* timeouts[0] is unused, make it same as SYN_SENT so
+		 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+		 */
+		tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
 		tn->tcp_loose = nf_ct_tcp_loose;
 		tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
 		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
-- 
cgit v1.2.3


From 0434ccdcf883e53ec7156a6843943e940dc1feb8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 24 Aug 2018 08:43:36 +0200
Subject: netfilter: nf_tables: rework ct timeout set support

Using a private template is problematic:

1. We can't assign both a zone and a timeout policy
   (zone assigns a conntrack template, so we hit problem 1)
2. Using a template needs to take care of ct refcount, else we'll
   eventually free the private template due to ->use underflow.

This patch reworks template policy to instead work with existing conntrack.

As long as such conntrack has not yet been placed into the hash table
(unconfirmed) we can still add the timeout extension.

The only caveat is that we now need to update/correct ct->timeout to
reflect the initial/new state, otherwise the conntrack entry retains the
default 'new' timeout.

Side effect of this change is that setting the policy must
now occur from chains that are evaluated *after* the conntrack lookup
has taken place.

No released kernel contains the timeout policy feature yet, so this change
should be ok.

Changes since v2:
 - don't handle 'ct is confirmed case'
 - after previous patch, no need to special-case tcp/dccp/sctp timeout
   anymore

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_timeout.h |  2 +-
 net/netfilter/nft_ct.c                       | 59 ++++++++++++++--------------
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index d5f62cc6c2ae..3394d75e1c80 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -30,7 +30,7 @@ struct nf_conn_timeout {
 };
 
 static inline unsigned int *
-nf_ct_timeout_data(struct nf_conn_timeout *t)
+nf_ct_timeout_data(const struct nf_conn_timeout *t)
 {
 	struct nf_ct_timeout *timeout;
 
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 26a8baebd072..5dd87748afa8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -799,7 +799,7 @@ err:
 }
 
 struct nft_ct_timeout_obj {
-	struct nf_conn		*tmpl;
+	struct nf_ct_timeout    *timeout;
 	u8			l4proto;
 };
 
@@ -809,26 +809,42 @@ static void nft_ct_timeout_obj_eval(struct nft_object *obj,
 {
 	const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
 	struct nf_conn *ct = (struct nf_conn *)skb_nfct(pkt->skb);
-	struct sk_buff *skb = pkt->skb;
+	struct nf_conn_timeout *timeout;
+	const unsigned int *values;
+
+	if (priv->l4proto != pkt->tprot)
+		return;
 
-	if (ct ||
-	    priv->l4proto != pkt->tprot)
+	if (!ct || nf_ct_is_template(ct) || nf_ct_is_confirmed(ct))
 		return;
 
-	nf_ct_set(skb, priv->tmpl, IP_CT_NEW);
+	timeout = nf_ct_timeout_find(ct);
+	if (!timeout) {
+		timeout = nf_ct_timeout_ext_add(ct, priv->timeout, GFP_ATOMIC);
+		if (!timeout) {
+			regs->verdict.code = NF_DROP;
+			return;
+		}
+	}
+
+	rcu_assign_pointer(timeout->timeout, priv->timeout);
+
+	/* adjust the timeout as per 'new' state. ct is unconfirmed,
+	 * so the current timestamp must not be added.
+	 */
+	values = nf_ct_timeout_data(timeout);
+	if (values)
+		nf_ct_refresh(ct, pkt->skb, values[0]);
 }
 
 static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
 				   const struct nlattr * const tb[],
 				   struct nft_object *obj)
 {
-	const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
 	struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
 	const struct nf_conntrack_l4proto *l4proto;
-	struct nf_conn_timeout *timeout_ext;
 	struct nf_ct_timeout *timeout;
 	int l3num = ctx->family;
-	struct nf_conn *tmpl;
 	__u8 l4num;
 	int ret;
 
@@ -863,28 +879,14 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
 
 	timeout->l3num = l3num;
 	timeout->l4proto = l4proto;
-	tmpl = nf_ct_tmpl_alloc(ctx->net, zone, GFP_ATOMIC);
-	if (!tmpl) {
-		ret = -ENOMEM;
-		goto err_free_timeout;
-	}
-
-	timeout_ext = nf_ct_timeout_ext_add(tmpl, timeout, GFP_ATOMIC);
-	if (!timeout_ext) {
-		ret = -ENOMEM;
-		goto err_free_tmpl;
-	}
 
 	ret = nf_ct_netns_get(ctx->net, ctx->family);
 	if (ret < 0)
-		goto err_free_tmpl;
-
-	priv->tmpl = tmpl;
+		goto err_free_timeout;
 
+	priv->timeout = timeout;
 	return 0;
 
-err_free_tmpl:
-	nf_ct_tmpl_free(tmpl);
 err_free_timeout:
 	kfree(timeout);
 err_proto_put:
@@ -896,22 +898,19 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx,
 				       struct nft_object *obj)
 {
 	struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
-	struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl);
-	struct nf_ct_timeout *timeout;
+	struct nf_ct_timeout *timeout = priv->timeout;
 
-	timeout = rcu_dereference_raw(t->timeout);
 	nf_ct_untimeout(ctx->net, timeout);
 	nf_ct_l4proto_put(timeout->l4proto);
 	nf_ct_netns_put(ctx->net, ctx->family);
-	nf_ct_tmpl_free(priv->tmpl);
+	kfree(priv->timeout);
 }
 
 static int nft_ct_timeout_obj_dump(struct sk_buff *skb,
 				   struct nft_object *obj, bool reset)
 {
 	const struct nft_ct_timeout_obj *priv = nft_obj_data(obj);
-	const struct nf_conn_timeout *t = nf_ct_timeout_find(priv->tmpl);
-	const struct nf_ct_timeout *timeout = rcu_dereference_raw(t->timeout);
+	const struct nf_ct_timeout *timeout = priv->timeout;
 	struct nlattr *nest_params;
 	int ret;
 
-- 
cgit v1.2.3


From e0758412208960be9de11e6d2350c81ffd88410f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 25 Aug 2018 01:14:46 +0200
Subject: netfilter: kconfig: nat related expression depend on nftables core

NF_TABLES_IPV4 is now boolean so it is possible to set

NF_TABLES=m
NF_TABLES_IPV4=y
NFT_CHAIN_NAT_IPV4=y

which causes:
nft_chain_nat_ipv4.c:(.text+0x6d): undefined reference to `nft_do_chain'

Wrap NFT_CHAIN_NAT_IPV4 and related nat expressions with NF_TABLES to
restore the dependency.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Fixes: 02c7b25e5f54 ("netfilter: nf_tables: build-in filter chain type")
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/Kconfig | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index d9504adc47b3..184bf2e0a1ed 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -106,6 +106,10 @@ config NF_NAT_IPV4
 
 if NF_NAT_IPV4
 
+config NF_NAT_MASQUERADE_IPV4
+	bool
+
+if NF_TABLES
 config NFT_CHAIN_NAT_IPV4
 	depends on NF_TABLES_IPV4
 	tristate "IPv4 nf_tables nat chain support"
@@ -115,9 +119,6 @@ config NFT_CHAIN_NAT_IPV4
 	  packet transformations such as the source, destination address and
 	  source and destination ports.
 
-config NF_NAT_MASQUERADE_IPV4
-	bool
-
 config NFT_MASQ_IPV4
 	tristate "IPv4 masquerading support for nf_tables"
 	depends on NF_TABLES_IPV4
@@ -135,6 +136,7 @@ config NFT_REDIR_IPV4
 	help
 	  This is the expression that provides IPv4 redirect support for
 	  nf_tables.
+endif # NF_TABLES
 
 config NF_NAT_SNMP_BASIC
 	tristate "Basic SNMP-ALG support"
-- 
cgit v1.2.3


From 7acfda539c0b9636a58bfee56abfb3aeee806d96 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 26 Aug 2018 02:35:44 +0900
Subject: netfilter: nf_tables: release chain in flushing set

When element of verdict map is deleted, the delete routine should
release chain. however, flush element of verdict map routine doesn't
release chain.

test commands:
   %nft add table ip filter
   %nft add chain ip filter c1
   %nft add map ip filter map1 { type ipv4_addr : verdict \; }
   %nft add element ip filter map1 { 1 : jump c1 }
   %nft flush map ip filter map1
   %nft flush ruleset

splat looks like:
[ 4895.170899] kernel BUG at net/netfilter/nf_tables_api.c:1415!
[ 4895.178114] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[ 4895.178880] CPU: 0 PID: 1670 Comm: nft Not tainted 4.18.0+ #55
[ 4895.178880] RIP: 0010:nf_tables_chain_destroy.isra.28+0x39/0x220 [nf_tables]
[ 4895.178880] Code: fc ff df 53 48 89 fb 48 83 c7 50 48 89 fa 48 c1 ea 03 0f b6 04 02 84 c0 74 09 3c 03 7f 05 e8 3e 4c 25 e1 8b 43 50 85 c0 74 02 <0f> 0b 48 89 da 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 80 3c 02
[ 4895.228342] RSP: 0018:ffff88010b98f4c0 EFLAGS: 00010202
[ 4895.234841] RAX: 0000000000000001 RBX: ffff8801131c6968 RCX: ffff8801146585b0
[ 4895.234841] RDX: 1ffff10022638d37 RSI: ffff8801191a9348 RDI: ffff8801131c69b8
[ 4895.234841] RBP: ffff8801146585a8 R08: 1ffff1002323526a R09: 0000000000000000
[ 4895.234841] R10: 0000000000000000 R11: 0000000000000000 R12: dead000000000200
[ 4895.234841] R13: dead000000000100 R14: ffffffffa3638af8 R15: dffffc0000000000
[ 4895.234841] FS:  00007f6d188e6700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000
[ 4895.234841] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 4895.234841] CR2: 00007ffe72b8df88 CR3: 000000010e2d4000 CR4: 00000000001006f0
[ 4895.234841] Call Trace:
[ 4895.234841]  nf_tables_commit+0x2704/0x2c70 [nf_tables]
[ 4895.234841]  ? nfnetlink_rcv_batch+0xa4f/0x11b0 [nfnetlink]
[ 4895.234841]  ? nf_tables_setelem_notify.constprop.48+0x1a0/0x1a0 [nf_tables]
[ 4895.323824]  ? __lock_is_held+0x9d/0x130
[ 4895.323824]  ? kasan_unpoison_shadow+0x30/0x40
[ 4895.333299]  ? kasan_kmalloc+0xa9/0xc0
[ 4895.333299]  ? kmem_cache_alloc_trace+0x2c0/0x310
[ 4895.333299]  ? nfnetlink_rcv_batch+0xa4f/0x11b0 [nfnetlink]
[ 4895.333299]  nfnetlink_rcv_batch+0xdb9/0x11b0 [nfnetlink]
[ 4895.333299]  ? debug_show_all_locks+0x290/0x290
[ 4895.333299]  ? nfnetlink_net_init+0x150/0x150 [nfnetlink]
[ 4895.333299]  ? sched_clock_cpu+0xe5/0x170
[ 4895.333299]  ? sched_clock_local+0xff/0x130
[ 4895.333299]  ? sched_clock_cpu+0xe5/0x170
[ 4895.333299]  ? find_held_lock+0x39/0x1b0
[ 4895.333299]  ? sched_clock_local+0xff/0x130
[ 4895.333299]  ? memset+0x1f/0x40
[ 4895.333299]  ? nla_parse+0x33/0x260
[ 4895.333299]  ? ns_capable_common+0x6e/0x110
[ 4895.333299]  nfnetlink_rcv+0x2c0/0x310 [nfnetlink]
[ ... ]

Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1dca5683f59f..2cfb173cd0b2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4637,6 +4637,7 @@ static int nft_flush_set(const struct nft_ctx *ctx,
 	}
 	set->ndeact++;
 
+	nft_set_elem_deactivate(ctx->net, set, elem);
 	nft_trans_elem_set(trans) = set;
 	nft_trans_elem(trans) = *elem;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
-- 
cgit v1.2.3


From 3a3539cd36327c6f9e0ffd9f3fd3dea7ff8b3567 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 5 Sep 2018 12:16:00 +0200
Subject: mlxsw: spectrum_buffers: Set up a dedicated pool for BUM traffic

MC-aware mode was recently enabled by mlxsw on Spectrum switches in
commit 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw
ports"). Unfortunately, testing has shown that the fix is incomplete and
in the presented form actually makes the problem even worse, because any
amount of MC traffic causes UC disruption.

The reason for this is that currently, mlxsw configures the MC-specific
TCs (8..15) to map to pool 0. It also configures a maximum buffer size
of 0, but for MC traffic that maximum is disregarded and not part of the
quota. Therefore MC traffic is always admitted to the egress buffer.

Fix the configuration by directing the MC TCs into pool 15, which is
dedicated to MC traffic and recognized as such by the silicon.

Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 4327487553c5..3589432d1643 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -337,14 +337,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = {
 	MLXSW_SP_SB_CM(1500, 9, 0),
 	MLXSW_SP_SB_CM(1500, 9, 0),
 	MLXSW_SP_SB_CM(1500, 9, 0),
-	MLXSW_SP_SB_CM(0, 0, 0),
-	MLXSW_SP_SB_CM(0, 0, 0),
-	MLXSW_SP_SB_CM(0, 0, 0),
-	MLXSW_SP_SB_CM(0, 0, 0),
-	MLXSW_SP_SB_CM(0, 0, 0),
-	MLXSW_SP_SB_CM(0, 0, 0),
-	MLXSW_SP_SB_CM(0, 0, 0),
-	MLXSW_SP_SB_CM(0, 0, 0),
+	MLXSW_SP_SB_CM(0, 140000, 15),
+	MLXSW_SP_SB_CM(0, 140000, 15),
+	MLXSW_SP_SB_CM(0, 140000, 15),
+	MLXSW_SP_SB_CM(0, 140000, 15),
+	MLXSW_SP_SB_CM(0, 140000, 15),
+	MLXSW_SP_SB_CM(0, 140000, 15),
+	MLXSW_SP_SB_CM(0, 140000, 15),
+	MLXSW_SP_SB_CM(0, 140000, 15),
 	MLXSW_SP_SB_CM(1, 0xff, 0),
 };
 
-- 
cgit v1.2.3


From 9d7f19dc4673fbafebfcbf30eb90e09fa7d1c037 Mon Sep 17 00:00:00 2001
From: Petr Oros <poros@redhat.com>
Date: Wed, 5 Sep 2018 14:37:45 +0200
Subject: be2net: Fix memory leak in be_cmd_get_profile_config()

DMA allocated memory is lost in be_cmd_get_profile_config() when we
call it with non-NULL port_res parameter.

Signed-off-by: Petr Oros <poros@redhat.com>
Reviewed-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index ff92ab1daeb8..1e9d882c04ef 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -4500,7 +4500,7 @@ int be_cmd_get_profile_config(struct be_adapter *adapter,
 				port_res->max_vfs += le16_to_cpu(pcie->num_vfs);
 			}
 		}
-		return status;
+		goto err;
 	}
 
 	pcie = be_get_pcie_desc(resp->func_param, desc_count,
-- 
cgit v1.2.3


From e65a9e480e91ddf9e15155454d370cead64689c8 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Wed, 5 Sep 2018 15:23:18 +0200
Subject: net: qca_spi: Fix race condition in spi transfers

With performance optimization the spi transfer and messages of basic
register operations like qcaspi_read_register moved into the private
driver structure. But they weren't protected against mutual access
(e.g. between driver kthread and ethtool). So dumping the QCA7000
registers via ethtool during network traffic could make spi_sync
hang forever, because the completion in spi_message is overwritten.

So revert the optimization completely.

Fixes: 291ab06ecf676 ("net: qualcomm: new Ethernet over SPI driver for QCA700")
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/qca_7k.c  |  76 +++++++++++-----------
 drivers/net/ethernet/qualcomm/qca_spi.c | 110 +++++++++++++++++---------------
 drivers/net/ethernet/qualcomm/qca_spi.h |   5 --
 3 files changed, 93 insertions(+), 98 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_7k.c b/drivers/net/ethernet/qualcomm/qca_7k.c
index ffe7a16bdfc8..6c8543fb90c0 100644
--- a/drivers/net/ethernet/qualcomm/qca_7k.c
+++ b/drivers/net/ethernet/qualcomm/qca_7k.c
@@ -45,34 +45,33 @@ qcaspi_read_register(struct qcaspi *qca, u16 reg, u16 *result)
 {
 	__be16 rx_data;
 	__be16 tx_data;
-	struct spi_transfer *transfer;
-	struct spi_message *msg;
+	struct spi_transfer transfer[2];
+	struct spi_message msg;
 	int ret;
 
+	memset(transfer, 0, sizeof(transfer));
+
+	spi_message_init(&msg);
+
 	tx_data = cpu_to_be16(QCA7K_SPI_READ | QCA7K_SPI_INTERNAL | reg);
+	*result = 0;
+
+	transfer[0].tx_buf = &tx_data;
+	transfer[0].len = QCASPI_CMD_LEN;
+	transfer[1].rx_buf = &rx_data;
+	transfer[1].len = QCASPI_CMD_LEN;
+
+	spi_message_add_tail(&transfer[0], &msg);
 
 	if (qca->legacy_mode) {
-		msg = &qca->spi_msg1;
-		transfer = &qca->spi_xfer1;
-		transfer->tx_buf = &tx_data;
-		transfer->rx_buf = NULL;
-		transfer->len = QCASPI_CMD_LEN;
-		spi_sync(qca->spi_dev, msg);
-	} else {
-		msg = &qca->spi_msg2;
-		transfer = &qca->spi_xfer2[0];
-		transfer->tx_buf = &tx_data;
-		transfer->rx_buf = NULL;
-		transfer->len = QCASPI_CMD_LEN;
-		transfer = &qca->spi_xfer2[1];
+		spi_sync(qca->spi_dev, &msg);
+		spi_message_init(&msg);
 	}
-	transfer->tx_buf = NULL;
-	transfer->rx_buf = &rx_data;
-	transfer->len = QCASPI_CMD_LEN;
-	ret = spi_sync(qca->spi_dev, msg);
+	spi_message_add_tail(&transfer[1], &msg);
+	ret = spi_sync(qca->spi_dev, &msg);
 
 	if (!ret)
-		ret = msg->status;
+		ret = msg.status;
 
 	if (ret)
 		qcaspi_spi_error(qca);
@@ -86,35 +85,32 @@ int
 qcaspi_write_register(struct qcaspi *qca, u16 reg, u16 value)
 {
 	__be16 tx_data[2];
-	struct spi_transfer *transfer;
-	struct spi_message *msg;
+	struct spi_transfer transfer[2];
+	struct spi_message msg;
 	int ret;
 
+	memset(&transfer, 0, sizeof(transfer));
+
+	spi_message_init(&msg);
+
 	tx_data[0] = cpu_to_be16(QCA7K_SPI_WRITE | QCA7K_SPI_INTERNAL | reg);
 	tx_data[1] = cpu_to_be16(value);
 
+	transfer[0].tx_buf = &tx_data[0];
+	transfer[0].len = QCASPI_CMD_LEN;
+	transfer[1].tx_buf = &tx_data[1];
+	transfer[1].len = QCASPI_CMD_LEN;
+
+	spi_message_add_tail(&transfer[0], &msg);
 	if (qca->legacy_mode) {
-		msg = &qca->spi_msg1;
-		transfer = &qca->spi_xfer1;
-		transfer->tx_buf = &tx_data[0];
-		transfer->rx_buf = NULL;
-		transfer->len = QCASPI_CMD_LEN;
-		spi_sync(qca->spi_dev, msg);
-	} else {
-		msg = &qca->spi_msg2;
-		transfer = &qca->spi_xfer2[0];
-		transfer->tx_buf = &tx_data[0];
-		transfer->rx_buf = NULL;
-		transfer->len = QCASPI_CMD_LEN;
-		transfer = &qca->spi_xfer2[1];
+		spi_sync(qca->spi_dev, &msg);
+		spi_message_init(&msg);
 	}
-	transfer->tx_buf = &tx_data[1];
-	transfer->rx_buf = NULL;
-	transfer->len = QCASPI_CMD_LEN;
-	ret = spi_sync(qca->spi_dev, msg);
+	spi_message_add_tail(&transfer[1], &msg);
+	ret = spi_sync(qca->spi_dev, &msg);
 
 	if (!ret)
-		ret = msg->status;
+		ret = msg.status;
 
 	if (ret)
 		qcaspi_spi_error(qca);
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 206f0266463e..66b775d462fd 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -99,22 +99,24 @@ static u32
 qcaspi_write_burst(struct qcaspi *qca, u8 *src, u32 len)
 {
 	__be16 cmd;
-	struct spi_message *msg = &qca->spi_msg2;
-	struct spi_transfer *transfer = &qca->spi_xfer2[0];
+	struct spi_message msg;
+	struct spi_transfer transfer[2];
 	int ret;
 
+	memset(&transfer, 0, sizeof(transfer));
+	spi_message_init(&msg);
+
 	cmd = cpu_to_be16(QCA7K_SPI_WRITE | QCA7K_SPI_EXTERNAL);
-	transfer->tx_buf = &cmd;
-	transfer->rx_buf = NULL;
-	transfer->len = QCASPI_CMD_LEN;
-	transfer = &qca->spi_xfer2[1];
-	transfer->tx_buf = src;
-	transfer->rx_buf = NULL;
-	transfer->len = len;
+	transfer[0].tx_buf = &cmd;
+	transfer[0].len = QCASPI_CMD_LEN;
+	transfer[1].tx_buf = src;
+	transfer[1].len = len;
 
-	ret = spi_sync(qca->spi_dev, msg);
+	spi_message_add_tail(&transfer[0], &msg);
+	spi_message_add_tail(&transfer[1], &msg);
+	ret = spi_sync(qca->spi_dev, &msg);
 
-	if (ret || (msg->actual_length != QCASPI_CMD_LEN + len)) {
+	if (ret || (msg.actual_length != QCASPI_CMD_LEN + len)) {
 		qcaspi_spi_error(qca);
 		return 0;
 	}
@@ -125,17 +127,20 @@ qcaspi_write_burst(struct qcaspi *qca, u8 *src, u32 len)
 static u32
 qcaspi_write_legacy(struct qcaspi *qca, u8 *src, u32 len)
 {
-	struct spi_message *msg = &qca->spi_msg1;
-	struct spi_transfer *transfer = &qca->spi_xfer1;
+	struct spi_message msg;
+	struct spi_transfer transfer;
 	int ret;
 
-	transfer->tx_buf = src;
-	transfer->rx_buf = NULL;
-	transfer->len = len;
+	memset(&transfer, 0, sizeof(transfer));
+	spi_message_init(&msg);
+
+	transfer.tx_buf = src;
+	transfer.len = len;
 
-	ret = spi_sync(qca->spi_dev, msg);
+	spi_message_add_tail(&transfer, &msg);
+	ret = spi_sync(qca->spi_dev, &msg);
 
-	if (ret || (msg->actual_length != len)) {
+	if (ret || (msg.actual_length != len)) {
 		qcaspi_spi_error(qca);
 		return 0;
 	}
@@ -146,23 +151,25 @@ qcaspi_write_legacy(struct qcaspi *qca, u8 *src, u32 len)
 static u32
 qcaspi_read_burst(struct qcaspi *qca, u8 *dst, u32 len)
 {
-	struct spi_message *msg = &qca->spi_msg2;
+	struct spi_message msg;
 	__be16 cmd;
-	struct spi_transfer *transfer = &qca->spi_xfer2[0];
+	struct spi_transfer transfer[2];
 	int ret;
 
+	memset(&transfer, 0, sizeof(transfer));
+	spi_message_init(&msg);
+
 	cmd = cpu_to_be16(QCA7K_SPI_READ | QCA7K_SPI_EXTERNAL);
-	transfer->tx_buf = &cmd;
-	transfer->rx_buf = NULL;
-	transfer->len = QCASPI_CMD_LEN;
-	transfer = &qca->spi_xfer2[1];
-	transfer->tx_buf = NULL;
-	transfer->rx_buf = dst;
-	transfer->len = len;
+	transfer[0].tx_buf = &cmd;
+	transfer[0].len = QCASPI_CMD_LEN;
+	transfer[1].rx_buf = dst;
+	transfer[1].len = len;
 
-	ret = spi_sync(qca->spi_dev, msg);
+	spi_message_add_tail(&transfer[0], &msg);
+	spi_message_add_tail(&transfer[1], &msg);
+	ret = spi_sync(qca->spi_dev, &msg);
 
-	if (ret || (msg->actual_length != QCASPI_CMD_LEN + len)) {
+	if (ret || (msg.actual_length != QCASPI_CMD_LEN + len)) {
 		qcaspi_spi_error(qca);
 		return 0;
 	}
@@ -173,17 +180,20 @@ qcaspi_read_burst(struct qcaspi *qca, u8 *dst, u32 len)
 static u32
 qcaspi_read_legacy(struct qcaspi *qca, u8 *dst, u32 len)
 {
-	struct spi_message *msg = &qca->spi_msg1;
-	struct spi_transfer *transfer = &qca->spi_xfer1;
+	struct spi_message msg;
+	struct spi_transfer transfer;
 	int ret;
 
-	transfer->tx_buf = NULL;
-	transfer->rx_buf = dst;
-	transfer->len = len;
+	memset(&transfer, 0, sizeof(transfer));
+	spi_message_init(&msg);
 
-	ret = spi_sync(qca->spi_dev, msg);
+	transfer.rx_buf = dst;
+	transfer.len = len;
 
-	if (ret || (msg->actual_length != len)) {
+	spi_message_add_tail(&transfer, &msg);
+	ret = spi_sync(qca->spi_dev, &msg);
+
+	if (ret || (msg.actual_length != len)) {
 		qcaspi_spi_error(qca);
 		return 0;
 	}
@@ -195,19 +205,23 @@ static int
 qcaspi_tx_cmd(struct qcaspi *qca, u16 cmd)
 {
 	__be16 tx_data;
-	struct spi_message *msg = &qca->spi_msg1;
-	struct spi_transfer *transfer = &qca->spi_xfer1;
+	struct spi_message msg;
+	struct spi_transfer transfer;
 	int ret;
 
+	memset(&transfer, 0, sizeof(transfer));
+
+	spi_message_init(&msg);
+
 	tx_data = cpu_to_be16(cmd);
-	transfer->len = sizeof(tx_data);
-	transfer->tx_buf = &tx_data;
-	transfer->rx_buf = NULL;
+	transfer.len = sizeof(cmd);
+	transfer.tx_buf = &tx_data;
+	spi_message_add_tail(&transfer, &msg);
 
-	ret = spi_sync(qca->spi_dev, msg);
+	ret = spi_sync(qca->spi_dev, &msg);
 
 	if (!ret)
-		ret = msg->status;
+		ret = msg.status;
 
 	if (ret)
 		qcaspi_spi_error(qca);
@@ -835,16 +849,6 @@ qcaspi_netdev_setup(struct net_device *dev)
 	qca = netdev_priv(dev);
 	memset(qca, 0, sizeof(struct qcaspi));
 
-	memset(&qca->spi_xfer1, 0, sizeof(struct spi_transfer));
-	memset(&qca->spi_xfer2, 0, sizeof(struct spi_transfer) * 2);
-
-	spi_message_init(&qca->spi_msg1);
-	spi_message_add_tail(&qca->spi_xfer1, &qca->spi_msg1);
-
-	spi_message_init(&qca->spi_msg2);
-	spi_message_add_tail(&qca->spi_xfer2[0], &qca->spi_msg2);
-	spi_message_add_tail(&qca->spi_xfer2[1], &qca->spi_msg2);
-
 	memset(&qca->txr, 0, sizeof(qca->txr));
 	qca->txr.count = TX_RING_MAX_LEN;
 }
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h
index fc4beb1b32d1..fc0e98726b36 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.h
+++ b/drivers/net/ethernet/qualcomm/qca_spi.h
@@ -83,11 +83,6 @@ struct qcaspi {
 	struct tx_ring txr;
 	struct qcaspi_stats stats;
 
-	struct spi_message spi_msg1;
-	struct spi_message spi_msg2;
-	struct spi_transfer spi_xfer1;
-	struct spi_transfer spi_xfer2[2];
-
 	u8 *rx_buffer;
 	u32 buffer_size;
 	u8 sync;
-- 
cgit v1.2.3


From 76d5581c870454be5f1f1a106c57985902e7ea20 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 5 Aug 2018 09:19:33 +0300
Subject: net/mlx5: Fix use-after-free in self-healing flow

When the mlx5 health mechanism detects a problem while the driver
is in the middle of init_one or remove_one, the driver needs to prevent
the health mechanism from scheduling future work; if future work
is scheduled, there is a problem with use-after-free: the system WQ
tries to run the work item (which has been freed) at the scheduled
future time.

Prevent this by disabling work item scheduling in the health mechanism
when the driver is in the middle of init_one() or remove_one().

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 10 +++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c   |  6 +++---
 include/linux/mlx5/driver.h                      |  2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d39b0b7011b2..9f39aeca863f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -331,9 +331,17 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 	add_timer(&health->timer);
 }
 
-void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
+	unsigned long flags;
+
+	if (disable_health) {
+		spin_lock_irqsave(&health->wq_lock, flags);
+		set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+		set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+		spin_unlock_irqrestore(&health->wq_lock, flags);
+	}
 
 	del_timer_sync(&health->timer);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index cf3e4a659052..739aad0a0b35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1286,7 +1286,7 @@ err_cleanup_once:
 		mlx5_cleanup_once(dev);
 
 err_stop_poll:
-	mlx5_stop_health_poll(dev);
+	mlx5_stop_health_poll(dev, boot);
 	if (mlx5_cmd_teardown_hca(dev)) {
 		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
 		goto out_err;
@@ -1346,7 +1346,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	mlx5_free_irq_vectors(dev);
 	if (cleanup)
 		mlx5_cleanup_once(dev);
-	mlx5_stop_health_poll(dev);
+	mlx5_stop_health_poll(dev, cleanup);
 	err = mlx5_cmd_teardown_hca(dev);
 	if (err) {
 		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
@@ -1608,7 +1608,7 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
 	 * with the HCA, so the health polll is no longer needed.
 	 */
 	mlx5_drain_health_wq(dev);
-	mlx5_stop_health_poll(dev);
+	mlx5_stop_health_poll(dev, false);
 
 	ret = mlx5_cmd_force_teardown_hca(dev);
 	if (ret) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7a452716de4b..aa65f58c6610 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1052,7 +1052,7 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 void mlx5_health_cleanup(struct mlx5_core_dev *dev);
 int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
-void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health);
 void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 5df816e7f43f1297c40021ef17ec6e722b45c82f Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 7 Aug 2018 09:59:03 +0300
Subject: net/mlx5: Fix debugfs cleanup in the device init/remove flow

When initializing the device (procedure init_one), the driver
calls mlx5_pci_init to perform pci initialization. As part of this
initialization, mlx5_pci_init creates a debugfs directory.
If this creation fails, init_one aborts, returning failure to
the caller (which is the probe method caller).

The main reason for such a failure to occur is if the debugfs
directory already exists. This can happen if the last time
mlx5_pci_close was called, debugfs_remove (silently) failed due
to the debugfs directory not being empty.

Guarantee that such a debugfs_remove failure will not occur by
instead calling debugfs_remove_recursive in procedure mlx5_pci_close.

Fixes: 59211bd3b632 ("net/mlx5: Split the load/unload flow into hardware and software flows")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 739aad0a0b35..b5e9f664fc66 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -878,8 +878,10 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	priv->numa_node = dev_to_node(&dev->pdev->dev);
 
 	priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
-	if (!priv->dbg_root)
+	if (!priv->dbg_root) {
+		dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n");
 		return -ENOMEM;
+	}
 
 	err = mlx5_pci_enable_device(dev);
 	if (err) {
@@ -928,7 +930,7 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 	pci_clear_master(dev->pdev);
 	release_bar(dev->pdev);
 	mlx5_pci_disable_device(dev);
-	debugfs_remove(priv->dbg_root);
+	debugfs_remove_recursive(priv->dbg_root);
 }
 
 static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
-- 
cgit v1.2.3


From 8d71e818506718e8d7032ce824b5c74a17d4f7a5 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 21 Aug 2018 16:04:41 +0300
Subject: net/mlx5: Use u16 for Work Queue buffer fragment size

Minimal stride size is 16.
Hence, the number of strides in a fragment (of PAGE_SIZE)
is <= PAGE_SIZE / 16 <= 4K.

u16 is sufficient to represent this.

Fixes: 388ca8be0037 ("IB/mlx5: Implement fragmented completion queue (CQ)")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/wq.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/wq.h | 2 +-
 include/linux/mlx5/driver.h                  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index c8c315eb5128..d838af9539b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -39,9 +39,9 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 	return (u32)wq->fbc.sz_m1 + 1;
 }
 
-u32 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
+u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
 {
-	return (u32)wq->fbc.frag_sz_m1 + 1;
+	return wq->fbc.frag_sz_m1 + 1;
 }
 
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 2bd4c3184eba..3a1a170bb2d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -80,7 +80,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
-u32 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
+u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index aa65f58c6610..3a1258fd8ac3 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -362,7 +362,7 @@ struct mlx5_frag_buf {
 struct mlx5_frag_buf_ctrl {
 	struct mlx5_frag_buf	frag_buf;
 	u32			sz_m1;
-	u32			frag_sz_m1;
+	u16			frag_sz_m1;
 	u32			strides_offset;
 	u8			log_sz;
 	u8			log_stride;
-- 
cgit v1.2.3


From a09036221092989b88c55d24d1f12ceb1d7d361f Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 21 Aug 2018 16:07:58 +0300
Subject: net/mlx5: Use u16 for Work Queue buffer strides offset

Minimal stride size is 16.
Hence, the number of strides in a fragment (of PAGE_SIZE)
is <= PAGE_SIZE / 16 <= 4K.

u16 is sufficient to represent this.

Fixes: d7037ad73daa ("net/mlx5: Fix QP fragmented buffer allocation")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/wq.c | 2 +-
 include/linux/mlx5/driver.h                  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index d838af9539b1..68e7f8df2a6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -138,7 +138,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
-	u32 sq_strides_offset;
+	u16 sq_strides_offset;
 	u32 rq_pg_remainder;
 	int err;
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3a1258fd8ac3..66d94b4557cf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -363,7 +363,7 @@ struct mlx5_frag_buf_ctrl {
 	struct mlx5_frag_buf	frag_buf;
 	u32			sz_m1;
 	u16			frag_sz_m1;
-	u32			strides_offset;
+	u16			strides_offset;
 	u8			log_sz;
 	u8			log_stride;
 	u8			log_frag_strides;
@@ -995,7 +995,7 @@ static inline u32 mlx5_base_mkey(const u32 key)
 }
 
 static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
-					u32 strides_offset,
+					u16 strides_offset,
 					struct mlx5_frag_buf_ctrl *fbc)
 {
 	fbc->log_stride = log_stride;
-- 
cgit v1.2.3


From c88a026e01219488e745f4f0267fd76c2bb68421 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Tue, 21 Aug 2018 15:22:42 +0300
Subject: net/mlx5: E-Switch, Fix memory leak when creating switchdev mode FDB
 tables

The memory allocated for the slow path table flow group input structure
was not freed upon successful return, fix that.

Fixes: 1967ce6ea5c8 ("net/mlx5: E-Switch, Refactor fast path FDB table creation in switchdev mode")
Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f72b5c9dcfe9..3028e8d90920 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -663,6 +663,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	if (err)
 		goto miss_rule_err;
 
+	kvfree(flow_group_in);
 	return 0;
 
 miss_rule_err:
-- 
cgit v1.2.3


From 071304772fc747d5df13c51f1cf48a4b922a5e0d Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Sun, 19 Aug 2018 08:56:09 +0300
Subject: net/mlx5: Fix not releasing read lock when adding flow rules

If building match list fg fails and we never jumped to
search_again_locked label then the function returned without
unlocking the read lock.

Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f418541af7cf..384b560f2a93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1726,6 +1726,8 @@ search_again_locked:
 	if (err) {
 		if (take_write)
 			up_write_ref_node(&ft->node);
+		else
+			up_read_ref_node(&ft->node);
 		return ERR_PTR(err);
 	}
 
-- 
cgit v1.2.3


From df7ddb2396cd162e64aaff9401be05e31e438961 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Mon, 27 Aug 2018 09:09:46 -0500
Subject: net/mlx5: Consider PCI domain in search for next dev

The PCI BDF is not unique. PCI domain must also be considered when
searching for the next physical device during lag setup. Example below:

mlx5_core 0000:01:00.0: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0)
mlx5_core 0000:01:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0)
mlx5_core 0001:01:00.0: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0)
mlx5_core 0001:01:00.1: MLX5E: StrdRq(1) RqSz(8) StrdSz(128) RxCqeCmprss(0)

Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Aviv Heller <avivh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index b994b80d5714..ada723bd91b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -391,16 +391,17 @@ void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
 		}
 }
 
-static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
+static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
 {
-	return (u16)((dev->pdev->bus->number << 8) |
+	return (u32)((pci_domain_nr(dev->pdev->bus) << 16) |
+		     (dev->pdev->bus->number << 8) |
 		     PCI_SLOT(dev->pdev->devfn));
 }
 
 /* Must be called with intf_mutex held */
 struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
 {
-	u16 pci_id = mlx5_gen_pci_id(dev);
+	u32 pci_id = mlx5_gen_pci_id(dev);
 	struct mlx5_core_dev *res = NULL;
 	struct mlx5_core_dev *tmp_dev;
 	struct mlx5_priv *priv;
-- 
cgit v1.2.3


From 47bc94b82291e007da61ee1b3d18c77871f3e158 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Wed, 15 Aug 2018 11:08:48 -0500
Subject: net/mlx5: Check for error in mlx5_attach_interface

Currently, mlx5_attach_interface does not check for error
after calling intf->attach or intf->add. When these two calls
fails, the client is not initialized and will cause issues such as
kernel panic on invalid address in the teardown path (mlx5_detach_interface)

Fixes: 737a234bb638 ("net/mlx5: Introduce attach/detach to interface API")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index ada723bd91b6..37ba7c78859d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -132,11 +132,11 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 	delayed_event_start(priv);
 
 	dev_ctx->context = intf->add(dev);
-	set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
-	if (intf->attach)
-		set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
-
 	if (dev_ctx->context) {
+		set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+		if (intf->attach)
+			set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+
 		spin_lock_irq(&priv->ctx_lock);
 		list_add_tail(&dev_ctx->list, &priv->ctx_list);
 
@@ -211,12 +211,17 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
 	if (intf->attach) {
 		if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
 			goto out;
-		intf->attach(dev, dev_ctx->context);
+		if (intf->attach(dev, dev_ctx->context))
+			goto out;
+
 		set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
 	} else {
 		if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
 			goto out;
 		dev_ctx->context = intf->add(dev);
+		if (!dev_ctx->context)
+			goto out;
+
 		set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
 	}
 
-- 
cgit v1.2.3


From fc433829f9a29530d492f0eb20804ac5e6967204 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Fri, 24 Aug 2018 12:24:10 -0700
Subject: net/mlx5e: Ethtool steering, fix udp source port value

Copy and paste bug was introduced in the offending patch.
We need to write udp source port value into the headers value and not
headers criteria "mask".

Fixes: 142644f8a1f8 ("net/mlx5e: Ethtool steering flow parsing refactoring")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 75bb981e00b7..41cde926cdab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -191,7 +191,7 @@ set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v,
 {
 	if (psrc_m) {
 		MLX5E_FTE_SET(headers_c, udp_sport, 0xffff);
-		MLX5E_FTE_SET(headers_c, udp_sport, ntohs(psrc_v));
+		MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v));
 	}
 
 	if (pdst_m) {
-- 
cgit v1.2.3


From ad9421e36a77056a4f095d49b9605e80b4d216ed Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Mon, 20 Aug 2018 11:43:03 +0300
Subject: net/mlx5: Fix possible deadlock from lockdep when adding fte to fg

This is a false positive report due to incorrect nested lock
annotations as we lock multiple fgs with the same subclass.
Instead of locking all fgs only lock the one being used as was
done before.

Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 74 +++++++++++------------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 384b560f2a93..37d114c668b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1578,6 +1578,33 @@ static u64 matched_fgs_get_version(struct list_head *match_head)
 	return version;
 }
 
+static struct fs_fte *
+lookup_fte_locked(struct mlx5_flow_group *g,
+		  u32 *match_value,
+		  bool take_write)
+{
+	struct fs_fte *fte_tmp;
+
+	if (take_write)
+		nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+	else
+		nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
+	fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
+					 rhash_fte);
+	if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
+		fte_tmp = NULL;
+		goto out;
+	}
+
+	nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+out:
+	if (take_write)
+		up_write_ref_node(&g->node);
+	else
+		up_read_ref_node(&g->node);
+	return fte_tmp;
+}
+
 static struct mlx5_flow_handle *
 try_add_to_existing_fg(struct mlx5_flow_table *ft,
 		       struct list_head *match_head,
@@ -1600,10 +1627,6 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 	if (IS_ERR(fte))
 		return  ERR_PTR(-ENOMEM);
 
-	list_for_each_entry(iter, match_head, list) {
-		nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT);
-	}
-
 search_again_locked:
 	version = matched_fgs_get_version(match_head);
 	/* Try to find a fg that already contains a matching fte */
@@ -1611,20 +1634,9 @@ search_again_locked:
 		struct fs_fte *fte_tmp;
 
 		g = iter->g;
-		fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value,
-						 rhash_fte);
-		if (!fte_tmp || !tree_get_node(&fte_tmp->node))
+		fte_tmp = lookup_fte_locked(g, spec->match_value, take_write);
+		if (!fte_tmp)
 			continue;
-
-		nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
-		if (!take_write) {
-			list_for_each_entry(iter, match_head, list)
-				up_read_ref_node(&iter->g->node);
-		} else {
-			list_for_each_entry(iter, match_head, list)
-				up_write_ref_node(&iter->g->node);
-		}
-
 		rule = add_rule_fg(g, spec->match_value,
 				   flow_act, dest, dest_num, fte_tmp);
 		up_write_ref_node(&fte_tmp->node);
@@ -1633,19 +1645,6 @@ search_again_locked:
 		return rule;
 	}
 
-	/* No group with matching fte found. Try to add a new fte to any
-	 * matching fg.
-	 */
-
-	if (!take_write) {
-		list_for_each_entry(iter, match_head, list)
-			up_read_ref_node(&iter->g->node);
-		list_for_each_entry(iter, match_head, list)
-			nested_down_write_ref_node(&iter->g->node,
-						   FS_LOCK_PARENT);
-		take_write = true;
-	}
-
 	/* Check the ft version, for case that new flow group
 	 * was added while the fgs weren't locked
 	 */
@@ -1657,27 +1656,30 @@ search_again_locked:
 	/* Check the fgs version, for case the new FTE with the
 	 * same values was added while the fgs weren't locked
 	 */
-	if (version != matched_fgs_get_version(match_head))
+	if (version != matched_fgs_get_version(match_head)) {
+		take_write = true;
 		goto search_again_locked;
+	}
 
 	list_for_each_entry(iter, match_head, list) {
 		g = iter->g;
 
 		if (!g->node.active)
 			continue;
+
+		nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+
 		err = insert_fte(g, fte);
 		if (err) {
+			up_write_ref_node(&g->node);
 			if (err == -ENOSPC)
 				continue;
-			list_for_each_entry(iter, match_head, list)
-				up_write_ref_node(&iter->g->node);
 			kmem_cache_free(steering->ftes_cache, fte);
 			return ERR_PTR(err);
 		}
 
 		nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
-		list_for_each_entry(iter, match_head, list)
-			up_write_ref_node(&iter->g->node);
+		up_write_ref_node(&g->node);
 		rule = add_rule_fg(g, spec->match_value,
 				   flow_act, dest, dest_num, fte);
 		up_write_ref_node(&fte->node);
@@ -1686,8 +1688,6 @@ search_again_locked:
 	}
 	rule = ERR_PTR(-ENOENT);
 out:
-	list_for_each_entry(iter, match_head, list)
-		up_write_ref_node(&iter->g->node);
 	kmem_cache_free(steering->ftes_cache, fte);
 	return rule;
 }
-- 
cgit v1.2.3


From 0a3b8b2b215f9e84b82ae97df71292ccfd92b1e7 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 3 Sep 2018 19:12:41 -0700
Subject: tipc: orphan sock in tipc_release()

Before we unlock the sock in tipc_release(), we have to
detach sk->sk_socket from sk, otherwise a parallel
tipc_sk_fill_sock_diag() could stil read it after we
free this socket.

Fixes: c30b70deb5f4 ("tipc: implement socket diagnostics for AF_TIPC")
Reported-and-tested-by: syzbot+48804b87c16588ad491d@syzkaller.appspotmail.com
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ab7a2a7178f7..a0ff8bffc96b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -576,6 +576,7 @@ static int tipc_release(struct socket *sock)
 	sk_stop_timer(sk, &sk->sk_timer);
 	tipc_sk_remove(tsk);
 
+	sock_orphan(sk);
 	/* Reject any messages that accumulated in backlog queue */
 	release_sock(sk);
 	tipc_dest_list_purge(&tsk->cong_links);
-- 
cgit v1.2.3


From ee28bb56ac5b4c0c08ef10d33cc7adb749bbf4c6 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Tue, 4 Sep 2018 19:00:19 +0200
Subject: net/sched: fix memory leak in act_tunnel_key_init()

If users try to install act_tunnel_key 'set' rules with duplicate values
of 'index', the tunnel metadata are allocated, but never released. Then,
kmemleak complains as follows:

 # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111
 # echo clear > /sys/kernel/debug/kmemleak
 # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111
 Error: TC IDR already exists.
 We have an error talking to the kernel
 # echo scan > /sys/kernel/debug/kmemleak
 # cat /sys/kernel/debug/kmemleak
 unreferenced object 0xffff8800574e6c80 (size 256):
   comm "tc", pid 5617, jiffies 4298118009 (age 57.990s)
   hex dump (first 32 bytes):
     00 00 00 00 00 00 00 00 00 1c e8 b0 ff ff ff ff  ................
     81 24 c2 ad ff ff ff ff 00 00 00 00 00 00 00 00  .$..............
   backtrace:
     [<00000000b7afbf4e>] tunnel_key_init+0x8a5/0x1800 [act_tunnel_key]
     [<000000007d98fccd>] tcf_action_init_1+0x698/0xac0
     [<0000000099b8f7cc>] tcf_action_init+0x15c/0x590
     [<00000000dc60eebe>] tc_ctl_action+0x336/0x5c2
     [<000000002f5a2f7d>] rtnetlink_rcv_msg+0x357/0x8e0
     [<000000000bfe7575>] netlink_rcv_skb+0x124/0x350
     [<00000000edab656f>] netlink_unicast+0x40f/0x5d0
     [<00000000b322cdcb>] netlink_sendmsg+0x6e8/0xba0
     [<0000000063d9d490>] sock_sendmsg+0xb3/0xf0
     [<00000000f0d3315a>] ___sys_sendmsg+0x654/0x960
     [<00000000c06cbd42>] __sys_sendmsg+0xd3/0x170
     [<00000000ce72e4b0>] do_syscall_64+0xa5/0x470
     [<000000005caa2d97>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
     [<00000000fac1b476>] 0xffffffffffffffff

This problem theoretically happens also in case users attempt to setup a
geneve rule having wrong configuration data, or when the kernel fails to
allocate 'params_new'. Ensure that tunnel_key_init() releases the tunnel
metadata also in the above conditions.

Addresses-Coverity-ID: 1373974 ("Resource leak")
Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key")
Fixes: 0ed5269f9e41f ("net/sched: add tunnel option support to act_tunnel_key")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_tunnel_key.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 420759153d5f..28d58bbc953e 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -317,7 +317,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 						  &metadata->u.tun_info,
 						  opts_len, extack);
 			if (ret < 0)
-				goto err_out;
+				goto release_tun_meta;
 		}
 
 		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
@@ -333,23 +333,24 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 				     &act_tunnel_key_ops, bind, true);
 		if (ret) {
 			NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
-			goto err_out;
+			goto release_tun_meta;
 		}
 
 		ret = ACT_P_CREATED;
 	} else if (!ovr) {
-		tcf_idr_release(*a, bind);
 		NL_SET_ERR_MSG(extack, "TC IDR already exists");
-		return -EEXIST;
+		ret = -EEXIST;
+		goto release_tun_meta;
 	}
 
 	t = to_tunnel_key(*a);
 
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		tcf_idr_release(*a, bind);
 		NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		exists = true;
+		goto release_tun_meta;
 	}
 	params_new->tcft_action = parm->t_action;
 	params_new->tcft_enc_metadata = metadata;
@@ -367,6 +368,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 
 	return ret;
 
+release_tun_meta:
+	dst_release(&metadata->dst);
+
 err_out:
 	if (exists)
 		tcf_idr_release(*a, bind);
-- 
cgit v1.2.3


From 222440996d6daf635bed6cb35041be22ede3e8a0 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 5 Sep 2018 16:55:10 +0200
Subject: net/af_iucv: drop inbound packets with invalid flags

Inbound packets may have any combination of flag bits set in their iucv
header. If we don't know how to handle a specific combination, drop the
skb instead of leaking it.

To clarify what error is returned in this case, replace the hard-coded
0 with the corresponding macro.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a21d8ed0a325..01000c14417f 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -2155,8 +2155,8 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct sock *sk;
 	struct iucv_sock *iucv;
 	struct af_iucv_trans_hdr *trans_hdr;
+	int err = NET_RX_SUCCESS;
 	char nullstring[8];
-	int err = 0;
 
 	if (skb->len < (ETH_HLEN + sizeof(struct af_iucv_trans_hdr))) {
 		WARN_ONCE(1, "AF_IUCV too short skb, len=%d, min=%d",
@@ -2254,7 +2254,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
 		err = afiucv_hs_callback_rx(sk, skb);
 		break;
 	default:
-		;
+		kfree_skb(skb);
 	}
 
 	return err;
-- 
cgit v1.2.3


From b2f543949acd1ba64313fdad9e672ef47550d773 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 5 Sep 2018 16:55:11 +0200
Subject: net/af_iucv: fix skb handling on HiperTransport xmit error

When sending an skb, afiucv_hs_send() bails out on various error
conditions. But currently the caller has no way of telling whether the
skb was freed or not - resulting in potentially either
a) leaked skbs from iucv_send_ctrl(), or
b) double-free's from iucv_sock_sendmsg().

As dev_queue_xmit() will always consume the skb (even on error), be
consistent and also free the skb from all other error paths. This way
callers no longer need to care about managing the skb.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 01000c14417f..e2f16a0173a9 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -351,20 +351,28 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
 		memcpy(&phs_hdr->iucv_hdr, imsg, sizeof(struct iucv_message));
 
 	skb->dev = iucv->hs_dev;
-	if (!skb->dev)
-		return -ENODEV;
-	if (!(skb->dev->flags & IFF_UP) || !netif_carrier_ok(skb->dev))
-		return -ENETDOWN;
+	if (!skb->dev) {
+		err = -ENODEV;
+		goto err_free;
+	}
+	if (!(skb->dev->flags & IFF_UP) || !netif_carrier_ok(skb->dev)) {
+		err = -ENETDOWN;
+		goto err_free;
+	}
 	if (skb->len > skb->dev->mtu) {
-		if (sock->sk_type == SOCK_SEQPACKET)
-			return -EMSGSIZE;
-		else
-			skb_trim(skb, skb->dev->mtu);
+		if (sock->sk_type == SOCK_SEQPACKET) {
+			err = -EMSGSIZE;
+			goto err_free;
+		}
+		skb_trim(skb, skb->dev->mtu);
 	}
 	skb->protocol = cpu_to_be16(ETH_P_AF_IUCV);
 	nskb = skb_clone(skb, GFP_ATOMIC);
-	if (!nskb)
-		return -ENOMEM;
+	if (!nskb) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
 	skb_queue_tail(&iucv->send_skb_q, nskb);
 	err = dev_queue_xmit(skb);
 	if (net_xmit_eval(err)) {
@@ -375,6 +383,10 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
 		WARN_ON(atomic_read(&iucv->msg_recv) < 0);
 	}
 	return net_xmit_eval(err);
+
+err_free:
+	kfree_skb(skb);
+	return err;
 }
 
 static struct sock *__iucv_get_sock_by_name(char *nm)
@@ -1167,7 +1179,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 		err = afiucv_hs_send(&txmsg, sk, skb, 0);
 		if (err) {
 			atomic_dec(&iucv->msg_sent);
-			goto fail;
+			goto out;
 		}
 	} else { /* Classic VM IUCV transport */
 		skb_queue_tail(&iucv->send_skb_q, skb);
-- 
cgit v1.2.3


From b7f41565546d393747fd554f9526c1187c6bf652 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 5 Sep 2018 16:55:12 +0200
Subject: net/iucv: declare iucv_path_table_empty() as static

Fixes a compile warning.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 8f7ef167c45a..eb502c6290c2 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1874,7 +1874,7 @@ static void iucv_pm_complete(struct device *dev)
  * Returns 0 if there are still iucv pathes defined
  *	   1 if there are no iucv pathes defined
  */
-int iucv_path_table_empty(void)
+static int iucv_path_table_empty(void)
 {
 	int i;
 
-- 
cgit v1.2.3


From 8f5c5fcf353302374b36232d6885c1a3b579e5ca Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 4 Sep 2018 14:54:55 -0700
Subject: tipc: call start and done ops directly in __tipc_nl_compat_dumpit()

__tipc_nl_compat_dumpit() uses a netlink_callback on stack,
so the only way to align it with other ->dumpit() call path
is calling tipc_dump_start() and tipc_dump_done() directly
inside it. Otherwise ->dumpit() would always get NULL from
cb->args[].

But tipc_dump_start() uses sock_net(cb->skb->sk) to retrieve
net pointer, the cb->skb here doesn't set skb->sk, the net pointer
is saved in msg->net instead, so introduce a helper function
__tipc_dump_start() to pass in msg->net.

Ying pointed out cb->args[0...3] are already used by other
callbacks on this call path, so we can't use cb->args[0] any
more, use cb->args[4] instead.

Fixes: 9a07efa9aea2 ("tipc: switch to rhashtable iterator")
Reported-and-tested-by: syzbot+e93a2c41f91b8e2c7d9b@syzkaller.appspotmail.com
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/netlink_compat.c |  2 ++
 net/tipc/socket.c         | 17 +++++++++++------
 net/tipc/socket.h         |  1 +
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index a2f76743c73a..82f665728382 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -185,6 +185,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 		return -ENOMEM;
 
 	buf->sk = msg->dst_sk;
+	__tipc_dump_start(&cb, msg->net);
 
 	do {
 		int rem;
@@ -216,6 +217,7 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	err = 0;
 
 err_out:
+	tipc_dump_done(&cb);
 	kfree_skb(buf);
 
 	if (err == -EMSGSIZE) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a0ff8bffc96b..3f03ddd0e35b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3230,7 +3230,7 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
 				       struct netlink_callback *cb,
 				       struct tipc_sock *tsk))
 {
-	struct rhashtable_iter *iter = (void *)cb->args[0];
+	struct rhashtable_iter *iter = (void *)cb->args[4];
 	struct tipc_sock *tsk;
 	int err;
 
@@ -3266,8 +3266,14 @@ EXPORT_SYMBOL(tipc_nl_sk_walk);
 
 int tipc_dump_start(struct netlink_callback *cb)
 {
-	struct rhashtable_iter *iter = (void *)cb->args[0];
-	struct net *net = sock_net(cb->skb->sk);
+	return __tipc_dump_start(cb, sock_net(cb->skb->sk));
+}
+EXPORT_SYMBOL(tipc_dump_start);
+
+int __tipc_dump_start(struct netlink_callback *cb, struct net *net)
+{
+	/* tipc_nl_name_table_dump() uses cb->args[0...3]. */
+	struct rhashtable_iter *iter = (void *)cb->args[4];
 	struct tipc_net *tn = tipc_net(net);
 
 	if (!iter) {
@@ -3275,17 +3281,16 @@ int tipc_dump_start(struct netlink_callback *cb)
 		if (!iter)
 			return -ENOMEM;
 
-		cb->args[0] = (long)iter;
+		cb->args[4] = (long)iter;
 	}
 
 	rhashtable_walk_enter(&tn->sk_rht, iter);
 	return 0;
 }
-EXPORT_SYMBOL(tipc_dump_start);
 
 int tipc_dump_done(struct netlink_callback *cb)
 {
-	struct rhashtable_iter *hti = (void *)cb->args[0];
+	struct rhashtable_iter *hti = (void *)cb->args[4];
 
 	rhashtable_walk_exit(hti);
 	kfree(hti);
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index d43032e26532..5e575f205afe 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -69,5 +69,6 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
 				       struct netlink_callback *cb,
 				       struct tipc_sock *tsk));
 int tipc_dump_start(struct netlink_callback *cb);
+int __tipc_dump_start(struct netlink_callback *cb, struct net *net);
 int tipc_dump_done(struct netlink_callback *cb);
 #endif
-- 
cgit v1.2.3


From f74dd480cf4e31e12971c58a1d832044db945670 Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
Date: Fri, 7 Sep 2018 20:15:22 +0200
Subject: r8169: set TxConfig register after TX / RX is enabled, just like
 RxConfig

Commit 3559d81e76bf ("r8169: simplify rtl_hw_start_8169") changed order of
two register writes:
1) Caused RxConfig to be written before TX / RX is enabled,
2) Caused TxConfig to be written before TX / RX is enabled.

At least on XIDs 10000000 ("RTL8169sb/8110sb") and
18000000 ("RTL8169sc/8110sc") such writes are ignored by the chip, leaving
values in these registers intact.

Change 1) was reverted by
commit 05212ba8132b42 ("r8169: set RxConfig after tx/rx is enabled for RTL8169sb/8110sb devices"),
however change 2) wasn't.

In practice, this caused TxConfig's "InterFrameGap time" and "Max DMA Burst
Size per Tx DMA Burst" bits to be zero dramatically reducing TX performance
(in my tests it dropped from around 500Mbps to around 50Mbps).

This patch fixes the issue by moving TxConfig register write a bit later in
the code so it happens after TX / RX is already enabled.

Fixes: 05212ba8132b42 ("r8169: set RxConfig after tx/rx is enabled for RTL8169sb/8110sb devices")
Signed-off-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b08d51bf7a20..a1f37d58e2fe 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4634,13 +4634,13 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
 
 	rtl_set_rx_max_size(tp);
 	rtl_set_rx_tx_desc_registers(tp);
-	rtl_set_tx_config_registers(tp);
 	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
 	/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
 	RTL_R8(tp, IntrMask);
 	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 	rtl_init_rxcfg(tp);
+	rtl_set_tx_config_registers(tp);
 
 	rtl_set_rx_mode(tp->dev);
 	/* no early-rx interrupts */
-- 
cgit v1.2.3


From 8edfe2e992b75aee3da9316e9697c531194c2f53 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 7 Sep 2018 14:21:30 +0200
Subject: xen/netfront: fix waiting for xenbus state change

Commit 822fb18a82aba ("xen-netfront: wait xenbus state change when load
module manually") added a new wait queue to wait on for a state change
when the module is loaded manually. Unfortunately there is no wakeup
anywhere to stop that waiting.

Instead of introducing a new wait queue rename the existing
module_unload_q to module_wq and use it for both purposes (loading and
unloading).

As any state change of the backend might be intended to stop waiting
do the wake_up_all() in any case when netback_changed() is called.

Fixes: 822fb18a82aba ("xen-netfront: wait xenbus state change when load module manually")
Cc: <stable@vger.kernel.org> #4.18
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/xen-netfront.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 73f596a90c69..9407acbd19a9 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -87,8 +87,7 @@ struct netfront_cb {
 /* IRQ name is queue name with "-tx" or "-rx" appended */
 #define IRQ_NAME_SIZE (QUEUE_NAME_SIZE + 3)
 
-static DECLARE_WAIT_QUEUE_HEAD(module_load_q);
-static DECLARE_WAIT_QUEUE_HEAD(module_unload_q);
+static DECLARE_WAIT_QUEUE_HEAD(module_wq);
 
 struct netfront_stats {
 	u64			packets;
@@ -1332,11 +1331,11 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 	netif_carrier_off(netdev);
 
 	xenbus_switch_state(dev, XenbusStateInitialising);
-	wait_event(module_load_q,
-			   xenbus_read_driver_state(dev->otherend) !=
-			   XenbusStateClosed &&
-			   xenbus_read_driver_state(dev->otherend) !=
-			   XenbusStateUnknown);
+	wait_event(module_wq,
+		   xenbus_read_driver_state(dev->otherend) !=
+		   XenbusStateClosed &&
+		   xenbus_read_driver_state(dev->otherend) !=
+		   XenbusStateUnknown);
 	return netdev;
 
  exit:
@@ -2010,15 +2009,14 @@ static void netback_changed(struct xenbus_device *dev,
 
 	dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
 
+	wake_up_all(&module_wq);
+
 	switch (backend_state) {
 	case XenbusStateInitialising:
 	case XenbusStateInitialised:
 	case XenbusStateReconfiguring:
 	case XenbusStateReconfigured:
-		break;
-
 	case XenbusStateUnknown:
-		wake_up_all(&module_unload_q);
 		break;
 
 	case XenbusStateInitWait:
@@ -2034,12 +2032,10 @@ static void netback_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateClosed:
-		wake_up_all(&module_unload_q);
 		if (dev->state == XenbusStateClosed)
 			break;
 		/* Missed the backend's CLOSING state -- fallthrough */
 	case XenbusStateClosing:
-		wake_up_all(&module_unload_q);
 		xenbus_frontend_closed(dev);
 		break;
 	}
@@ -2147,14 +2143,14 @@ static int xennet_remove(struct xenbus_device *dev)
 
 	if (xenbus_read_driver_state(dev->otherend) != XenbusStateClosed) {
 		xenbus_switch_state(dev, XenbusStateClosing);
-		wait_event(module_unload_q,
+		wait_event(module_wq,
 			   xenbus_read_driver_state(dev->otherend) ==
 			   XenbusStateClosing ||
 			   xenbus_read_driver_state(dev->otherend) ==
 			   XenbusStateUnknown);
 
 		xenbus_switch_state(dev, XenbusStateClosed);
-		wait_event(module_unload_q,
+		wait_event(module_wq,
 			   xenbus_read_driver_state(dev->otherend) ==
 			   XenbusStateClosed ||
 			   xenbus_read_driver_state(dev->otherend) ==
-- 
cgit v1.2.3


From a162c3511410b50f09c002fea56fea2153b679d0 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 6 Sep 2018 14:50:16 -0700
Subject: net_sched: properly cancel netlink dump on failure

When nla_put*() fails after nla_nest_start(), we need
to call nla_nest_cancel() to cancel the message, otherwise
we end up calling nla_nest_end() like a success.

Fixes: 0ed5269f9e41 ("net/sched: add tunnel option support to act_tunnel_key")
Cc: Davide Caratti <dcaratti@redhat.com>
Cc: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_tunnel_key.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 28d58bbc953e..681f6f04e7da 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -412,8 +412,10 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
 		    nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,
 			       opt->type) ||
 		    nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,
-			    opt->length * 4, opt + 1))
+			    opt->length * 4, opt + 1)) {
+			nla_nest_cancel(skb, start);
 			return -EMSGSIZE;
+		}
 
 		len -= sizeof(struct geneve_opt) + opt->length * 4;
 		src += sizeof(struct geneve_opt) + opt->length * 4;
@@ -427,7 +429,7 @@ static int tunnel_key_opts_dump(struct sk_buff *skb,
 				const struct ip_tunnel_info *info)
 {
 	struct nlattr *start;
-	int err;
+	int err = -EINVAL;
 
 	if (!info->options_len)
 		return 0;
@@ -439,9 +441,11 @@ static int tunnel_key_opts_dump(struct sk_buff *skb,
 	if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
 		err = tunnel_key_geneve_opts_dump(skb, info);
 		if (err)
-			return err;
+			goto err_out;
 	} else {
-		return -EINVAL;
+err_out:
+		nla_nest_cancel(skb, start);
+		return err;
 	}
 
 	nla_nest_end(skb, start);
-- 
cgit v1.2.3


From 5cf4a8532c992bb22a9ecd5f6d93f873f4eaccc2 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Thu, 6 Sep 2018 15:54:59 +0200
Subject: tcp: really ignore MSG_ZEROCOPY if no SO_ZEROCOPY

According to the documentation in msg_zerocopy.rst, the SO_ZEROCOPY
flag was introduced because send(2) ignores unknown message flags and
any legacy application which was accidentally passing the equivalent of
MSG_ZEROCOPY earlier should not see any new behaviour.

Before commit f214f915e7db ("tcp: enable MSG_ZEROCOPY"), a send(2) call
which passed the equivalent of MSG_ZEROCOPY without setting SO_ZEROCOPY
would succeed.  However, after that commit, it fails with -ENOBUFS.  So
it appears that the SO_ZEROCOPY flag fails to fulfill its intended
purpose.  Fix it.

Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY")
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 3 ---
 net/ipv4/tcp.c    | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c996c09d095f..b2c807f67aba 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -939,9 +939,6 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
 
 	WARN_ON_ONCE(!in_task());
 
-	if (!sock_flag(sk, SOCK_ZEROCOPY))
-		return NULL;
-
 	skb = sock_omalloc(sk, 0, GFP_KERNEL);
 	if (!skb)
 		return NULL;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8af2fec5ad5..10c6246396cc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1185,7 +1185,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
 	flags = msg->msg_flags;
 
-	if (flags & MSG_ZEROCOPY && size) {
+	if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
 		if (sk->sk_state != TCP_ESTABLISHED) {
 			err = -EINVAL;
 			goto out_err;
-- 
cgit v1.2.3


From 772ed869f535b4ec2b134645c951ff22de4d3f79 Mon Sep 17 00:00:00 2001
From: Netanel Belgazal <netanel@amazon.com>
Date: Sun, 9 Sep 2018 08:15:20 +0000
Subject: net: ena: fix surprise unplug NULL dereference kernel crash

Starting with driver version 1.5.0, in case of a surprise device
unplug, there is a race caused by invoking ena_destroy_device()
from two different places. As a result, the readless register might
be accessed after it was destroyed.

Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index c673ac2df65b..170830b807fe 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3409,12 +3409,12 @@ static void ena_remove(struct pci_dev *pdev)
 		netdev->rx_cpu_rmap = NULL;
 	}
 #endif /* CONFIG_RFS_ACCEL */
-
-	unregister_netdev(netdev);
 	del_timer_sync(&adapter->timer_service);
 
 	cancel_work_sync(&adapter->reset_task);
 
+	unregister_netdev(netdev);
+
 	/* Reset the device only if the device is running. */
 	if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
 		ena_com_dev_reset(ena_dev, adapter->reset_reason);
-- 
cgit v1.2.3


From ef5b0771d247379c90c8bf1332ff32f7f74bff7f Mon Sep 17 00:00:00 2001
From: Netanel Belgazal <netanel@amazon.com>
Date: Sun, 9 Sep 2018 08:15:21 +0000
Subject: net: ena: fix driver when PAGE_SIZE == 64kB

The buffer length field in the ena rx descriptor is 16 bit, and the
current driver passes a full page in each ena rx descriptor.
When PAGE_SIZE equals 64kB or more, the buffer length field becomes
zero.
To solve this issue, limit the ena Rx descriptor to use 16kB even
when allocating 64kB kernel pages. This change would not impact ena
device functionality, as 16kB is still larger than maximum MTU.

Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++-----
 drivers/net/ethernet/amazon/ena/ena_netdev.h | 11 +++++++++++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 170830b807fe..69e684fd2787 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -461,7 +461,7 @@ static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
 		return -ENOMEM;
 	}
 
-	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE,
+	dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
 			   DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
 		u64_stats_update_begin(&rx_ring->syncp);
@@ -478,7 +478,7 @@ static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
 	rx_info->page_offset = 0;
 	ena_buf = &rx_info->ena_buf;
 	ena_buf->paddr = dma;
-	ena_buf->len = PAGE_SIZE;
+	ena_buf->len = ENA_PAGE_SIZE;
 
 	return 0;
 }
@@ -495,7 +495,7 @@ static void ena_free_rx_page(struct ena_ring *rx_ring,
 		return;
 	}
 
-	dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE,
+	dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE,
 		       DMA_FROM_DEVICE);
 
 	__free_page(page);
@@ -916,10 +916,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 	do {
 		dma_unmap_page(rx_ring->dev,
 			       dma_unmap_addr(&rx_info->ena_buf, paddr),
-			       PAGE_SIZE, DMA_FROM_DEVICE);
+			       ENA_PAGE_SIZE, DMA_FROM_DEVICE);
 
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
-				rx_info->page_offset, len, PAGE_SIZE);
+				rx_info->page_offset, len, ENA_PAGE_SIZE);
 
 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 			  "rx skb updated. len %d. data_len %d\n",
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index f1972b5ab650..7c7ae56c52cf 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -355,4 +355,15 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
 
 int ena_get_sset_count(struct net_device *netdev, int sset);
 
+/* The ENA buffer length fields is 16 bit long. So when PAGE_SIZE == 64kB the
+ * driver passas 0.
+ * Since the max packet size the ENA handles is ~9kB limit the buffer length to
+ * 16kB.
+ */
+#if PAGE_SIZE > SZ_16K
+#define ENA_PAGE_SIZE SZ_16K
+#else
+#define ENA_PAGE_SIZE PAGE_SIZE
+#endif
+
 #endif /* !(ENA_H) */
-- 
cgit v1.2.3


From cfa324a514233b28a6934de619183eee941f02d7 Mon Sep 17 00:00:00 2001
From: Netanel Belgazal <netanel@amazon.com>
Date: Sun, 9 Sep 2018 08:15:22 +0000
Subject: net: ena: fix device destruction to gracefully free resources

When ena_destroy_device() is called from ena_suspend(), the device is
still reachable from the driver. Therefore, the driver can send a command
to the device to free all resources.
However, in all other cases of calling ena_destroy_device(), the device is
potentially in an error state and unreachable from the driver. In these
cases the driver must not send commands to the device.

The current implementation does not request resource freeing from the
device even when possible. We add the graceful parameter to
ena_destroy_device() to enable resource freeing when possible, and
use it in ena_suspend().

Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 69e684fd2787..035d47d2179a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -76,7 +76,7 @@ MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
 
 static int ena_rss_init_default(struct ena_adapter *adapter);
 static void check_for_admin_com_state(struct ena_adapter *adapter);
-static void ena_destroy_device(struct ena_adapter *adapter);
+static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
 static int ena_restore_device(struct ena_adapter *adapter);
 
 static void ena_tx_timeout(struct net_device *dev)
@@ -1900,7 +1900,7 @@ static int ena_close(struct net_device *netdev)
 			  "Destroy failure, restarting device\n");
 		ena_dump_stats_to_dmesg(adapter);
 		/* rtnl lock already obtained in dev_ioctl() layer */
-		ena_destroy_device(adapter);
+		ena_destroy_device(adapter, false);
 		ena_restore_device(adapter);
 	}
 
@@ -2550,7 +2550,7 @@ err_disable_msix:
 	return rc;
 }
 
-static void ena_destroy_device(struct ena_adapter *adapter)
+static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
@@ -2563,7 +2563,8 @@ static void ena_destroy_device(struct ena_adapter *adapter)
 	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 	adapter->dev_up_before_reset = dev_up;
 
-	ena_com_set_admin_running_state(ena_dev, false);
+	if (!graceful)
+		ena_com_set_admin_running_state(ena_dev, false);
 
 	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 		ena_down(adapter);
@@ -2665,7 +2666,7 @@ static void ena_fw_reset_device(struct work_struct *work)
 		return;
 	}
 	rtnl_lock();
-	ena_destroy_device(adapter);
+	ena_destroy_device(adapter, false);
 	ena_restore_device(adapter);
 	rtnl_unlock();
 }
@@ -3467,7 +3468,7 @@ static int ena_suspend(struct pci_dev *pdev,  pm_message_t state)
 			"ignoring device reset request as the device is being suspended\n");
 		clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 	}
-	ena_destroy_device(adapter);
+	ena_destroy_device(adapter, true);
 	rtnl_unlock();
 	return 0;
 }
-- 
cgit v1.2.3


From fe870c77efdf8682252545cbd3d29800d8379efc Mon Sep 17 00:00:00 2001
From: Netanel Belgazal <netanel@amazon.com>
Date: Sun, 9 Sep 2018 08:15:23 +0000
Subject: net: ena: fix potential double ena_destroy_device()

ena_destroy_device() can potentially be called twice.
To avoid this, check that the device is running and
only then proceed destroying it.

Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 035d47d2179a..a68c2a8d4da2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2556,6 +2556,9 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
 	bool dev_up;
 
+	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
+		return;
+
 	netif_carrier_off(netdev);
 
 	del_timer_sync(&adapter->timer_service);
@@ -2592,6 +2595,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
 
 	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
 }
 
 static int ena_restore_device(struct ena_adapter *adapter)
@@ -2636,6 +2640,7 @@ static int ena_restore_device(struct ena_adapter *adapter)
 		}
 	}
 
+	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
 	dev_err(&pdev->dev, "Device reset completed successfully\n");
 
-- 
cgit v1.2.3


From 944b28aa2982b4590d4d4dfc777cf85135dca2c0 Mon Sep 17 00:00:00 2001
From: Netanel Belgazal <netanel@amazon.com>
Date: Sun, 9 Sep 2018 08:15:24 +0000
Subject: net: ena: fix missing lock during device destruction

acquire the rtnl_lock during device destruction to avoid
using partially destroyed device.

ena_remove() shares almost the same logic as ena_destroy_device(),
so use ena_destroy_device() and avoid duplications.

Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index a68c2a8d4da2..b9ce2a6a87ed 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3421,24 +3421,18 @@ static void ena_remove(struct pci_dev *pdev)
 
 	unregister_netdev(netdev);
 
-	/* Reset the device only if the device is running. */
+	/* If the device is running then we want to make sure the device will be
+	 * reset to make sure no more events will be issued by the device.
+	 */
 	if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
-		ena_com_dev_reset(ena_dev, adapter->reset_reason);
-
-	ena_free_mgmnt_irq(adapter);
+		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 
-	ena_disable_msix(adapter);
+	rtnl_lock();
+	ena_destroy_device(adapter, true);
+	rtnl_unlock();
 
 	free_netdev(netdev);
 
-	ena_com_mmio_reg_read_request_destroy(ena_dev);
-
-	ena_com_abort_admin_commands(ena_dev);
-
-	ena_com_wait_for_abort_completion(ena_dev);
-
-	ena_com_admin_destroy(ena_dev);
-
 	ena_com_rss_destroy(ena_dev);
 
 	ena_com_delete_debug_area(ena_dev);
-- 
cgit v1.2.3


From 28abf4e9c9201eda5c4d29ea609d07e877b464b8 Mon Sep 17 00:00:00 2001
From: Netanel Belgazal <netanel@amazon.com>
Date: Sun, 9 Sep 2018 08:15:25 +0000
Subject: net: ena: fix missing calls to READ_ONCE

Add READ_ONCE calls where necessary (for example when iterating
over a memory field that gets updated by the hardware).

Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 17f12c18d225..c37deef3bcf1 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -459,7 +459,7 @@ static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_qu
 	cqe = &admin_queue->cq.entries[head_masked];
 
 	/* Go over all the completions */
-	while ((cqe->acq_common_descriptor.flags &
+	while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
 			ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
 		/* Do not read the rest of the completion entry before the
 		 * phase bit was validated
@@ -637,7 +637,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
 
 	mmiowb();
 	for (i = 0; i < timeout; i++) {
-		if (read_resp->req_id == mmio_read->seq_num)
+		if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
 			break;
 
 		udelay(1);
@@ -1796,8 +1796,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 	aenq_common = &aenq_e->aenq_common_desc;
 
 	/* Go over all the events */
-	while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) ==
-	       phase) {
+	while ((READ_ONCE(aenq_common->flags) &
+		ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
 		pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n",
 			 aenq_common->group, aenq_common->syndrom,
 			 (u64)aenq_common->timestamp_low +
-- 
cgit v1.2.3


From 37dff155dcf57f6c08bf1641c5ddf9abd45f2b1f Mon Sep 17 00:00:00 2001
From: Netanel Belgazal <netanel@amazon.com>
Date: Sun, 9 Sep 2018 08:15:26 +0000
Subject: net: ena: fix incorrect usage of memory barriers

Added memory barriers where they were missing to support multiple
architectures, and removed redundant ones.

As part of removing the redundant memory barriers and improving
performance, we moved to more relaxed versions of memory barriers,
as well as to the more relaxed version of writel - writel_relaxed,
while maintaining correctness.

Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c     | 16 +++++++-------
 drivers/net/ethernet/amazon/ena/ena_eth_com.c |  6 ++++++
 drivers/net/ethernet/amazon/ena/ena_eth_com.h |  8 ++-----
 drivers/net/ethernet/amazon/ena/ena_netdev.c  | 30 ++++++++++-----------------
 4 files changed, 26 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index c37deef3bcf1..7635c38e77dd 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -464,7 +464,7 @@ static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_qu
 		/* Do not read the rest of the completion entry before the
 		 * phase bit was validated
 		 */
-		rmb();
+		dma_rmb();
 		ena_com_handle_single_admin_completion(admin_queue, cqe);
 
 		head_masked++;
@@ -627,15 +627,8 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
 	mmio_read_reg |= mmio_read->seq_num &
 			ENA_REGS_MMIO_REG_READ_REQ_ID_MASK;
 
-	/* make sure read_resp->req_id get updated before the hw can write
-	 * there
-	 */
-	wmb();
-
-	writel_relaxed(mmio_read_reg,
-		       ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+	writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
 
-	mmiowb();
 	for (i = 0; i < timeout; i++) {
 		if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
 			break;
@@ -1798,6 +1791,11 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 	/* Go over all the events */
 	while ((READ_ONCE(aenq_common->flags) &
 		ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
+		/* Make sure the phase bit (ownership) is as expected before
+		 * reading the rest of the descriptor.
+		 */
+		dma_rmb();
+
 		pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n",
 			 aenq_common->group, aenq_common->syndrom,
 			 (u64)aenq_common->timestamp_low +
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index ea149c134e15..1c682b76190f 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -51,6 +51,11 @@ static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
 	if (desc_phase != expected_phase)
 		return NULL;
 
+	/* Make sure we read the rest of the descriptor after the phase bit
+	 * has been read
+	 */
+	dma_rmb();
+
 	return cdesc;
 }
 
@@ -493,6 +498,7 @@ int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)
 	if (cdesc_phase != expected_phase)
 		return -EAGAIN;
 
+	dma_rmb();
 	if (unlikely(cdesc->req_id >= io_cq->q_depth)) {
 		pr_err("Invalid req id %d\n", cdesc->req_id);
 		return -EINVAL;
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 6fdc753d9483..2f7657227cfe 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -107,8 +107,7 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
 	return io_sq->q_depth - 1 - cnt;
 }
 
-static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq,
-					    bool relaxed)
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
 {
 	u16 tail;
 
@@ -117,10 +116,7 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq,
 	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
 		 io_sq->qid, tail);
 
-	if (relaxed)
-		writel_relaxed(tail, io_sq->db_addr);
-	else
-		writel(tail, io_sq->db_addr);
+	writel(tail, io_sq->db_addr);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index b9ce2a6a87ed..29b5774dd32d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -551,14 +551,9 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 			    rx_ring->qid, i, num);
 	}
 
-	if (likely(i)) {
-		/* Add memory barrier to make sure the desc were written before
-		 * issue a doorbell
-		 */
-		wmb();
-		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true);
-		mmiowb();
-	}
+	/* ena_com_write_sq_doorbell issues a wmb() */
+	if (likely(i))
+		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
 
 	rx_ring->next_to_use = next_to_use;
 
@@ -2112,12 +2107,6 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
 		tx_ring->ring_size);
 
-	/* This WMB is aimed to:
-	 * 1 - perform smp barrier before reading next_to_completion
-	 * 2 - make sure the desc were written before trigger DB
-	 */
-	wmb();
-
 	/* stop the queue when no more space available, the packet can have up
 	 * to sgl_size + 2. one for the meta descriptor and one for header
 	 * (if the header is larger than tx_max_header_size).
@@ -2136,10 +2125,11 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * stop the queue but meanwhile clean_tx_irq updates
 		 * next_to_completion and terminates.
 		 * The queue will remain stopped forever.
-		 * To solve this issue this function perform rmb, check
-		 * the wakeup condition and wake up the queue if needed.
+		 * To solve this issue add a mb() to make sure that
+		 * netif_tx_stop_queue() write is vissible before checking if
+		 * there is additional space in the queue.
 		 */
-		smp_rmb();
+		smp_mb();
 
 		if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq)
 				> ENA_TX_WAKEUP_THRESH) {
@@ -2151,8 +2141,10 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
-		/* trigger the dma engine */
-		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false);
+		/* trigger the dma engine. ena_com_write_sq_doorbell()
+		 * has a mb
+		 */
+		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
 		u64_stats_update_begin(&tx_ring->syncp);
 		tx_ring->tx_stats.doorbells++;
 		u64_stats_update_end(&tx_ring->syncp);
-- 
cgit v1.2.3


From 52ea992cfac357b73180d5c051dca43bc8d20c2a Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Thu, 6 Sep 2018 21:41:40 +0530
Subject: net/tls: Set count of SG entries if sk_alloc_sg returns -ENOSPC

tls_sw_sendmsg() allocates plaintext and encrypted SG entries using
function sk_alloc_sg(). In case the number of SG entries hit
MAX_SKB_FRAGS, sk_alloc_sg() returns -ENOSPC and sets the variable for
current SG index to '0'. This leads to calling of function
tls_push_record() with 'sg_encrypted_num_elem = 0' and later causes
kernel crash. To fix this, set the number of SG elements to the number
of elements in plaintext/encrypted SG arrays in case sk_alloc_sg()
returns -ENOSPC.

Fixes: 3c4d7559159b ("tls: kernel TLS support")
Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 52fbe727d7c1..e28a6ff25d96 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -125,6 +125,9 @@ static int alloc_encrypted_sg(struct sock *sk, int len)
 			 &ctx->sg_encrypted_num_elem,
 			 &ctx->sg_encrypted_size, 0);
 
+	if (rc == -ENOSPC)
+		ctx->sg_encrypted_num_elem = ARRAY_SIZE(ctx->sg_encrypted_data);
+
 	return rc;
 }
 
@@ -138,6 +141,9 @@ static int alloc_plaintext_sg(struct sock *sk, int len)
 			 &ctx->sg_plaintext_num_elem, &ctx->sg_plaintext_size,
 			 tls_ctx->pending_open_record_frags);
 
+	if (rc == -ENOSPC)
+		ctx->sg_plaintext_num_elem = ARRAY_SIZE(ctx->sg_plaintext_data);
+
 	return rc;
 }
 
-- 
cgit v1.2.3


From 5d407b071dc369c26a38398326ee2be53651cfe4 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 10 Sep 2018 02:47:05 +0900
Subject: ip: frags: fix crash in ip_do_fragment()

A kernel crash occurrs when defragmented packet is fragmented
in ip_do_fragment().
In defragment routine, skb_orphan() is called and
skb->ip_defrag_offset is set. but skb->sk and
skb->ip_defrag_offset are same union member. so that
frag->sk is not NULL.
Hence crash occurrs in skb->sk check routine in ip_do_fragment() when
defragmented packet is fragmented.

test commands:
   %iptables -t nat -I POSTROUTING -j MASQUERADE
   %hping3 192.168.4.2 -s 1000 -p 2000 -d 60000

splat looks like:
[  261.069429] kernel BUG at net/ipv4/ip_output.c:636!
[  261.075753] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  261.083854] CPU: 1 PID: 1349 Comm: hping3 Not tainted 4.19.0-rc2+ #3
[  261.100977] RIP: 0010:ip_do_fragment+0x1613/0x2600
[  261.106945] Code: e8 e2 38 e3 fe 4c 8b 44 24 18 48 8b 74 24 08 e9 92 f6 ff ff 80 3c 02 00 0f 85 da 07 00 00 48 8b b5 d0 00 00 00 e9 25 f6 ff ff <0f> 0b 0f 0b 44 8b 54 24 58 4c 8b 4c 24 18 4c 8b 5c 24 60 4c 8b 6c
[  261.127015] RSP: 0018:ffff8801031cf2c0 EFLAGS: 00010202
[  261.134156] RAX: 1ffff1002297537b RBX: ffffed0020639e6e RCX: 0000000000000004
[  261.142156] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff880114ba9bd8
[  261.150157] RBP: ffff880114ba8a40 R08: ffffed0022975395 R09: ffffed0022975395
[  261.158157] R10: 0000000000000001 R11: ffffed0022975394 R12: ffff880114ba9ca4
[  261.166159] R13: 0000000000000010 R14: ffff880114ba9bc0 R15: dffffc0000000000
[  261.174169] FS:  00007fbae2199700(0000) GS:ffff88011b400000(0000) knlGS:0000000000000000
[  261.183012] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  261.189013] CR2: 00005579244fe000 CR3: 0000000119bf4000 CR4: 00000000001006e0
[  261.198158] Call Trace:
[  261.199018]  ? dst_output+0x180/0x180
[  261.205011]  ? save_trace+0x300/0x300
[  261.209018]  ? ip_copy_metadata+0xb00/0xb00
[  261.213034]  ? sched_clock_local+0xd4/0x140
[  261.218158]  ? kill_l4proto+0x120/0x120 [nf_conntrack]
[  261.223014]  ? rt_cpu_seq_stop+0x10/0x10
[  261.227014]  ? find_held_lock+0x39/0x1c0
[  261.233008]  ip_finish_output+0x51d/0xb50
[  261.237006]  ? ip_fragment.constprop.56+0x220/0x220
[  261.243011]  ? nf_ct_l4proto_register_one+0x5b0/0x5b0 [nf_conntrack]
[  261.250152]  ? rcu_is_watching+0x77/0x120
[  261.255010]  ? nf_nat_ipv4_out+0x1e/0x2b0 [nf_nat_ipv4]
[  261.261033]  ? nf_hook_slow+0xb1/0x160
[  261.265007]  ip_output+0x1c7/0x710
[  261.269005]  ? ip_mc_output+0x13f0/0x13f0
[  261.273002]  ? __local_bh_enable_ip+0xe9/0x1b0
[  261.278152]  ? ip_fragment.constprop.56+0x220/0x220
[  261.282996]  ? nf_hook_slow+0xb1/0x160
[  261.287007]  raw_sendmsg+0x21f9/0x4420
[  261.291008]  ? dst_output+0x180/0x180
[  261.297003]  ? sched_clock_cpu+0x126/0x170
[  261.301003]  ? find_held_lock+0x39/0x1c0
[  261.306155]  ? stop_critical_timings+0x420/0x420
[  261.311004]  ? check_flags.part.36+0x450/0x450
[  261.315005]  ? _raw_spin_unlock_irq+0x29/0x40
[  261.320995]  ? _raw_spin_unlock_irq+0x29/0x40
[  261.326142]  ? cyc2ns_read_end+0x10/0x10
[  261.330139]  ? raw_bind+0x280/0x280
[  261.334138]  ? sched_clock_cpu+0x126/0x170
[  261.338995]  ? check_flags.part.36+0x450/0x450
[  261.342991]  ? __lock_acquire+0x4500/0x4500
[  261.348994]  ? inet_sendmsg+0x11c/0x500
[  261.352989]  ? dst_output+0x180/0x180
[  261.357012]  inet_sendmsg+0x11c/0x500
[ ... ]

v2:
 - clear skb->sk at reassembly routine.(Eric Dumarzet)

Fixes: fa0f527358bd ("ip: use rb trees for IP frag queue.")
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c                  | 1 +
 net/ipv6/netfilter/nf_conntrack_reasm.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 88281fbce88c..e7227128df2c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -599,6 +599,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 			nextp = &fp->next;
 			fp->prev = NULL;
 			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+			fp->sk = NULL;
 			head->data_len += fp->len;
 			head->len += fp->len;
 			if (head->ip_summed != fp->ip_summed)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 2a14d8b65924..8f68a518d9db 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -445,6 +445,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_devic
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
+		fp->sk = NULL;
 	}
 	sub_frag_mem_limit(fq->q.net, head->truesize);
 
-- 
cgit v1.2.3


From 3ebb17446b954b7d39264564ec3f7522d502e785 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 7 Sep 2018 02:02:45 +0000
Subject: ethernet: renesas: convert to SPDX identifiers

This patch updates license to use SPDX-License-Identifier
instead of verbose license text.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/Kconfig    | 1 +
 drivers/net/ethernet/renesas/Makefile   | 1 +
 drivers/net/ethernet/renesas/ravb_ptp.c | 6 +-----
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig
index f3f7477043ce..bb0ebdfd4459 100644
--- a/drivers/net/ethernet/renesas/Kconfig
+++ b/drivers/net/ethernet/renesas/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Renesas device configuration
 #
diff --git a/drivers/net/ethernet/renesas/Makefile b/drivers/net/ethernet/renesas/Makefile
index a05102a7df02..f21ab8c02af0 100644
--- a/drivers/net/ethernet/renesas/Makefile
+++ b/drivers/net/ethernet/renesas/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the Renesas device drivers.
 #
diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c
index eede70ec37f8..0721b5c35d91 100644
--- a/drivers/net/ethernet/renesas/ravb_ptp.c
+++ b/drivers/net/ethernet/renesas/ravb_ptp.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /* PTP 1588 clock using the Renesas Ethernet AVB
  *
  * Copyright (C) 2013-2015 Renesas Electronics Corporation
  * Copyright (C) 2015 Renesas Solutions Corp.
  * Copyright (C) 2015-2016 Cogent Embedded, Inc. <source@cogentembedded.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
  */
 
 #include "ravb.h"
-- 
cgit v1.2.3


From 7c5cca3588545e7f255171e28e0dd6e384ebb91d Mon Sep 17 00:00:00 2001
From: Kristian Evensen <kristian.evensen@gmail.com>
Date: Sat, 8 Sep 2018 13:50:48 +0200
Subject: qmi_wwan: Support dynamic config on Quectel EP06
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Quectel EP06 (and EM06/EG06) supports dynamic configuration of USB
interfaces, without the device changing VID/PID or configuration number.
When the configuration is updated and interfaces are added/removed, the
interface numbers change. This means that the current code for matching
EP06 does not work.

This patch removes the current EP06 interface number match, and replaces
it with a match on class, subclass and protocol. Unfortunately, matching
on those three alone is not enough, as the diag interface exports the
same values as QMI. The other serial interfaces + adb export different
values and do not match.

The diag interface only has two endpoints, while the QMI interface has
three. I have therefore added a check for number of interfaces, and we
ignore the interface if the number of endpoints equals two.

Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index cb0cc30c3d6a..e3270deecec2 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -967,6 +967,13 @@ static const struct usb_device_id products[] = {
 		USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7),
 		.driver_info = (unsigned long)&qmi_wwan_info,
 	},
+	{	/* Quectel EP06/EG06/EM06 */
+		USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0306,
+					      USB_CLASS_VENDOR_SPEC,
+					      USB_SUBCLASS_VENDOR_SPEC,
+					      0xff),
+		.driver_info	    = (unsigned long)&qmi_wwan_info_quirk_dtr,
+	},
 
 	/* 3. Combined interface devices matching on interface number */
 	{QMI_FIXED_INTF(0x0408, 0xea42, 4)},	/* Yota / Megafon M100-1 */
@@ -1255,7 +1262,6 @@ static const struct usb_device_id products[] = {
 	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)},	/* Quectel EC21 Mini PCIe */
 	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)},	/* Quectel EG91 */
 	{QMI_FIXED_INTF(0x2c7c, 0x0296, 4)},	/* Quectel BG96 */
-	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0306, 4)},	/* Quectel EP06 Mini PCIe */
 
 	/* 4. Gobi 1000 devices */
 	{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},	/* Acer Gobi Modem Device */
@@ -1331,6 +1337,19 @@ static bool quectel_ec20_detected(struct usb_interface *intf)
 	return false;
 }
 
+static bool quectel_ep06_diag_detected(struct usb_interface *intf)
+{
+	struct usb_device *dev = interface_to_usbdev(intf);
+	struct usb_interface_descriptor intf_desc = intf->cur_altsetting->desc;
+
+	if (le16_to_cpu(dev->descriptor.idVendor) == 0x2c7c &&
+	    le16_to_cpu(dev->descriptor.idProduct) == 0x0306 &&
+	    intf_desc.bNumEndpoints == 2)
+		return true;
+
+	return false;
+}
+
 static int qmi_wwan_probe(struct usb_interface *intf,
 			  const struct usb_device_id *prod)
 {
@@ -1365,6 +1384,15 @@ static int qmi_wwan_probe(struct usb_interface *intf,
 		return -ENODEV;
 	}
 
+	/* Quectel EP06/EM06/EG06 supports dynamic interface configuration, so
+	 * we need to match on class/subclass/protocol. These values are
+	 * identical for the diagnostic- and QMI-interface, but bNumEndpoints is
+	 * different. Ignore the current interface if the number of endpoints
+	 * the number for the diag interface (two).
+	 */
+	if (quectel_ep06_diag_detected(intf))
+		return -ENODEV;
+
 	return usbnet_probe(intf, id);
 }
 
-- 
cgit v1.2.3


From f94e63801ab2791ed64c409d0f751f6a0c953ead Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 24 Aug 2018 23:22:08 +0200
Subject: netfilter: conntrack: reset tcp maxwin on re-register

Doug Smythies says:
  Sometimes it is desirable to temporarily disable, or clear,
  the iptables rule set on a computer being controlled via a
  secure shell session (SSH). While unwise on an internet facing
  computer, I also do it often on non-internet accessible computers
  while testing. Recently, this has become problematic, with the
  SSH session being dropped upon re-load of the rule set.

The problem is that when all rules are deleted, conntrack hooks get
unregistered.

In case the rules are re-added later, its possible that tcp window
has moved far enough so that all packets are considered invalid (out of
window) until entry expires (which can take forever, default
established timeout is 5 days).

Fix this by clearing maxwin of existing tcp connections on register.

v2: don't touch entries on hook removal.
v3: remove obsolete expiry check.

Reported-by: Doug Smythies <dsmythies@telus.net>
Fixes: 4d3a57f23dec59 ("netfilter: conntrack: do not enable connection tracking unless needed")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 9f14b0df6960..51c5d7eec0a3 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -776,9 +776,26 @@ static const struct nf_hook_ops ipv6_conntrack_ops[] = {
 };
 #endif
 
+static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
+{
+	u8 nfproto = (unsigned long)_nfproto;
+
+	if (nf_ct_l3num(ct) != nfproto)
+		return 0;
+
+	if (nf_ct_protonum(ct) == IPPROTO_TCP &&
+	    ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED) {
+		ct->proto.tcp.seen[0].td_maxwin = 0;
+		ct->proto.tcp.seen[1].td_maxwin = 0;
+	}
+
+	return 0;
+}
+
 static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
 {
 	struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+	bool fixup_needed = false;
 	int err = 0;
 
 	mutex_lock(&nf_ct_proto_mutex);
@@ -798,6 +815,8 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
 					    ARRAY_SIZE(ipv4_conntrack_ops));
 		if (err)
 			cnet->users4 = 0;
+		else
+			fixup_needed = true;
 		break;
 #if IS_ENABLED(CONFIG_IPV6)
 	case NFPROTO_IPV6:
@@ -814,6 +833,8 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
 					    ARRAY_SIZE(ipv6_conntrack_ops));
 		if (err)
 			cnet->users6 = 0;
+		else
+			fixup_needed = true;
 		break;
 #endif
 	default:
@@ -822,6 +843,11 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
 	}
  out_unlock:
 	mutex_unlock(&nf_ct_proto_mutex);
+
+	if (fixup_needed)
+		nf_ct_iterate_cleanup_net(net, nf_ct_tcp_fixup,
+					  (void *)(unsigned long)nfproto, 0, 0);
+
 	return err;
 }
 
-- 
cgit v1.2.3


From a874752a10da113f513980e28f562d946d3f829d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 31 Aug 2018 12:36:01 +0200
Subject: netfilter: conntrack: timeout interface depend on
 CONFIG_NF_CONNTRACK_TIMEOUT

Now that cttimeout support for nft_ct is in place, these should depend
on CONFIG_NF_CONNTRACK_TIMEOUT otherwise we can crash when dumping the
policy if this option is not enabled.

[   71.600121] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[...]
[   71.600141] CPU: 3 PID: 7612 Comm: nft Not tainted 4.18.0+ #246
[...]
[   71.600188] Call Trace:
[   71.600201]  ? nft_ct_timeout_obj_dump+0xc6/0xf0 [nft_ct]

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto_dccp.c    | 12 ++++++------
 net/netfilter/nf_conntrack_proto_generic.c |  8 ++++----
 net/netfilter/nf_conntrack_proto_gre.c     |  8 ++++----
 net/netfilter/nf_conntrack_proto_icmp.c    |  8 ++++----
 net/netfilter/nf_conntrack_proto_icmpv6.c  |  8 ++++----
 net/netfilter/nf_conntrack_proto_sctp.c    | 14 +++++++-------
 net/netfilter/nf_conntrack_proto_tcp.c     | 12 ++++++------
 net/netfilter/nf_conntrack_proto_udp.c     | 20 ++++++++++----------
 8 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index b81f70039828..f3f91ed2c21a 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -675,7 +675,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
 }
 #endif
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_cttimeout.h>
@@ -728,7 +728,7 @@ dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = {
 	[CTA_TIMEOUT_DCCP_CLOSING]	= { .type = NLA_U32 },
 	[CTA_TIMEOUT_DCCP_TIMEWAIT]	= { .type = NLA_U32 },
 };
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 #ifdef CONFIG_SYSCTL
 /* template, data assigned later */
@@ -863,7 +863,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= dccp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= dccp_timeout_obj_to_nlattr,
@@ -871,7 +871,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = {
 		.obj_size	= sizeof(unsigned int) * CT_DCCP_MAX,
 		.nla_policy	= dccp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= dccp_init_net,
 	.get_net_proto		= dccp_get_net_proto,
 };
@@ -896,7 +896,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= dccp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= dccp_timeout_obj_to_nlattr,
@@ -904,7 +904,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = {
 		.obj_size	= sizeof(unsigned int) * CT_DCCP_MAX,
 		.nla_policy	= dccp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= dccp_init_net,
 	.get_net_proto		= dccp_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index ac4a0b296dcd..1df3244ecd07 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -70,7 +70,7 @@ static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return ret;
 }
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_cttimeout.h>
@@ -113,7 +113,7 @@ static const struct nla_policy
 generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = {
 	[CTA_TIMEOUT_GENERIC_TIMEOUT]	= { .type = NLA_U32 },
 };
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table generic_sysctl_table[] = {
@@ -164,7 +164,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 	.pkt_to_tuple		= generic_pkt_to_tuple,
 	.packet			= generic_packet,
 	.new			= generic_new,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= generic_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= generic_timeout_obj_to_nlattr,
@@ -172,7 +172,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 		.obj_size	= sizeof(unsigned int),
 		.nla_policy	= generic_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= generic_init_net,
 	.get_net_proto		= generic_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index d1632252bf5b..650eb4fba2c5 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -285,7 +285,7 @@ static void gre_destroy(struct nf_conn *ct)
 		nf_ct_gre_keymap_destroy(master);
 }
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_cttimeout.h>
@@ -334,7 +334,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
 	[CTA_TIMEOUT_GRE_UNREPLIED]	= { .type = NLA_U32 },
 	[CTA_TIMEOUT_GRE_REPLIED]	= { .type = NLA_U32 },
 };
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 static int gre_init_net(struct net *net, u_int16_t proto)
 {
@@ -367,7 +367,7 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
 	.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
 	.nla_policy	 = nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout    = {
 		.nlattr_to_obj	= gre_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= gre_timeout_obj_to_nlattr,
@@ -375,7 +375,7 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
 		.obj_size	= sizeof(unsigned int) * GRE_CT_MAX,
 		.nla_policy	= gre_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.net_id		= &proto_gre_net_id,
 	.init_net	= gre_init_net,
 };
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 036670b38282..43c7e1a217b9 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -273,7 +273,7 @@ static unsigned int icmp_nlattr_tuple_size(void)
 }
 #endif
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_cttimeout.h>
@@ -313,7 +313,7 @@ static const struct nla_policy
 icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
 	[CTA_TIMEOUT_ICMP_TIMEOUT]	= { .type = NLA_U32 },
 };
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table icmp_sysctl_table[] = {
@@ -374,7 +374,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
 	.nlattr_to_tuple	= icmp_nlattr_to_tuple,
 	.nla_policy		= icmp_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= icmp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= icmp_timeout_obj_to_nlattr,
@@ -382,7 +382,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
 		.obj_size	= sizeof(unsigned int),
 		.nla_policy	= icmp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= icmp_init_net,
 	.get_net_proto		= icmp_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index bed07b998a10..97e40f77d678 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -274,7 +274,7 @@ static unsigned int icmpv6_nlattr_tuple_size(void)
 }
 #endif
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_cttimeout.h>
@@ -314,7 +314,7 @@ static const struct nla_policy
 icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
 	[CTA_TIMEOUT_ICMPV6_TIMEOUT]	= { .type = NLA_U32 },
 };
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table icmpv6_sysctl_table[] = {
@@ -373,7 +373,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 	.nlattr_to_tuple	= icmpv6_nlattr_to_tuple,
 	.nla_policy		= icmpv6_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= icmpv6_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= icmpv6_timeout_obj_to_nlattr,
@@ -381,7 +381,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 		.obj_size	= sizeof(unsigned int),
 		.nla_policy	= icmpv6_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= icmpv6_init_net,
 	.get_net_proto		= icmpv6_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 5eddfd32b852..e4d738d34cd0 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -591,7 +591,7 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
 }
 #endif
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_cttimeout.h>
@@ -646,7 +646,7 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
 	[CTA_TIMEOUT_SCTP_HEARTBEAT_SENT]	= { .type = NLA_U32 },
 	[CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED]	= { .type = NLA_U32 },
 };
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 
 #ifdef CONFIG_SYSCTL
@@ -780,7 +780,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= sctp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= sctp_timeout_obj_to_nlattr,
@@ -788,7 +788,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = {
 		.obj_size	= sizeof(unsigned int) * SCTP_CONNTRACK_MAX,
 		.nla_policy	= sctp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= sctp_init_net,
 	.get_net_proto		= sctp_get_net_proto,
 };
@@ -813,7 +813,8 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
 	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
 	.nla_policy		= nf_ct_port_nla_policy,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= sctp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= sctp_timeout_obj_to_nlattr,
@@ -821,8 +822,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = {
 		.obj_size	= sizeof(unsigned int) * SCTP_CONNTRACK_MAX,
 		.nla_policy	= sctp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-#endif
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= sctp_init_net,
 	.get_net_proto		= sctp_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3e2dc56a96c3..b4bdf9eda7b7 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1279,7 +1279,7 @@ static unsigned int tcp_nlattr_tuple_size(void)
 }
 #endif
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_cttimeout.h>
@@ -1394,7 +1394,7 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
 	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
 	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
 };
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table tcp_sysctl_table[] = {
@@ -1558,7 +1558,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
 	.nlattr_size		= TCP_NLATTR_SIZE,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
@@ -1567,7 +1567,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
 					TCP_CONNTRACK_TIMEOUT_MAX,
 		.nla_policy	= tcp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= tcp_init_net,
 	.get_net_proto		= tcp_get_net_proto,
 };
@@ -1593,7 +1593,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
 	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
@@ -1602,7 +1602,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
 					TCP_CONNTRACK_TIMEOUT_MAX,
 		.nla_policy	= tcp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= tcp_init_net,
 	.get_net_proto		= tcp_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 9272a2c525a8..3065fb8ef91b 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -171,7 +171,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_cttimeout.h>
@@ -221,7 +221,7 @@ udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = {
        [CTA_TIMEOUT_UDP_UNREPLIED]	= { .type = NLA_U32 },
        [CTA_TIMEOUT_UDP_REPLIED]	= { .type = NLA_U32 },
 };
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table udp_sysctl_table[] = {
@@ -292,7 +292,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
 	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= udp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= udp_timeout_obj_to_nlattr,
@@ -300,7 +300,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 =
 		.obj_size	= sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
 		.nla_policy	= udp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= udp_init_net,
 	.get_net_proto		= udp_get_net_proto,
 };
@@ -321,7 +321,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
 	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= udp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= udp_timeout_obj_to_nlattr,
@@ -329,7 +329,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 =
 		.obj_size	= sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
 		.nla_policy	= udp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= udp_init_net,
 	.get_net_proto		= udp_get_net_proto,
 };
@@ -350,7 +350,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
 	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= udp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= udp_timeout_obj_to_nlattr,
@@ -358,7 +358,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 =
 		.obj_size	= sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
 		.nla_policy	= udp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= udp_init_net,
 	.get_net_proto		= udp_get_net_proto,
 };
@@ -379,7 +379,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
 	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= udp_timeout_nlattr_to_obj,
 		.obj_to_nlattr	= udp_timeout_obj_to_nlattr,
@@ -387,7 +387,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 =
 		.obj_size	= sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
 		.nla_policy	= udp_timeout_nla_policy,
 	},
-#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 	.init_net		= udp_init_net,
 	.get_net_proto		= udp_get_net_proto,
 };
-- 
cgit v1.2.3


From 99e25d071fca91eb90ffa2f51240547a69137bde Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 3 Sep 2018 13:53:22 +0200
Subject: netfilter: cttimeout: ctnl_timeout_find_get() returns incorrect
 pointer to type

Compiler did not catch incorrect typing in the rcu hook assignment.

 % nfct add timeout test-tcp inet tcp established 100 close 10 close_wait 10
 % iptables -I OUTPUT -t raw -p tcp -j CT --timeout test-tcp
 dmesg - xt_CT: Timeout policy `test-tcp' can only be used by L3 protocol number 25000

The CT target bails out with incorrect layer 3 protocol number.

Fixes: 6c1fd7dc489d ("netfilter: cttimeout: decouple timeout policy from nfnetlink_cttimeout object")
Reported-by: Harsha Sharma <harshasharmaiitr@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cttimeout.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index d46a236cdf31..a30f8ba4b89a 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -489,8 +489,8 @@ err:
 	return err;
 }
 
-static struct ctnl_timeout *
-ctnl_timeout_find_get(struct net *net, const char *name)
+static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
+						   const char *name)
 {
 	struct ctnl_timeout *timeout, *matching = NULL;
 
@@ -509,7 +509,7 @@ ctnl_timeout_find_get(struct net *net, const char *name)
 		break;
 	}
 err:
-	return matching;
+	return matching ? &matching->timeout : NULL;
 }
 
 static void ctnl_timeout_put(struct nf_ct_timeout *t)
-- 
cgit v1.2.3


From ad18d7bf68a3da860ebb62a59c449804a6d237b4 Mon Sep 17 00:00:00 2001
From: Michal 'vorner' Vaner <michal.vaner@avast.com>
Date: Tue, 4 Sep 2018 13:25:44 +0200
Subject: netfilter: nfnetlink_queue: Solve the NFQUEUE/conntrack clash for
 NF_REPEAT

NF_REPEAT places the packet at the beginning of the iptables chain
instead of accepting or rejecting it right away. The packet however will
reach the end of the chain and continue to the end of iptables
eventually, so it needs the same handling as NF_ACCEPT and NF_DROP.

Fixes: 368982cd7d1b ("netfilter: nfnetlink_queue: resolve clash for unconfirmed conntracks")
Signed-off-by: Michal 'vorner' Vaner <michal.vaner@avast.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_queue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index ea4ba551abb2..d33094f4ec41 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -233,6 +233,7 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	int err;
 
 	if (verdict == NF_ACCEPT ||
+	    verdict == NF_REPEAT ||
 	    verdict == NF_STOP) {
 		rcu_read_lock();
 		ct_hook = rcu_dereference(nf_ct_hook);
-- 
cgit v1.2.3


From 1286df269f498165061e0cf8092ca212545dbb5a Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 5 Sep 2018 11:41:31 -0700
Subject: netfilter: xt_hashlimit: use s->file instead of s->private

After switching to the new procfs API, it is supposed to
retrieve the private pointer from PDE_DATA(file_inode(s->file)),
s->private is no longer referred.

Fixes: 1cd671827290 ("netfilter/x_tables: switch to proc_create_seq_private")
Reported-by: Sami Farin <hvtaifwkbgefbaei@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Sami Farin <hvtaifwkbgefbaei@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_hashlimit.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9b16402f29af..3e7d259e5d8d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1057,7 +1057,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 static void *dl_seq_start(struct seq_file *s, loff_t *pos)
 	__acquires(htable->lock)
 {
-	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
 	unsigned int *bucket;
 
 	spin_lock_bh(&htable->lock);
@@ -1074,7 +1074,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
 
 static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
 	unsigned int *bucket = v;
 
 	*pos = ++(*bucket);
@@ -1088,7 +1088,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
 static void dl_seq_stop(struct seq_file *s, void *v)
 	__releases(htable->lock)
 {
-	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
 	unsigned int *bucket = v;
 
 	if (!IS_ERR(bucket))
@@ -1130,7 +1130,7 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
 static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
 			       struct seq_file *s)
 {
-	struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
+	struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
@@ -1145,7 +1145,7 @@ static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
 static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
 			       struct seq_file *s)
 {
-	struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
+	struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
@@ -1160,7 +1160,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
 static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 			    struct seq_file *s)
 {
-	struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
+	struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file));
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
@@ -1174,7 +1174,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 
 static int dl_seq_show_v2(struct seq_file *s, void *v)
 {
-	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
 	unsigned int *bucket = (unsigned int *)v;
 	struct dsthash_ent *ent;
 
@@ -1188,7 +1188,7 @@ static int dl_seq_show_v2(struct seq_file *s, void *v)
 
 static int dl_seq_show_v1(struct seq_file *s, void *v)
 {
-	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
 	unsigned int *bucket = v;
 	struct dsthash_ent *ent;
 
@@ -1202,7 +1202,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v)
 
 static int dl_seq_show(struct seq_file *s, void *v)
 {
-	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file));
 	unsigned int *bucket = v;
 	struct dsthash_ent *ent;
 
-- 
cgit v1.2.3


From 2d946e5bcdabc1deef72d01bc92a2801c71d6d8d Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sun, 9 Sep 2018 21:26:23 +0200
Subject: MIPS: lantiq: dma: add dev pointer

dma_zalloc_coherent() now crashes if no dev pointer is given.
Add a dev pointer to the ltq_dma_channel structure and fill it in the
driver using it.

This fixes a bug introduced in kernel 4.19.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/include/asm/mach-lantiq/xway/xway_dma.h | 1 +
 arch/mips/lantiq/xway/dma.c                       | 4 ++--
 drivers/net/ethernet/lantiq_etop.c                | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
index 4901833498f7..8441b2698e64 100644
--- a/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
+++ b/arch/mips/include/asm/mach-lantiq/xway/xway_dma.h
@@ -40,6 +40,7 @@ struct ltq_dma_channel {
 	int desc;			/* the current descriptor */
 	struct ltq_dma_desc *desc_base; /* the descriptor base */
 	int phys;			/* physical addr */
+	struct device *dev;
 };
 
 enum {
diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c
index 4b9fbb6744ad..664f2f7f55c1 100644
--- a/arch/mips/lantiq/xway/dma.c
+++ b/arch/mips/lantiq/xway/dma.c
@@ -130,7 +130,7 @@ ltq_dma_alloc(struct ltq_dma_channel *ch)
 	unsigned long flags;
 
 	ch->desc = 0;
-	ch->desc_base = dma_zalloc_coherent(NULL,
+	ch->desc_base = dma_zalloc_coherent(ch->dev,
 				LTQ_DESC_NUM * LTQ_DESC_SIZE,
 				&ch->phys, GFP_ATOMIC);
 
@@ -182,7 +182,7 @@ ltq_dma_free(struct ltq_dma_channel *ch)
 	if (!ch->desc_base)
 		return;
 	ltq_dma_close(ch);
-	dma_free_coherent(NULL, LTQ_DESC_NUM * LTQ_DESC_SIZE,
+	dma_free_coherent(ch->dev, LTQ_DESC_NUM * LTQ_DESC_SIZE,
 		ch->desc_base, ch->phys);
 }
 EXPORT_SYMBOL_GPL(ltq_dma_free);
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 7a637b51c7d2..e08301d833e2 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -274,6 +274,7 @@ ltq_etop_hw_init(struct net_device *dev)
 		struct ltq_etop_chan *ch = &priv->ch[i];
 
 		ch->idx = ch->dma.nr = i;
+		ch->dma.dev = &priv->pdev->dev;
 
 		if (IS_TX(i)) {
 			ltq_dma_alloc_tx(&ch->dma);
-- 
cgit v1.2.3


From 0297c1c2eadb5bd996a873b87597af3b91c0d4ba Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sun, 9 Sep 2018 19:12:12 -0400
Subject: tcp: rate limit synflood warnings further

Convert pr_info to net_info_ratelimited to limit the total number of
synflood warnings.

Commit 946cedccbd73 ("tcp: Change possible SYN flooding messages")
rate limits synflood warnings to one per listener.

Workloads that open many listener sockets can still see a high rate of
log messages. Syzkaller is one frequent example.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4c2dd9f863f7..4cf2f7bb2802 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6367,8 +6367,8 @@ static bool tcp_syn_flood_action(const struct sock *sk,
 	if (!queue->synflood_warned &&
 	    net->ipv4.sysctl_tcp_syncookies != 2 &&
 	    xchg(&queue->synflood_warned, 1) == 0)
-		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
-			proto, ntohs(tcp_hdr(skb)->dest), msg);
+		net_info_ratelimited("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
+				     proto, ntohs(tcp_hdr(skb)->dest), msg);
 
 	return want_cookie;
 }
-- 
cgit v1.2.3


From 5a64506b5c2c3cdb29d817723205330378075448 Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Mon, 10 Sep 2018 22:19:47 +0800
Subject: erspan: return PACKET_REJECT when the appropriate tunnel is not found

If erspan tunnel hasn't been established, we'd better send icmp port
unreachable message after receive erspan packets.

Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
Cc: William Tu <u9012063@gmail.com>
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Acked-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ae714aecc31c..85a714d36b66 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -328,6 +328,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 		return PACKET_RCVD;
 	}
+	return PACKET_REJECT;
+
 drop:
 	kfree_skb(skb);
 	return PACKET_RCVD;
-- 
cgit v1.2.3


From 51dc63e3911fbb1f0a7a32da2fe56253e2040ea4 Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Mon, 10 Sep 2018 22:19:48 +0800
Subject: erspan: fix error handling for erspan tunnel

When processing icmp unreachable message for erspan tunnel, tunnel id
should be erspan_net_id instead of ipgre_net_id.

Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN")
Cc: William Tu <u9012063@gmail.com>
Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Acked-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 85a714d36b66..8cce0e9ea08c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -178,6 +178,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
 
 	if (tpi->proto == htons(ETH_P_TEB))
 		itn = net_generic(net, gre_tap_net_id);
+	else if (tpi->proto == htons(ETH_P_ERSPAN) ||
+		 tpi->proto == htons(ETH_P_ERSPAN2))
+		itn = net_generic(net, erspan_net_id);
 	else
 		itn = net_generic(net, ipgre_net_id);
 
-- 
cgit v1.2.3


From 6ad569019999300afd8e614d296fdc356550b77f Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 11 Sep 2018 01:51:43 +0800
Subject: r8169: Clear RTL_FLAG_TASK_*_PENDING when clearing
 RTL_FLAG_TASK_ENABLED

After system suspend, sometimes the r8169 doesn't work when ethernet
cable gets pluggued.

This issue happens because rtl_reset_work() doesn't get called from
rtl8169_runtime_resume(), after system suspend.

In rtl_task(), RTL_FLAG_TASK_* only gets cleared if this condition is
met:
if (!netif_running(dev) ||
    !test_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags))
    ...

If RTL_FLAG_TASK_ENABLED was cleared during system suspend while
RTL_FLAG_TASK_RESET_PENDING was set, the next rtl_schedule_task() won't
schedule task as the flag is still there.

So in addition to clearing RTL_FLAG_TASK_ENABLED, also clears other
flags.

Cc: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index a1f37d58e2fe..1d8631303b53 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -631,7 +631,7 @@ struct rtl8169_tc_offsets {
 };
 
 enum rtl_flag {
-	RTL_FLAG_TASK_ENABLED,
+	RTL_FLAG_TASK_ENABLED = 0,
 	RTL_FLAG_TASK_SLOW_PENDING,
 	RTL_FLAG_TASK_RESET_PENDING,
 	RTL_FLAG_MAX
@@ -6655,7 +6655,8 @@ static int rtl8169_close(struct net_device *dev)
 	rtl8169_update_counters(tp);
 
 	rtl_lock_work(tp);
-	clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
+	/* Clear all task flags */
+	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
 
 	rtl8169_down(dev);
 	rtl_unlock_work(tp);
@@ -6838,7 +6839,9 @@ static void rtl8169_net_suspend(struct net_device *dev)
 
 	rtl_lock_work(tp);
 	napi_disable(&tp->napi);
-	clear_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags);
+	/* Clear all task flags */
+	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
+
 	rtl_unlock_work(tp);
 
 	rtl_pll_power_down(tp);
-- 
cgit v1.2.3


From cc4dfb7f70a344f24c1c71e298deea0771dadcb2 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 10 Sep 2018 18:27:26 -0700
Subject: rds: fix two RCU related problems

When a rds sock is bound, it is inserted into the bind_hash_table
which is protected by RCU. But when releasing rds sock, after it
is removed from this hash table, it is freed immediately without
respecting RCU grace period. This could cause some use-after-free
as reported by syzbot.

Mark the rds sock with SOCK_RCU_FREE before inserting it into the
bind_hash_table, so that it would be always freed after a RCU grace
period.

The other problem is in rds_find_bound(), the rds sock could be
freed in between rhashtable_lookup_fast() and rds_sock_addref(),
so we need to extend RCU read lock protection in rds_find_bound()
to close this race condition.

Reported-and-tested-by: syzbot+8967084bcac563795dc6@syzkaller.appspotmail.com
Reported-by: syzbot+93a5839deb355537440f@syzkaller.appspotmail.com
Cc: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Cc: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Cc: rds-devel@oss.oracle.com
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oarcle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/bind.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/rds/bind.c b/net/rds/bind.c
index 3ab55784b637..762d2c6788a3 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -76,11 +76,13 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port,
 	struct rds_sock *rs;
 
 	__rds_create_bind_key(key, addr, port, scope_id);
-	rs = rhashtable_lookup_fast(&bind_hash_table, key, ht_parms);
+	rcu_read_lock();
+	rs = rhashtable_lookup(&bind_hash_table, key, ht_parms);
 	if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
 		rds_sock_addref(rs);
 	else
 		rs = NULL;
+	rcu_read_unlock();
 
 	rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr,
 		 ntohs(port));
@@ -235,6 +237,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 	}
 
+	sock_set_flag(sk, SOCK_RCU_FREE);
 	ret = rds_add_bound(rs, binding_addr, &port, scope_id);
 	if (ret)
 		goto out;
-- 
cgit v1.2.3


From 778b1ac737494cec156f17c80da44664c1f77cf6 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 12 Sep 2018 15:31:32 +0200
Subject: s390/qeth: indicate error when netdev allocation fails

Bailing out on allocation error is nice, but we also need to tell the
ccwgroup core that creating the qeth groupdev failed.

Fixes: d3d1b205e89f ("s390/qeth: allocate netdevice early")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 49f64eb3eab0..6b24face21d5 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5768,8 +5768,10 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
 	qeth_update_from_chp_desc(card);
 
 	card->dev = qeth_alloc_netdev(card);
-	if (!card->dev)
+	if (!card->dev) {
+		rc = -ENOMEM;
 		goto err_card;
+	}
 
 	qeth_determine_capabilities(card);
 	enforced_disc = qeth_enforce_discipline(card);
-- 
cgit v1.2.3


From 04db741d0df02fdb9ea4ddca32615153407dcf7f Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 12 Sep 2018 15:31:33 +0200
Subject: s390/qeth: switch on SG by default for IQD devices

Scatter-gather transmit brings a nice performance boost. Considering the
rather large MTU sizes at play, it's also totally the Right Thing To Do.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 6b24face21d5..b60055e9cb1a 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5706,6 +5706,8 @@ static struct net_device *qeth_alloc_netdev(struct qeth_card *card)
 		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 		dev->hw_features |= NETIF_F_SG;
 		dev->vlan_features |= NETIF_F_SG;
+		if (IS_IQD(card))
+			dev->features |= NETIF_F_SG;
 	}
 
 	return dev;
-- 
cgit v1.2.3


From aec45e857c5538664edb76a60dd452e3265f37d1 Mon Sep 17 00:00:00 2001
From: Wenjia Zhang <wenjia@linux.ibm.com>
Date: Wed, 12 Sep 2018 15:31:34 +0200
Subject: s390/qeth: use vzalloc for QUERY OAT buffer

qeth_query_oat_command() currently allocates the kernel buffer for
the SIOC_QETH_QUERY_OAT ioctl with kzalloc. So on systems with
fragmented memory, large allocations may fail (eg. the qethqoat tool by
default uses 132KB).

Solve this issue by using vzalloc, backing the allocation with
non-contiguous memory.

Signed-off-by: Wenjia Zhang <wenjia@linux.ibm.com>
Reviewed-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index b60055e9cb1a..de8282420f96 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -25,6 +25,7 @@
 #include <linux/netdevice.h>
 #include <linux/netdev_features.h>
 #include <linux/skbuff.h>
+#include <linux/vmalloc.h>
 
 #include <net/iucv/af_iucv.h>
 #include <net/dsfield.h>
@@ -4699,7 +4700,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata)
 
 	priv.buffer_len = oat_data.buffer_len;
 	priv.response_len = 0;
-	priv.buffer =  kzalloc(oat_data.buffer_len, GFP_KERNEL);
+	priv.buffer = vzalloc(oat_data.buffer_len);
 	if (!priv.buffer) {
 		rc = -ENOMEM;
 		goto out;
@@ -4740,7 +4741,7 @@ static int qeth_query_oat_command(struct qeth_card *card, char __user *udata)
 			rc = -EFAULT;
 
 out_free:
-	kfree(priv.buffer);
+	vfree(priv.buffer);
 out:
 	return rc;
 }
-- 
cgit v1.2.3


From 0ac1487c4b2de383b91ecad1be561b8f7a2c15f4 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 12 Sep 2018 15:31:35 +0200
Subject: s390/qeth: don't dump past end of unknown HW header

For inbound data with an unsupported HW header format, only dump the
actual HW header. We have no idea how much payload follows it, and what
it contains. Worst case, we dump past the end of the Inbound Buffer and
access whatever is located next in memory.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 2 +-
 drivers/s390/net/qeth_l3_main.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 710fa74892ae..b5e38531733f 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -423,7 +423,7 @@ static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
 		default:
 			dev_kfree_skb_any(skb);
 			QETH_CARD_TEXT(card, 3, "inbunkno");
-			QETH_DBF_HEX(CTRL, 3, hdr, QETH_DBF_CTRL_LEN);
+			QETH_DBF_HEX(CTRL, 3, hdr, sizeof(*hdr));
 			continue;
 		}
 		work_done++;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 7175086677fb..ada258c01a08 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1390,7 +1390,7 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
 		default:
 			dev_kfree_skb_any(skb);
 			QETH_CARD_TEXT(card, 3, "inbunkno");
-			QETH_DBF_HEX(CTRL, 3, hdr, QETH_DBF_CTRL_LEN);
+			QETH_DBF_HEX(CTRL, 3, hdr, sizeof(*hdr));
 			continue;
 		}
 		work_done++;
-- 
cgit v1.2.3


From 12a78b026f870c575d3a98998b25084aac5b3c61 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 11 Sep 2018 15:12:17 -0700
Subject: tipc: check return value of __tipc_dump_start()

When __tipc_dump_start() fails with running out of memory,
we have no reason to continue, especially we should avoid
calling tipc_dump_done().

Fixes: 8f5c5fcf3533 ("tipc: call start and done ops directly in __tipc_nl_compat_dumpit()")
Reported-and-tested-by: syzbot+3f8324abccfbf8c74a9f@syzkaller.appspotmail.com
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/netlink_compat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 82f665728382..6376467e78f8 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -185,7 +185,10 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 		return -ENOMEM;
 
 	buf->sk = msg->dst_sk;
-	__tipc_dump_start(&cb, msg->net);
+	if (__tipc_dump_start(&cb, msg->net)) {
+		kfree_skb(buf);
+		return -ENOMEM;
+	}
 
 	do {
 		int rem;
-- 
cgit v1.2.3


From db191db813722297be36ffce2862e0f2b0e54d82 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Tue, 11 Sep 2018 06:38:44 -0700
Subject: nfp: flower: fix vlan match by checking both vlan id and vlan pcp

Previously we only checked if the vlan id field is present when trying
to match a vlan tag. The vlan id and vlan pcp field should be treated
independently.

Fixes: 5571e8c9f241 ("nfp: extend flower matching capabilities")
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/main.h    |  1 +
 drivers/net/ethernet/netronome/nfp/flower/match.c   |  2 +-
 drivers/net/ethernet/netronome/nfp/flower/offload.c | 11 +++++++++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 85f8209bf007..81d941ab895c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -70,6 +70,7 @@ struct nfp_app;
 #define NFP_FL_FEATS_GENEVE		BIT(0)
 #define NFP_FL_NBI_MTU_SETTING		BIT(1)
 #define NFP_FL_FEATS_GENEVE_OPT		BIT(2)
+#define NFP_FL_FEATS_VLAN_PCP		BIT(3)
 #define NFP_FL_FEATS_LAG		BIT(31)
 
 struct nfp_fl_mask_id {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index a0c72f277faa..17acb8cc6044 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -56,7 +56,7 @@ nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *frame,
 						      FLOW_DISSECTOR_KEY_VLAN,
 						      target);
 		/* Populate the tci field. */
-		if (flow_vlan->vlan_id) {
+		if (flow_vlan->vlan_id || flow_vlan->vlan_priority) {
 			tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
 					     flow_vlan->vlan_priority) |
 				  FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 2edab01c3beb..bd19624f10cf 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -192,6 +192,17 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		key_size += sizeof(struct nfp_flower_mac_mpls);
 	}
 
+	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *flow_vlan;
+
+		flow_vlan = skb_flow_dissector_target(flow->dissector,
+						      FLOW_DISSECTOR_KEY_VLAN,
+						      flow->mask);
+		if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_PCP) &&
+		    flow_vlan->vlan_priority)
+			return -EOPNOTSUPP;
+	}
+
 	if (dissector_uses_key(flow->dissector,
 			       FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
 		struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
-- 
cgit v1.2.3


From 224de549f0beca58fb95c0b8da9cb2bfa8c6cc12 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@netronome.com>
Date: Tue, 11 Sep 2018 06:38:45 -0700
Subject: nfp: flower: reject tunnel encap with ipv6 outer headers for
 offloading

This fixes a bug where ipv6 tunnels would report that it is
getting offloaded to hardware but would actually be rejected
by hardware.

Fixes: b27d6a95a70d ("nfp: compile flower vxlan tunnel set actions")
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/action.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 9044496803e6..46ba0cf257c6 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -52,6 +52,7 @@
 #define NFP_FL_TUNNEL_CSUM			cpu_to_be16(0x01)
 #define NFP_FL_TUNNEL_KEY			cpu_to_be16(0x04)
 #define NFP_FL_TUNNEL_GENEVE_OPT		cpu_to_be16(0x0800)
+#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS	IP_TUNNEL_INFO_TX
 #define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS	(NFP_FL_TUNNEL_CSUM | \
 						 NFP_FL_TUNNEL_KEY | \
 						 NFP_FL_TUNNEL_GENEVE_OPT)
@@ -741,11 +742,16 @@ nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
 		nfp_fl_push_vlan(psh_v, a);
 		*a_len += sizeof(struct nfp_fl_push_vlan);
 	} else if (is_tcf_tunnel_set(a)) {
+		struct ip_tunnel_info *ip_tun = tcf_tunnel_info(a);
 		struct nfp_repr *repr = netdev_priv(netdev);
+
 		*tun_type = nfp_fl_get_tun_from_act_l4_port(repr->app, a);
 		if (*tun_type == NFP_FL_TUNNEL_NONE)
 			return -EOPNOTSUPP;
 
+		if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS)
+			return -EOPNOTSUPP;
+
 		/* Pre-tunnel action is required for tunnel encap.
 		 * This checks for next hop entries on NFP.
 		 * If none, the packet falls back before applying other actions.
-- 
cgit v1.2.3