From 30ca1aa536211f5ac3de0173513a7a99a98a97f3 Mon Sep 17 00:00:00 2001
From: Dedy Lansky <dlansky@codeaurora.org>
Date: Sun, 29 Jul 2018 14:59:16 +0300
Subject: cfg80211/mac80211: make ieee80211_send_layer2_update a public
 function

Make ieee80211_send_layer2_update() a common function so other drivers
can re-use it.

Signed-off-by: Dedy Lansky <dlansky@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9a850973e09a..4f57f770f602 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4732,6 +4732,17 @@ static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len)
 const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
 				  const u8 *ies, int len);
 
+/**
+ * cfg80211_send_layer2_update - send layer 2 update frame
+ *
+ * @dev: network device
+ * @addr: STA MAC address
+ *
+ * Wireless drivers can use this function to update forwarding tables in bridge
+ * devices upon STA association.
+ */
+void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr);
+
 /**
  * DOC: Regulatory enforcement infrastructure
  *
-- 
cgit v1.2.3


From 21a5d4c3a45ca608477a083096cfbce76e449a0c Mon Sep 17 00:00:00 2001
From: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Date: Wed, 11 Jul 2018 00:12:53 +0530
Subject: mac80211: add stop/start logic for software TXQs

Sometimes, it is required to stop the transmissions momentarily and
resume it later; stopping the txqs becomes very critical in scenarios where
the packet transmission has to be ceased completely. For example, during
the hardware restart, during off channel operations,
when initiating CSA(upon detecting a radar on the DFS channel), etc.

The TX queue stop/start logic in mac80211 works well in stopping the TX
when drivers make use of netdev queues, i.e, when Qdiscs in network layer
take care of traffic scheduling. Since the devices implementing
wake_tx_queue can run without Qdiscs, packets will be handed to mac80211
directly without queueing them in the netdev queues.

Also, mac80211 does not invoke any of the
netif_stop_*/netif_wake_* APIs if wake_tx_queue is implemented.
Since the queues are not stopped in this case, transmissions can continue
and this will impact negatively on the operation of the wireless device.

For example,
During hardware restart, we stop the netdev queues so that packets are
not sent to the driver. Since ath10k implements wake_tx_queue,
TX queues will not be stopped and packets might reach the hardware while
it is restarting; this can make hardware unresponsive and the only
possible option for recovery is to reboot the entire system.

There is another problem to this, it is observed that the packets
were sent on the DFS channel for a prolonged duration after radar
detection impacting the channel closing time.

We can still invoke netif stop/wake APIs when wake_tx_queue is implemented
but this could lead to packet drops in network layer; adding stop/start
logic for software TXQs in mac80211 instead makes more sense; the change
proposed adds the same in mac80211.

Signed-off-by: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5790f55c241d..e248f5fe5b19 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1504,6 +1504,8 @@ enum ieee80211_vif_flags {
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void \*).
  * @txq: the multicast data TX queue (if driver uses the TXQ abstraction)
+ * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped,
+ *	protected by fq->lock.
  */
 struct ieee80211_vif {
 	enum nl80211_iftype type;
@@ -1528,6 +1530,8 @@ struct ieee80211_vif {
 
 	unsigned int probe_req_reg;
 
+	bool txqs_stopped[IEEE80211_NUM_ACS];
+
 	/* must be last */
 	u8 drv_priv[0] __aligned(sizeof(void *));
 };
-- 
cgit v1.2.3


From 9cf0a0b4b64ae103cf0e7dfaa72b44ecda24c0eb Mon Sep 17 00:00:00 2001
From: Alexei Avshalom Lazar <ailizaro@codeaurora.org>
Date: Mon, 13 Aug 2018 15:33:00 +0300
Subject: cfg80211: Add support for 60GHz band channels 5 and 6

The current support in the 60GHz band is for channels 1-4.
Add support for channels 5 and 6.
This requires enlarging ieee80211_channel.center_freq from u16 to u32.

Signed-off-by: Alexei Avshalom Lazar <ailizaro@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4f57f770f602..46c4cbf54903 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -149,7 +149,7 @@ enum ieee80211_channel_flags {
  */
 struct ieee80211_channel {
 	enum nl80211_band band;
-	u16 center_freq;
+	u32 center_freq;
 	u16 hw_value;
 	u32 flags;
 	int max_antenna_gain;
-- 
cgit v1.2.3


From 996d5b4db4b191f2676cf8775565cab8a5e2753b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 11 Jul 2018 14:02:24 -0700
Subject: 9p: Use a slab for allocating requests

Replace the custom batch allocation with a slab.  Use an IDR to store
pointers to the active requests instead of an array.  We don't try to
handle P9_NOTAG specially; the IDR will happily shrink all the way back
once the TVERSION call has completed.

Link: http://lkml.kernel.org/r/20180711210225.19730-6-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
---
 include/net/9p/client.h | 51 +++++++++----------------------------------------
 1 file changed, 9 insertions(+), 42 deletions(-)

(limited to 'include/net')

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 0fa0fbab33b0..a4dc42c53d18 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -64,22 +64,15 @@ enum p9_trans_status {
 
 /**
  * enum p9_req_status_t - status of a request
- * @REQ_STATUS_IDLE: request slot unused
  * @REQ_STATUS_ALLOC: request has been allocated but not sent
  * @REQ_STATUS_UNSENT: request waiting to be sent
  * @REQ_STATUS_SENT: request sent to server
  * @REQ_STATUS_RCVD: response received from server
  * @REQ_STATUS_FLSHD: request has been flushed
  * @REQ_STATUS_ERROR: request encountered an error on the client side
- *
- * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
- * but use is actually tracked by the idpool structure which handles tag
- * id allocation.
- *
  */
 
 enum p9_req_status_t {
-	REQ_STATUS_IDLE,
 	REQ_STATUS_ALLOC,
 	REQ_STATUS_UNSENT,
 	REQ_STATUS_SENT,
@@ -92,24 +85,12 @@ enum p9_req_status_t {
  * struct p9_req_t - request slots
  * @status: status of this request slot
  * @t_err: transport error
- * @flush_tag: tag of request being flushed (for flush requests)
  * @wq: wait_queue for the client to block on for this request
  * @tc: the request fcall structure
  * @rc: the response fcall structure
  * @aux: transport specific data (provided for trans_fd migration)
  * @req_list: link for higher level objects to chain requests
- *
- * Transport use an array to track outstanding requests
- * instead of a list.  While this may incurr overhead during initial
- * allocation or expansion, it makes request lookup much easier as the
- * tag id is a index into an array.  (We use tag+1 so that we can accommodate
- * the -1 tag for the T_VERSION request).
- * This also has the nice effect of only having to allocate wait_queues
- * once, instead of constantly allocating and freeing them.  Its possible
- * other resources could benefit from this scheme as well.
- *
  */
-
 struct p9_req_t {
 	int status;
 	int t_err;
@@ -117,40 +98,26 @@ struct p9_req_t {
 	struct p9_fcall *tc;
 	struct p9_fcall *rc;
 	void *aux;
-
 	struct list_head req_list;
 };
 
 /**
  * struct p9_client - per client instance state
- * @lock: protect @fidlist
+ * @lock: protect @fids and @reqs
  * @msize: maximum data size negotiated by protocol
- * @dotu: extension flags negotiated by protocol
  * @proto_version: 9P protocol version to use
  * @trans_mod: module API instantiated with this client
+ * @status: connection state
  * @trans: tranport instance state and API
  * @fids: All active FID handles
- * @tagpool - transaction id accounting for session
- * @reqs - 2D array of requests
- * @max_tag - current maximum tag id allocated
- * @name - node name used as client id
+ * @reqs: All active requests.
+ * @name: node name used as client id
  *
  * The client structure is used to keep track of various per-client
  * state that has been instantiated.
- * In order to minimize per-transaction overhead we use a
- * simple array to lookup requests instead of a hash table
- * or linked list.  In order to support larger number of
- * transactions, we make this a 2D array, allocating new rows
- * when we need to grow the total number of the transactions.
- *
- * Each row is 256 requests and we'll support up to 256 rows for
- * a total of 64k concurrent requests per session.
- *
- * Bugs: duplicated data and potentially unnecessary elements.
  */
-
 struct p9_client {
-	spinlock_t lock; /* protect client structure */
+	spinlock_t lock;
 	unsigned int msize;
 	unsigned char proto_version;
 	struct p9_trans_module *trans_mod;
@@ -170,10 +137,7 @@ struct p9_client {
 	} trans_opts;
 
 	struct idr fids;
-
-	struct p9_idpool *tagpool;
-	struct p9_req_t *reqs[P9_ROW_MAXTAG];
-	int max_tag;
+	struct idr reqs;
 
 	char name[__NEW_UTS_LEN + 1];
 };
@@ -279,4 +243,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *, const char *, u64 *);
 int p9_client_xattrcreate(struct p9_fid *, const char *, u64, int);
 int p9_client_readlink(struct p9_fid *fid, char **target);
 
+int p9_client_init(void);
+void p9_client_exit(void);
+
 #endif /* NET_9P_CLIENT_H */
-- 
cgit v1.2.3


From 6348b903d79119a8157aace08ab99521f5dba139 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 11 Jul 2018 14:02:25 -0700
Subject: 9p: Remove p9_idpool

There are no more users left of the p9_idpool; delete it.

Link: http://lkml.kernel.org/r/20180711210225.19730-7-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
---
 include/net/9p/9p.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index b8eb51a661e5..e23896116d9a 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -561,16 +561,8 @@ struct p9_fcall {
 	u8 *sdata;
 };
 
-struct p9_idpool;
-
 int p9_errstr2errno(char *errstr, int len);
 
-struct p9_idpool *p9_idpool_create(void);
-void p9_idpool_destroy(struct p9_idpool *);
-int p9_idpool_get(struct p9_idpool *p);
-void p9_idpool_put(int id, struct p9_idpool *p);
-int p9_idpool_check(int id, struct p9_idpool *p);
-
 int p9_error_init(void);
 int p9_trans_fd_init(void);
 void p9_trans_fd_exit(void);
-- 
cgit v1.2.3


From b0d1beeff2a97a0cf1965ea8f1d13b8973f22582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Tue, 28 Aug 2018 14:44:25 +0200
Subject: xdp: implement convert_to_xdp_frame for MEM_TYPE_ZERO_COPY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds proper MEM_TYPE_ZERO_COPY support for
convert_to_xdp_frame. Converting a MEM_TYPE_ZERO_COPY xdp_buff to an
xdp_frame is done by transforming the MEM_TYPE_ZERO_COPY buffer into a
MEM_TYPE_PAGE_ORDER0 frame. This is costly, and in the future it might
make sense to implement a more sophisticated thread-safe alloc/free
scheme for MEM_TYPE_ZERO_COPY, so that no allocation and copy is
required in the fast-path.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/xdp.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 76b95256c266..0d5c6fb4b2e2 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -91,6 +91,8 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
 	frame->dev_rx = NULL;
 }
 
+struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
+
 /* Convert xdp_buff to xdp_frame */
 static inline
 struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
@@ -99,9 +101,8 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 	int metasize;
 	int headroom;
 
-	/* TODO: implement clone, copy, use "native" MEM_TYPE */
 	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
-		return NULL;
+		return xdp_convert_zc_to_xdp_frame(xdp);
 
 	/* Assure headroom is available for storing info */
 	headroom = xdp->data - xdp->data_hard_start;
-- 
cgit v1.2.3


From dce5bd6140a436e3348f6d13a1efb6e6c5a89acd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Tue, 28 Aug 2018 14:44:26 +0200
Subject: xdp: export xdp_rxq_info_unreg_mem_model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Export __xdp_rxq_info_unreg_mem_model as xdp_rxq_info_unreg_mem_model,
so it can be used from netdev drivers. Also, add additional checks for
the memory type.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/xdp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 0d5c6fb4b2e2..0f25b3675c5c 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -136,6 +136,7 @@ void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 			       enum xdp_mem_type type, void *allocator);
+void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
 
 /* Drivers not supporting XDP metadata can use this helper, which
  * rejects any room expansion for metadata as a result.
-- 
cgit v1.2.3


From 902540342096af8a13351f6a22bfdd7a8e19ffd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= <bjorn.topel@intel.com>
Date: Tue, 28 Aug 2018 14:44:27 +0200
Subject: xsk: expose xdp_umem_get_{data,dma} to drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the xdp_umem_get_{data,dma} functions to include/net/xdp_sock.h,
so that the upcoming zero-copy implementation in the Ethernet drivers
can utilize them.

Also, supply some dummy function implementations for
CONFIG_XDP_SOCKETS=n configs.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/xdp_sock.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 7161856bcf9c..56994ad1ab40 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -79,6 +79,16 @@ void xsk_umem_discard_addr(struct xdp_umem *umem);
 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
 bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
 void xsk_umem_consume_tx_done(struct xdp_umem *umem);
+
+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
+{
+	return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
+}
+
+static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
+{
+	return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
+}
 #else
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
@@ -98,6 +108,39 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
 {
 	return false;
 }
+
+static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
+{
+	return NULL;
+}
+
+static inline void xsk_umem_discard_addr(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+{
+}
+
+static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma,
+				       u32 *len)
+{
+	return false;
+}
+
+static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+{
+}
+
+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
+{
+	return NULL;
+}
+
+static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
+{
+	return 0;
+}
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
-- 
cgit v1.2.3


From 6fce10f70461c14079d5d44aa2b25c693f4d9221 Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Tue, 28 Aug 2018 18:51:58 +0200
Subject: genetlink: constify genl_err_attr() argument

genl_err_attr() sets netlink_ext_ack::bad_attr which is a pointer to const
struct nlattr so make the attr argument also const.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index decf6012a401..aa2e5888f18d 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -112,7 +112,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
 #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg)
 
 static inline int genl_err_attr(struct genl_info *info, int err,
-				struct nlattr *attr)
+				const struct nlattr *attr)
 {
 	info->extack->bad_attr = attr;
 
-- 
cgit v1.2.3


From 93ee30f3e8b412c5fc2d2f7d9d002529d9a209ad Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Fri, 31 Aug 2018 13:40:02 +0200
Subject: xsk: i40e: get rid of useless struct xdp_umem_props

This commit gets rid of the structure xdp_umem_props. It was there to
be able to break a dependency at one point, but this is no longer
needed. The values in the struct are instead stored directly in the
xdp_umem structure. This simplifies the xsk code as well as af_xdp
zero-copy drivers and as a bonus gets rid of one internal header file.

The i40e driver is also adapted to the new interface in this commit.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp_sock.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 56994ad1ab40..932ca0dad6f3 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -16,11 +16,6 @@
 struct net_device;
 struct xsk_queue;
 
-struct xdp_umem_props {
-	u64 chunk_mask;
-	u64 size;
-};
-
 struct xdp_umem_page {
 	void *addr;
 	dma_addr_t dma;
@@ -30,7 +25,8 @@ struct xdp_umem {
 	struct xsk_queue *fq;
 	struct xsk_queue *cq;
 	struct xdp_umem_page *pages;
-	struct xdp_umem_props props;
+	u64 chunk_mask;
+	u64 size;
 	u32 headroom;
 	u32 chunk_size_nohr;
 	struct user_struct *user;
-- 
cgit v1.2.3


From f061b48c1787e6fece2190e27da6878f4f1796d0 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 29 Aug 2018 10:15:35 -0700
Subject: Revert "net: sched: act: add extack for lookup callback"

This reverts commit 331a9295de23 ("net: sched: act: add extack for lookup callback").

This extack is never used after 6 months... In fact, it can be just
set in the caller, right after ->lookup().

Cc: Alexander Aring <aring@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 970303448c90..c6f195b3c706 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -85,8 +85,7 @@ struct tc_action_ops {
 		       struct tcf_result *); /* called under RCU BH lock*/
 	int     (*dump)(struct sk_buff *, struct tc_action *, int, int);
 	void	(*cleanup)(struct tc_action *);
-	int     (*lookup)(struct net *net, struct tc_action **a, u32 index,
-			  struct netlink_ext_ack *extack);
+	int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
 			int bind, bool rtnl_held,
-- 
cgit v1.2.3


From 94524d8fc965a7a0facdef6d1b01d5ef6d71a802 Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Wed, 29 Aug 2018 15:26:55 +0530
Subject: net/tls: Add support for async decryption of tls records

When tls records are decrypted using asynchronous acclerators such as
NXP CAAM engine, the crypto apis return -EINPROGRESS. Presently, on
getting -EINPROGRESS, the tls record processing stops till the time the
crypto accelerator finishes off and returns the result. This incurs a
context switch and is not an efficient way of accessing the crypto
accelerators. Crypto accelerators work efficient when they are queued
with multiple crypto jobs without having to wait for the previous ones
to complete.

The patch submits multiple crypto requests without having to wait for
for previous ones to complete. This has been implemented for records
which are decrypted in zero-copy mode. At the end of recvmsg(), we wait
for all the asynchronous decryption requests to complete.

The references to records which have been sent for async decryption are
dropped. For cases where record decryption is not possible in zero-copy
mode, asynchronous decryption is not used and we wait for decryption
crypto api to complete.

For crypto requests executing in async fashion, the memory for
aead_request, sglists and skb etc is freed from the decryption
completion handler. The decryption completion handler wakesup the
sleeping user context when recvmsg() flags that it has done sending
all the decryption requests and there are no more decryption requests
pending to be completed.

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Reviewed-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index d5c683e8bb22..cd0a65bd92f9 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -124,6 +124,12 @@ struct tls_sw_context_rx {
 	struct sk_buff *recv_pkt;
 	u8 control;
 	bool decrypted;
+	atomic_t decrypt_pending;
+	bool async_notify;
+};
+
+struct decrypt_req_ctx {
+	struct sock *sk;
 };
 
 struct tls_record_info {
-- 
cgit v1.2.3


From adf8ed01e4fdd254efead978d633718ab01a7d5c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 31 Aug 2018 11:31:08 +0300
Subject: mac80211: add an optional TXQ for other PS-buffered frames

Some drivers may want to also use the TXQ abstraction with
non-data packets that need powersave buffering, so add a
hardware flag to allow this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e248f5fe5b19..03e1dfd311f7 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -101,8 +101,9 @@
  * Drivers indicate that they use this model by implementing the .wake_tx_queue
  * driver operation.
  *
- * Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with a
- * single per-vif queue for multicast data frames.
+ * Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with
+ * another per-sta for non-data/non-mgmt and bufferable management frames, and
+ * a single per-vif queue for multicast data frames.
  *
  * The driver is expected to initialize its private per-queue data for stations
  * and interfaces in the .add_interface and .sta_add ops.
@@ -1843,7 +1844,8 @@ struct ieee80211_sta_rates {
  *	unlimited.
  * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not.
  * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control.
- * @txq: per-TID data TX queues (if driver uses the TXQ abstraction)
+ * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that
+ *	the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames
  */
 struct ieee80211_sta {
 	u32 supp_rates[NUM_NL80211_BANDS];
@@ -1884,7 +1886,7 @@ struct ieee80211_sta {
 	bool support_p2p_ps;
 	u16 max_rc_amsdu_len;
 
-	struct ieee80211_txq *txq[IEEE80211_NUM_TIDS];
+	struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1];
 
 	/* must be last */
 	u8 drv_priv[0] __aligned(sizeof(void *));
@@ -1918,7 +1920,8 @@ struct ieee80211_tx_control {
  *
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
  * @sta: station table entry, %NULL for per-vif queue
- * @tid: the TID for this queue (unused for per-vif queue)
+ * @tid: the TID for this queue (unused for per-vif queue),
+ *	%IEEE80211_NUM_TIDS for non-data (if enabled)
  * @ac: the AC for this queue
  * @drv_priv: driver private area, sized by hw->txq_data_size
  *
@@ -2131,6 +2134,9 @@ struct ieee80211_txq {
  * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't
  *	support QoS NDP for AP probing - that's most likely a driver bug.
  *
+ * @IEEE80211_HW_BUFF_MMPDU_TXQ: use the TXQ for bufferable MMPDUs, this of
+ *	course requires the driver to use TXQs to start with.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2176,6 +2182,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA,
 	IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
 	IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
+	IEEE80211_HW_BUFF_MMPDU_TXQ,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
-- 
cgit v1.2.3


From 244eb9ae797385c2ed244f6bdf0534fcaa6f0d33 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Fri, 31 Aug 2018 11:31:14 +0300
Subject: cfg80211: add he_capabilities (ext) IE to AP settings

Same as for HT and VHT.
This helps the lower level to know whether the AP supports HE.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 33c2a1d2a8d2..9f3ed79c39d7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -849,6 +849,7 @@ struct cfg80211_bitrate_mask {
  * @beacon_rate: bitrate to be used for beacons
  * @ht_cap: HT capabilities (or %NULL if HT isn't enabled)
  * @vht_cap: VHT capabilities (or %NULL if VHT isn't enabled)
+ * @he_cap: HE capabilities (or %NULL if HE isn't enabled)
  * @ht_required: stations must support HT
  * @vht_required: stations must support VHT
  */
@@ -874,6 +875,7 @@ struct cfg80211_ap_settings {
 
 	const struct ieee80211_ht_cap *ht_cap;
 	const struct ieee80211_vht_cap *vht_cap;
+	const struct ieee80211_he_cap_elem *he_cap;
 	bool ht_required, vht_required;
 };
 
-- 
cgit v1.2.3


From 09b4a4faf9d037990ac4f8110dd944b27b42d5df Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 31 Aug 2018 11:31:17 +0300
Subject: mac80211: introduce capability flags for VHT EXT NSS support

Depending on whether or not rate control supports selecting
rates depending on the bandwidth, we can use VHT extended
NSS support. In essence, this is dot11VHTExtendedNSSBWCapable
from the spec, since depending on that we'll need to parse
the bandwidth.

If needed, also set/clear the VHT Capability Element bit for
this capability so that we don't advertise it erroneously or
don't advertise it when we actually use it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 03e1dfd311f7..00e2e9909d45 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2137,6 +2137,12 @@ struct ieee80211_txq {
  * @IEEE80211_HW_BUFF_MMPDU_TXQ: use the TXQ for bufferable MMPDUs, this of
  *	course requires the driver to use TXQs to start with.
  *
+ * @IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW: (Hardware) rate control supports VHT
+ *	extended NSS BW (dot11VHTExtendedNSSBWCapable). This flag will be set if
+ *	the selected rate control algorithm sets %RATE_CTRL_CAPA_VHT_EXT_NSS_BW
+ *	but if the rate control is built-in then it must be set by the driver.
+ *	See also the documentation for that flag.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2183,6 +2189,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP,
 	IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
 	IEEE80211_HW_BUFF_MMPDU_TXQ,
+	IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
@@ -5655,7 +5662,22 @@ struct ieee80211_tx_rate_control {
 	bool bss;
 };
 
+/**
+ * enum rate_control_capabilities - rate control capabilities
+ */
+enum rate_control_capabilities {
+	/**
+	 * @RATE_CTRL_CAPA_VHT_EXT_NSS_BW:
+	 * Support for extended NSS BW support (dot11VHTExtendedNSSCapable)
+	 * Note that this is only looked at if the minimum number of chains
+	 * that the AP uses is < the number of TX chains the hardware has,
+	 * otherwise the NSS difference doesn't bother us.
+	 */
+	RATE_CTRL_CAPA_VHT_EXT_NSS_BW = BIT(0),
+};
+
 struct rate_control_ops {
+	unsigned long capa;
 	const char *name;
 	void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir);
 	void (*free)(void *priv);
-- 
cgit v1.2.3


From 70e53669c4c41b0fc043cb0bcb518b53428edf64 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Wed, 8 Aug 2018 18:40:01 +0800
Subject: mac80211: Store sk_pacing_shift in ieee80211_hw

Make it possibly for drivers to adjust the default skb_pacing_shift
by storing it in the hardware struct.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
[adjust commit log, move & adjust comment]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 00e2e9909d45..f8247d2658ac 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2308,6 +2308,10 @@ enum ieee80211_hw_flags {
  *	supported by HW.
  * @max_nan_de_entries: maximum number of NAN DE functions supported by the
  *	device.
+ *
+ * @tx_sk_pacing_shift: Pacing shift to set on TCP sockets when frames from
+ *	them are encountered. The default should typically not be changed,
+ *	unless the driver has good reasons for needing more buffers.
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -2343,6 +2347,7 @@ struct ieee80211_hw {
 	u8 n_cipher_schemes;
 	const struct ieee80211_cipher_scheme *cipher_schemes;
 	u8 max_nan_de_entries;
+	u8 tx_sk_pacing_shift;
 };
 
 static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw,
-- 
cgit v1.2.3


From d1332e7be25088383527e3de325930bea64780cb Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Fri, 31 Aug 2018 11:31:20 +0300
Subject: mac80211: support radiotap L-SIG data

As before with HE, the data needs to be provided by the
driver in the skb head, since there's not enough space
in the skb CB.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/ieee80211_radiotap.h | 15 +++++++++++++++
 include/net/mac80211.h           |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index feef706e1158..80d543902b8b 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -75,6 +75,7 @@ enum ieee80211_radiotap_presence {
 	IEEE80211_RADIOTAP_TIMESTAMP = 22,
 	IEEE80211_RADIOTAP_HE = 23,
 	IEEE80211_RADIOTAP_HE_MU = 24,
+	IEEE80211_RADIOTAP_LSIG = 27,
 
 	/* valid in every it_present bitmap, even vendor namespaces */
 	IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29,
@@ -325,6 +326,20 @@ enum ieee80211_radiotap_he_mu_bits {
 	IEEE80211_RADIOTAP_HE_MU_FLAGS2_CH2_CTR_26T_RU		= 0x0800,
 };
 
+enum ieee80211_radiotap_lsig_data1 {
+	IEEE80211_RADIOTAP_LSIG_DATA1_RATE_KNOWN		= 0x0001,
+	IEEE80211_RADIOTAP_LSIG_DATA1_LENGTH_KNOWN		= 0x0002,
+};
+
+enum ieee80211_radiotap_lsig_data2 {
+	IEEE80211_RADIOTAP_LSIG_DATA2_RATE			= 0x000f,
+	IEEE80211_RADIOTAP_LSIG_DATA2_LENGTH			= 0xfff0,
+};
+
+struct ieee80211_radiotap_lsig {
+	__le16 data1, data2;
+};
+
 /**
  * ieee80211_get_radiotap_len - get radiotap header length
  */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f8247d2658ac..3cc1ca17a1a8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1141,6 +1141,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  *	from the RX info data, so leave those zeroed when building this data)
  * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present
  *	(&struct ieee80211_radiotap_he_mu)
+ * @RX_FLAG_RADIOTAP_LSIG: L-SIG radiotap data is present
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR		= BIT(0),
@@ -1171,6 +1172,7 @@ enum mac80211_rx_flags {
 	RX_FLAG_AMPDU_EOF_BIT_KNOWN	= BIT(25),
 	RX_FLAG_RADIOTAP_HE		= BIT(26),
 	RX_FLAG_RADIOTAP_HE_MU		= BIT(27),
+	RX_FLAG_RADIOTAP_LSIG		= BIT(28),
 };
 
 /**
-- 
cgit v1.2.3


From 62872a9b9a106f00360193f428451c321ec2e823 Mon Sep 17 00:00:00 2001
From: Alexander Wetzel <alexander@wetzel-home.de>
Date: Fri, 31 Aug 2018 15:00:38 +0200
Subject: mac80211: Fix PTK rekey freezes and clear text leak

Rekeying PTK keys without "Extended Key ID for Individually Addressed
Frames" did use a procedure not suitable to replace in-use keys and
could caused the following issues:

 1) Freeze caused by incoming frames:
    If the local STA installed the key prior to the remote STA we still
    had the old key active in the hardware when mac80211 switched over
    to the new key.
    Therefore there was a window where the card could hand over frames
    decoded with the old key to mac80211 and bump the new PN (IV) value
    to an incorrect high number. When it happened the local replay
    detection silently started to drop all frames sent with the new key.

 2) Freeze caused by outgoing frames:
    If mac80211 was providing the PN (IV) and handed over a clear text
    frame for encryption to the hardware prior to a key change the
    driver/card could have processed the queued frame after switching
    to the new key. This bumped the PN value on the remote STA to an
    incorrect high number, tricking the remote STA to discard all frames
    we sent later.

 3) Freeze caused by RX aggregation reorder buffer:
    An aggregation session started with the old key and ending after the
    switch to the new key also bumped the PN to an incorrect high number,
    freezing the connection quite similar to 1).

 4) Freeze caused by repeating lost frames in an aggregation session:
    A driver could repeat a lost frame and encrypt it with the new key
    while in a TX aggregation session without updating the PN for the
    new key. This also could freeze connections similar to 2).

 5) Clear text leak:
    Removing encryption offload from the card cleared the encryption
    offload flag only after the card had deleted the key and we did not
    stop TX during the rekey. The driver/card could therefore get
    unencrypted frames from mac80211 while no longer be instructed to
    encrypt them.

To prevent those issues the key install logic has been changed:
 - Mac80211 divers known to be able to rekey PTK0 keys have to set
   @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0,
 - mac80211 stops queuing frames depending on the key during the replace
 - the key is first replaced in the hardware and after that in mac80211
 - and mac80211 stops/blocks new aggregation sessions during the rekey.

For drivers not setting
@NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 the user space must avoid PTK
rekeys if "Extended Key ID for Individually Addressed Frames" is not
being used. Rekeys for mac80211 drivers without this flag will generate a
warning and use an extra call to ieee80211_flush_queues() to both
highlight and try to prevent the issues with not updated drivers.

The core of the fix changes the key install procedure from:
 - atomic switch over to the new key in mac80211
 - remove the old key in the hardware (stops encryption offloading, fall
   back to software encryption with a potential clear text packet leak
   in between)
 - delete the inactive old key in mac80211
 - enable hardware encryption offloading for the new key
to:
 - if it's a PTK mark the old key as tainted to drop TX frames with the
   outgoing key
 - replace the key in hardware with the new one
 - atomic switch over to the new (not marked as tainted) key in
   mac80211 (which also resumes TX)
 - delete the inactive old key in mac80211

With the new sequence the hardware will be unable to decrypt frames
encrypted with the old key prior to switching to the new key in mac80211
and thus prevent PNs from packets decrypted with the old key to be
accounted against the new key.

For that to work the drivers have to provide a clear boundary.
Mac80211 drivers setting @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 confirm
to provide it and mac80211 will then be able to correctly rekey in-use
PTK keys with those drivers.

The mac80211 requirements for drivers to set the flag have been added to
the "Hardware crypto acceleration" documentation section. It drills down
to:
The drivers must not hand over frames decrypted with the old key to
mac80211 once the call to set_key() with %DISABLE_KEY has been
completed. It's allowed to either drop or continue to use the old key
for any outgoing frames which are already in the queues, but it must not
send out any of them unencrypted or encrypted with the new key.

Even with the new boundary in place aggregation sessions with the
reorder buffer are problematic:
RX aggregation session started prior and completed after the rekey could
still dump frames received with the old key at mac80211 after it
switched over to the new key. This is side stepped by stopping all (RX
and TX) aggregation sessions when replacing a PTK key and hardware key
offloading.
Stopping TX aggregation sessions avoids the need to get
the PNs (IVs) updated in frames prepared for the old key and
(re)transmitted after the switch to the new key. As a bonus it improves
the compatibility when the remote STA is not handling rekeys as it
should.

When using software crypto aggregation sessions are not stopped.
Mac80211 won't be able to decode the dangerous frames and discard them
without special handling.

Signed-off-by: Alexander Wetzel <alexander@wetzel-home.de>
[trim overly long rekey warning]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3cc1ca17a1a8..8c26d2d36cbe 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2531,6 +2531,19 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
  * The set_default_unicast_key() call updates the default WEP key index
  * configured to the hardware for WEP encryption type. This is required
  * for devices that support offload of data packets (e.g. ARP responses).
+ *
+ * Mac80211 drivers should set the @NL80211_EXT_FEATURE_CAN_REPLACE_PTK0 flag
+ * when they are able to replace in-use PTK keys according to to following
+ * requirements:
+ * 1) They do not hand over frames decrypted with the old key to
+      mac80211 once the call to set_key() with command %DISABLE_KEY has been
+      completed when also setting @IEEE80211_KEY_FLAG_GENERATE_IV for any key,
+   2) either drop or continue to use the old key for any outgoing frames queued
+      at the time of the key deletion (including re-transmits),
+   3) never send out a frame queued prior to the set_key() %SET_KEY command
+      encrypted with the new key and
+   4) never send out a frame unencrypted when it should be encrypted.
+   Mac80211 will not queue any new frames for a deleted key to the driver.
  */
 
 /**
-- 
cgit v1.2.3


From c3d1f8752802b2e1fb12c73bee50035bc125bc54 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Wed, 5 Sep 2018 08:06:06 +0300
Subject: mac80211: support reporting 0-length PSDU in radiotap

For certain sounding frames, it may be useful to report them
to userspace even though they don't have a PSDU in order to
determine the PHY parameters (e.g. VHT rate/stream config.)
Add support for this to mac80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/ieee80211_radiotap.h | 6 ++++++
 include/net/mac80211.h           | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 80d543902b8b..8014153bdd49 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -75,6 +75,7 @@ enum ieee80211_radiotap_presence {
 	IEEE80211_RADIOTAP_TIMESTAMP = 22,
 	IEEE80211_RADIOTAP_HE = 23,
 	IEEE80211_RADIOTAP_HE_MU = 24,
+	IEEE80211_RADIOTAP_ZERO_LEN_PSDU = 26,
 	IEEE80211_RADIOTAP_LSIG = 27,
 
 	/* valid in every it_present bitmap, even vendor namespaces */
@@ -340,6 +341,11 @@ struct ieee80211_radiotap_lsig {
 	__le16 data1, data2;
 };
 
+enum ieee80211_radiotap_zero_len_psdu_type {
+	IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING		= 0,
+	IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR			= 0xff,
+};
+
 /**
  * ieee80211_get_radiotap_len - get radiotap header length
  */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8c26d2d36cbe..50bf598abdfd 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1142,6 +1142,10 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present
  *	(&struct ieee80211_radiotap_he_mu)
  * @RX_FLAG_RADIOTAP_LSIG: L-SIG radiotap data is present
+ * @RX_FLAG_NO_PSDU: use the frame only for radiotap reporting, with
+ *	the "0-length PSDU" field included there.  The value for it is
+ *	in &struct ieee80211_rx_status.  Note that if this value isn't
+ *	known the frame shouldn't be reported.
  */
 enum mac80211_rx_flags {
 	RX_FLAG_MMIC_ERROR		= BIT(0),
@@ -1173,6 +1177,7 @@ enum mac80211_rx_flags {
 	RX_FLAG_RADIOTAP_HE		= BIT(26),
 	RX_FLAG_RADIOTAP_HE_MU		= BIT(27),
 	RX_FLAG_RADIOTAP_LSIG		= BIT(28),
+	RX_FLAG_NO_PSDU			= BIT(29),
 };
 
 /**
@@ -1245,6 +1250,7 @@ enum mac80211_rx_encoding {
  * @ampdu_reference: A-MPDU reference number, must be a different value for
  *	each A-MPDU but the same for each subframe within one A-MPDU
  * @ampdu_delimiter_crc: A-MPDU delimiter CRC
+ * @zero_length_psdu_type: radiotap type of the 0-length PSDU
  */
 struct ieee80211_rx_status {
 	u64 mactime;
@@ -1265,6 +1271,7 @@ struct ieee80211_rx_status {
 	u8 chains;
 	s8 chain_signal[IEEE80211_MAX_CHAINS];
 	u8 ampdu_delimiter_crc;
+	u8 zero_length_psdu_type;
 };
 
 /**
-- 
cgit v1.2.3


From 0eeb2b674f05ccb5162a1d68c0b8ae81e25fd972 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 5 Sep 2018 08:06:09 +0300
Subject: mac80211: add an option for station management TXQ

We have a TXQ abstraction for non-data packets that need
powersave buffering. Since the AP cannot sleep, in case
of station we can use this TXQ for all management frames,
regardless if they are bufferable. Add HW flag to allow
that.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 50bf598abdfd..fe71cec8ba42 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2152,6 +2152,10 @@ struct ieee80211_txq {
  *	but if the rate control is built-in then it must be set by the driver.
  *	See also the documentation for that flag.
  *
+ * @IEEE80211_HW_STA_MMPDU_TXQ: use the extra non-TID per-station TXQ for all
+ *	MMPDUs on station interfaces. This of course requires the driver to use
+ *	TXQs to start with.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2199,6 +2203,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP,
 	IEEE80211_HW_BUFF_MMPDU_TXQ,
 	IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW,
+	IEEE80211_HW_STA_MMPDU_TXQ,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
-- 
cgit v1.2.3


From edba6bdad6fef787c0363e8a1e7d91e8d6a10129 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 5 Sep 2018 08:06:10 +0300
Subject: mac80211: allow AMSDU size limitation per-TID

Some drivers may have AMSDU size limitation per TID, due to
HW constrains. Add an option to set this limit.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index fe71cec8ba42..28da9e27ea70 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1853,6 +1853,7 @@ struct ieee80211_sta_rates {
  *	unlimited.
  * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not.
  * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control.
+ * @max_tid_amsdu_len: Maximum A-MSDU size in bytes for this TID
  * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that
  *	the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames
  */
@@ -1894,6 +1895,7 @@ struct ieee80211_sta {
 	u16 max_amsdu_len;
 	bool support_p2p_ps;
 	u16 max_rc_amsdu_len;
+	u16 max_tid_amsdu_len[IEEE80211_NUM_TIDS];
 
 	struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1];
 
-- 
cgit v1.2.3


From 9739fe29a207ffff55361a3047e7780ebddccdb2 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 5 Sep 2018 08:06:11 +0300
Subject: mac80211: add an option for drivers to check if packets can be
 aggregated

Some hardwares have limitations on the packets' type in AMSDU.
Add an optional driver callback to determine if two skbs can
be used in the same AMSDU or not.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 28da9e27ea70..c4fadbafbf21 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3594,6 +3594,10 @@ enum ieee80211_reconfig_type {
  * @del_nan_func: Remove a NAN function. The driver must call
  *	ieee80211_nan_func_terminated() with
  *	NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal.
+ * @can_aggregate_in_amsdu: Called in order to determine if HW supports
+ *	aggregating two specific frames in the same A-MSDU. The relation
+ *	between the skbs should be symmetric and transitive. Note that while
+ *	skb is always a real frame, head may or may not be an A-MSDU.
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -3876,6 +3880,9 @@ struct ieee80211_ops {
 	void (*del_nan_func)(struct ieee80211_hw *hw,
 			    struct ieee80211_vif *vif,
 			    u8 instance_id);
+	bool (*can_aggregate_in_amsdu)(struct ieee80211_hw *hw,
+				       struct sk_buff *head,
+				       struct sk_buff *skb);
 };
 
 /**
-- 
cgit v1.2.3


From c383edc42403b0bca31cbaabafd44dd58afb202f Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Tue, 4 Sep 2018 21:53:47 +0200
Subject: rtnetlink: add rtnl_get_net_ns_capable()

get_target_net() will be used in follow-up patches in ipv{4,6} codepaths to
retrieve network namespaces based on network namespace identifiers. So
remove the static declaration and export in the rtnetlink header. Also,
rename it to rtnl_get_net_ns_capable() to make it obvious what this
function is doing.
Export rtnl_get_net_ns_capable() so it can be used when ipv6 is built as
a module.

Signed-off-by: Christian Brauner <christian@brauner.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 0bbaa5488423..cf26e5aacac4 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -165,6 +165,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm);
 
 int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
 			struct netlink_ext_ack *exterr);
+struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid);
 
 #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
 
-- 
cgit v1.2.3


From 523adb6cc10b48655c0abe556505240741425b49 Mon Sep 17 00:00:00 2001
From: Dominique Martinet <dominique.martinet@cea.fr>
Date: Mon, 30 Jul 2018 05:55:19 +0000
Subject: 9p: embed fcall in req to round down buffer allocs

'msize' is often a power of two, or at least page-aligned, so avoiding
an overhead of two dozen bytes for each allocation will help the
allocator do its work and reduce memory fragmentation.

Link: http://lkml.kernel.org/r/1533825236-22896-1-git-send-email-asmadeus@codewreck.org
Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Reviewed-by: Greg Kurz <groug@kaod.org>
Acked-by: Jun Piao <piaojun@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
---
 include/net/9p/client.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index a4dc42c53d18..c2671d40bb6b 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -95,8 +95,8 @@ struct p9_req_t {
 	int status;
 	int t_err;
 	wait_queue_head_t wq;
-	struct p9_fcall *tc;
-	struct p9_fcall *rc;
+	struct p9_fcall tc;
+	struct p9_fcall rc;
 	void *aux;
 	struct list_head req_list;
 };
@@ -230,6 +230,7 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
 				kgid_t gid, struct p9_qid *);
 int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
 int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
+void p9_fcall_fini(struct p9_fcall *fc);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
 
-- 
cgit v1.2.3


From 91a76be37ff89795526c452a6799576b03bec501 Mon Sep 17 00:00:00 2001
From: Dominique Martinet <dominique.martinet@cea.fr>
Date: Mon, 30 Jul 2018 15:14:37 +0900
Subject: 9p: add a per-client fcall kmem_cache

Having a specific cache for the fcall allocations helps speed up
end-to-end latency.

The caches will automatically be merged if there are multiple caches
of items with the same size so we do not need to try to share a cache
between different clients of the same size.

Since the msize is negotiated with the server, only allocate the cache
after that negotiation has happened - previous allocations or
allocations of different sizes (e.g. zero-copy fcall) are made with
kmalloc directly.

Some figures on two beefy VMs with Connect-IB (sriov) / trans=rdma,
with ior running 32 processes in parallel doing small 32 bytes IOs:
 - no alloc (4.18-rc7 request cache): 65.4k req/s
 - non-power of two alloc, no patch: 61.6k req/s
 - power of two alloc, no patch: 62.2k req/s
 - non-power of two alloc, with patch: 64.7k req/s
 - power of two alloc, with patch: 65.1k req/s

Link: http://lkml.kernel.org/r/1532943263-24378-2-git-send-email-asmadeus@codewreck.org
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Acked-by: Jun Piao <piaojun@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Greg Kurz <groug@kaod.org>
---
 include/net/9p/9p.h     | 4 ++++
 include/net/9p/client.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index e23896116d9a..beede1e1a919 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -336,6 +336,9 @@ enum p9_qid_t {
 #define P9_NOFID	(u32)(~0)
 #define P9_MAXWELEM	16
 
+/* Minimal header size: size[4] type[1] tag[2] */
+#define P9_HDRSZ	7
+
 /* ample room for Twrite/Rread header */
 #define P9_IOHDRSZ	24
 
@@ -558,6 +561,7 @@ struct p9_fcall {
 	size_t offset;
 	size_t capacity;
 
+	struct kmem_cache *cache;
 	u8 *sdata;
 };
 
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index c2671d40bb6b..735f3979d559 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -123,6 +123,7 @@ struct p9_client {
 	struct p9_trans_module *trans_mod;
 	enum p9_trans_status status;
 	void *trans;
+	struct kmem_cache *fcall_cache;
 
 	union {
 		struct {
-- 
cgit v1.2.3


From 728356dedeff8ef999cb436c71333ef4ac51a81c Mon Sep 17 00:00:00 2001
From: Tomas Bortoli <tomasbortoli@gmail.com>
Date: Tue, 14 Aug 2018 19:43:42 +0200
Subject: 9p: Add refcount to p9_req_t

To avoid use-after-free(s), use a refcount to keep track of the
usable references to any instantiated struct p9_req_t.

This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as
wrappers to kref_put(), kref_get() and kref_get_unless_zero().
These are used by the client and the transports to keep track of
valid requests' references.

p9_free_req() is added back and used as callback by kref_put().

Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by
kmem_cache_free() will not be reused for another type until the rcu
synchronisation period is over, so an address gotten under rcu read
lock is safe to inc_ref() without corrupting random memory while
the lock is held.

Link: http://lkml.kernel.org/r/1535626341-20693-1-git-send-email-asmadeus@codewreck.org
Co-developed-by: Dominique Martinet <dominique.martinet@cea.fr>
Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
Reported-by: syzbot+467050c1ce275af2a5b8@syzkaller.appspotmail.com
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
---
 include/net/9p/client.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/net')

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 735f3979d559..947a570307a6 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -94,6 +94,7 @@ enum p9_req_status_t {
 struct p9_req_t {
 	int status;
 	int t_err;
+	struct kref refcount;
 	wait_queue_head_t wq;
 	struct p9_fcall tc;
 	struct p9_fcall rc;
@@ -233,6 +234,19 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
 int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
 void p9_fcall_fini(struct p9_fcall *fc);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
+
+static inline void p9_req_get(struct p9_req_t *r)
+{
+	kref_get(&r->refcount);
+}
+
+static inline int p9_req_try_get(struct p9_req_t *r)
+{
+	return kref_get_unless_zero(&r->refcount);
+}
+
+int p9_req_put(struct p9_req_t *r);
+
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
 
 int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
-- 
cgit v1.2.3


From 0153167aebd0808fb90031dba07d4e696557474c Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 10 Sep 2018 09:11:28 -0700
Subject: net/ipv6: Remove rt6i_prefsrc

After the conversion to fib6_info, rt6i_prefsrc has a single user that
reads the value and otherwise it is only set. The one reader can be
converted to use rt->from so rt6i_prefsrc can be removed, reducing
rt6_info by another 20 bytes.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 3d4930528db0..c7496663f99a 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -182,7 +182,6 @@ struct rt6_info {
 	struct in6_addr			rt6i_gateway;
 	struct inet6_dev		*rt6i_idev;
 	u32				rt6i_flags;
-	struct rt6key			rt6i_prefsrc;
 
 	struct list_head		rt6i_uncached;
 	struct uncached_list		*rt6i_uncached_list;
-- 
cgit v1.2.3


From aea890b8b2e071bb75043353581f2197a2f13160 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 29 Jul 2018 16:22:13 -0700
Subject: sch_htb: Remove local SKB queue handling code.

Instead, adjust __qdisc_enqueue_tail() such that HTB can use it
instead.

The only other caller of __qdisc_enqueue_tail() is
qdisc_enqueue_tail() so we can move the backlog and return value
handling (which HTB doesn't need/want) to the latter.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a6d00093f35e..bc8f6b0b6610 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -828,8 +828,8 @@ static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
 	qh->qlen = 0;
 }
 
-static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
-				       struct qdisc_skb_head *qh)
+static inline void __qdisc_enqueue_tail(struct sk_buff *skb,
+					struct qdisc_skb_head *qh)
 {
 	struct sk_buff *last = qh->tail;
 
@@ -842,14 +842,13 @@ static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 		qh->head = skb;
 	}
 	qh->qlen++;
-	qdisc_qstats_backlog_inc(sch, skb);
-
-	return NET_XMIT_SUCCESS;
 }
 
 static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch)
 {
-	return __qdisc_enqueue_tail(skb, sch, &sch->q);
+	__qdisc_enqueue_tail(skb, &sch->q);
+	qdisc_qstats_backlog_inc(sch, skb);
+	return NET_XMIT_SUCCESS;
 }
 
 static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
-- 
cgit v1.2.3


From 596977300ab5c5d5d85f7950dd7f299f8322e533 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 29 Jul 2018 16:33:28 -0700
Subject: sch_netem: Move private queue handler to generic location.

By hand copies of SKB list handlers do not belong in individual packet
schedulers.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index bc8f6b0b6610..fdaa5506e6f7 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -851,6 +851,17 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 }
 
+static inline void __qdisc_enqueue_head(struct sk_buff *skb,
+					struct qdisc_skb_head *qh)
+{
+	skb->next = qh->head;
+
+	if (!qh->head)
+		qh->tail = skb;
+	qh->head = skb;
+	qh->qlen++;
+}
+
 static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
 {
 	struct sk_buff *skb = qh->head;
-- 
cgit v1.2.3


From 86c55361e569400b6286f30283a9c143a18c20d9 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 7 Sep 2018 17:22:21 +0300
Subject: net: sched: cls_flower: dump offload count value

Change flower in_hw_count type to fixed-size u32 and dump it as
TCA_FLOWER_IN_HW_COUNT. This change is necessary to properly test shared
blocks and re-offload functionality.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index fdaa5506e6f7..d326fd553b58 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -362,7 +362,7 @@ static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
 }
 
 static inline void
-tc_cls_offload_cnt_update(struct tcf_block *block, unsigned int *cnt,
+tc_cls_offload_cnt_update(struct tcf_block *block, u32 *cnt,
 			  u32 *flags, bool add)
 {
 	if (add) {
-- 
cgit v1.2.3


From 67edf21e5adfd336f2ff08668eb09850943666d3 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 10 Sep 2018 17:21:42 -0700
Subject: scsi: libcxgbi: fib6_ino reference in rt6_info is rcu protected

The fib6_info reference in rt6_info is rcu protected. Add a helper
to extract prefsrc from and update cxgbi_check_route6 to use it.

Fixes: 0153167aebd0 ("net/ipv6: Remove rt6i_prefsrc")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index c7496663f99a..f06e968f1992 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -412,6 +412,25 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
 	     struct nl_info *info, struct netlink_ext_ack *extack);
 int fib6_del(struct fib6_info *rt, struct nl_info *info);
 
+static inline
+void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
+{
+	const struct fib6_info *from;
+
+	rcu_read_lock();
+
+	from = rcu_dereference(rt->from);
+	if (from) {
+		*addr = from->fib6_prefsrc.addr;
+	} else {
+		struct in6_addr in6_zero = {};
+
+		*addr = in6_zero;
+	}
+
+	rcu_read_unlock();
+}
+
 static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
 {
 	return f6i->fib6_nh.nh_dev;
-- 
cgit v1.2.3


From 7969119293f5aa3b51040ae81a80e87c7b979b2d Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sun, 9 Sep 2018 22:16:43 +0200
Subject: net: dsa: Add Lantiq / Intel GSWIP tag support

This handles the tag added by the PMAC on the VRX200 SoC line.

The GSWIP uses internally a GSWIP special tag which is located after the
Ethernet header. The PMAC which connects the GSWIP to the CPU converts
this special tag used by the GSWIP into the PMAC special tag which is
added in front of the Ethernet header.

This was tested with GSWIP 2.1 found in the VRX200 SoCs, other GSWIP
versions use slightly different PMAC special tags.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 461e8a7661b7..23690c44e167 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -35,6 +35,7 @@ enum dsa_tag_protocol {
 	DSA_TAG_PROTO_BRCM_PREPEND,
 	DSA_TAG_PROTO_DSA,
 	DSA_TAG_PROTO_EDSA,
+	DSA_TAG_PROTO_GSWIP,
 	DSA_TAG_PROTO_KSZ,
 	DSA_TAG_PROTO_LAN9303,
 	DSA_TAG_PROTO_MTK,
-- 
cgit v1.2.3


From 9708d2b5b7c648e8e0a40d11e8cea12f6277f33c Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 11 Sep 2018 11:42:06 -0700
Subject: llc: avoid blocking in llc_sap_close()

llc_sap_close() is called by llc_sap_put() which
could be called in BH context in llc_rcv(). We can't
block in BH.

There is no reason to block it here, kfree_rcu() should
be sufficient.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/llc.h b/include/net/llc.h
index 890a87318014..df282d9b4017 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -66,6 +66,7 @@ struct llc_sap {
 	int sk_count;
 	struct hlist_nulls_head sk_laddr_hash[LLC_SK_LADDR_HASH_ENTRIES];
 	struct hlist_head sk_dev_hash[LLC_SK_DEV_HASH_ENTRIES];
+	struct rcu_head rcu;
 };
 
 static inline
-- 
cgit v1.2.3


From e4a2a3048ed93f0c354ad837f1d45fc8d389d538 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 12 Sep 2018 11:16:59 +0800
Subject: net: sock: introduce SOCK_XDP

This patch introduces a new sock flag - SOCK_XDP. This will be used
for notifying the upper layer that XDP program is attached on the
lower socket, and requires for extra headroom.

TUN will be the first user.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 433f45fc2d68..38cae35f6e16 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -800,6 +800,7 @@ enum sock_flags {
 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
 	SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
 	SOCK_TXTIME,
+	SOCK_XDP, /* XDP is attached */
 };
 
 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
-- 
cgit v1.2.3


From 293681f149a8dc4c9df2c09b2c4e873d474be5d4 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 13 Sep 2018 21:32:23 +0800
Subject: vxlan: Remove duplicated include from vxlan.h

Remove duplicated include.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index b99a02ae3934..7ef15179f263 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -5,7 +5,6 @@
 #include <linux/if_vlan.h>
 #include <net/udp_tunnel.h>
 #include <net/dst_metadata.h>
-#include <net/udp_tunnel.h>
 
 /* VXLAN protocol (RFC 7348) header:
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-- 
cgit v1.2.3


From d58e468b1112dcd1d5193c0a89ff9f98b5a3e8b9 Mon Sep 17 00:00:00 2001
From: Petar Penkov <ppenkov@google.com>
Date: Fri, 14 Sep 2018 07:46:18 -0700
Subject: flow_dissector: implements flow dissector BPF hook

Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and
attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector
path. The BPF program is per-network namespace.

Signed-off-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/net_namespace.h |  3 +++
 include/net/sch_generic.h   | 12 +++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 9b5fdc50519a..99d4148e0f90 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -43,6 +43,7 @@ struct ctl_table_header;
 struct net_generic;
 struct uevent_sock;
 struct netns_ipvs;
+struct bpf_prog;
 
 
 #define NETDEV_HASHBITS    8
@@ -145,6 +146,8 @@ struct net {
 #endif
 	struct net_generic __rcu	*gen;
 
+	struct bpf_prog __rcu	*flow_dissector_prog;
+
 	/* Note : following structs are cache line aligned */
 #ifdef CONFIG_XFRM
 	struct netns_xfrm	xfrm;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a6d00093f35e..1b81ba85fd2d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -19,6 +19,7 @@ struct Qdisc_ops;
 struct qdisc_walker;
 struct tcf_walker;
 struct module;
+struct bpf_flow_keys;
 
 typedef int tc_setup_cb_t(enum tc_setup_type type,
 			  void *type_data, void *cb_priv);
@@ -307,9 +308,14 @@ struct tcf_proto {
 };
 
 struct qdisc_skb_cb {
-	unsigned int		pkt_len;
-	u16			slave_dev_queue_mapping;
-	u16			tc_classid;
+	union {
+		struct {
+			unsigned int		pkt_len;
+			u16			slave_dev_queue_mapping;
+			u16			tc_classid;
+		};
+		struct bpf_flow_keys *flow_keys;
+	};
 #define QDISC_CB_PRIV_LEN 20
 	unsigned char		data[QDISC_CB_PRIV_LEN];
 };
-- 
cgit v1.2.3


From cd5125d8f51882279f50506bb9c7e5e89dc9bef3 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 29 Aug 2018 14:41:30 +0200
Subject: netfilter: nf_tables: split set destruction in deactivate and destroy
 phase

Splits unbind_set into destroy_set and unbinding operation.

Unbinding removes set from lists (so new transaction would not
find it anymore) but keeps memory allocated (so packet path continues
to work).

Rebind function is added to allow unrolling in case transaction
that wants to remove set is aborted.

Destroy function is added to free the memory, but this could occur
outside of transaction in the future.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 0f39ac487012..2c33958f3e7a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -470,6 +470,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		       struct nft_set_binding *binding);
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 			  struct nft_set_binding *binding);
+void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set,
+			  struct nft_set_binding *binding);
+void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
 
 /**
  *	enum nft_set_extensions - set extension type IDs
@@ -724,7 +727,9 @@ struct nft_expr_type {
  *	@eval: Expression evaluation function
  *	@size: full expression size, including private data size
  *	@init: initialization function
- *	@destroy: destruction function
+ *	@activate: activate expression in the next generation
+ *	@deactivate: deactivate expression in next generation
+ *	@destroy: destruction function, called after synchronize_rcu
  *	@dump: function to dump parameters
  *	@type: expression type
  *	@validate: validate expression, called during loop detection
-- 
cgit v1.2.3


From 0935d558840099b3679c67bb7468dc78fcbad940 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 29 Aug 2018 14:41:32 +0200
Subject: netfilter: nf_tables: asynchronous release

Release the committed transaction log from a work queue, moving
expensive synchronize_rcu out of the locked section and providing
opportunity to batch this.

On my test machine this cuts runtime of nft-test.py in half.
Based on earlier patch from Pablo Neira Ayuso.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2c33958f3e7a..841835a387e1 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1298,12 +1298,14 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
  *
  *	@list: used internally
  *	@msg_type: message type
+ *	@put_net: ctx->net needs to be put
  *	@ctx: transaction context
  *	@data: internal information related to the transaction
  */
 struct nft_trans {
 	struct list_head		list;
 	int				msg_type;
+	bool				put_net;
 	struct nft_ctx			ctx;
 	char				data[0];
 };
-- 
cgit v1.2.3


From 7a3dd8c8979ce48b99cb0e9b7435a97f0716138a Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 14 Sep 2018 13:01:46 -0700
Subject: tls: async support causes out-of-bounds access in crypto APIs

When async support was added it needed to access the sk from the async
callback to report errors up the stack. The patch tried to use space
after the aead request struct by directly setting the reqsize field in
aead_request. This is an internal field that should not be used
outside the crypto APIs. It is used by the crypto code to define extra
space for private structures used in the crypto context. Users of the
API then use crypto_aead_reqsize() and add the returned amount of
bytes to the end of the request memory allocation before posting the
request to encrypt/decrypt APIs.

So this breaks (with general protection fault and KASAN error, if
enabled) because the request sent to decrypt is shorter than required
causing the crypto API out-of-bounds errors. Also it seems unlikely the
sk is even valid by the time it gets to the callback because of memset
in crypto layer.

Anyways, fix this by holding the sk in the skb->sk field when the
callback is set up and because the skb is already passed through to
the callback handler via void* we can access it in the handler. Then
in the handler we need to be careful to NULL the pointer again before
kfree_skb. I added comments on both the setup (in tls_do_decryption)
and when we clear it from the crypto callback handler
tls_decrypt_done(). After this selftests pass again and fixes KASAN
errors/warnings.

Fixes: 94524d8fc965 ("net/tls: Add support for async decryption of tls records")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Reviewed-by: Vakul Garg <Vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index cd0a65bd92f9..8630d28bd951 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -128,10 +128,6 @@ struct tls_sw_context_rx {
 	bool async_notify;
 };
 
-struct decrypt_req_ctx {
-	struct sock *sk;
-};
-
 struct tls_record_info {
 	struct list_head list;
 	u32 end_seq;
-- 
cgit v1.2.3


From 568b742a9d9888aca876b6ad9fa45490f18bee0a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Sep 2018 11:57:28 +0200
Subject: netlink: add NLA_REJECT policy type

In some situations some netlink attributes may be used for output
only (kernel->userspace) or may be reserved for future use. It's
then helpful to be able to prevent userspace from using them in
messages sent to the kernel, since they'd otherwise be ignored and
any future will become impossible if this happens.

Add NLA_REJECT to the policy which does nothing but reject (with
EINVAL) validation of any messages containing this attribute.
Allow for returning a specific extended ACK error message in the
validation_data pointer.

While at it clear up the documentation a bit - the NLA_BITFIELD32
documentation was added to the list of len field descriptions.

Also, use NL_SET_BAD_ATTR() in one place where it's open-coded.

The specific case I have in mind now is a shared nested attribute
containing request/response data, and it would be pointless and
potentially confusing to have userspace include response data in
the messages that actually contain a request.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 0c154f98e987..b318b0a9f6c3 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -180,6 +180,7 @@ enum {
 	NLA_S32,
 	NLA_S64,
 	NLA_BITFIELD32,
+	NLA_REJECT,
 	__NLA_TYPE_MAX,
 };
 
@@ -208,9 +209,19 @@ enum {
  *    NLA_MSECS            Leaving the length field zero will verify the
  *                         given type fits, using it verifies minimum length
  *                         just like "All other"
- *    NLA_BITFIELD32      A 32-bit bitmap/bitselector attribute
+ *    NLA_BITFIELD32       Unused
+ *    NLA_REJECT           Unused
  *    All other            Minimum length of attribute payload
  *
+ * Meaning of `validation_data' field:
+ *    NLA_BITFIELD32       This is a 32-bit bitmap/bitselector attribute and
+ *                         validation data must point to a u32 value of valid
+ *                         flags
+ *    NLA_REJECT           This attribute is always rejected and validation data
+ *                         may point to a string to report as the error instead
+ *                         of the generic one in extended ACK.
+ *    All other            Unused
+ *
  * Example:
  * static const struct nla_policy my_policy[ATTR_MAX+1] = {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
-- 
cgit v1.2.3


From b60b87fc2996240e298529a46e122ef62ef9c27f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Sep 2018 11:57:29 +0200
Subject: netlink: add ethernet address policy types

Commonly, ethernet addresses are just using a policy of
	{ .len = ETH_ALEN }
which leaves userspace free to send more data than it should,
which may hide bugs.

Introduce NLA_EXACT_LEN which checks for exact size, rejecting
the attribute if it's not exactly that length. Also add
NLA_EXACT_LEN_WARN which requires the minimum length and will
warn on longer attributes, for backward compatibility.

Use these to define NLA_POLICY_ETH_ADDR (new strict policy) and
NLA_POLICY_ETH_ADDR_COMPAT (compatible policy with warning);
these are used like this:

    static const struct nla_policy <name>[...] = {
        [NL_ATTR_NAME] = NLA_POLICY_ETH_ADDR,
        ...
    };

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index b318b0a9f6c3..318b1ded3833 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -181,6 +181,8 @@ enum {
 	NLA_S64,
 	NLA_BITFIELD32,
 	NLA_REJECT,
+	NLA_EXACT_LEN,
+	NLA_EXACT_LEN_WARN,
 	__NLA_TYPE_MAX,
 };
 
@@ -211,6 +213,10 @@ enum {
  *                         just like "All other"
  *    NLA_BITFIELD32       Unused
  *    NLA_REJECT           Unused
+ *    NLA_EXACT_LEN        Attribute must have exactly this length, otherwise
+ *                         it is rejected.
+ *    NLA_EXACT_LEN_WARN   Attribute should have exactly this length, a warning
+ *                         is logged if it is longer, shorter is rejected.
  *    All other            Minimum length of attribute payload
  *
  * Meaning of `validation_data' field:
@@ -236,6 +242,13 @@ struct nla_policy {
 	void            *validation_data;
 };
 
+#define NLA_POLICY_EXACT_LEN(_len)	{ .type = NLA_EXACT_LEN, .len = _len }
+#define NLA_POLICY_EXACT_LEN_WARN(_len)	{ .type = NLA_EXACT_LEN_WARN, \
+					  .len = _len }
+
+#define NLA_POLICY_ETH_ADDR		NLA_POLICY_EXACT_LEN(ETH_ALEN)
+#define NLA_POLICY_ETH_ADDR_COMPAT	NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
+
 /**
  * struct nl_info - netlink source information
  * @nlh: Netlink message header of original request
-- 
cgit v1.2.3


From 93e66024b0249cec81e91328c55a754efd3192e0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 12 Sep 2018 15:19:07 +0200
Subject: netfilter: conntrack: pass nf_hook_state to packet and error handlers

nf_hook_state contains all the hook meta-information: netns, protocol family,
hook location, and so on.

Instead of only passing selected information, pass a pointer to entire
structure.

This will allow to merge the error and the packet handlers and remove
the ->new() function in followup patches.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h    | 3 +--
 include/net/netfilter/nf_conntrack_l4proto.h | 7 ++++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 2a3e0974a6af..afc9b3620473 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -20,8 +20,7 @@
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
    of connection tracking. */
-unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
-			     struct sk_buff *skb);
+unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state);
 
 int nf_conntrack_init_net(struct net *net);
 void nf_conntrack_cleanup_net(struct net *net);
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 8465263b297d..a857a0adfb31 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -45,7 +45,8 @@ struct nf_conntrack_l4proto {
 	int (*packet)(struct nf_conn *ct,
 		      const struct sk_buff *skb,
 		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo);
+		      enum ip_conntrack_info ctinfo,
+		      const struct nf_hook_state *state);
 
 	/* Called when a new connection for this protocol found;
 	 * returns TRUE if it's OK.  If so, packet() called next. */
@@ -55,9 +56,9 @@ struct nf_conntrack_l4proto {
 	/* Called when a conntrack entry is destroyed */
 	void (*destroy)(struct nf_conn *ct);
 
-	int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+	int (*error)(struct nf_conn *tmpl, struct sk_buff *skb,
 		     unsigned int dataoff,
-		     u_int8_t pf, unsigned int hooknum);
+		     const struct nf_hook_state *state);
 
 	/* called by gc worker if table is full */
 	bool (*can_early_drop)(const struct nf_conn *ct);
-- 
cgit v1.2.3


From 9976fc6e6edbb0372f084a2ae8c1b8103b3bff1d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 12 Sep 2018 15:19:08 +0200
Subject: netfilter: conntrack: remove the l4proto->new() function

->new() gets invoked after ->error() and before ->packet() if
a conntrack lookup has found no result for the tuple.

We can fold it into ->packet() -- the packet() implementations
can check if the conntrack is confirmed (new) or not
(already in hash).

If its unconfirmed, the conntrack isn't in the hash yet so current
skb created a new conntrack entry.

Only relevant side effect -- if packet() doesn't return NF_ACCEPT
but -NF_ACCEPT (or drop), while the conntrack was just created,
then the newly allocated conntrack is freed right away, rather than not
created in the first place.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index a857a0adfb31..016958e67fcc 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -48,11 +48,6 @@ struct nf_conntrack_l4proto {
 		      enum ip_conntrack_info ctinfo,
 		      const struct nf_hook_state *state);
 
-	/* Called when a new connection for this protocol found;
-	 * returns TRUE if it's OK.  If so, packet() called next. */
-	bool (*new)(struct nf_conn *ct, const struct sk_buff *skb,
-		    unsigned int dataoff);
-
 	/* Called when a conntrack entry is destroyed */
 	void (*destroy)(struct nf_conn *ct);
 
-- 
cgit v1.2.3


From 83d213fd9d1a56108584cd812333462caa39a747 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 12 Sep 2018 15:19:09 +0200
Subject: netfilter: conntrack: deconstify packet callback skb pointer

Only two protocols need the ->error() function: icmp and icmpv6.
This is because icmp error mssages might be RELATED to an existing
connection (e.g. PMTUD, port unreachable and the like), and their
->error() handlers do this.

The error callback is already optional, so remove it for
udp and call them from ->packet() instead.

As the error() callback can call checksum functions that write to
skb->csum*, the const qualifier has to be removed as well.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 016958e67fcc..39f0c84f71b9 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -43,7 +43,7 @@ struct nf_conntrack_l4proto {
 
 	/* Returns verdict for packet, or -1 for invalid. */
 	int (*packet)(struct nf_conn *ct,
-		      const struct sk_buff *skb,
+		      struct sk_buff *skb,
 		      unsigned int dataoff,
 		      enum ip_conntrack_info ctinfo,
 		      const struct nf_hook_state *state);
-- 
cgit v1.2.3


From 6fe78fa484a5dad030b24e33e0cedc5d5bbd0fde Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 12 Sep 2018 15:19:11 +0200
Subject: netfilter: conntrack: remove error callback and handle icmp from core

icmp(v6) are the only two layer four protocols that need the error()
callback (to handle icmp errors that are related to an established
connections, e.g. packet too big, port unreachable and the like).

Remove the error callback and handle these two special cases from the core.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 39f0c84f71b9..7fdb4b95bba4 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -51,10 +51,6 @@ struct nf_conntrack_l4proto {
 	/* Called when a conntrack entry is destroyed */
 	void (*destroy)(struct nf_conn *ct);
 
-	int (*error)(struct nf_conn *tmpl, struct sk_buff *skb,
-		     unsigned int dataoff,
-		     const struct nf_hook_state *state);
-
 	/* called by gc worker if table is full */
 	bool (*can_early_drop)(const struct nf_conn *ct);
 
@@ -97,6 +93,15 @@ struct nf_conntrack_l4proto {
 	struct module *me;
 };
 
+int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
+			      struct sk_buff *skb,
+			      unsigned int dataoff,
+			      const struct nf_hook_state *state);
+
+int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
+			      struct sk_buff *skb,
+			      unsigned int dataoff,
+			      const struct nf_hook_state *state);
 /* Existing built-in generic protocol */
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
-- 
cgit v1.2.3


From ca2ca6e1c04e64413f5fb9a5d54fb8b0bdd86467 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 12 Sep 2018 15:19:12 +0200
Subject: netfilter: conntrack: remove unused proto arg from netns init
 functions

Its unused, next patch will remove l4proto->l3proto number to simplify
l4 protocol demuxer lookup.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 7fdb4b95bba4..420823a8648f 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -84,7 +84,7 @@ struct nf_conntrack_l4proto {
 #endif
 	unsigned int	*net_id;
 	/* Init l4proto pernet data */
-	int (*init_net)(struct net *net, u_int16_t proto);
+	int (*init_net)(struct net *net);
 
 	/* Return the per-net protocol part. */
 	struct nf_proto_net *(*get_net_proto)(struct net *net);
-- 
cgit v1.2.3


From dd2934a95701576203b2f61e8ded4e4a2f9183ea Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 17 Sep 2018 12:02:54 +0200
Subject: netfilter: conntrack: remove l3->l4 mapping information

l4 protocols are demuxed by l3num, l4num pair.

However, almost all l4 trackers are l3 agnostic.

Only exceptions are:
 - gre, icmp (ipv4 only)
 - icmpv6 (ipv6 only)

This commit gets rid of the l3 mapping, l4 trackers can now be looked up
by their IPPROTO_XXX value alone, which gets rid of the additional l3
indirection.

For icmp, ipcmp6 and gre, add a check on state->pf and
return -NF_ACCEPT in case we're asked to track e.g. icmpv6-in-ipv4,
this seems more fitting than using the generic tracker.

Additionally we can kill the 2nd l4proto definitions that were needed
for v4/v6 split -- they are now the same so we can use single l4proto
struct for each protocol, rather than two.

The EXPORT_SYMBOLs can be removed as all these object files are
part of nf_conntrack with no external references.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h | 13 +++++--------
 include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 13 -------------
 include/net/netfilter/nf_conntrack_l4proto.h   |  9 ++-------
 3 files changed, 7 insertions(+), 28 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index c84b51682f08..135ee702c7b0 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -10,20 +10,17 @@
 #ifndef _NF_CONNTRACK_IPV4_H
 #define _NF_CONNTRACK_IPV4_H
 
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
 #ifdef CONFIG_NF_CT_PROTO_DCCP
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp;
 #endif
 #ifdef CONFIG_NF_CT_PROTO_SCTP
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp;
 #endif
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4;
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite;
 #endif
 
-int nf_conntrack_ipv4_compat_init(void);
-void nf_conntrack_ipv4_compat_fini(void);
-
 #endif /*_NF_CONNTRACK_IPV4_H*/
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index effa8dfba68c..7b3c873f8839 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -2,20 +2,7 @@
 #ifndef _NF_CONNTRACK_IPV6_H
 #define _NF_CONNTRACK_IPV6_H
 
-extern const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
-
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6;
-#endif
 
 #include <linux/sysctl.h>
 extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 420823a8648f..d838a93430a1 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -18,9 +18,6 @@
 struct seq_file;
 
 struct nf_conntrack_l4proto {
-	/* L3 Protocol number. */
-	u_int16_t l3proto;
-
 	/* L4 Protocol number. */
 	u_int8_t l4proto;
 
@@ -107,11 +104,9 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
 #define MAX_NF_CT_PROTO 256
 
-const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto,
-						  u_int8_t l4proto);
+const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto);
 
-const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
-						    u_int8_t l4proto);
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto);
 void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
 
 /* Protocol pernet registration. */
-- 
cgit v1.2.3


From 93185c80a5f748620f5652e492f2a1c8d89db593 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 12 Sep 2018 15:19:14 +0200
Subject: netfilter: conntrack: clamp l4proto array size at largers supported
 protocol

All higher l4proto numbers are handled by the generic tracker; the
l4proto lookup function already returns generic one in case the l4proto
number exceeds max size.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index d838a93430a1..eed04af9b75e 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -102,7 +102,7 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
 /* Existing built-in generic protocol */
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
-#define MAX_NF_CT_PROTO 256
+#define MAX_NF_CT_PROTO IPPROTO_UDPLITE
 
 const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto);
 
-- 
cgit v1.2.3


From 78f2756c5fc0bf17560766dbc5aaa1e4a7ba66e4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 20 Sep 2018 13:50:47 -0700
Subject: net/ipv4: Move device validation to helper

Move the device matching check in __fib_validate_source to a helper and
export it for use by netfilter modules. Code move only; no functional
change intended.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 69c91d1934c1..f7c109e37298 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -373,6 +373,7 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
 extern const struct nla_policy rtm_ipv4_policy[];
 void ip_fib_init(void);
 __be32 fib_compute_spec_dst(struct sk_buff *skb);
+bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev);
 int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 			u8 tos, int oif, struct net_device *dev,
 			struct in_device *idev, u32 *itag);
-- 
cgit v1.2.3


From a42055e8d2c30d4decfc13ce943d09c7b9dad221 Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Fri, 21 Sep 2018 09:46:13 +0530
Subject: net/tls: Add support for async encryption of records for performance

In current implementation, tls records are encrypted & transmitted
serially. Till the time the previously submitted user data is encrypted,
the implementation waits and on finish starts transmitting the record.
This approach of encrypt-one record at a time is inefficient when
asynchronous crypto accelerators are used. For each record, there are
overheads of interrupts, driver softIRQ scheduling etc. Also the crypto
accelerator sits idle most of time while an encrypted record's pages are
handed over to tcp stack for transmission.

This patch enables encryption of multiple records in parallel when an
async capable crypto accelerator is present in system. This is achieved
by allowing the user space application to send more data using sendmsg()
even while previously issued data is being processed by crypto
accelerator. This requires returning the control back to user space
application after submitting encryption request to accelerator. This
also means that zero-copy mode of encryption cannot be used with async
accelerator as we must be done with user space application buffer before
returning from sendmsg().

There can be multiple records in flight to/from the accelerator. Each of
the record is represented by 'struct tls_rec'. This is used to store the
memory pages for the record.

After the records are encrypted, they are added in a linked list called
tx_ready_list which contains encrypted tls records sorted as per tls
sequence number. The records from tx_ready_list are transmitted using a
newly introduced function called tls_tx_records(). The tx_ready_list is
polled for any record ready to be transmitted in sendmsg(), sendpage()
after initiating encryption of new tls records. This achieves parallel
encryption and transmission of records when async accelerator is
present.

There could be situation when crypto accelerator completes encryption
later than polling of tx_ready_list by sendmsg()/sendpage(). Therefore
we need a deferred work context to be able to transmit records from
tx_ready_list. The deferred work context gets scheduled if applications
are not sending much data through the socket. If the applications issue
sendmsg()/sendpage() in quick succession, then the scheduling of
tx_work_handler gets cancelled as the tx_ready_list would be polled from
application's context itself. This saves scheduling overhead of deferred
work.

The patch also brings some side benefit. We are able to get rid of the
concept of CLOSED record. This is because the records once closed are
either encrypted and then placed into tx_ready_list or if encryption
fails, the socket error is set. This simplifies the kernel tls
sendpath. However since tls_device.c is still using macros, accessory
functions for CLOSED records have been retained.

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 70 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 58 insertions(+), 12 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 9f3c4ea9ad6f..3aa73e2d8823 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -41,7 +41,7 @@
 #include <linux/tcp.h>
 #include <net/tcp.h>
 #include <net/strparser.h>
-
+#include <crypto/aead.h>
 #include <uapi/linux/tls.h>
 
 
@@ -93,24 +93,47 @@ enum {
 	TLS_NUM_CONFIG,
 };
 
-struct tls_sw_context_tx {
-	struct crypto_aead *aead_send;
-	struct crypto_wait async_wait;
-
-	char aad_space[TLS_AAD_SPACE_SIZE];
-
-	unsigned int sg_plaintext_size;
-	int sg_plaintext_num_elem;
+/* TLS records are maintained in 'struct tls_rec'. It stores the memory pages
+ * allocated or mapped for each TLS record. After encryption, the records are
+ * stores in a linked list.
+ */
+struct tls_rec {
+	struct list_head list;
+	int tx_flags;
 	struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS];
-
-	unsigned int sg_encrypted_size;
-	int sg_encrypted_num_elem;
 	struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS];
 
 	/* AAD | sg_plaintext_data | sg_tag */
 	struct scatterlist sg_aead_in[2];
 	/* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */
 	struct scatterlist sg_aead_out[2];
+
+	unsigned int sg_plaintext_size;
+	unsigned int sg_encrypted_size;
+	int sg_plaintext_num_elem;
+	int sg_encrypted_num_elem;
+
+	char aad_space[TLS_AAD_SPACE_SIZE];
+	struct aead_request aead_req;
+	u8 aead_req_ctx[];
+};
+
+struct tx_work {
+	struct delayed_work work;
+	struct sock *sk;
+};
+
+struct tls_sw_context_tx {
+	struct crypto_aead *aead_send;
+	struct crypto_wait async_wait;
+	struct tx_work tx_work;
+	struct tls_rec *open_rec;
+	struct list_head tx_ready_list;
+	atomic_t encrypt_pending;
+	int async_notify;
+
+#define BIT_TX_SCHEDULED	0
+	unsigned long tx_bitmask;
 };
 
 struct tls_sw_context_rx {
@@ -197,6 +220,8 @@ struct tls_context {
 
 	struct scatterlist *partially_sent_record;
 	u16 partially_sent_offset;
+	u64 tx_seq_number;	/* Next TLS seqnum to be transmitted */
+
 	unsigned long flags;
 	bool in_tcp_sendpages;
 
@@ -261,6 +286,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
 void tls_device_sk_destruct(struct sock *sk);
 void tls_device_init(void);
 void tls_device_cleanup(void);
+int tls_tx_records(struct sock *sk, int flags);
 
 struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 				       u32 seq, u64 *p_record_sn);
@@ -279,6 +305,9 @@ void tls_sk_destruct(struct sock *sk, struct tls_context *ctx);
 int tls_push_sg(struct sock *sk, struct tls_context *ctx,
 		struct scatterlist *sg, u16 first_offset,
 		int flags);
+int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
+			    int flags);
+
 int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
 				   int flags, long *timeo);
 
@@ -312,6 +341,23 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 	return tls_ctx->pending_open_record_frags;
 }
 
+static inline bool is_tx_ready(struct tls_context *tls_ctx,
+			       struct tls_sw_context_tx *ctx)
+{
+	struct tls_rec *rec;
+	u64 seq;
+
+	rec = list_first_entry(&ctx->tx_ready_list, struct tls_rec, list);
+	if (!rec)
+		return false;
+
+	seq = be64_to_cpup((const __be64 *)&rec->aad_space);
+	if (seq == tls_ctx->tx_seq_number)
+		return true;
+	else
+		return false;
+}
+
 struct sk_buff *
 tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
 		      struct sk_buff *skb);
-- 
cgit v1.2.3


From 72b0094f918294e6cb8cf5c3b4520d928fbb1a57 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 08:51:46 -0700
Subject: tcp: switch tcp_clock_ns() to CLOCK_TAI base

TCP pacing is either implemented in sch_fq or internally.
We have the goal of being able to offload pacing on the NICS.

TCP will soon provide per skb skb->tstamp as early departure time.

Like ETF in commit 25db26a91364 ("net/sched: Introduce the ETF Qdisc")
we chose CLOCK_T as the clock base, so that TCP and pacers can share
a common clock, to get better RTT samples (without pacing artificially
inflating these samples).

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 770917d0caa7..c6f0bc1dc678 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -732,7 +732,7 @@ void tcp_send_window_probe(struct sock *sk);
 
 static inline u64 tcp_clock_ns(void)
 {
-	return local_clock();
+	return ktime_get_tai_ns();
 }
 
 static inline u64 tcp_clock_us(void)
-- 
cgit v1.2.3


From 2fd66ffba50716fc5ab481c48db643af3bda2276 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 08:51:47 -0700
Subject: tcp: introduce tcp_skb_timestamp_us() helper

There are few places where TCP reads skb->skb_mstamp expecting
a value in usec unit.

skb->tstamp (aka skb->skb_mstamp) will soon store CLOCK_TAI nsec value.

Add tcp_skb_timestamp_us() to provide proper conversion when needed.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c6f0bc1dc678..0ca5ea10dc06 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -774,6 +774,12 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
 	return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
 }
 
+/* provide the departure time in us unit */
+static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
+{
+	return skb->skb_mstamp;
+}
+
 
 #define tcp_flag_byte(th) (((u_int8_t *)th)[13])
 
@@ -1940,7 +1946,7 @@ static inline s64 tcp_rto_delta_us(const struct sock *sk)
 {
 	const struct sk_buff *skb = tcp_rtx_queue_head(sk);
 	u32 rto = inet_csk(sk)->icsk_rto;
-	u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
+	u64 rto_time_stamp_us = tcp_skb_timestamp_us(skb) + jiffies_to_usecs(rto);
 
 	return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
 }
-- 
cgit v1.2.3


From 9799ccb0e984a5c1311b22a212e7ff96e8b736de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 08:51:49 -0700
Subject: tcp: add tcp_wstamp_ns socket field

TCP will soon provide earliest departure time on TX skbs.
It needs to track this in a new variable.

tcp_mstamp_refresh() needs to update this variable, and
became too big to stay an inline.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0ca5ea10dc06..370198fdc65d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -752,17 +752,7 @@ static inline u32 tcp_time_stamp_raw(void)
 	return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
 }
 
-
-/* Refresh 1us clock of a TCP socket,
- * ensuring monotically increasing values.
- */
-static inline void tcp_mstamp_refresh(struct tcp_sock *tp)
-{
-	u64 val = tcp_clock_us();
-
-	if (val > tp->tcp_mstamp)
-		tp->tcp_mstamp = val;
-}
+void tcp_mstamp_refresh(struct tcp_sock *tp);
 
 static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
 {
-- 
cgit v1.2.3


From d3edd06ea8ea9e03de6567fda80b8be57e21a537 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 08:51:50 -0700
Subject: tcp: provide earliest departure time in skb->tstamp

Switch internal TCP skb->skb_mstamp to skb->skb_mstamp_ns,
from usec units to nsec units.

Do not clear skb->tstamp before entering IP stacks in TX,
so that qdisc or devices can implement pacing based on the
earliest departure time instead of socket sk->sk_pacing_rate

Packets are fed with tcp_wstamp_ns, and following patch
will update tcp_wstamp_ns when both TCP and sch_fq switch to
the earliest departure time mechanism.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 370198fdc65d..ff15d8e0d525 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -761,13 +761,13 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
 
 static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
 {
-	return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
+	return div_u64(skb->skb_mstamp_ns, NSEC_PER_SEC / TCP_TS_HZ);
 }
 
 /* provide the departure time in us unit */
 static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
 {
-	return skb->skb_mstamp;
+	return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC);
 }
 
 
@@ -813,7 +813,7 @@ struct tcp_skb_cb {
 #define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
 #define TCPCB_LOST		0x04	/* SKB is lost			*/
 #define TCPCB_TAGBITS		0x07	/* All tag bits			*/
-#define TCPCB_REPAIRED		0x10	/* SKB repaired (no skb_mstamp)	*/
+#define TCPCB_REPAIRED		0x10	/* SKB repaired (no skb_mstamp_ns)	*/
 #define TCPCB_EVER_RETRANS	0x80	/* Ever retransmitted frame	*/
 #define TCPCB_RETRANS		(TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
 				TCPCB_REPAIRED)
-- 
cgit v1.2.3


From 5e111210a44301304f9054e995bf33f69b6de76f Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Fri, 21 Sep 2018 07:13:54 -0400
Subject: net/core: Add new basic hardware counter

Add a new hardware specific basic counter, TCA_STATS_BASIC_HW. This can
be used to count packets/bytes processed by hardware offload.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gen_stats.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 883bb9085f15..946bd53a9f81 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -44,6 +44,10 @@ void __gnet_stats_copy_basic(const seqcount_t *running,
 			     struct gnet_stats_basic_packed *bstats,
 			     struct gnet_stats_basic_cpu __percpu *cpu,
 			     struct gnet_stats_basic_packed *b);
+int gnet_stats_copy_basic_hw(const seqcount_t *running,
+			     struct gnet_dump *d,
+			     struct gnet_stats_basic_cpu __percpu *cpu,
+			     struct gnet_stats_basic_packed *b);
 int gnet_stats_copy_rate_est(struct gnet_dump *d,
 			     struct net_rate_estimator __rcu **ptr);
 int gnet_stats_copy_queue(struct gnet_dump *d,
-- 
cgit v1.2.3


From 28169abadb08333eb607621faa3a1dd7109e0d45 Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Fri, 21 Sep 2018 07:14:02 -0400
Subject: net/sched: Add hardware specific counters to TC actions

Add additional counters that will store the bytes/packets processed by
hardware. These will be exported through the netlink interface for
displaying by the iproute2 tc tool

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 8 +++++---
 include/net/pkt_cls.h | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index c6f195b3c706..1ddff3360592 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -31,10 +31,12 @@ struct tc_action {
 	int				tcfa_action;
 	struct tcf_t			tcfa_tm;
 	struct gnet_stats_basic_packed	tcfa_bstats;
+	struct gnet_stats_basic_packed	tcfa_bstats_hw;
 	struct gnet_stats_queue		tcfa_qstats;
 	struct net_rate_estimator __rcu *tcfa_rate_est;
 	spinlock_t			tcfa_lock;
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+	struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
 	struct gnet_stats_queue __percpu *cpu_qstats;
 	struct tc_cookie	__rcu *act_cookie;
 	struct tcf_chain	*goto_chain;
@@ -94,7 +96,7 @@ struct tc_action_ops {
 			struct netlink_callback *, int,
 			const struct tc_action_ops *,
 			struct netlink_ext_ack *);
-	void	(*stats_update)(struct tc_action *, u64, u32, u64);
+	void	(*stats_update)(struct tc_action *, u64, u32, u64, bool);
 	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 	void	(*put_dev)(struct net_device *dev);
@@ -182,13 +184,13 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 #endif /* CONFIG_NET_CLS_ACT */
 
 static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
-					   u64 packets, u64 lastuse)
+					   u64 packets, u64 lastuse, bool hw)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	if (!a->ops->stats_update)
 		return;
 
-	a->ops->stats_update(a, bytes, packets, lastuse);
+	a->ops->stats_update(a, bytes, packets, lastuse, hw);
 #endif
 }
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 75a3f3fdb359..bbfe27f86d5f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -318,7 +318,7 @@ tcf_exts_stats_update(const struct tcf_exts *exts,
 	for (i = 0; i < exts->nr_actions; i++) {
 		struct tc_action *a = exts->actions[i];
 
-		tcf_action_stats_update(a, bytes, packets, lastuse);
+		tcf_action_stats_update(a, bytes, packets, lastuse, true);
 	}
 
 	preempt_enable();
-- 
cgit v1.2.3


From fc6e8073f304010605f834cb2eb8c07c46461c9d Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sat, 22 Sep 2018 21:26:20 -0700
Subject: neighbour: send netlink notification if NTF_ROUTER changes

send netlink notification if neigh_update results in NTF_ROUTER
change and if NEIGH_UPDATE_F_ISROUTER is on. Also move the
NTF_ROUTER change function into a helper.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/net')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 6c1eecd56a4d..0874f7fcd859 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -544,4 +544,19 @@ static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
 		*notify = 1;
 	}
 }
+
+static inline void neigh_update_is_router(struct neighbour *neigh, u32 flags,
+					  int *notify)
+{
+	u8 ndm_flags = 0;
+
+	ndm_flags |= (flags & NEIGH_UPDATE_F_ISROUTER) ? NTF_ROUTER : 0;
+	if ((neigh->flags ^ ndm_flags) & NTF_ROUTER) {
+		if (ndm_flags & NTF_ROUTER)
+			neigh->flags |= NTF_ROUTER;
+		else
+			neigh->flags &= ~NTF_ROUTER;
+		*notify = 1;
+	}
+}
 #endif
-- 
cgit v1.2.3


From 9932a29ab1be1427a2ccbdf852a0f131f2849685 Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Mon, 24 Sep 2018 15:35:56 +0530
Subject: net/tls: Fixed race condition in async encryption

On processors with multi-engine crypto accelerators, it is possible that
multiple records get encrypted in parallel and their encryption
completion is notified to different cpus in multicore processor. This
leads to the situation where tls_encrypt_done() starts executing in
parallel on different cores. In current implementation, encrypted
records are queued to tx_ready_list in tls_encrypt_done(). This requires
addition to linked list 'tx_ready_list' to be protected. As
tls_decrypt_done() could be executing in irq content, it is not possible
to protect linked list addition operation using a lock.

To fix the problem, we remove linked list addition operation from the
irq context. We do tx_ready_list addition/removal operation from
application context only and get rid of possible multiple access to
the linked list. Before starting encryption on the record, we add it to
the tail of tx_ready_list. To prevent tls_tx_records() from transmitting
it, we mark the record with a new flag 'tx_ready' in 'struct tls_rec'.
When record encryption gets completed, tls_encrypt_done() has to only
update the 'tx_ready' flag to true & linked list add operation is not
required.

The changed logic brings some other side benefits. Since the records
are always submitted in tls sequence number order for encryption, the
tx_ready_list always remains sorted and addition of new records to it
does not have to traverse the linked list.

Lastly, we renamed tx_ready_list in 'struct tls_sw_context_tx' to
'tx_list'. This is because now, the some of the records at the tail are
not ready to transmit.

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption")
Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 3aa73e2d8823..1615fb5ea114 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -99,6 +99,7 @@ enum {
  */
 struct tls_rec {
 	struct list_head list;
+	int tx_ready;
 	int tx_flags;
 	struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS];
 	struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS];
@@ -128,7 +129,7 @@ struct tls_sw_context_tx {
 	struct crypto_wait async_wait;
 	struct tx_work tx_work;
 	struct tls_rec *open_rec;
-	struct list_head tx_ready_list;
+	struct list_head tx_list;
 	atomic_t encrypt_pending;
 	int async_notify;
 
@@ -220,7 +221,6 @@ struct tls_context {
 
 	struct scatterlist *partially_sent_record;
 	u16 partially_sent_offset;
-	u64 tx_seq_number;	/* Next TLS seqnum to be transmitted */
 
 	unsigned long flags;
 	bool in_tcp_sendpages;
@@ -341,21 +341,15 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 	return tls_ctx->pending_open_record_frags;
 }
 
-static inline bool is_tx_ready(struct tls_context *tls_ctx,
-			       struct tls_sw_context_tx *ctx)
+static inline bool is_tx_ready(struct tls_sw_context_tx *ctx)
 {
 	struct tls_rec *rec;
-	u64 seq;
 
-	rec = list_first_entry(&ctx->tx_ready_list, struct tls_rec, list);
+	rec = list_first_entry(&ctx->tx_list, struct tls_rec, list);
 	if (!rec)
 		return false;
 
-	seq = be64_to_cpup((const __be64 *)&rec->aad_space);
-	if (seq == tls_ctx->tx_seq_number)
-		return true;
-	else
-		return false;
+	return READ_ONCE(rec->tx_ready);
 }
 
 struct sk_buff *
-- 
cgit v1.2.3


From f5bd91388e26557f64ca999e0006038c7a919308 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 7 Sep 2018 10:18:46 +0200
Subject: net: xsk: add a simple buffer reuse queue

XSK UMEM is strongly single producer single consumer so reuse of
frames is challenging.  Add a simple "stash" of FILL packets to
reuse for drivers to optionally make use of.  This is useful
when driver has to free (ndo_stop) or resize a ring with an active
AF_XDP ZC socket.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/net/xdp_sock.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 932ca0dad6f3..70a115bea4f4 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -21,6 +21,12 @@ struct xdp_umem_page {
 	dma_addr_t dma;
 };
 
+struct xdp_umem_fq_reuse {
+	u32 nentries;
+	u32 length;
+	u64 handles[];
+};
+
 struct xdp_umem {
 	struct xsk_queue *fq;
 	struct xsk_queue *cq;
@@ -37,6 +43,7 @@ struct xdp_umem {
 	struct page **pgs;
 	u32 npgs;
 	struct net_device *dev;
+	struct xdp_umem_fq_reuse *fq_reuse;
 	u16 queue_id;
 	bool zc;
 	spinlock_t xsk_list_lock;
@@ -75,6 +82,10 @@ void xsk_umem_discard_addr(struct xdp_umem *umem);
 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
 bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
 void xsk_umem_consume_tx_done(struct xdp_umem *umem);
+struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
+struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
+					  struct xdp_umem_fq_reuse *newq);
+void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
 
 static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
 {
@@ -85,6 +96,35 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
 {
 	return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
 }
+
+/* Reuse-queue aware version of FILL queue helpers */
+static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
+{
+	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+	if (!rq->length)
+		return xsk_umem_peek_addr(umem, addr);
+
+	*addr = rq->handles[rq->length - 1];
+	return addr;
+}
+
+static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
+{
+	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+	if (!rq->length)
+		xsk_umem_discard_addr(umem);
+	else
+		rq->length--;
+}
+
+static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
+{
+	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+	rq->handles[rq->length++] = addr;
+}
 #else
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
@@ -128,6 +168,21 @@ static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
 {
 }
 
+static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
+{
+	return NULL;
+}
+
+static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap(
+	struct xdp_umem *umem,
+	struct xdp_umem_fq_reuse *newq)
+{
+	return NULL;
+}
+static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
+{
+}
+
 static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
 {
 	return NULL;
@@ -137,6 +192,20 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
 {
 	return 0;
 }
+
+static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
+{
+	return NULL;
+}
+
+static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
+{
+}
+
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
-- 
cgit v1.2.3


From 86bd446b5cebd783187ea3772ff258210de77d99 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 24 Sep 2018 19:22:50 +0300
Subject: net: sched: rename qdisc_destroy() to qdisc_put()

Current implementation of qdisc_destroy() decrements Qdisc reference
counter and only actually destroy Qdisc if reference counter value reached
zero. Rename qdisc_destroy() to qdisc_put() in order for it to better
describe the way in which this function currently implemented and used.

Extract code that deallocates Qdisc into new private qdisc_destroy()
function. It is intended to be shared between regular qdisc_put() and its
unlocked version that is introduced in next patch in this series.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d326fd553b58..fadb1a4d4ee8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -554,7 +554,7 @@ void dev_deactivate_many(struct list_head *head);
 struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 			      struct Qdisc *qdisc);
 void qdisc_reset(struct Qdisc *qdisc);
-void qdisc_destroy(struct Qdisc *qdisc);
+void qdisc_put(struct Qdisc *qdisc);
 void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
 			       unsigned int len);
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
-- 
cgit v1.2.3


From 3a7d0d07a386716b459b00783b11a8211cefcc0f Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 24 Sep 2018 19:22:51 +0300
Subject: net: sched: extend Qdisc with rcu

Currently, Qdisc API functions assume that users have rtnl lock taken. To
implement rtnl unlocked classifiers update interface, Qdisc API must be
extended with functions that do not require rtnl lock.

Extend Qdisc structure with rcu. Implement special version of put function
qdisc_put_unlocked() that is called without rtnl lock taken. This function
only takes rtnl lock if Qdisc reference counter reached zero and is
intended to be used as optimization.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h   | 1 +
 include/net/sch_generic.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 7dc769e5452b..a16fbe9a2a67 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -102,6 +102,7 @@ int qdisc_set_default(const char *id);
 void qdisc_hash_add(struct Qdisc *q, bool invisible);
 void qdisc_hash_del(struct Qdisc *q);
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle);
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 					struct nlattr *tab,
 					struct netlink_ext_ack *extack);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index fadb1a4d4ee8..091b40c198ff 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -105,6 +105,7 @@ struct Qdisc {
 
 	spinlock_t		busylock ____cacheline_aligned_in_smp;
 	spinlock_t		seqlock;
+	struct rcu_head		rcu;
 };
 
 static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
@@ -555,6 +556,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 			      struct Qdisc *qdisc);
 void qdisc_reset(struct Qdisc *qdisc);
 void qdisc_put(struct Qdisc *qdisc);
+void qdisc_put_unlocked(struct Qdisc *qdisc);
 void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
 			       unsigned int len);
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
-- 
cgit v1.2.3


From 9d7e82cec35c027756ec97e274f878251f271181 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 24 Sep 2018 19:22:52 +0300
Subject: net: sched: add helper function to take reference to Qdisc

Implement function to take reference to Qdisc that relies on rcu read lock
instead of rtnl mutex. Function only takes reference to Qdisc if reference
counter isn't zero. Intended to be used by unlocked cls API.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 091b40c198ff..43b17f82d8ee 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -115,6 +115,19 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
 	refcount_inc(&qdisc->refcnt);
 }
 
+/* Intended to be used by unlocked users, when concurrent qdisc release is
+ * possible.
+ */
+
+static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
+{
+	if (qdisc->flags & TCQ_F_BUILTIN)
+		return qdisc;
+	if (refcount_inc_not_zero(&qdisc->refcnt))
+		return qdisc;
+	return NULL;
+}
+
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK)
-- 
cgit v1.2.3


From cfebd7e242d7193a9901222b3e667788810d98c1 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 24 Sep 2018 19:22:54 +0300
Subject: net: sched: change tcf block reference counter type to refcount_t

As a preparation for removing rtnl lock dependency from rules update path,
change tcf block reference counter type to refcount_t to allow modification
by concurrent users.

In block put function perform decrement and check reference counter once to
accommodate concurrent modification by unlocked users. After this change
tcf_chain_put at the end of block put function is called with
block->refcnt==0 and will deallocate block after the last chain is
released, so there is no need to manually deallocate block in this case.
However, if block reference counter reached 0 and there are no chains to
release, block must still be deallocated manually.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 43b17f82d8ee..4a86f4d33f07 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -345,7 +345,7 @@ struct tcf_chain {
 struct tcf_block {
 	struct list_head chain_list;
 	u32 index; /* block index for shared blocks */
-	unsigned int refcnt;
+	refcount_t refcnt;
 	struct net *net;
 	struct Qdisc *q;
 	struct list_head cb_list;
-- 
cgit v1.2.3


From 0607e439943bd150e53eed2979f9c69aa61c37ce Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 24 Sep 2018 19:22:57 +0300
Subject: net: sched: implement tcf_block_refcnt_{get|put}()

Implement get/put function for blocks that only take/release the reference
and perform deallocation. These functions are intended to be used by
unlocked rules update path to always hold reference to block while working
with it. They use on new fine-grained locking mechanisms introduced in
previous patches in this set, instead of relying on global protection
provided by rtnl lock.

Extract code that is common with tcf_block_detach_ext() into common
function __tcf_block_put().

Extend tcf_block with rcu to allow safe deallocation when it is accessed
concurrently.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4a86f4d33f07..7a6b71ee5433 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -357,6 +357,7 @@ struct tcf_block {
 		struct tcf_chain *chain;
 		struct list_head filter_chain_list;
 	} chain0;
+	struct rcu_head rcu;
 };
 
 static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
-- 
cgit v1.2.3


From 0bcbf6518456f63038a290bd359237d31f6f8ac3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 23 Sep 2018 11:59:13 -0700
Subject: cfg80211: fix reg_query_regdb_wmm kernel-doc

Drop @ptr from kernel-doc for function reg_query_regdb_wmm().
This function parameter was recently removed so update the
kernel-doc to match that and remove the kernel-doc warnings.

Removes 109 occurrences of this warning message:
../include/net/cfg80211.h:4869: warning: Excess function parameter 'ptr' description in 'reg_query_regdb_wmm'

Fixes: 38cb87ee47fb ("cfg80211: make wmm_rule part of the reg_rule structure")

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: linux-wireless@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8ebabc9873d1..4de121e24ce5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4852,8 +4852,6 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
  *
  * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried.
  * @freq: the freqency(in MHz) to be queried.
- * @ptr: pointer where the regdb wmm data is to be stored (or %NULL if
- *	irrelevant). This can be used later for deduplication.
  * @rule: pointer to store the wmm rule from the regulatory db.
  *
  * Self-managed wireless drivers can use this function to  query
-- 
cgit v1.2.3


From cd11d11286cba88aab5b1da1c83ee36e5b5cefb7 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 26 Sep 2018 18:29:06 +0200
Subject: net/af_iucv: locate IUCV header via skb_network_header()

This patch attempts to untangle the TX and RX code in qeth from
af_iucv's respective HiperTransport path:
On the TX side, pointing skb_network_header() at the IUCV header
means that qeth_l3_fill_af_iucv_hdr() no longer needs a magical offset
to access the header.
On the RX side, qeth pulls the (fake) L2 header off the skb like any
normal ethernet driver would. This makes working with the IUCV header
in af_iucv easier, since we no longer have to assume a fixed skb layout.

While at it, replace the open-coded length checks in af_iucv's RX path
with pskb_may_pull().

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/iucv/af_iucv.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index f4c21b5a1242..14a490246be9 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -80,6 +80,11 @@ struct af_iucv_trans_hdr {
 	u8 pad;                          /* total 104 bytes */
 } __packed;
 
+static inline struct af_iucv_trans_hdr *iucv_trans_hdr(struct sk_buff *skb)
+{
+	return (struct af_iucv_trans_hdr *)skb_network_header(skb);
+}
+
 enum iucv_tx_notify {
 	/* transmission of skb is completed and was successful */
 	TX_NOTIFY_OK = 0,
-- 
cgit v1.2.3


From d4859d749aa7090ffb743d15648adb962a1baeae Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Mon, 24 Sep 2018 14:40:11 -0700
Subject: bonding: avoid possible dead-lock

Syzkaller reported this on a slightly older kernel but it's still
applicable to the current kernel -

======================================================
WARNING: possible circular locking dependency detected
4.18.0-next-20180823+ #46 Not tainted
------------------------------------------------------
syz-executor4/26841 is trying to acquire lock:
00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652

but task is already holding lock:
00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline]
00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (rtnl_mutex){+.+.}:
       __mutex_lock_common kernel/locking/mutex.c:925 [inline]
       __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073
       mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088
       rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77
       bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline]
       bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320
       process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153
       worker_thread+0x189/0x13c0 kernel/workqueue.c:2296
       kthread+0x35a/0x420 kernel/kthread.c:246
       ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415

-> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}:
       process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129
       worker_thread+0x189/0x13c0 kernel/workqueue.c:2296
       kthread+0x35a/0x420 kernel/kthread.c:246
       ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415

-> #0 ((wq_completion)bond_dev->name){+.+.}:
       lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901
       flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655
       drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820
       destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155
       __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138
       bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734
       register_netdevice+0x337/0x1100 net/core/dev.c:8410
       bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453
       rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099
       rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711
       netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
       rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729
       netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
       netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
       netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
       sock_sendmsg_nosec net/socket.c:622 [inline]
       sock_sendmsg+0xd5/0x120 net/socket.c:632
       ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115
       __sys_sendmsg+0x11d/0x290 net/socket.c:2153
       __do_sys_sendmsg net/socket.c:2162 [inline]
       __se_sys_sendmsg net/socket.c:2160 [inline]
       __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160
       do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
       entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

Chain exists of:
  (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(rtnl_mutex);
                               lock((work_completion)(&(&nnw->work)->work));
                               lock(rtnl_mutex);
  lock((wq_completion)bond_dev->name);

 *** DEADLOCK ***

1 lock held by syz-executor4/26841:

stack backtrace:
CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222
 check_prev_add kernel/locking/lockdep.c:1862 [inline]
 check_prevs_add kernel/locking/lockdep.c:1975 [inline]
 validate_chain kernel/locking/lockdep.c:2416 [inline]
 __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412
 lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901
 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655
 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820
 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155
 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138
 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734
 register_netdevice+0x337/0x1100 net/core/dev.c:8410
 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453
 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099
 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711
 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729
 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
 netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
 sock_sendmsg_nosec net/socket.c:622 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:632
 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115
 __sys_sendmsg+0x11d/0x290 net/socket.c:2153
 __do_sys_sendmsg net/socket.c:2162 [inline]
 __se_sys_sendmsg net/socket.c:2160 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457089
Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089
RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003
RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bonding.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bonding.h b/include/net/bonding.h
index a2d058170ea3..b46d68acf701 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -139,12 +139,6 @@ struct bond_parm_tbl {
 	int mode;
 };
 
-struct netdev_notify_work {
-	struct delayed_work	work;
-	struct net_device	*dev;
-	struct netdev_bonding_info bonding_info;
-};
-
 struct slave {
 	struct net_device *dev; /* first - useful for panic debug */
 	struct bonding *bond; /* our master */
@@ -172,6 +166,7 @@ struct slave {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll *np;
 #endif
+	struct delayed_work notify_work;
 	struct kobject kobj;
 	struct rtnl_link_stats64 slave_stats;
 };
-- 
cgit v1.2.3


From d888f39666774c7debfa34e4e20ba33cf61a6d71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 25 Sep 2018 20:56:26 -0700
Subject: net-ipv4: remove 2 always zero parameters from ipv4_update_pmtu()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(the parameters in question are mark and flow_flags)

Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/route.h b/include/net/route.h
index bb53cdba38dc..73c605bdd6d8 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -201,7 +201,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 }
 
 void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
-		      u32 mark, u8 protocol, int flow_flags);
+		      u8 protocol);
 void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu);
 void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
 		   u8 protocol, int flow_flags);
-- 
cgit v1.2.3


From 1042caa79e9351b81ed19dc8d2d7fd6ff51a4422 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 25 Sep 2018 20:56:27 -0700
Subject: net-ipv4: remove 2 always zero parameters from ipv4_redirect()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(the parameters in question are mark and flow_flags)

Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/route.h b/include/net/route.h
index 73c605bdd6d8..9883dc82f723 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -203,8 +203,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
 		      u8 protocol);
 void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu);
-void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
-		   u8 protocol, int flow_flags);
+void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol);
 void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk);
 void ip_rt_send_redirect(struct sk_buff *skb);
 
-- 
cgit v1.2.3


From b950aa88638c52a013504f025e0b8f99bf2dc26e Mon Sep 17 00:00:00 2001
From: Ankit Navik <ankit.p.navik@intel.com>
Date: Fri, 17 Aug 2018 07:29:19 +0530
Subject: Bluetooth: Add definitions and track LE resolve list modification

Add the definitions for adding entries to the LE resolve list and
removing entries from the LE resolve list. When the LE resolve list
gets changed via HCI commands make sure that the internal storage of
the resolve list entries gets updated.

Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h      | 14 ++++++++++++++
 include/net/bluetooth/hci_core.h | 15 +++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index cdd9f1fe7cfa..c36dc1e20556 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1517,6 +1517,20 @@ struct hci_cp_le_write_def_data_len {
 	__le16	tx_time;
 } __packed;
 
+#define HCI_OP_LE_ADD_TO_RESOLV_LIST	0x2027
+struct hci_cp_le_add_to_resolv_list {
+	__u8	 bdaddr_type;
+	bdaddr_t bdaddr;
+	__u8	 peer_irk[16];
+	__u8	 local_irk[16];
+} __packed;
+
+#define HCI_OP_LE_DEL_FROM_RESOLV_LIST	0x2028
+struct hci_cp_le_del_from_resolv_list {
+	__u8	 bdaddr_type;
+	bdaddr_t bdaddr;
+} __packed;
+
 #define HCI_OP_LE_CLEAR_RESOLV_LIST	0x2029
 
 #define HCI_OP_LE_READ_RESOLV_LIST_SIZE	0x202a
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 0db1b9b428b7..9b0f821b2d3a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -103,6 +103,14 @@ struct bdaddr_list {
 	u8 bdaddr_type;
 };
 
+struct bdaddr_list_with_irk {
+	struct list_head list;
+	bdaddr_t bdaddr;
+	u8 bdaddr_type;
+	u8 peer_irk[16];
+	u8 local_irk[16];
+};
+
 struct bt_uuid {
 	struct list_head list;
 	u8 uuid[16];
@@ -1058,8 +1066,15 @@ int hci_inquiry(void __user *arg);
 
 struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *list,
 					   bdaddr_t *bdaddr, u8 type);
+struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk(
+				    struct list_head *list, bdaddr_t *bdaddr,
+				    u8 type);
 int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type);
+int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr,
+					u8 type, u8 *peer_irk, u8 *local_irk);
 int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type);
+int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr,
+								u8 type);
 void hci_bdaddr_list_clear(struct list_head *list);
 
 struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
-- 
cgit v1.2.3


From fe1493101ac1313cbdbef1af65342fb17d944e71 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 4 Sep 2018 13:39:20 +0300
Subject: Bluetooth: L2CAP: Derive MPS from connection MTU

This ensures the MPS can fit in a single HCI fragment so each
segment don't have to be reassembled at HCI level, in addition to
that also remove the debugfs entry to configure the MPS.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 0697fd413087..17296675a0b1 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -456,7 +456,6 @@ struct l2cap_conn_param_update_rsp {
 #define L2CAP_CONN_PARAM_REJECTED	0x0001
 
 #define L2CAP_LE_MAX_CREDITS		10
-#define L2CAP_LE_DEFAULT_MPS		230
 
 struct l2cap_le_conn_req {
 	__le16     psm;
-- 
cgit v1.2.3


From 96cd8eaa131f0ffd4cfae09e1b4bdfafb9570907 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 4 Sep 2018 13:39:21 +0300
Subject: Bluetooth: L2CAP: Derive rx credits from MTU and MPS

Give enough rx credits for a full packet instead of using an arbitrary
number which may not be enough depending on the MTU and MPS which can
cause interruptions while waiting for more credits, also remove
debugfs entry for l2cap_le_max_credits.

With these changes the credits are restored after each SDU is received
instead of using fixed threshold, this way it is garanteed that there
will always be enough credits to send a packet without waiting more
credits to arrive.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 17296675a0b1..3555440e14fc 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -455,8 +455,6 @@ struct l2cap_conn_param_update_rsp {
 #define L2CAP_CONN_PARAM_ACCEPTED	0x0000
 #define L2CAP_CONN_PARAM_REJECTED	0x0001
 
-#define L2CAP_LE_MAX_CREDITS		10
-
 struct l2cap_le_conn_req {
 	__le16     psm;
 	__le16     scid;
-- 
cgit v1.2.3


From fb961945457f5177072c968aa38fee910ab893b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20G=C3=B6ttsche?= <cgzones@googlemail.com>
Date: Sun, 23 Sep 2018 20:26:15 +0200
Subject: netfilter: nf_tables: add SECMARK support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the ability to set the security context of packets within the nf_tables framework.
Add a nft_object for holding security contexts in the kernel and manipulating packets on the wire.

Convert the security context strings at rule addition time to security identifiers.
This is the same behavior like in xt_SECMARK and offers better performance than computing it per packet.

Set the maximum security context length to 256.

Signed-off-by: Christian Göttsche <cgzones@googlemail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 8da837d2aaf9..2046d104f323 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -16,6 +16,10 @@ extern struct nft_expr_type nft_meta_type;
 extern struct nft_expr_type nft_rt_type;
 extern struct nft_expr_type nft_exthdr_type;
 
+#ifdef CONFIG_NETWORK_SECMARK
+extern struct nft_object_type nft_secmark_obj_type;
+#endif
+
 int nf_tables_core_module_init(void);
 void nf_tables_core_module_exit(void);
 
-- 
cgit v1.2.3


From fe3b30ddb90face841b2ede3b73ed2e9cfece6ba Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 26 Sep 2018 11:15:30 +0200
Subject: netlink: remove NLA_NESTED_COMPAT

This isn't used anywhere, so we might as well get rid of it.

Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 318b1ded3833..b680fe365e91 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -172,7 +172,6 @@ enum {
 	NLA_FLAG,
 	NLA_MSECS,
 	NLA_NESTED,
-	NLA_NESTED_COMPAT,
 	NLA_NUL_STRING,
 	NLA_BINARY,
 	NLA_S8,
@@ -203,7 +202,6 @@ enum {
  *    NLA_BINARY           Maximum length of attribute payload
  *    NLA_NESTED           Don't use `len' field -- length verification is
  *                         done by checking len of nested header (or empty)
- *    NLA_NESTED_COMPAT    Minimum length of structure payload
  *    NLA_U8, NLA_U16,
  *    NLA_U32, NLA_U64,
  *    NLA_S8, NLA_S16,
-- 
cgit v1.2.3


From 48fde90a78f8c67e2bec5061f9725fe363519feb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 26 Sep 2018 11:15:31 +0200
Subject: netlink: make validation_data const

The validation data is only used within the policy that
should usually already be const, and isn't changed in any
code that uses it. Therefore, make the validation_data
pointer const.

While at it, remove the duplicate variable in the bitfield
validation that I'd otherwise have to change to const.

Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index b680fe365e91..0d698215d4d9 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -237,7 +237,7 @@ enum {
 struct nla_policy {
 	u16		type;
 	u16		len;
-	void            *validation_data;
+	const void     *validation_data;
 };
 
 #define NLA_POLICY_EXACT_LEN(_len)	{ .type = NLA_EXACT_LEN, .len = _len }
-- 
cgit v1.2.3


From 9a659a35ba177cec30676e170fb6ed98157bcb0d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 26 Sep 2018 11:15:33 +0200
Subject: netlink: allow NLA_NESTED to specify nested policy to validate

Now that we have a validation_data pointer, and the len field in
the policy is unused for NLA_NESTED, we can allow using them both
to have nested validation. This can be nice in code, although we
still have to use nla_parse_nested() or similar which would also
take a policy; however, it also serves as documentation in the
policy without requiring a look at the code.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 0d698215d4d9..91907852da1c 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -200,8 +200,10 @@ enum {
  *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
  *    NLA_FLAG             Unused
  *    NLA_BINARY           Maximum length of attribute payload
- *    NLA_NESTED           Don't use `len' field -- length verification is
- *                         done by checking len of nested header (or empty)
+ *    NLA_NESTED           Length verification is done by checking len of
+ *                         nested header (or empty); len field is used if
+ *                         validation_data is also used, for the max attr
+ *                         number in the nested policy.
  *    NLA_U8, NLA_U16,
  *    NLA_U32, NLA_U64,
  *    NLA_S8, NLA_S16,
@@ -224,6 +226,10 @@ enum {
  *    NLA_REJECT           This attribute is always rejected and validation data
  *                         may point to a string to report as the error instead
  *                         of the generic one in extended ACK.
+ *    NLA_NESTED           Points to a nested policy to validate, must also set
+ *                         `len' to the max attribute number.
+ *                         Note that nla_parse() will validate, but of course not
+ *                         parse, the nested sub-policies.
  *    All other            Unused
  *
  * Example:
@@ -247,6 +253,9 @@ struct nla_policy {
 #define NLA_POLICY_ETH_ADDR		NLA_POLICY_EXACT_LEN(ETH_ALEN)
 #define NLA_POLICY_ETH_ADDR_COMPAT	NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
 
+#define NLA_POLICY_NESTED(maxattr, policy) \
+	{ .type = NLA_NESTED, .validation_data = policy, .len = maxattr }
+
 /**
  * struct nl_info - netlink source information
  * @nlh: Netlink message header of original request
-- 
cgit v1.2.3


From 1501d13596b92d6d1f0ea5e104be838188b6e026 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 26 Sep 2018 11:15:34 +0200
Subject: netlink: add nested array policy validation

Sometimes nested netlink attributes are just used as arrays, with
the nla_type() of each not being used; we have this in nl80211 and
e.g. NFTA_SET_ELEM_LIST_ELEMENTS.

Add the ability to validate this type of message directly in the
policy, by adding the type NLA_NESTED_ARRAY which does exactly
this: require a first level of nesting but ignore the attribute
type, and then inside each require a second level of nested and
validate those attributes against a given policy (if present).

Note that some nested array types actually require that all of
the entries have the same index, this is possible to express in
a nested policy already, apart from the validation that only the
one allowed type is used.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 91907852da1c..3698ca8ff92c 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -172,6 +172,7 @@ enum {
 	NLA_FLAG,
 	NLA_MSECS,
 	NLA_NESTED,
+	NLA_NESTED_ARRAY,
 	NLA_NUL_STRING,
 	NLA_BINARY,
 	NLA_S8,
@@ -200,7 +201,8 @@ enum {
  *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
  *    NLA_FLAG             Unused
  *    NLA_BINARY           Maximum length of attribute payload
- *    NLA_NESTED           Length verification is done by checking len of
+ *    NLA_NESTED,
+ *    NLA_NESTED_ARRAY     Length verification is done by checking len of
  *                         nested header (or empty); len field is used if
  *                         validation_data is also used, for the max attr
  *                         number in the nested policy.
@@ -230,6 +232,12 @@ enum {
  *                         `len' to the max attribute number.
  *                         Note that nla_parse() will validate, but of course not
  *                         parse, the nested sub-policies.
+ *    NLA_NESTED_ARRAY     Points to a nested policy to validate, must also set
+ *                         `len' to the max attribute number. The difference to
+ *                         NLA_NESTED is the structure - NLA_NESTED has the
+ *                         nested attributes directly inside, while an array has
+ *                         the nested attributes at another level down and the
+ *                         attributes directly in the nesting don't matter.
  *    All other            Unused
  *
  * Example:
@@ -255,6 +263,8 @@ struct nla_policy {
 
 #define NLA_POLICY_NESTED(maxattr, policy) \
 	{ .type = NLA_NESTED, .validation_data = policy, .len = maxattr }
+#define NLA_POLICY_NESTED_ARRAY(maxattr, policy) \
+	{ .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr }
 
 /**
  * struct nl_info - netlink source information
-- 
cgit v1.2.3


From 30d65e0804d58a03d1a8ea4e12c6fc07ed08218b Mon Sep 17 00:00:00 2001
From: Matias Karhumaa <matias.karhumaa@gmail.com>
Date: Fri, 28 Sep 2018 21:54:30 +0300
Subject: Bluetooth: Fix debugfs NULL pointer dereference

Fix crash caused by NULL pointer dereference when debugfs functions
le_max_key_read, le_max_key_size_write, le_min_key_size_read or
le_min_key_size_write and Bluetooth adapter was powered off.

Fix is to move max_key_size and min_key_size from smp_dev to hci_dev.
At the same time they were renamed to le_max_key_size and
le_min_key_size.

BUG: unable to handle kernel NULL pointer dereference at 00000000000002e8
PGD 0 P4D 0
Oops: 0000 [#24] SMP PTI
CPU: 2 PID: 6255 Comm: cat Tainted: G      D    OE     4.18.9-200.fc28.x86_64 #1
Hardware name: LENOVO 4286CTO/4286CTO, BIOS 8DET76WW (1.46 ) 06/21/2018
RIP: 0010:le_max_key_size_read+0x45/0xb0 [bluetooth]
Code: 00 00 00 48 83 ec 10 65 48 8b 04 25 28 00 00 00 48 89 44 24 08 31 c0 48 8b 87 c8 00 00 00 48 8d 7c 24 04 48 8b 80 48 0a 00 00 <48> 8b 80 e8 02 00 00 0f b6 48 52 e8 fb b6 b3 ed be 04 00 00 00 48
RSP: 0018:ffffab23c3ff3df0 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 00007f0b4ca2e000 RCX: ffffab23c3ff3f08
RDX: ffffffffc0ddb033 RSI: 0000000000000004 RDI: ffffab23c3ff3df4
RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000000
R10: ffffab23c3ff3ed8 R11: 0000000000000000 R12: ffffab23c3ff3f08
R13: 00007f0b4ca2e000 R14: 0000000000020000 R15: ffffab23c3ff3f08
FS:  00007f0b4ca0f540(0000) GS:ffff91bd5e280000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000000002e8 CR3: 00000000629fa006 CR4: 00000000000606e0
Call Trace:
 full_proxy_read+0x53/0x80
 __vfs_read+0x36/0x180
 vfs_read+0x8a/0x140
 ksys_read+0x4f/0xb0
 do_syscall_64+0x5b/0x160
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Signed-off-by: Matias Karhumaa <matias.karhumaa@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 9b0f821b2d3a..e5ea633ea368 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -267,6 +267,8 @@ struct hci_dev {
 	__u16		le_max_tx_time;
 	__u16		le_max_rx_len;
 	__u16		le_max_rx_time;
+	__u8		le_max_key_size;
+	__u8		le_min_key_size;
 	__u16		discov_interleaved_timeout;
 	__u16		conn_info_min_age;
 	__u16		conn_info_max_age;
-- 
cgit v1.2.3


From 80ece6a03aaf3f3215475826bdd2bb9f326bccfd Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Wed, 26 Sep 2018 16:22:08 +0530
Subject: tls: Remove redundant vars from tls record structure

Structure 'tls_rec' contains sg_aead_in and sg_aead_out which point
to a aad_space and then chain scatterlists sg_plaintext_data,
sg_encrypted_data respectively. Rather than using chained scatterlists
for plaintext and encrypted data in aead_req, it is efficient to store
aad_space in sg_encrypted_data and sg_plaintext_data itself in the
first index and get rid of sg_aead_in, sg_aead_in and further chaining.

This requires increasing size of sg_encrypted_data & sg_plaintext_data
arrarys by 1 to accommodate entry for aad_space. The code which uses
sg_encrypted_data and sg_plaintext_data has been modified to skip first
index as it points to aad_space.

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 1615fb5ea114..262420cdad10 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -101,13 +101,11 @@ struct tls_rec {
 	struct list_head list;
 	int tx_ready;
 	int tx_flags;
-	struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS];
-	struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS];
 
 	/* AAD | sg_plaintext_data | sg_tag */
-	struct scatterlist sg_aead_in[2];
+	struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS + 1];
 	/* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */
-	struct scatterlist sg_aead_out[2];
+	struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS + 1];
 
 	unsigned int sg_plaintext_size;
 	unsigned int sg_encrypted_size;
-- 
cgit v1.2.3


From 43955a45dc0b4f3be7f0c3afc0e080ed59bb5280 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 26 Sep 2018 22:19:42 +0200
Subject: netlink: fix typo in nla_parse_nested() comment

Fix a simple typo: attribuets -> attributes

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 0c154f98e987..39e1d875d507 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -153,7 +153,7 @@
  *   nla_find()				find attribute in stream of attributes
  *   nla_find_nested()			find attribute in nested attributes
  *   nla_parse()			parse and validate stream of attrs
- *   nla_parse_nested()			parse nested attribuets
+ *   nla_parse_nested()			parse nested attributes
  *   nla_for_each_attr()		loop over all attributes
  *   nla_for_each_nested()		loop over the nested attributes
  *=========================================================================
-- 
cgit v1.2.3


From 1ad98e9d1bdf4724c0a8532fabd84bf3c457c2bc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 1 Oct 2018 15:02:26 -0700
Subject: tcp/dccp: fix lockdep issue when SYN is backlogged

In normal SYN processing, packets are handled without listener
lock and in RCU protected ingress path.

But syzkaller is known to be able to trick us and SYN
packets might be processed in process context, after being
queued into socket backlog.

In commit 06f877d613be ("tcp/dccp: fix other lockdep splats
accessing ireq_opt") I made a very stupid fix, that happened
to work mostly because of the regular path being RCU protected.

Really the thing protecting ireq->ireq_opt is RCU read lock,
and the pseudo request refcnt is not relevant.

This patch extends what I did in commit 449809a66c1d ("tcp/dccp:
block BH for SYN processing") by adding an extra rcu_read_{lock|unlock}
pair in the paths that might be taken when processing SYN from
socket backlog (thus possibly in process context)

Fixes: 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index e03b93360f33..a8cd5cf9ff5b 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -132,8 +132,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
 
 static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
 {
-	return rcu_dereference_check(ireq->ireq_opt,
-				     refcount_read(&ireq->req.rsk_refcnt) > 0);
+	return rcu_dereference(ireq->ireq_opt);
 }
 
 struct inet_cork {
-- 
cgit v1.2.3


From 3e48be05f3c7eb6f6126939e9d957903c5cfeee5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Sep 2018 11:28:35 +0200
Subject: netlink: add attribute range validation to policy

Without further bloating the policy structs, we can overload
the `validation_data' pointer with a struct of s16 min, max
and use those to validate ranges in NLA_{U,S}{8,16,32,64}
attributes.

It may sound strange to validate NLA_U32 with a s16 max, but
in many cases NLA_U32 is used for enums etc. since there's no
size benefit in using a smaller attribute width anyway, due
to netlink attribute alignment; in cases like that it's still
useful, particularly when the attribute really transports an
enum value.

Doing so lets us remove quite a bit of validation code, if we
can be sure that these attributes aren't used by userspace in
places where they're ignored today.

To achieve all this, split the 'type' field and introduce a
new 'validation_type' field which indicates what further
validation (beyond the validation prescribed by the type of
the attribute) is done. This currently allows for no further
validation (the default), as well as min, max and range checks.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 64 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 3698ca8ff92c..d34ceeba82a8 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -188,9 +188,19 @@ enum {
 
 #define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
 
+enum nla_policy_validation {
+	NLA_VALIDATE_NONE,
+	NLA_VALIDATE_RANGE,
+	NLA_VALIDATE_MIN,
+	NLA_VALIDATE_MAX,
+};
+
 /**
  * struct nla_policy - attribute validation policy
  * @type: Type of attribute or NLA_UNSPEC
+ * @validation_type: type of attribute validation done in addition to
+ *	type-specific validation (e.g. range), see
+ *	&enum nla_policy_validation
  * @len: Type specific length of payload
  *
  * Policies are defined as arrays of this struct, the array must be
@@ -238,7 +248,26 @@ enum {
  *                         nested attributes directly inside, while an array has
  *                         the nested attributes at another level down and the
  *                         attributes directly in the nesting don't matter.
- *    All other            Unused
+ *    All other            Unused - but note that it's a union
+ *
+ * Meaning of `min' and `max' fields, use via NLA_POLICY_MIN, NLA_POLICY_MAX
+ * and NLA_POLICY_RANGE:
+ *    NLA_U8,
+ *    NLA_U16,
+ *    NLA_U32,
+ *    NLA_U64,
+ *    NLA_S8,
+ *    NLA_S16,
+ *    NLA_S32,
+ *    NLA_S64              These are used depending on the validation_type
+ *                         field, if that is min/max/range then the minimum,
+ *                         maximum and both are used (respectively) to check
+ *                         the value of the integer attribute.
+ *                         Note that in the interest of code simplicity and
+ *                         struct size both limits are s16, so you cannot
+ *                         enforce a range that doesn't fall within the range
+ *                         of s16 - do that as usual in the code instead.
+ *    All other            Unused - but note that it's a union
  *
  * Example:
  * static const struct nla_policy my_policy[ATTR_MAX+1] = {
@@ -249,9 +278,15 @@ enum {
  * };
  */
 struct nla_policy {
-	u16		type;
+	u8		type;
+	u8		validation_type;
 	u16		len;
-	const void     *validation_data;
+	union {
+		const void *validation_data;
+		struct {
+			s16 min, max;
+		};
+	};
 };
 
 #define NLA_POLICY_EXACT_LEN(_len)	{ .type = NLA_EXACT_LEN, .len = _len }
@@ -266,6 +301,32 @@ struct nla_policy {
 #define NLA_POLICY_NESTED_ARRAY(maxattr, policy) \
 	{ .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr }
 
+#define __NLA_ENSURE(condition) (sizeof(char[1 - 2*!(condition)]) - 1)
+#define NLA_ENSURE_INT_TYPE(tp)				\
+	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 ||	\
+		      tp == NLA_S16 || tp == NLA_U16 ||	\
+		      tp == NLA_S32 || tp == NLA_U32 ||	\
+		      tp == NLA_S64 || tp == NLA_U64) + tp)
+
+#define NLA_POLICY_RANGE(tp, _min, _max) {		\
+	.type = NLA_ENSURE_INT_TYPE(tp),		\
+	.validation_type = NLA_VALIDATE_RANGE,		\
+	.min = _min,					\
+	.max = _max					\
+}
+
+#define NLA_POLICY_MIN(tp, _min) {			\
+	.type = NLA_ENSURE_INT_TYPE(tp),		\
+	.validation_type = NLA_VALIDATE_MIN,		\
+	.min = _min,					\
+}
+
+#define NLA_POLICY_MAX(tp, _max) {			\
+	.type = NLA_ENSURE_INT_TYPE(tp),		\
+	.validation_type = NLA_VALIDATE_MAX,		\
+	.max = _max,					\
+}
+
 /**
  * struct nl_info - netlink source information
  * @nlh: Netlink message header of original request
-- 
cgit v1.2.3


From 33188bd6430ef06d206ae4fda2cc92f14f16fd20 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 27 Sep 2018 11:28:36 +0200
Subject: netlink: add validation function to policy

Add the ability to have an arbitrary validation function attached
to a netlink policy that doesn't already use the validation_data
pointer in another way.

This can be useful to validate for example the content of a binary
attribute, like in nl80211 the "(information) elements", which must
be valid streams of "u8 type, u8 length, u8 value[length]".

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index d34ceeba82a8..6a106ef5ca56 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -193,13 +193,14 @@ enum nla_policy_validation {
 	NLA_VALIDATE_RANGE,
 	NLA_VALIDATE_MIN,
 	NLA_VALIDATE_MAX,
+	NLA_VALIDATE_FUNCTION,
 };
 
 /**
  * struct nla_policy - attribute validation policy
  * @type: Type of attribute or NLA_UNSPEC
  * @validation_type: type of attribute validation done in addition to
- *	type-specific validation (e.g. range), see
+ *	type-specific validation (e.g. range, function call), see
  *	&enum nla_policy_validation
  * @len: Type specific length of payload
  *
@@ -269,6 +270,13 @@ enum nla_policy_validation {
  *                         of s16 - do that as usual in the code instead.
  *    All other            Unused - but note that it's a union
  *
+ * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
+ *    NLA_BINARY           Validation function called for the attribute,
+ *                         not compatible with use of the validation_data
+ *                         as in NLA_BITFIELD32, NLA_REJECT, NLA_NESTED and
+ *                         NLA_NESTED_ARRAY.
+ *    All other            Unused - but note that it's a union
+ *
  * Example:
  * static const struct nla_policy my_policy[ATTR_MAX+1] = {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
@@ -286,6 +294,8 @@ struct nla_policy {
 		struct {
 			s16 min, max;
 		};
+		int (*validate)(const struct nlattr *attr,
+				struct netlink_ext_ack *extack);
 	};
 };
 
@@ -307,6 +317,11 @@ struct nla_policy {
 		      tp == NLA_S16 || tp == NLA_U16 ||	\
 		      tp == NLA_S32 || tp == NLA_U32 ||	\
 		      tp == NLA_S64 || tp == NLA_U64) + tp)
+#define NLA_ENSURE_NO_VALIDATION_PTR(tp)		\
+	(__NLA_ENSURE(tp != NLA_BITFIELD32 &&		\
+		      tp != NLA_REJECT &&		\
+		      tp != NLA_NESTED &&		\
+		      tp != NLA_NESTED_ARRAY) + tp)
 
 #define NLA_POLICY_RANGE(tp, _min, _max) {		\
 	.type = NLA_ENSURE_INT_TYPE(tp),		\
@@ -327,6 +342,13 @@ struct nla_policy {
 	.max = _max,					\
 }
 
+#define NLA_POLICY_VALIDATE_FN(tp, fn, ...) {		\
+	.type = NLA_ENSURE_NO_VALIDATION_PTR(tp),	\
+	.validation_type = NLA_VALIDATE_FUNCTION,	\
+	.validate = fn,					\
+	.len = __VA_ARGS__ + 0,				\
+}
+
 /**
  * struct nl_info - netlink source information
  * @nlh: Netlink message header of original request
-- 
cgit v1.2.3


From fb420d5d91c1274d5966917725e71f27ed092a85 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 28 Sep 2018 10:28:44 -0700
Subject: tcp/fq: move back to CLOCK_MONOTONIC

In the recent TCP/EDT patch series, I switched TCP and sch_fq
clocks from MONOTONIC to TAI, in order to meet the choice done
earlier for sch_etf packet scheduler.

But sure enough, this broke some setups were the TAI clock
jumps forward (by almost 50 year...), as reported
by Leonard Crestez.

If we want to converge later, we'll probably need to add
an skb field to differentiate the clock bases, or a socket option.

In the meantime, an UDP application will need to use CLOCK_MONOTONIC
base for its SCM_TXTIME timestamps if using fq packet scheduler.

Fixes: 72b0094f9182 ("tcp: switch tcp_clock_ns() to CLOCK_TAI base")
Fixes: 142537e41923 ("net_sched: sch_fq: switch to CLOCK_TAI")
Fixes: fd2bca2aa789 ("tcp: switch internal pacing timer to CLOCK_TAI")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Leonard Crestez <leonard.crestez@nxp.com>
Tested-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index ff15d8e0d525..0d2929223c70 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -732,7 +732,7 @@ void tcp_send_window_probe(struct sock *sk);
 
 static inline u64 tcp_clock_ns(void)
 {
-	return ktime_get_tai_ns();
+	return ktime_get_ns();
 }
 
 static inline u64 tcp_clock_us(void)
-- 
cgit v1.2.3


From 81e54d08d9d845053111f30045a93f3eb1c3ca96 Mon Sep 17 00:00:00 2001
From: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Date: Thu, 20 Sep 2018 17:30:09 -0700
Subject: cfg80211: support FTM responder configuration/statistics

Allow userspace to enable fine timing measurement responder
functionality with configurable lci/civic parameters in AP mode.
This can be done at AP start or changing beacon parameters.

A new EXT_FEATURE flag is introduced for drivers to advertise
the capability.

Also nl80211 API support for retrieving statistics is added.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
[remove unused cfg80211_ftm_responder_params, clarify docs,
 move validation into policy]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9f3ed79c39d7..deb313105014 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -775,6 +775,12 @@ struct cfg80211_crypto_settings {
  * @assocresp_ies_len: length of assocresp_ies in octets
  * @probe_resp_len: length of probe response template (@probe_resp)
  * @probe_resp: probe response template (AP mode only)
+ * @ftm_responder: enable FTM responder functionality; -1 for no change
+ *	(which also implies no change in LCI/civic location data)
+ * @lci: LCI subelement content
+ * @civicloc: Civic location subelement content
+ * @lci_len: LCI data length
+ * @civicloc_len: Civic location data length
  */
 struct cfg80211_beacon_data {
 	const u8 *head, *tail;
@@ -782,12 +788,17 @@ struct cfg80211_beacon_data {
 	const u8 *proberesp_ies;
 	const u8 *assocresp_ies;
 	const u8 *probe_resp;
+	const u8 *lci;
+	const u8 *civicloc;
+	s8 ftm_responder;
 
 	size_t head_len, tail_len;
 	size_t beacon_ies_len;
 	size_t proberesp_ies_len;
 	size_t assocresp_ies_len;
 	size_t probe_resp_len;
+	size_t lci_len;
+	size_t civicloc_len;
 };
 
 struct mac_address {
@@ -2796,6 +2807,40 @@ struct cfg80211_external_auth_params {
 	u16 status;
 };
 
+/**
+ * cfg80211_ftm_responder_stats - FTM responder statistics
+ *
+ * @filled: bitflag of flags using the bits of &enum nl80211_ftm_stats to
+ *	indicate the relevant values in this struct for them
+ * @success_num: number of FTM sessions in which all frames were successfully
+ *	answered
+ * @partial_num: number of FTM sessions in which part of frames were
+ *	successfully answered
+ * @failed_num: number of failed FTM sessions
+ * @asap_num: number of ASAP FTM sessions
+ * @non_asap_num: number of  non-ASAP FTM sessions
+ * @total_duration_ms: total sessions durations - gives an indication
+ *	of how much time the responder was busy
+ * @unknown_triggers_num: number of unknown FTM triggers - triggers from
+ *	initiators that didn't finish successfully the negotiation phase with
+ *	the responder
+ * @reschedule_requests_num: number of FTM reschedule requests - initiator asks
+ *	for a new scheduling although it already has scheduled FTM slot
+ * @out_of_window_triggers_num: total FTM triggers out of scheduled window
+ */
+struct cfg80211_ftm_responder_stats {
+	u32 filled;
+	u32 success_num;
+	u32 partial_num;
+	u32 failed_num;
+	u32 asap_num;
+	u32 non_asap_num;
+	u64 total_duration_ms;
+	u32 unknown_triggers_num;
+	u32 reschedule_requests_num;
+	u32 out_of_window_triggers_num;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -3128,6 +3173,9 @@ struct cfg80211_external_auth_params {
  *
  * @tx_control_port: TX a control port frame (EAPoL).  The noencrypt parameter
  *	tells the driver that the frame should not be encrypted.
+ *
+ * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available.
+ *	Statistics should be cumulative, currently no way to reset is provided.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -3433,6 +3481,10 @@ struct cfg80211_ops {
 				   const u8 *buf, size_t len,
 				   const u8 *dest, const __be16 proto,
 				   const bool noencrypt);
+
+	int	(*get_ftm_responder_stats)(struct wiphy *wiphy,
+				struct net_device *dev,
+				struct cfg80211_ftm_responder_stats *ftm_stats);
 };
 
 /*
-- 
cgit v1.2.3


From b60ad3485106b5845113e7a2745abb7e64b15d6d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 1 Oct 2018 11:52:07 +0200
Subject: cfg80211: move cookie_counter out of wiphy

There's no reason for drivers to be able to access the
cfg80211 internal cookie counter; move it out of the
wiphy into the rdev structure.

While at it, also make it never assign 0 as a cookie
(we consider that invalid in some places), and warn if
we manage to do that for some reason (wrapping is not
likely to happen with a u64.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index deb313105014..8f5ee2c2da04 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4012,7 +4012,6 @@ struct wiphy_iftype_ext_capab {
  *	by the driver in the .connect() callback. The bit position maps to the
  *	attribute indices defined in &enum nl80211_bss_select_attr.
  *
- * @cookie_counter: unique generic cookie counter, used to identify objects.
  * @nan_supported_bands: bands supported by the device in NAN mode, a
  *	bitmap of &enum nl80211_band values.  For instance, for
  *	NL80211_BAND_2GHZ, bit 0 would be set
@@ -4151,8 +4150,6 @@ struct wiphy {
 
 	u32 bss_select_support;
 
-	u64 cookie_counter;
-
 	u8 nan_supported_bands;
 
 	u32 txq_limit;
-- 
cgit v1.2.3


From 2ab2ddd301a22ca3c5f0b743593e4ad2953dfa53 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 2 Oct 2018 12:35:05 -0700
Subject: inet: make sure to grab rcu_read_lock before using ireq->ireq_opt

Timer handlers do not imply rcu_read_lock(), so my recent fix
triggered a LOCKDEP warning when SYNACK is retransmit.

Lets add rcu_read_lock()/rcu_read_unlock() pairs around ireq->ireq_opt
usages instead of guessing what is done by callers, since it is
not worth the pain.

Get rid of ireq_opt_deref() helper since it hides the logic
without real benefit, since it is now a standard rcu_dereference().

Fixes: 1ad98e9d1bdf ("tcp/dccp: fix lockdep issue when SYN is backlogged")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a8cd5cf9ff5b..a80fd0ac4563 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -130,11 +130,6 @@ static inline int inet_request_bound_dev_if(const struct sock *sk,
 	return sk->sk_bound_dev_if;
 }
 
-static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
-{
-	return rcu_dereference(ireq->ireq_opt);
-}
-
 struct inet_cork {
 	unsigned int		flags;
 	__be32			addr;
-- 
cgit v1.2.3


From d456336d164886d9339aaa112d6595e1c142f8bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Sat, 29 Sep 2018 23:44:50 -0700
Subject: net: remove 1 always zero parameter from ip6_redirect_no_header()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(the parameter in question is mark)

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 7b9c82de11cc..cef186dbd2ce 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -165,8 +165,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif,
 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu);
 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
 		  kuid_t uid);
-void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
-			    u32 mark);
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif);
 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk);
 
 struct netlink_callback;
-- 
cgit v1.2.3


From 8873c064d1de579ea23412a6d3eee972593f142b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 1 Oct 2018 23:24:26 -0700
Subject: tcp: do not release socket ownership in tcp_close()

syzkaller was able to hit the WARN_ON(sock_owned_by_user(sk));
in tcp_close()

While a socket is being closed, it is very possible other
threads find it in rtnetlink dump.

tcp_get_info() will acquire the socket lock for a short amount
of time (slow = lock_sock_fast(sk)/unlock_sock_fast(sk, slow);),
enough to trigger the warning.

Fixes: 67db3e4bfbc9 ("tcp: no longer hold ehash lock while calling tcp_get_info()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 38cae35f6e16..751549ac0a84 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1492,6 +1492,7 @@ static inline void lock_sock(struct sock *sk)
 	lock_sock_nested(sk, 0);
 }
 
+void __release_sock(struct sock *sk);
 void release_sock(struct sock *sk);
 
 /* BH context may only use the following locking interface. */
-- 
cgit v1.2.3


From 4e6d47206c32d1bbb4931f1d851dae3870e0df81 Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Sun, 30 Sep 2018 08:04:35 +0530
Subject: tls: Add support for inplace records encryption

Presently, for non-zero copy case, separate pages are allocated for
storing plaintext and encrypted text of records. These pages are stored
in sg_plaintext_data and sg_encrypted_data scatterlists inside record
structure. Further, sg_plaintext_data & sg_encrypted_data are passed
to cryptoapis for record encryption. Allocating separate pages for
plaintext and encrypted text is inefficient from both required memory
and performance point of view.

This patch adds support of inplace encryption of records. For non-zero
copy case, we reuse the pages from sg_encrypted_data scatterlist to
copy the application's plaintext data. For the movement of pages from
sg_encrypted_data to sg_plaintext_data scatterlists, we introduce a new
function move_to_plaintext_sg(). This function add pages into
sg_plaintext_data from sg_encrypted_data scatterlists.

tls_do_encryption() is modified to pass the same scatterlist as both
source and destination into aead_request_set_crypt() if inplace crypto
has been enabled. A new ariable 'inplace_crypto' has been introduced in
record structure to signify whether the same scatterlist can be used.
By default, the inplace_crypto is enabled in get_rec(). If zero-copy is
used (i.e. plaintext data is not copied), inplace_crypto is set to '0'.

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Reviewed-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 262420cdad10..5e853835597e 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -101,6 +101,7 @@ struct tls_rec {
 	struct list_head list;
 	int tx_ready;
 	int tx_flags;
+	int inplace_crypto;
 
 	/* AAD | sg_plaintext_data | sg_tag */
 	struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS + 1];
-- 
cgit v1.2.3


From db7ff19e7b119adb4618fbc6410b441d1c3b55c5 Mon Sep 17 00:00:00 2001
From: Eli Britstein <elibr@mellanox.com>
Date: Wed, 15 Aug 2018 16:02:18 +0300
Subject: devlink: Add extack for eswitch operations

Add extack argument to the eswitch related operations.

Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/net/devlink.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index b9b89d6604d4..70671f0d4c30 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -451,11 +451,14 @@ struct devlink_ops {
 				       u32 *p_cur, u32 *p_max);
 
 	int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode);
-	int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
+	int (*eswitch_mode_set)(struct devlink *devlink, u16 mode,
+				struct netlink_ext_ack *extack);
 	int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode);
-	int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode);
+	int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode,
+				       struct netlink_ext_ack *extack);
 	int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode);
-	int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode);
+	int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode,
+				      struct netlink_ext_ack *extack);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
-- 
cgit v1.2.3


From 2070a3e44962212d6ef02c5def821b1b9744e496 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 4 Oct 2018 09:42:29 +0100
Subject: rxrpc: Allow the reply time to be obtained on a client call

Allow the timestamp on the sk_buff holding the first DATA packet of a reply
to be queried.  This can then be used as a base for the expiry time
calculation on the callback promise duration indicated by an operation
result.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/net/af_rxrpc.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index f53edb3754bc..c4c912554dee 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -13,6 +13,7 @@
 #define _NET_RXRPC_H
 
 #include <linux/rxrpc.h>
+#include <linux/ktime.h>
 
 struct key;
 struct sock;
@@ -77,5 +78,7 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
 int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
 			    enum rxrpc_call_completion *, u32 *);
 u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
+bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
+				 ktime_t *);
 
 #endif /* _NET_RXRPC_H */
-- 
cgit v1.2.3


From e908bcf4f1a271e7c264dcbffc5881ced8bfacee Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 4 Oct 2018 09:54:29 +0100
Subject: rxrpc: Allow the reply time to be obtained on a client call

Allow the epoch value to be queried on a server connection.  This is in the
rxrpc header of every packet for use in routing and is derived from the
client's state.  It's also not supposed to change unless the client gets
restarted.

AFS can make use of this information to deduce whether a fileserver has
been restarted because the fileserver makes client calls to the filesystem
driver's cache manager to send notifications (ie. callback breaks) about
conflicting changes from other clients.  These convey the fileserver's own
epoch value back to the filesystem.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/net/af_rxrpc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index c4c912554dee..de587948042a 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -78,6 +78,7 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
 int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
 			    enum rxrpc_call_completion *, u32 *);
 u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
+u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
 bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
 				 ktime_t *);
 
-- 
cgit v1.2.3


From e3b5106162a3f73c7633ae6051fbf244584ab584 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Thu, 4 Oct 2018 11:13:44 +0530
Subject: devlink: Add generic parameter ignore_ari

ignore_ari - Device ignores ARI(Alternate Routing ID) capability,
even when platforms has the support and creates same number of
partitions when platform does not support ARI capability.

Cc: Jiri Pirko <jiri@mellanox.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 70671f0d4c30..ae28ccbd6843 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -362,6 +362,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
 	DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+	DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -380,6 +381,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME "region_snapshot_enable"
 #define DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE DEVLINK_PARAM_TYPE_BOOL
 
+#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME "ignore_ari"
+#define DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE DEVLINK_PARAM_TYPE_BOOL
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
-- 
cgit v1.2.3


From f61cba4291c06c201b1b855a341b036caefdc2d6 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Thu, 4 Oct 2018 11:13:45 +0530
Subject: devlink: Add generic parameter msix_vec_per_pf_max

msix_vec_per_pf_max - This param sets the number of MSIX vectors
that the device requests from the host on driver initialization.
This value is set in the device which is applicable per PF.

Cc: Jiri Pirko <jiri@mellanox.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index ae28ccbd6843..c9b08b49957c 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -363,6 +363,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
 	DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
 	DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
+	DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -384,6 +385,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME "ignore_ari"
 #define DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE DEVLINK_PARAM_TYPE_BOOL
 
+#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME "msix_vec_per_pf_max"
+#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE DEVLINK_PARAM_TYPE_U32
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
-- 
cgit v1.2.3


From 16511789b9cc0a946611b1f9575b7a5b2b566301 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Thu, 4 Oct 2018 11:13:46 +0530
Subject: devlink: Add generic parameter msix_vec_per_pf_min

msix_vec_per_pf_min - This param sets the number of minimal MSIX
vectors required for the device initialization. This value is set
in the device which limits MSIX vectors per PF.

Cc: Jiri Pirko <jiri@mellanox.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index c9b08b49957c..9a70755ad1c2 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -364,6 +364,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
 	DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI,
 	DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
+	DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -388,6 +389,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME "msix_vec_per_pf_max"
 #define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE DEVLINK_PARAM_TYPE_U32
 
+#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME "msix_vec_per_pf_min"
+#define DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE DEVLINK_PARAM_TYPE_U32
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
-- 
cgit v1.2.3


From d26d4b194e582c6f2070cc5f7f74a72124ad41ef Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 4 Oct 2018 17:07:51 -0700
Subject: net: sched: remove unused helpers

tcf_block_dev() doesn't seem to be used anywhere in the tree.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bbfe27f86d5f..72ffb3120ced 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -65,11 +65,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 	return block->q;
 }
 
-static inline struct net_device *tcf_block_dev(struct tcf_block *block)
-{
-	return tcf_block_q(block)->dev_queue->dev;
-}
-
 void *tcf_block_cb_priv(struct tcf_block_cb *block_cb);
 struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
 					 tc_setup_cb_t *cb, void *cb_ident);
@@ -122,11 +117,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 	return NULL;
 }
 
-static inline struct net_device *tcf_block_dev(struct tcf_block *block)
-{
-	return NULL;
-}
-
 static inline
 int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb,
 			       void *cb_priv)
-- 
cgit v1.2.3


From 767a2217533fed696af0d06bee7746d34c4e00aa Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 4 Oct 2018 20:07:51 -0700
Subject: net: common metrics init helper for FIB entries

Consolidate initialization of ipv4 and ipv6 metrics when fib entries
are created into a single helper, ip_fib_metrics_init, that handles
the call to ip_metrics_convert.

If no metrics are defined for the fib entry, then the metrics is set
to dst_default_metrics.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index e44b1a44f67a..8cbe7e8c9e1e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -420,8 +420,8 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
 	return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
 }
 
-int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
-		       u32 *metrics);
+struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
+					int fc_mx_len);
 
 u32 ip_idents_reserve(u32 hash, int segs);
 void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
-- 
cgit v1.2.3


From cc5f0eb2164f9aa11fe631f8d905192e0233e262 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 4 Oct 2018 20:07:52 -0700
Subject: net: Move free of fib_metrics to helper

Move the refcounting and potential free of dst metrics associated
with a fib entry to a helper and use it in both ipv4 and ipv6.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 8cbe7e8c9e1e..8fdd58ce580d 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -422,6 +422,12 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
 
 struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
 					int fc_mx_len);
+static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics)
+{
+	if (fib_metrics != &dst_default_metrics &&
+	    refcount_dec_and_test(&fib_metrics->refcnt))
+		kfree(fib_metrics);
+}
 
 u32 ip_idents_reserve(u32 hash, int segs);
 void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
-- 
cgit v1.2.3


From e1255ed4b6dafd9966c99cde5105891cc1ac70df Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 4 Oct 2018 20:07:53 -0700
Subject: net: common metrics init helper for dst_entry

ipv4 and ipv6 both use refcounted metrics if FIB entries have metrics set.
Move the common initialization code to a helper and use for both protocols.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 8fdd58ce580d..f9a7125b4bda 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -429,6 +429,18 @@ static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics)
 		kfree(fib_metrics);
 }
 
+/* ipv4 and ipv6 both use refcounted metrics if it is not the default */
+static inline
+void ip_dst_init_metrics(struct dst_entry *dst, struct dst_metrics *fib_metrics)
+{
+	dst_init_metrics(dst, fib_metrics->metrics, true);
+
+	if (fib_metrics != &dst_default_metrics) {
+		dst->_metrics |= DST_METRICS_REFCOUNTED;
+		refcount_inc(&fib_metrics->refcnt);
+	}
+}
+
 u32 ip_idents_reserve(u32 hash, int segs);
 void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
 
-- 
cgit v1.2.3


From 1620a33695d81611360d813a47ebde9386714036 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 4 Oct 2018 20:07:54 -0700
Subject: net: Move free of dst_metrics to helper

Move the refcounting and potential free of dst metrics associated
for ipv4 and ipv6 to a common helper.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip.h b/include/net/ip.h
index f9a7125b4bda..72593e171d14 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -441,6 +441,15 @@ void ip_dst_init_metrics(struct dst_entry *dst, struct dst_metrics *fib_metrics)
 	}
 }
 
+static inline
+void ip_dst_metrics_put(struct dst_entry *dst)
+{
+	struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
+
+	if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt))
+		kfree(p);
+}
+
 u32 ip_idents_reserve(u32 hash, int segs);
 void __ip_select_ident(struct net *net, struct iphdr *iph, int segs);
 
-- 
cgit v1.2.3


From 1661d346628115c364e2b7d5b15a64ca3bd0dbd4 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 1 Oct 2018 14:51:36 +0200
Subject: ethtool: don't allow disabling queues with umem installed

We already check the RSS indirection table does not use queues which
would be disabled by channel reconfiguration. Make sure user does not
try to disable queues which have a UMEM and zero-copy AF_XDP socket
installed.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp_sock.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 70a115bea4f4..13acb9803a6d 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -86,6 +86,7 @@ struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
 struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
 					  struct xdp_umem_fq_reuse *newq);
 void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
+struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
 
 static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
 {
@@ -183,6 +184,12 @@ static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
 {
 }
 
+static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
+						     u16 queue_id)
+{
+	return NULL;
+}
+
 static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
 {
 	return NULL;
-- 
cgit v1.2.3


From 95278ddaa15cfa23e4a06ee9ed7b6ee0197c500b Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 2 Oct 2018 12:50:19 -0700
Subject: net_sched: convert idrinfo->lock from spinlock to a mutex

In commit ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
we move fl_hw_destroy_tmplt() to a workqueue to avoid blocking
with the spinlock held. Unfortunately, this causes a lot of
troubles here:

1. tcf_chain_destroy() could be called right after we queue the work
   but before the work runs. This is a use-after-free.

2. The chain refcnt is already 0, we can't even just hold it again.
   We can check refcnt==1 but it is ugly.

3. The chain with refcnt 0 is still visible in its block, which means
   it could be still found and used!

4. The block has a refcnt too, we can't hold it without introducing a
   proper API either.

We can make it working but the end result is ugly. Instead of wasting
time on reviewing it, let's just convert the troubling spinlock to
a mutex, which allows us to use non-atomic allocations too.

Fixes: ec3ed293e766 ("net_sched: change tcf_del_walker() to take idrinfo->lock")
Reported-by: Ido Schimmel <idosch@idosch.org>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Vlad Buslov <vladbu@mellanox.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Tested-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 1ddff3360592..05c7df41d737 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -13,7 +13,7 @@
 #include <net/netns/generic.h>
 
 struct tcf_idrinfo {
-	spinlock_t	lock;
+	struct mutex	lock;
 	struct idr	action_idr;
 };
 
@@ -117,7 +117,7 @@ int tc_action_net_init(struct tc_action_net *tn,
 	if (!tn->idrinfo)
 		return -ENOMEM;
 	tn->ops = ops;
-	spin_lock_init(&tn->idrinfo->lock);
+	mutex_init(&tn->idrinfo->lock);
 	idr_init(&tn->idrinfo->action_idr);
 	return err;
 }
-- 
cgit v1.2.3


From f2e9de210d50187d206989e60bc5a99c2b692209 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 5 Oct 2018 11:31:40 -0400
Subject: udp: gro behind static key

Avoid the socket lookup cost in udp_gro_receive if no socket has a
udp tunnel callback configured.

udp_sk(sk)->gro_receive requires a registration with
setup_udp_tunnel_sock, which enables the static key.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/udp.h b/include/net/udp.h
index 8482a990b0bb..9e82cb391dea 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -443,8 +443,10 @@ int udpv4_offload_init(void);
 
 void udp_init(void);
 
+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
 void udp_encap_enable(void);
 #if IS_ENABLED(CONFIG_IPV6)
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
 void udpv6_encap_enable(void);
 #endif
 
-- 
cgit v1.2.3


From 3d0d4337d7a105c5e8ba85c6e7b75437b4c6745e Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 7 Oct 2018 20:16:23 -0700
Subject: netlink: Add extack message to nlmsg_parse for invalid header length

Give a user a reason why EINVAL is returned in nlmsg_parse.

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Christian Brauner <christian@brauner.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 589683091f16..9522a0bf1f3a 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -516,8 +516,10 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
 			      const struct nla_policy *policy,
 			      struct netlink_ext_ack *extack)
 {
-	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
+	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) {
+		NL_SET_ERR_MSG(extack, "Invalid header length");
 		return -EINVAL;
+	}
 
 	return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
 			 nlmsg_attrlen(nlh, hdrlen), policy, extack);
-- 
cgit v1.2.3


From a5f6cba291654168e6ab73c3e7ff5b27371c4cb9 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 7 Oct 2018 20:16:25 -0700
Subject: netlink: Add strict version of nlmsg_parse and nla_parse

nla_parse is currently lenient on message parsing, allowing type to be 0
or greater than max expected and only logging a message

    "netlink: %d bytes leftover after parsing attributes in process `%s'."

if the netlink message has unknown data at the end after parsing. What this
could mean is that the header at the front of the attributes is actually
wrong and the parsing is shifted from what is expected.

Add a new strict version that actually fails with EINVAL if there are any
bytes remaining after the parsing loop completes, if the atttrbitue type
is 0 or greater than max expected.

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Christian Brauner <christian@brauner.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 9522a0bf1f3a..f1db8e594847 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -373,6 +373,9 @@ int nla_validate(const struct nlattr *head, int len, int maxtype,
 int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
 	      int len, const struct nla_policy *policy,
 	      struct netlink_ext_ack *extack);
+int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head,
+		     int len, const struct nla_policy *policy,
+		     struct netlink_ext_ack *extack);
 int nla_policy_len(const struct nla_policy *, int);
 struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype);
 size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize);
@@ -525,6 +528,20 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
 			 nlmsg_attrlen(nlh, hdrlen), policy, extack);
 }
 
+static inline int nlmsg_parse_strict(const struct nlmsghdr *nlh, int hdrlen,
+				     struct nlattr *tb[], int maxtype,
+				     const struct nla_policy *policy,
+				     struct netlink_ext_ack *extack)
+{
+	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) {
+		NL_SET_ERR_MSG(extack, "Invalid header length");
+		return -EINVAL;
+	}
+
+	return nla_parse_strict(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
+				nlmsg_attrlen(nlh, hdrlen), policy, extack);
+}
+
 /**
  * nlmsg_find_attr - find a specific attribute in a netlink message
  * @nlh: netlink message header
-- 
cgit v1.2.3


From e8ba330ac0c55004e775eab53fa1e748e5d71bdb Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 7 Oct 2018 20:16:35 -0700
Subject: rtnetlink: Update fib dumps for strict data checking

Add helper to check netlink message for route dumps. If the strict flag
is set the dump request is expected to have an rtmsg struct as the header.
All elements of the struct are expected to be 0 with the exception of
rtm_flags (which is used by both ipv4 and ipv6 dumps) and no attributes
can be appended. rtm_flags can only have RTM_F_CLONED and RTM_F_PREFIX
set.

Update inet_dump_fib, inet6_dump_fib, mpls_dump_routes, ipmr_rtm_dumproute,
and ip6mr_rtm_dumproute to call this helper if strict data checking is
enabled.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f7c109e37298..9846b79c9ee1 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -452,4 +452,6 @@ static inline void fib_proc_exit(struct net *net)
 
 u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
 
+int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
+			  struct netlink_ext_ack *extack);
 #endif  /* _NET_FIB_H */
-- 
cgit v1.2.3


From f355cfcdb251e22b9dfb78c0eef4005a9d902a35 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 10 Oct 2018 16:09:25 +0300
Subject: devlink: Fix param set handling for string type

In case devlink param type is string, it needs to copy the string value
it got from the input to devlink_param_value.

Fixes: e3b7ca18ad7b ("devlink: Add param set command")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index b9b89d6604d4..b0e17c025fdc 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -311,7 +311,7 @@ union devlink_param_value {
 	u8 vu8;
 	u16 vu16;
 	u32 vu32;
-	const char *vstr;
+	char vstr[DEVLINK_PARAM_MAX_STRING_VALUE];
 	bool vbool;
 };
 
-- 
cgit v1.2.3


From bde74ad10eb55aaa472c37b107934e6b8563c25e Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Wed, 10 Oct 2018 16:09:27 +0300
Subject: devlink: Add helper function for safely copy string param

Devlink string param buffer is allocated at the size of
DEVLINK_PARAM_MAX_STRING_VALUE. Add helper function which makes sure
this size is not exceeded.
Renamed DEVLINK_PARAM_MAX_STRING_VALUE to
__DEVLINK_PARAM_MAX_STRING_VALUE to emphasize that it should be used by
devlink only. The driver should use the helper function instead to
verify it doesn't exceed the allowed length.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index b0e17c025fdc..99efc156a309 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -298,7 +298,7 @@ struct devlink_resource {
 
 #define DEVLINK_RESOURCE_ID_PARENT_TOP 0
 
-#define DEVLINK_PARAM_MAX_STRING_VALUE 32
+#define __DEVLINK_PARAM_MAX_STRING_VALUE 32
 enum devlink_param_type {
 	DEVLINK_PARAM_TYPE_U8,
 	DEVLINK_PARAM_TYPE_U16,
@@ -311,7 +311,7 @@ union devlink_param_value {
 	u8 vu8;
 	u16 vu16;
 	u32 vu32;
-	char vstr[DEVLINK_PARAM_MAX_STRING_VALUE];
+	char vstr[__DEVLINK_PARAM_MAX_STRING_VALUE];
 	bool vbool;
 };
 
@@ -553,6 +553,8 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val);
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
+void devlink_param_value_str_fill(union devlink_param_value *dst_val,
+				  const char *src);
 struct devlink_region *devlink_region_create(struct devlink *devlink,
 					     const char *region_name,
 					     u32 region_max_snapshots,
@@ -789,6 +791,12 @@ devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
 }
 
+static inline void
+devlink_param_value_str_fill(union devlink_param_value *dst_val,
+			     const char *src)
+{
+}
+
 static inline struct devlink_region *
 devlink_region_create(struct devlink *devlink,
 		      const char *region_name,
-- 
cgit v1.2.3


From ed792e28c4bd09e9a319d2ad914aa62982cb4c4a Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 8 Oct 2018 14:06:34 -0700
Subject: net/ipv6: Make ipv6_route_table_template static

ipv6_route_table_template is exported but there are no users outside
of route.c. Make it static.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ff33f498c137..829650540780 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1089,8 +1089,6 @@ static inline int snmp6_unregister_dev(struct inet6_dev *idev) { return 0; }
 #endif
 
 #ifdef CONFIG_SYSCTL
-extern struct ctl_table ipv6_route_table_template[];
-
 struct ctl_table *ipv6_icmp_sysctl_init(struct net *net);
 struct ctl_table *ipv6_route_sysctl_init(struct net *net);
 int ipv6_sysctl_register(void);
-- 
cgit v1.2.3


From af7d6cce53694a88d6a1bb60c9a239a6a5144459 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 9 Oct 2018 17:48:14 +0200
Subject: net: ipv4: update fnhe_pmtu when first hop's MTU changes

Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop
exceptions"), exceptions get deprecated separately from cached
routes. In particular, administrative changes don't clear PMTU anymore.

As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes
on PMTU of exceptions for MTU-less routes"), the PMTU discovered before
the local MTU change can become stale:
 - if the local MTU is now lower than the PMTU, that PMTU is now
   incorrect
 - if the local MTU was the lowest value in the path, and is increased,
   we might discover a higher PMTU

Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those
cases.

If the exception was locked, the discovered PMTU was smaller than the
minimal accepted PMTU. In that case, if the new local MTU is smaller
than the current PMTU, let PMTU discovery figure out if locking of the
exception is still needed.

To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU
notifier. By the time the notifier is called, dev->mtu has been
changed. This patch adds the old MTU as additional information in the
notifier structure, and a new call_netdevice_notifiers_u32() function.

Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 69c91d1934c1..c9b7b136939d 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
 int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
-- 
cgit v1.2.3


From bc847970f43281cb07c9f7d0897ee08cd1e08cf3 Mon Sep 17 00:00:00 2001
From: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Date: Wed, 3 Oct 2018 20:19:20 -0700
Subject: mac80211: support FTM responder configuration/statistics

New bss param ftm_responder is used to notify the driver to
enable fine timing request (FTM) responder role in AP mode.

Plumb the new cfg80211 API for FTM responder statistics through to
the driver API in mac80211.

Signed-off-by: David Spinadel <david.spinadel@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c4fadbafbf21..2ccd4d1bef89 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -309,6 +309,8 @@ struct ieee80211_vif_chanctx_switch {
  * @BSS_CHANGED_KEEP_ALIVE: keep alive options (idle period or protected
  *	keep alive) changed.
  * @BSS_CHANGED_MCAST_RATE: Multicast Rate setting changed for this interface
+ * @BSS_CHANGED_FTM_RESPONDER: fime timing reasurement request responder
+ *	functionality changed for this BSS (AP mode).
  *
  */
 enum ieee80211_bss_change {
@@ -338,6 +340,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_MU_GROUPS		= 1<<23,
 	BSS_CHANGED_KEEP_ALIVE		= 1<<24,
 	BSS_CHANGED_MCAST_RATE		= 1<<25,
+	BSS_CHANGED_FTM_RESPONDER	= 1<<26,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
@@ -463,6 +466,21 @@ struct ieee80211_mu_group_data {
 	u8 position[WLAN_USER_POSITION_LEN];
 };
 
+/**
+ * ieee80211_ftm_responder_params - FTM responder parameters
+ *
+ * @lci: LCI subelement content
+ * @civicloc: CIVIC location subelement content
+ * @lci_len: LCI data length
+ * @civicloc_len: Civic data length
+ */
+struct ieee80211_ftm_responder_params {
+	const u8 *lci;
+	const u8 *civicloc;
+	size_t lci_len;
+	size_t civicloc_len;
+};
+
 /**
  * struct ieee80211_bss_conf - holds the BSS's changing parameters
  *
@@ -562,6 +580,9 @@ struct ieee80211_mu_group_data {
  * @protected_keep_alive: if set, indicates that the station should send an RSN
  *	protected frame to the AP to reset the idle timer at the AP for the
  *	station.
+ * @ftm_responder: whether to enable or disable fine timing measurement FTM
+ *	responder functionality.
+ * @ftmr_params: configurable lci/civic parameter when enabling FTM responder.
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -612,6 +633,8 @@ struct ieee80211_bss_conf {
 	bool allow_p2p_go_ps;
 	u16 max_idle_period;
 	bool protected_keep_alive;
+	bool ftm_responder;
+	struct ieee80211_ftm_responder_params *ftmr_params;
 };
 
 /**
@@ -3598,6 +3621,8 @@ enum ieee80211_reconfig_type {
  *	aggregating two specific frames in the same A-MSDU. The relation
  *	between the skbs should be symmetric and transitive. Note that while
  *	skb is always a real frame, head may or may not be an A-MSDU.
+ * @get_ftm_responder_stats: Retrieve FTM responder statistics, if available.
+ *	Statistics should be cumulative, currently no way to reset is provided.
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -3883,6 +3908,9 @@ struct ieee80211_ops {
 	bool (*can_aggregate_in_amsdu)(struct ieee80211_hw *hw,
 				       struct sk_buff *head,
 				       struct sk_buff *skb);
+	int (*get_ftm_responder_stats)(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif,
+				       struct cfg80211_ftm_responder_stats *ftm_stats);
 };
 
 /**
-- 
cgit v1.2.3


From 0d4e14a32dcab9c4bd559d02874120fbb86b1322 Mon Sep 17 00:00:00 2001
From: Ankita Bajaj <bankita@codeaurora.org>
Date: Thu, 27 Sep 2018 18:01:57 +0300
Subject: nl80211: Add per peer statistics to compute FCS error rate

Add support for drivers to report the total number of MPDUs received
and the number of MPDUs received with an FCS error from a specific
peer. These counters will be incremented only when the TA of the
frame matches the MAC address of the peer irrespective of FCS
error.

It should be noted that the TA field in the frame might be corrupted
when there is an FCS error and TA matching logic would fail in such
cases. Hence, FCS error counter might not be fully accurate, but it can
provide help in detecting bad RX links in significant number of cases.
This FCS error counter without full accuracy can be used, e.g., to
trigger a kick-out of a connected client with a bad link in AP mode to
force such a client to roam to another AP.

Signed-off-by: Ankita Bajaj <bankita@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0e16e723dcef..1fa41b7a1be3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1303,6 +1303,10 @@ struct cfg80211_tid_stats {
  * @ack_signal: signal strength (in dBm) of the last ACK frame.
  * @avg_ack_signal: average rssi value of ack packet for the no of msdu's has
  *	been sent.
+ * @rx_mpdu_count: number of MPDUs received from this station
+ * @fcs_err_count: number of packets (MPDUs) received from this station with
+ *	an FCS error. This counter should be incremented only when TA of the
+ *	received packet with an FCS error matches the peer MAC address.
  */
 struct station_info {
 	u64 filled;
@@ -1349,6 +1353,9 @@ struct station_info {
 	struct cfg80211_tid_stats *pertid;
 	s8 ack_signal;
 	s8 avg_ack_signal;
+
+	u32 rx_mpdu_count;
+	u32 fcs_err_count;
 };
 
 #if IS_ENABLED(CONFIG_CFG80211)
-- 
cgit v1.2.3


From f8252e7b5a83deee0e477fc1e31e3f06ceb35d28 Mon Sep 17 00:00:00 2001
From: Anilkumar Kolli <akolli@codeaurora.org>
Date: Thu, 11 Oct 2018 18:15:03 +0530
Subject: mac80211: implement ieee80211_tx_rate_update to update rate

Current mac80211 has provision to update tx status through
ieee80211_tx_status() and ieee80211_tx_status_ext(). But
drivers like ath10k updates the tx status from the skb except
txrate, txrate will be updated from a different path, peer stats.

Using ieee80211_tx_status_ext() in two different paths
(one for the stats, one for the tx rate) would duplicate
the stats instead.

To avoid this stats duplication, ieee80211_tx_rate_update()
is implemented.

Signed-off-by: Anilkumar Kolli <akolli@codeaurora.org>
[minor commit message editing, use initializers in code]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2ccd4d1bef89..71985e95d2d9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4379,6 +4379,21 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
 void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta,
 					   u32 thr);
 
+/**
+ * ieee80211_tx_rate_update - transmit rate update callback
+ *
+ * Drivers should call this functions with a non-NULL pub sta
+ * This function can be used in drivers that does not have provision
+ * in updating the tx rate in data path.
+ *
+ * @hw: the hardware the frame was transmitted by
+ * @pubsta: the station to update the tx rate for.
+ * @info: tx status information
+ */
+void ieee80211_tx_rate_update(struct ieee80211_hw *hw,
+			      struct ieee80211_sta *pubsta,
+			      struct ieee80211_tx_info *info);
+
 /**
  * ieee80211_tx_status - transmit status callback
  *
-- 
cgit v1.2.3


From 7c6bb7d2faaf1ed7d78bafd712476e4cf2cf0d7d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 11 Oct 2018 20:17:21 -0700
Subject: net/ipv6: Add knob to skip DELROUTE message on device down

Another difference between IPv4 and IPv6 is the generation of RTM_DELROUTE
notifications when a device is taken down (admin down) or deleted. IPv4
does not generate a message for routes evicted by the down or delete;
IPv6 does. A NOS at scale really needs to avoid these messages and have
IPv4 and IPv6 behave similarly, relying on userspace to handle link
notifications and evict the routes.

At this point existing user behavior needs to be preserved. Since
notifications are a global action (not per app) the only way to preserve
existing behavior and allow the messages to be skipped is to add a new
sysctl (net/ipv6/route/skip_notify_on_dev_down) which can be set to
disable the notificatioons.

IPv6 route code already supports the option to skip the message (it is
used for multipath routes for example). Besides the new sysctl we need
to pass the skip_notify setting through the generic fib6_clean and
fib6_walk functions to fib6_clean_node and to set skip_notify on calls
to __ip_del_rt for the addrconf_ifdown path.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h    | 3 +++
 include/net/netns/ipv6.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index f06e968f1992..caabfd84a098 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -407,6 +407,9 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 
 void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg),
 		    void *arg);
+void fib6_clean_all_skip_notify(struct net *net,
+				int (*func)(struct fib6_info *, void *arg),
+				void *arg);
 
 int fib6_add(struct fib6_node *root, struct fib6_info *rt,
 	     struct nl_info *info, struct netlink_ext_ack *extack);
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index f0e396ab9bec..ef1ed529f33c 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -45,6 +45,7 @@ struct netns_sysctl_ipv6 {
 	int max_dst_opts_len;
 	int max_hbh_opts_len;
 	int seg6_flowlabel;
+	bool skip_notify_on_dev_down;
 };
 
 struct netns_ipv6 {
-- 
cgit v1.2.3


From 859bd2ef1fc1110a8031b967ee656c53a6260a76 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 11 Oct 2018 20:33:49 -0700
Subject: net: Evict neighbor entries on carrier down

When a link's carrier goes down it could be a sign of the port changing
networks. If the new network has overlapping addresses with the old one,
then the kernel will continue trying to use neighbor entries established
based on the old network until the entries finally age out - meaning a
potentially long delay with communications not working.

This patch evicts neighbor entries on carrier down with the exception of
those marked permanent. Permanent entries are managed by userspace (either
an admin or a routing daemon such as FRR).

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 0874f7fcd859..f58b384aa6c9 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -323,6 +323,7 @@ void __neigh_set_probe_once(struct neighbour *neigh);
 bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl);
 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
+int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev);
 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb);
 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb);
 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb);
-- 
cgit v1.2.3


From 5886d932e52acfbe12ea5aac8e7c3ad6f16364d1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 12 Oct 2018 12:53:00 +0200
Subject: netlink: replace __NLA_ENSURE implementation

We already have BUILD_BUG_ON_ZERO() which I just hadn't found
before, so we should use it here instead of open-coding another
implementation thereof.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index f1db8e594847..4c1e99303b5a 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -311,7 +311,7 @@ struct nla_policy {
 #define NLA_POLICY_NESTED_ARRAY(maxattr, policy) \
 	{ .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr }
 
-#define __NLA_ENSURE(condition) (sizeof(char[1 - 2*!(condition)]) - 1)
+#define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
 #define NLA_ENSURE_INT_TYPE(tp)				\
 	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 ||	\
 		      tp == NLA_S16 || tp == NLA_U16 ||	\
-- 
cgit v1.2.3


From 571f739083e2544b343b5998608de679519de4e9 Mon Sep 17 00:00:00 2001
From: Mallikarjun Phulari <mallikarjun.phulari@intel.com>
Date: Fri, 5 Oct 2018 14:48:12 +0530
Subject: Bluetooth: Use separate L2CAP LE credit based connection result
 values

Add the result values specific to L2CAP LE credit based connections
and change the old result values wherever they were used.

Signed-off-by: Mallikarjun Phulari <mallikarjun.phulari@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 3555440e14fc..ea4b4ec85b78 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -277,12 +277,17 @@ struct l2cap_conn_rsp {
 #define L2CAP_CR_SEC_BLOCK	0x0003
 #define L2CAP_CR_NO_MEM		0x0004
 #define L2CAP_CR_BAD_AMP	0x0005
-#define L2CAP_CR_AUTHENTICATION	0x0005
-#define L2CAP_CR_AUTHORIZATION	0x0006
-#define L2CAP_CR_BAD_KEY_SIZE	0x0007
-#define L2CAP_CR_ENCRYPTION	0x0008
-#define L2CAP_CR_INVALID_SCID	0x0009
-#define L2CAP_CR_SCID_IN_USE	0x000A
+
+/* credit based connect results */
+#define L2CAP_CR_LE_SUCCESS		0x0000
+#define L2CAP_CR_LE_BAD_PSM		0x0002
+#define L2CAP_CR_LE_NO_MEM		0x0004
+#define L2CAP_CR_LE_AUTHENTICATION	0x0005
+#define L2CAP_CR_LE_AUTHORIZATION	0x0006
+#define L2CAP_CR_LE_BAD_KEY_SIZE	0x0007
+#define L2CAP_CR_LE_ENCRYPTION		0x0008
+#define L2CAP_CR_LE_INVALID_SCID	0x0009
+#define L2CAP_CR_LE_SCID_IN_USE		0X000A
 
 /* connect/create channel status */
 #define L2CAP_CS_NO_INFO	0x0000
-- 
cgit v1.2.3


From dd1a8f8a88eecbc903f9ffff12332bec6d3f3be3 Mon Sep 17 00:00:00 2001
From: Mallikarjun Phulari <mallikarjun.phulari@intel.com>
Date: Fri, 5 Oct 2018 14:48:13 +0530
Subject: Bluetooth: Errata Service Release 8, Erratum 3253
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

L2CAP: New result values
	0x0006 - Connection refused – Invalid Source CID
	0x0007 - Connection refused – Source CID already allocated

As per the ESR08_V1.0.0, 1.11.2 Erratum 3253, Page No. 54,
"Remote CID invalid Issue".
Applies to Core Specification versions: V5.0, V4.2, v4.1, v4.0, and v3.0 + HS
Vol 3, Part A, Section 4.2, 4.3, 4.14, 4.15.

Core Specification Version 5.0, Page No.1753, Table 4.6 and
Page No. 1767, Table 4.14

New result values are added to l2cap connect/create channel response as
0x0006 - Connection refused – Invalid Source CID
0x0007 - Connection refused – Source CID already allocated

Signed-off-by: Mallikarjun Phulari <mallikarjun.phulari@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index ea4b4ec85b78..093aedebdf0c 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -277,6 +277,8 @@ struct l2cap_conn_rsp {
 #define L2CAP_CR_SEC_BLOCK	0x0003
 #define L2CAP_CR_NO_MEM		0x0004
 #define L2CAP_CR_BAD_AMP	0x0005
+#define L2CAP_CR_INVALID_SCID	0x0006
+#define L2CAP_CR_SCID_IN_USE	0x0007
 
 /* credit based connect results */
 #define L2CAP_CR_LE_SUCCESS		0x0000
-- 
cgit v1.2.3


From 1243a51f6c05ecbb2c5c9e02fdcc1e7a06f76f26 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 13 Oct 2018 02:45:57 +0200
Subject: tcp, ulp: remove ulp bits from sockmap

In order to prepare sockmap logic to be used in combination with kTLS
we need to detangle it from ULP, and further split it in later commits
into a generic API.

Joint work with John.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/tcp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0d2929223c70..8f5cef67fd35 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2057,7 +2057,6 @@ struct tcp_ulp_ops {
 int tcp_register_ulp(struct tcp_ulp_ops *type);
 void tcp_unregister_ulp(struct tcp_ulp_ops *type);
 int tcp_set_ulp(struct sock *sk, const char *name);
-int tcp_set_ulp_id(struct sock *sk, const int ulp);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
 
-- 
cgit v1.2.3


From 604326b41a6fb9b4a78b6179335decee0365cd8c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 13 Oct 2018 02:45:58 +0200
Subject: bpf, sockmap: convert to generic sk_msg interface

Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.

This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.

Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.

The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.

Joint work with John.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/tcp.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8f5cef67fd35..3600ae0f25c3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -858,6 +858,21 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
 }
 
+static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
+{
+	return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
+}
+
+static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
+{
+	return TCP_SKB_CB(skb)->bpf.sk_redir;
+}
+
+static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
+{
+	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 /* This is the variant of inet6_iif() that must be used by TCP,
  * as TCP moves IP6CB into a different location in skb->cb[]
@@ -2064,6 +2079,18 @@ void tcp_cleanup_ulp(struct sock *sk);
 	__MODULE_INFO(alias, alias_userspace, name);		\
 	__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
 
+struct sk_msg;
+struct sk_psock;
+
+int tcp_bpf_init(struct sock *sk);
+void tcp_bpf_reinit(struct sock *sk);
+int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
+			  int flags);
+int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		    int nonblock, int flags, int *addr_len);
+int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
+		      struct msghdr *msg, int len);
+
 /* Call BPF_SOCK_OPS program that returns an int. If the return value
  * is < 0, then the BPF op failed (for example if the loaded BPF
  * program does not support the chosen operation or there is no BPF
-- 
cgit v1.2.3


From d829e9c4112b52f4f00195900fd4c685f61365ab Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 13 Oct 2018 02:45:59 +0200
Subject: tls: convert to generic sk_msg interface

Convert kTLS over to make use of sk_msg interface for plaintext and
encrypted scattergather data, so it reuses all the sk_msg helpers
and data structure which later on in a second step enables to glue
this to BPF.

This also allows to remove quite a bit of open coded helpers which
are covered by the sk_msg API. Recent changes in kTLs 80ece6a03aaf
("tls: Remove redundant vars from tls record structure") and
4e6d47206c32 ("tls: Add support for inplace records encryption")
changed the data path handling a bit; while we've kept the latter
optimization intact, we had to undo the former change to better
fit the sk_msg model, hence the sg_aead_in and sg_aead_out have
been brought back and are linked into the sk_msg sgs. Now the kTLS
record contains a msg_plaintext and msg_encrypted sk_msg each.

In the original code, the zerocopy_from_iter() has been used out
of TX but also RX path. For the strparser skb-based RX path,
we've left the zerocopy_from_iter() in decrypt_internal() mostly
untouched, meaning it has been moved into tls_setup_from_iter()
with charging logic removed (as not used from RX). Given RX path
is not based on sk_msg objects, we haven't pursued setting up a
dummy sk_msg to call into sk_msg_zerocopy_from_iter(), but it
could be an option to prusue in a later step.

Joint work with John.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/sock.h |  4 ----
 include/net/tls.h  | 18 +++++++++---------
 2 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 751549ac0a84..7470c45d182d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2214,10 +2214,6 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
 
 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 
-int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		int sg_start, int *sg_curr, unsigned int *sg_size,
-		int first_coalesce);
-
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff --git a/include/net/tls.h b/include/net/tls.h
index 5e853835597e..3d22d8a59be7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -39,6 +39,8 @@
 #include <linux/crypto.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
+#include <linux/skmsg.h>
+
 #include <net/tcp.h>
 #include <net/strparser.h>
 #include <crypto/aead.h>
@@ -103,15 +105,13 @@ struct tls_rec {
 	int tx_flags;
 	int inplace_crypto;
 
-	/* AAD | sg_plaintext_data | sg_tag */
-	struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS + 1];
-	/* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */
-	struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS + 1];
+	struct sk_msg msg_plaintext;
+	struct sk_msg msg_encrypted;
 
-	unsigned int sg_plaintext_size;
-	unsigned int sg_encrypted_size;
-	int sg_plaintext_num_elem;
-	int sg_encrypted_num_elem;
+	/* AAD | msg_plaintext.sg.data | sg_tag */
+	struct scatterlist sg_aead_in[2];
+	/* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */
+	struct scatterlist sg_aead_out[2];
 
 	char aad_space[TLS_AAD_SPACE_SIZE];
 	struct aead_request aead_req;
@@ -223,8 +223,8 @@ struct tls_context {
 
 	unsigned long flags;
 	bool in_tcp_sendpages;
+	bool pending_open_record_frags;
 
-	u16 pending_open_record_frags;
 	int (*push_pending_record)(struct sock *sk, int flags);
 
 	void (*sk_write_space)(struct sock *sk);
-- 
cgit v1.2.3


From 924ad65ed01ee0eec5d2a3280c01c394343d6df7 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 13 Oct 2018 02:46:00 +0200
Subject: tls: replace poll implementation with read hook

Instead of re-implementing poll routine use the poll callback to
trigger read from kTLS, we reuse the stream_memory_read callback
which is simpler and achieves the same. This helps to align sockmap
and kTLS so we can more easily embed BPF in kTLS.

Joint work with Daniel.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/tls.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 3d22d8a59be7..bab5627ff5e3 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -142,8 +142,7 @@ struct tls_sw_context_rx {
 
 	struct strparser strp;
 	void (*saved_data_ready)(struct sock *sk);
-	unsigned int (*sk_poll)(struct file *file, struct socket *sock,
-				struct poll_table_struct *wait);
+
 	struct sk_buff *recv_pkt;
 	u8 control;
 	bool decrypted;
@@ -272,8 +271,7 @@ void tls_sw_free_resources_rx(struct sock *sk);
 void tls_sw_release_resources_rx(struct sock *sk);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		   int nonblock, int flags, int *addr_len);
-unsigned int tls_sw_poll(struct file *file, struct socket *sock,
-			 struct poll_table_struct *wait);
+bool tls_sw_stream_read(const struct sock *sk);
 ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
 			   struct pipe_inode_info *pipe,
 			   size_t len, unsigned int flags);
-- 
cgit v1.2.3


From 8a615c6b0352a9ec56151b6c95d68e0a2eef5cf0 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Mon, 15 Oct 2018 10:27:45 -0700
Subject: bpf: Allow sk_lookup with IPv6 module

This is a more complete fix than d71019b54bff ("net: core: Fix build
with CONFIG_IPV6=m"), so that IPv6 sockets may be looked up if the IPv6
module is loaded (not just if it's compiled in).

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/addrconf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6def0351bcc3..14b789a123e7 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -265,6 +265,11 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly;
 struct ipv6_bpf_stub {
 	int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 			  bool force_bind_address_no_port, bool with_lock);
+	struct sock *(*udp6_lib_lookup)(struct net *net,
+					const struct in6_addr *saddr, __be16 sport,
+					const struct in6_addr *daddr, __be16 dport,
+					int dif, int sdif, struct udp_table *tbl,
+					struct sk_buff *skb);
 };
 extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
 
-- 
cgit v1.2.3


From f547fac624be53ad8b07e9ebca7654a7827ba61b Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 12 Oct 2018 16:22:47 +0200
Subject: ipv6: rate-limit probes for neighbourless routes

When commit 270972554c91 ("[IPV6]: ROUTE: Add Router Reachability
Probing (RFC4191).") introduced router probing, the rt6_probe() function
required that a neighbour entry existed. This neighbour entry is used to
record the timestamp of the last probe via the ->updated field.

Later, commit 2152caea7196 ("ipv6: Do not depend on rt->n in rt6_probe().")
removed the requirement for a neighbour entry. Neighbourless routes skip
the interval check and are not rate-limited.

This patch adds rate-limiting for neighbourless routes, by recording the
timestamp of the last probe in the fib6_info itself.

Fixes: 2152caea7196 ("ipv6: Do not depend on rt->n in rt6_probe().")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 3d4930528db0..2d31e22babd8 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -159,6 +159,10 @@ struct fib6_info {
 	struct rt6_info * __percpu	*rt6i_pcpu;
 	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
 
+#ifdef CONFIG_IPV6_ROUTER_PREF
+	unsigned long			last_probe;
+#endif
+
 	u32				fib6_metric;
 	u8				fib6_protocol;
 	u8				fib6_type;
-- 
cgit v1.2.3


From d805397c3822d57ca3884d4bea37b2291fc40992 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 15 Oct 2018 19:58:29 +0800
Subject: sctp: use the pmtu from the icmp packet to update transport pathmtu

Other than asoc pmtu sync from all transports, sctp_assoc_sync_pmtu
is also processing transport pmtu_pending by icmp packets. But it's
meaningless to use sctp_dst_mtu(t->dst) as new pmtu for a transport.

The right pmtu value should come from the icmp packet, and it would
be saved into transport->mtu_info in this patch and used later when
the pmtu sync happens in sctp_sendmsg_to_asoc or sctp_packet_config.

Besides, without this patch, as pmtu can only be updated correctly
when receiving a icmp packet and no place is holding sock lock, it
will take long time if the sock is busy with sending packets.

Note that it doesn't process transport->mtu_info in .release_cb(),
as there is no enough information for pmtu update, like for which
asoc or transport. It is not worth traversing all asocs to check
pmtu_pending. So unlike tcp, sctp does this in tx path, for which
mtu_info needs to be atomic_t.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 28a7c8e44636..a11f93790476 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -876,6 +876,8 @@ struct sctp_transport {
 	unsigned long sackdelay;
 	__u32 sackfreq;
 
+	atomic_t mtu_info;
+
 	/* When was the last time that we heard from this transport? We use
 	 * this to pick new active and retran paths.
 	 */
-- 
cgit v1.2.3


From 76a9ebe811fb3d0605cb084f1ae6be5610541865 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Oct 2018 09:37:53 -0700
Subject: net: extend sk_pacing_rate to unsigned long

sk_pacing_rate has beed introduced as a u32 field in 2013,
effectively limiting per flow pacing to 34Gbit.

We believe it is time to allow TCP to pace high speed flows
on 64bit hosts, as we now can reach 100Gbit on one TCP flow.

This patch adds no cost for 32bit kernels.

The tcpi_pacing_rate and tcpi_max_pacing_rate were already
exported as 64bit, so iproute2/ss command require no changes.

Unfortunately the SO_MAX_PACING_RATE socket option will stay
32bit and we will need to add a new option to let applications
control high pacing rates.

State      Recv-Q Send-Q Local Address:Port             Peer Address:Port
ESTAB      0      1787144  10.246.9.76:49992             10.246.9.77:36741
                 timer:(on,003ms,0) ino:91863 sk:2 <->
 skmem:(r0,rb540000,t66440,tb2363904,f605944,w1822984,o0,bl0,d0)
 ts sack bbr wscale:8,8 rto:201 rtt:0.057/0.006 mss:1448
 rcvmss:536 advmss:1448
 cwnd:138 ssthresh:178 bytes_acked:256699822585 segs_out:177279177
 segs_in:3916318 data_segs_out:177279175
 bbr:(bw:31276.8Mbps,mrtt:0,pacing_gain:1.25,cwnd_gain:2)
 send 28045.5Mbps lastrcv:73333
 pacing_rate 38705.0Mbps delivery_rate 22997.6Mbps
 busy:73333ms unacked:135 retrans:0/157 rcv_space:14480
 notsent:2085120 minrtt:0.013

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 751549ac0a84..cfaf261936c8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -422,8 +422,8 @@ struct sock {
 	struct timer_list	sk_timer;
 	__u32			sk_priority;
 	__u32			sk_mark;
-	u32			sk_pacing_rate; /* bytes per second */
-	u32			sk_max_pacing_rate;
+	unsigned long		sk_pacing_rate; /* bytes per second */
+	unsigned long		sk_max_pacing_rate;
 	struct page_frag	sk_frag;
 	netdev_features_t	sk_route_caps;
 	netdev_features_t	sk_route_nocaps;
-- 
cgit v1.2.3


From 4724676d551c0961659b1da3fb4b5928169fb184 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 Oct 2018 18:56:42 -0700
Subject: net: Add struct for fib dump filter

Add struct fib_dump_filter for options on limiting which routes are
returned in a dump request. The current list is table id, protocol,
route type, rtm_flags and nexthop device index. struct net is needed
to lookup the net_device from the index.

Declare the filter for each route dump handler and plumb the new
arguments from dump handlers to ip_valid_fib_dump_req.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |  1 +
 include/net/ip_fib.h    | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index cef186dbd2ce..7ab119936e69 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -174,6 +174,7 @@ struct rt6_rtnl_dump_arg {
 	struct sk_buff *skb;
 	struct netlink_callback *cb;
 	struct net *net;
+	struct fib_dump_filter filter;
 };
 
 int rt6_dump_route(struct fib6_info *f6i, void *p_arg);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 852e4ebf2209..667013bf4266 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -222,6 +222,16 @@ struct fib_table {
 	unsigned long		__data[0];
 };
 
+struct fib_dump_filter {
+	u32			table_id;
+	/* filter_set is an optimization that an entry is set */
+	bool			filter_set;
+	unsigned char		protocol;
+	unsigned char		rt_type;
+	unsigned int		flags;
+	struct net_device	*dev;
+};
+
 int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		     struct fib_result *res, int fib_flags);
 int fib_table_insert(struct net *, struct fib_table *, struct fib_config *,
@@ -453,6 +463,7 @@ static inline void fib_proc_exit(struct net *net)
 
 u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
 
-int ip_valid_fib_dump_req(const struct nlmsghdr *nlh,
+int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
+			  struct fib_dump_filter *filter,
 			  struct netlink_ext_ack *extack);
 #endif  /* _NET_FIB_H */
-- 
cgit v1.2.3


From 18a8021a7be3207686851208f91a2f105b2d4703 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 Oct 2018 18:56:43 -0700
Subject: net/ipv4: Plumb support for filtering route dumps

Implement kernel side filtering of routes by table id, egress device index,
protocol and route type. If the table id is given in the filter, lookup the
table and call fib_table_dump directly for it.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 667013bf4266..1eabc9edd2b9 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -239,7 +239,7 @@ int fib_table_insert(struct net *, struct fib_table *, struct fib_config *,
 int fib_table_delete(struct net *, struct fib_table *, struct fib_config *,
 		     struct netlink_ext_ack *extack);
 int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
-		   struct netlink_callback *cb);
+		   struct netlink_callback *cb, struct fib_dump_filter *filter);
 int fib_table_flush(struct net *net, struct fib_table *table);
 struct fib_table *fib_trie_unmerge(struct fib_table *main_tb);
 void fib_table_flush_external(struct fib_table *table);
-- 
cgit v1.2.3


From effe6792662495ad9c175bf0d9c53459a51fdbbd Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 Oct 2018 18:56:48 -0700
Subject: net: Enable kernel side filtering of route dumps

Update parsing of route dump request to enable kernel side filtering.
Allow filtering results by protocol (e.g., which routing daemon installed
the route), route type (e.g., unicast), table id and nexthop device. These
amount to the low hanging fruit, yet a huge improvement, for dumping
routes.

ip_valid_fib_dump_req is called with RTNL held, so __dev_get_by_index can
be used to look up the device index without taking a reference. From
there filter->dev is only used during dump loops with the lock still held.

Set NLM_F_DUMP_FILTERED in the answer_flags so the user knows the results
have been filtered should no entries be returned.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 1eabc9edd2b9..e8d9456bf36e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -465,5 +465,5 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
 
 int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
 			  struct fib_dump_filter *filter,
-			  struct netlink_ext_ack *extack);
+			  struct netlink_callback *cb);
 #endif  /* _NET_FIB_H */
-- 
cgit v1.2.3


From 02c558b2d5d679fbbcaa5b9689484c7e0f8abb7b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 16 Oct 2018 11:08:04 -0700
Subject: bpf: sockmap, support for msg_peek in sk_msg with redirect ingress

This adds support for the MSG_PEEK flag when doing redirect to ingress
and receiving on the sk_msg psock queue. Previously the flag was
being ignored which could confuse applications if they expected the
flag to work as normal.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3600ae0f25c3..14fdd7ce9992 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2089,7 +2089,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
 int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		    int nonblock, int flags, int *addr_len);
 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
-		      struct msghdr *msg, int len);
+		      struct msghdr *msg, int len, int flags);
 
 /* Call BPF_SOCK_OPS program that returns an int. If the return value
  * is < 0, then the BPF op failed (for example if the loaded BPF
-- 
cgit v1.2.3


From cca45e054ce55c06046a37bf4d3fd7c17edd57da Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Oct 2018 08:53:10 +0000
Subject: vxlan: Export address checking functions

Drivers that support VxLAN offload need to be able to sanitize the
configuration of the VxLAN device and accept / reject its offload.

For example, mlxsw requires that the local IP of the VxLAN device be set
and that packets be flooded to unicast IP(s) and not to a multicast
group.

Expose the functions that perform such checks.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/net')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 7ef15179f263..dd3d72ce64b6 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -370,4 +370,36 @@ static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
 	return vs->sock->sk->sk_family;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+
+static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
+{
+	if (ipa->sa.sa_family == AF_INET6)
+		return ipv6_addr_any(&ipa->sin6.sin6_addr);
+	else
+		return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+}
+
+static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
+{
+	if (ipa->sa.sa_family == AF_INET6)
+		return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
+	else
+		return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+}
+
+#else /* !IS_ENABLED(CONFIG_IPV6) */
+
+static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
+{
+	return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+}
+
+static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
+{
+	return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+}
+
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
 #endif
-- 
cgit v1.2.3


From 28e450333d4d1328710e258d38793c61658d4c95 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Oct 2018 08:53:12 +0000
Subject: inet: Refactor INET_ECN_decapsulate()

Drivers that support tunnel decapsulation (IPinIP or NVE) need to
configure the underlying device to conform to the behavior outlined in
RFC 6040 with respect to the ECN bits.

This behavior is implemented by INET_ECN_decapsulate() which requires an
skb to be passed where the ECN CE bit can be potentially set. Since
these drivers do not need to mark an skb, but only configure the device
to do so, factor out the business logic to __INET_ECN_decapsulate() and
potentially perform the marking in INET_ECN_decapsulate().

This allows drivers to invoke __INET_ECN_decapsulate() and configure the
device.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Suggested-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_ecn.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/net')

diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 482a1b705362..c8e2bebd8d93 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -183,8 +183,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
  *          1 if something is broken and should be logged (!!! above)
  *          2 if packet should be dropped
  */
-static inline int INET_ECN_decapsulate(struct sk_buff *skb,
-				       __u8 outer, __u8 inner)
+static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
 {
 	if (INET_ECN_is_not_ect(inner)) {
 		switch (outer & INET_ECN_MASK) {
@@ -198,10 +197,21 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb,
 		}
 	}
 
-	if (INET_ECN_is_ce(outer))
+	*set_ce = INET_ECN_is_ce(outer);
+	return 0;
+}
+
+static inline int INET_ECN_decapsulate(struct sk_buff *skb,
+				       __u8 outer, __u8 inner)
+{
+	bool set_ce = false;
+	int rc;
+
+	rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
+	if (!rc && set_ce)
 		INET_ECN_set_ce(skb);
 
-	return 0;
+	return rc;
 }
 
 static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
-- 
cgit v1.2.3


From 5ff4ff4fe8c4e7d0de1d837e489056f0c470667b Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Oct 2018 08:53:20 +0000
Subject: net: Add netif_is_vxlan()

Add the ability to determine whether a netdev is a VxLAN netdev by
calling the above mentioned function that checks the netdev's
rtnl_link_ops.

This will allow modules to identify netdev events involving a VxLAN
netdev and act accordingly. For example, drivers capable of VxLAN
offload will need to configure the underlying device when a VxLAN netdev
is being enslaved to an offloaded bridge.

Convert nfp to use the newly introduced helper.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/net')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index dd3d72ce64b6..95227fa925e8 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -5,6 +5,7 @@
 #include <linux/if_vlan.h>
 #include <net/udp_tunnel.h>
 #include <net/dst_metadata.h>
+#include <net/rtnetlink.h>
 
 /* VXLAN protocol (RFC 7348) header:
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -402,4 +403,10 @@ static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
 
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
+static inline bool netif_is_vxlan(const struct net_device *dev)
+{
+	return dev->rtnl_link_ops &&
+	       !strcmp(dev->rtnl_link_ops->kind, "vxlan");
+}
+
 #endif
-- 
cgit v1.2.3


From 9a99735317866e821c75f957fc85c63d049d330c Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 17 Oct 2018 08:53:22 +0000
Subject: vxlan: Add switchdev notifications

When offloading VXLAN devices, drivers need to know about events in
VXLAN FDB database. Since VXLAN models a bridge, it is natural to
distribute the VXLAN FDB notifications using the pre-existing switchdev
notification mechanism.

To that end, introduce two new notification types:
SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE and SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE.
Introduce a new function, vxlan_fdb_switchdev_call_notifiers() to send
the new notifier types, and a struct switchdev_notifier_vxlan_fdb_info
to communicate the details of the FDB entry under consideration.

Invoke the new function from vxlan_fdb_notify().

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h |  3 +++
 include/net/vxlan.h     | 11 +++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d574ce63bf22..47199a11c586 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -145,6 +145,9 @@ enum switchdev_notifier_type {
 	SWITCHDEV_FDB_ADD_TO_DEVICE,
 	SWITCHDEV_FDB_DEL_TO_DEVICE,
 	SWITCHDEV_FDB_OFFLOADED,
+
+	SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
+	SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
 };
 
 struct switchdev_notifier_info {
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 95227fa925e8..3f00877f5edf 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -6,6 +6,7 @@
 #include <net/udp_tunnel.h>
 #include <net/dst_metadata.h>
 #include <net/rtnetlink.h>
+#include <net/switchdev.h>
 
 /* VXLAN protocol (RFC 7348) header:
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -409,4 +410,14 @@ static inline bool netif_is_vxlan(const struct net_device *dev)
 	       !strcmp(dev->rtnl_link_ops->kind, "vxlan");
 }
 
+struct switchdev_notifier_vxlan_fdb_info {
+	struct switchdev_notifier_info info; /* must be first */
+	union vxlan_addr remote_ip;
+	__be16 remote_port;
+	__be32 remote_vni;
+	u32 remote_ifindex;
+	u8 eth_addr[ETH_ALEN];
+	__be32 vni;
+};
+
 #endif
-- 
cgit v1.2.3


From 1941f1d6453a527ae8df59891da0319646608444 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 17 Oct 2018 08:53:24 +0000
Subject: vxlan: Add vxlan_fdb_find_uc() for FDB querying

A switchdev-capable driver that is aware of VXLAN may need to query
VXLAN FDB. In the particular case of mlxsw, this functionality is
limited to querying UC FDBs. Those being easier to deal with than the
general case of RDST chain traversal, introduce an interface to query
specifically UC FDBs: vxlan_fdb_find_uc().

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/net')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 3f00877f5edf..1828d686ac4f 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -420,4 +420,16 @@ struct switchdev_notifier_vxlan_fdb_info {
 	__be32 vni;
 };
 
+#if IS_ENABLED(CONFIG_VXLAN)
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+		      struct switchdev_notifier_vxlan_fdb_info *fdb_info);
+#else
+static inline int
+vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+		  struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+	return -ENOENT;
+}
+#endif
+
 #endif
-- 
cgit v1.2.3


From 0efe11733356273d734cc2c5ab2dc6f5865cbeb6 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 17 Oct 2018 08:53:26 +0000
Subject: vxlan: Support marking RDSTs as offloaded

Offloaded bridge FDB entries are marked with NTF_OFFLOADED. Implement a
similar mechanism for VXLAN, where a given remote destination can be
marked as offloaded.

To that end, introduce a new event, SWITCHDEV_VXLAN_FDB_OFFLOADED,
through which the marking is communicated to the vxlan driver. To
identify which RDST should be marked as offloaded, an
switchdev_notifier_vxlan_fdb_info is passed to the listeners. The
"offloaded" flag in that object determines whether the offloaded mark
should be set or cleared.

When sending offloaded FDB entries over netlink, mark them with
NTF_OFFLOADED.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 1 +
 include/net/vxlan.h     | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 47199a11c586..b040f82351ba 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -148,6 +148,7 @@ enum switchdev_notifier_type {
 
 	SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
 	SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
+	SWITCHDEV_VXLAN_FDB_OFFLOADED,
 };
 
 struct switchdev_notifier_info {
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 1828d686ac4f..03431c148e16 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -192,6 +192,7 @@ union vxlan_addr {
 struct vxlan_rdst {
 	union vxlan_addr	 remote_ip;
 	__be16			 remote_port;
+	u8			 offloaded:1;
 	__be32			 remote_vni;
 	u32			 remote_ifindex;
 	struct list_head	 list;
@@ -418,6 +419,7 @@ struct switchdev_notifier_vxlan_fdb_info {
 	u32 remote_ifindex;
 	u8 eth_addr[ETH_ALEN];
 	__be32 vni;
+	bool offloaded;
 };
 
 #if IS_ENABLED(CONFIG_VXLAN)
-- 
cgit v1.2.3


From e9ba0fbc7dd23a74e77960c98c988f59a1ff75aa Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Wed, 17 Oct 2018 08:53:29 +0000
Subject: bridge: switchdev: Allow clearing FDB entry offload indication

Currently, an FDB entry only ceases being offloaded when it is deleted.
This changes with VxLAN encapsulation.

Devices capable of performing VxLAN encapsulation usually have only one
FDB table, unlike the software data path which has two - one in the
bridge driver and another in the VxLAN driver.

Therefore, bridge FDB entries pointing to a VxLAN device are only
offloaded if there is a corresponding entry in the VxLAN FDB.

Allow clearing the offload indication in case the corresponding entry
was deleted from the VxLAN FDB.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index b040f82351ba..881ecb1555bf 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -159,7 +159,8 @@ struct switchdev_notifier_fdb_info {
 	struct switchdev_notifier_info info; /* must be first */
 	const unsigned char *addr;
 	u16 vid;
-	bool added_by_user;
+	u8 added_by_user:1,
+	   offloaded:1;
 };
 
 static inline struct net_device *
-- 
cgit v1.2.3


From 6b4f92af3d59e882d3ba04c44a815266890d188f Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Fri, 12 Oct 2018 23:53:59 +0200
Subject: geneve, vxlan: Don't set exceptions if skb->len < mtu

We shouldn't abuse exceptions: if the destination MTU is already higher
than what we're transmitting, no exception should be created.

Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path")
Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 7f735e76ca73..6cf0870414c7 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -527,4 +527,14 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
 		dst->ops->update_pmtu(dst, NULL, skb, mtu);
 }
 
+static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+					 struct dst_entry *encap_dst,
+					 int headroom)
+{
+	u32 encap_mtu = dst_mtu(encap_dst);
+
+	if (skb->len > encap_mtu - headroom)
+		skb_dst_update_pmtu(skb, encap_mtu - headroom);
+}
+
 #endif /* _NET_DST_H */
-- 
cgit v1.2.3


From 5660b9d9d6a29c2c3cc12f62ae44bfb56b0a15a9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 17 Oct 2018 21:11:27 +0800
Subject: sctp: fix the data size calculation in sctp_data_size

sctp data size should be calculated by subtracting data chunk header's
length from chunk_hdr->length, not just data header.

Fixes: 668c9beb9020 ("sctp: implement assign_number for sctp_stream_interleave")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 5ef1bad81ef5..9e3d32746430 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk)
 	__u16 size;
 
 	size = ntohs(chunk->chunk_hdr->length);
-	size -= sctp_datahdr_len(&chunk->asoc->stream);
+	size -= sctp_datachk_len(&chunk->asoc->stream);
 
 	return size;
 }
-- 
cgit v1.2.3


From 605c0ac182c34867bda71bfbcc74958aabbe2fe0 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 17 Oct 2018 03:07:50 +0800
Subject: sctp: count both sk and asoc sndbuf with skb truesize and sctp_chunk
 size

Now it's confusing that asoc sndbuf_used is doing memory accounting with
SCTP_DATA_SNDSIZE(chunk) + sizeof(sk_buff) + sizeof(sctp_chunk) while sk
sk_wmem_alloc is doing that with skb->truesize + sizeof(sctp_chunk).

It also causes sctp_prsctp_prune to count with a wrong freed memory when
sndbuf_policy is not set.

To make this right and also keep consistent between asoc sndbuf_used, sk
sk_wmem_alloc and sk_wmem_queued, use skb->truesize + sizeof(sctp_chunk)
for them.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 86f034b524d4..8dadc74c22e7 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -148,11 +148,6 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE,	enum sctp_event_primitive, primitive)
 #define sctp_chunk_is_data(a) (a->chunk_hdr->type == SCTP_CID_DATA || \
 			       a->chunk_hdr->type == SCTP_CID_I_DATA)
 
-/* Calculate the actual data size in a data chunk */
-#define SCTP_DATA_SNDSIZE(c) ((int)((unsigned long)(c->chunk_end) - \
-				    (unsigned long)(c->chunk_hdr) - \
-				    sctp_datachk_len(&c->asoc->stream)))
-
 /* Internal error codes */
 enum sctp_ierror {
 	SCTP_IERROR_NO_ERROR	        = 0,
-- 
cgit v1.2.3


From 5f1be84aad4b520a36246d0c289ad73641277630 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 12 Oct 2018 03:01:54 +0900
Subject: netfilter: nf_flow_table: remove unnecessary parameter of
 nf_flow_table_cleanup()

parameter net of nf_flow_table_cleanup() is not used.
So that it can be removed.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 0e355f4a3d76..77e2761d4f2f 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -99,7 +99,7 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 			  void (*iter)(struct flow_offload *flow, void *data),
 			  void *data);
 
-void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
+void nf_flow_table_cleanup(struct net_device *dev);
 
 int nf_flow_table_init(struct nf_flowtable *flow_table);
 void nf_flow_table_free(struct nf_flowtable *flow_table);
-- 
cgit v1.2.3


From 468c041cff57e87f18e1022cacf9f5c98bf00b58 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 18 Oct 2018 22:29:59 +0900
Subject: netfilter: nfnetlink_log: remove empty nfnetlink_log.h header file

/include/net/netfilter/nfnetlink_log.h file is empty.
so that it can be removed.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nfnetlink_log.h | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 include/net/netfilter/nfnetlink_log.h

(limited to 'include/net')

diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h
deleted file mode 100644
index ea32a7d3cf1b..000000000000
--- a/include/net/netfilter/nfnetlink_log.h
+++ /dev/null
@@ -1 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-- 
cgit v1.2.3


From c16ee04c9b305d57719344922c4d48379e206a79 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 21 Oct 2018 02:09:23 +0200
Subject: ulp: remove uid and user_visible members

They are not used anymore and therefore should be removed.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/tcp.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 14fdd7ce9992..8a61c3e8c15d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2051,11 +2051,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
 #define TCP_ULP_MAX		128
 #define TCP_ULP_BUF_MAX		(TCP_ULP_NAME_MAX*TCP_ULP_MAX)
 
-enum {
-	TCP_ULP_TLS,
-	TCP_ULP_BPF,
-};
-
 struct tcp_ulp_ops {
 	struct list_head	list;
 
@@ -2064,9 +2059,7 @@ struct tcp_ulp_ops {
 	/* cleanup ulp */
 	void (*release)(struct sock *sk);
 
-	int		uid;
 	char		name[TCP_ULP_NAME_MAX];
-	bool		user_visible;
 	struct module	*owner;
 };
 int tcp_register_ulp(struct tcp_ulp_ops *type);
-- 
cgit v1.2.3


From 424c22fb62427362f1f660cd83fbdc41aec488b6 Mon Sep 17 00:00:00 2001
From: Vito Caputo <vcaputo@pengaru.com>
Date: Sun, 21 Oct 2018 04:33:03 -0700
Subject: af_unix.h: trivial whitespace cleanup

Replace spurious spaces with a tab and remove superfluous tab from
unix_sock struct.

Signed-off-by: Vito Caputo <vcaputo@pengaru.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index a5ba41b3b867..e2695c4bf358 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -52,7 +52,7 @@ struct unix_skb_parms {
 struct unix_sock {
 	/* WARNING: sk has to be the first member */
 	struct sock		sk;
-	struct unix_address     *addr;
+	struct unix_address	*addr;
 	struct path		path;
 	struct mutex		iolock, bindlock;
 	struct sock		*peer;
@@ -63,7 +63,7 @@ struct unix_sock {
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
-	wait_queue_entry_t		peer_wake;
+	wait_queue_entry_t	peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)
-- 
cgit v1.2.3


From 89ab066d4229acd32e323f1569833302544a4186 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Tue, 23 Oct 2018 13:40:39 +0200
Subject: Revert "net: simplify sock_poll_wait"

This reverts commit dd979b4df817e9976f18fb6f9d134d6bc4a3c317.

This broke tcp_poll for SMC fallback: An AF_SMC socket establishes an
internal TCP socket for the initial handshake with the remote peer.
Whenever the SMC connection can not be established this TCP socket is
used as a fallback. All socket operations on the SMC socket are then
forwarded to the TCP socket. In case of poll, the file->private_data
pointer references the SMC socket because the TCP socket has no file
assigned. This causes tcp_poll to wait on the wrong socket.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2440f8b407eb..f665d74ae509 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2059,14 +2059,20 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
 /**
  * sock_poll_wait - place memory barrier behind the poll_wait call.
  * @filp:           file
+ * @sock:           socket to wait on
  * @p:              poll_table
  *
  * See the comments in the wq_has_sleeper function.
+ *
+ * Do not derive sock from filp->private_data here. An SMC socket establishes
+ * an internal TCP socket that is used in the fallback case. All socket
+ * operations on the SMC socket are then forwarded to the TCP socket. In case of
+ * poll, the filp->private_data pointer references the SMC socket because the
+ * TCP socket has no file assigned.
  */
-static inline void sock_poll_wait(struct file *filp, poll_table *p)
+static inline void sock_poll_wait(struct file *filp, struct socket *sock,
+				  poll_table *p)
 {
-	struct socket *sock = filp->private_data;
-
 	if (!poll_does_not_wait(p)) {
 		poll_wait(filp, &sock->wq->wait, p);
 		/* We need to be sure we are in sync with the
-- 
cgit v1.2.3


From 3f80e08f40cdb308589a49077c87632fa4508b21 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 23 Oct 2018 11:54:16 -0700
Subject: tcp: add tcp_reset_xmit_timer() helper

With EDT model, SRTT no longer is inflated by pacing delays.

This means that RTO and some other xmit timers might be setup
incorrectly. This is particularly visible with either :

- Very small enforced pacing rates (SO_MAX_PACING_RATE)
- Reduced rto (from the default 200 ms)

This can lead to TCP flows aborts in the worst case,
or spurious retransmits in other cases.

For example, this session gets far more throughput
than the requested 80kbit :

$ netperf -H 127.0.0.2 -l 100 -- -q 10000
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 127.0.0.2 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

540000 262144 262144    104.00      2.66

With the fix :

$ netperf -H 127.0.0.2 -l 100 -- -q 10000
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 127.0.0.2 () port 0 AF_INET
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

540000 262144 262144    104.00      0.12

EDT allows for better control of rtx timers, since TCP has
a better idea of the earliest departure time of each skb
in the rtx queue. We only have to eventually add to the
timer the difference of the EDT time with current time.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8a61c3e8c15d..a18914d20486 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1245,8 +1245,31 @@ static inline bool tcp_needs_internal_pacing(const struct sock *sk)
 	return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
 }
 
+/* Return in jiffies the delay before one skb is sent.
+ * If @skb is NULL, we look at EDT for next packet being sent on the socket.
+ */
+static inline unsigned long tcp_pacing_delay(const struct sock *sk,
+					     const struct sk_buff *skb)
+{
+	s64 pacing_delay = skb ? skb->tstamp : tcp_sk(sk)->tcp_wstamp_ns;
+
+	pacing_delay -= tcp_sk(sk)->tcp_clock_cache;
+
+	return pacing_delay > 0 ? nsecs_to_jiffies(pacing_delay) : 0;
+}
+
+static inline void tcp_reset_xmit_timer(struct sock *sk,
+					const int what,
+					unsigned long when,
+					const unsigned long max_when,
+					const struct sk_buff *skb)
+{
+	inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk, skb),
+				  max_when);
+}
+
 /* Something is really bad, we could not queue an additional packet,
- * because qdisc is full or receiver sent a 0 window.
+ * because qdisc is full or receiver sent a 0 window, or we are paced.
  * We do not want to add fuel to the fire, or abort too early,
  * so make sure the timer we arm now is at least 200ms in the future,
  * regardless of current icsk_rto value (as it could be ~2ms)
@@ -1268,8 +1291,9 @@ static inline unsigned long tcp_probe0_when(const struct sock *sk,
 static inline void tcp_check_probe_timer(struct sock *sk)
 {
 	if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
-		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
-					  tcp_probe0_base(sk), TCP_RTO_MAX);
+		tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
+				     tcp_probe0_base(sk), TCP_RTO_MAX,
+				     NULL);
 }
 
 static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
-- 
cgit v1.2.3


From ae677bbb4441309e1827e60413de92363153dccb Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 24 Oct 2018 12:59:01 -0700
Subject: net: Don't return invalid table id error when dumping all families

When doing a route dump across all address families, do not error out
if the table does not exist. This allows a route dump for AF_UNSPEC
with a table id that may only exist for some of the families.

Do return the table does not exist error if dumping routes for a
specific family and the table does not exist.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/net')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e8d9456bf36e..c5969762a8f4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -226,6 +226,7 @@ struct fib_dump_filter {
 	u32			table_id;
 	/* filter_set is an optimization that an entry is set */
 	bool			filter_set;
+	bool			dump_all_families;
 	unsigned char		protocol;
 	unsigned char		rt_type;
 	unsigned int		flags;
-- 
cgit v1.2.3


From c4147beabec19fc7b37eb79251114bad3e9915dd Mon Sep 17 00:00:00 2001
From: Bo YU <tsu.yubo@gmail.com>
Date: Mon, 29 Oct 2018 23:42:09 -0400
Subject: net: add an identifier name for 'struct sock *'

Fix a warning from checkpatch:
function definition argument 'struct sock *' should also have an
identifier name in include/net/af_unix.h.

Signed-off-by: Bo YU <tsu.yubo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index e2695c4bf358..d53aea859a76 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -13,7 +13,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp);
 void unix_gc(void);
 void wait_for_unix_gc(void);
 struct sock *unix_get_socket(struct file *filp);
-struct sock *unix_peer_get(struct sock *);
+struct sock *unix_peer_get(struct sock *sk);
 
 #define UNIX_HASH_SIZE	256
 #define UNIX_HASH_BITS	8
-- 
cgit v1.2.3


From b1c234441e07da748ccded3aaa37177622d469d3 Mon Sep 17 00:00:00 2001
From: Bo YU <tsu.yubo@gmail.com>
Date: Mon, 29 Oct 2018 23:42:10 -0400
Subject: net: drop a space before tabs

Fix a warning from checkpatch.pl:'please no space before tabs'
in include/net/af_unix.h

Signed-off-by: Bo YU <tsu.yubo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index d53aea859a76..ddbba838d048 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -40,7 +40,7 @@ struct unix_skb_parms {
 	u32			consumed;
 } __randomize_layout;
 
-#define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
+#define UNIXCB(skb)	(*(struct unix_skb_parms *)&((skb)->cb))
 
 #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
 #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
-- 
cgit v1.2.3


From 2384d02520ff2a916169b2fd85ea50e923ed56c2 Mon Sep 17 00:00:00 2001
From: Jeff Barnhill <0xeffeff@gmail.com>
Date: Fri, 2 Nov 2018 20:23:57 +0000
Subject: net/ipv6: Add anycast addresses to a global hashtable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 2 ++
 include/net/if_inet6.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 14b789a123e7..1656c5978498 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int ipv6_anycast_init(void);
+void ipv6_anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..c9c78c15bce0 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -146,10 +146,12 @@ struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
 	struct ifacaddr6	*aca_next;
+	struct hlist_node	aca_addr_lst;
 	int			aca_users;
 	refcount_t		aca_refcnt;
 	unsigned long		aca_cstamp;
 	unsigned long		aca_tstamp;
+	struct rcu_head		rcu;
 };
 
 #define	IFA_HOST	IPV6_ADDR_LOOPBACK
-- 
cgit v1.2.3


From a95a7774d51e13f9cf4b7285666829b68852f07a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 2 Nov 2018 00:11:34 +0100
Subject: netfilter: conntrack: add
 nf_{tcp,udp,sctp,icmp,dccp,icmpv6,generic}_pernet()

Expose these functions to access conntrack protocol tracker netns area,
nfnetlink_cttimeout needs this.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 39 ++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'include/net')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index eed04af9b75e..ae7b86f587f2 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -153,4 +153,43 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 			       const char *fmt, ...) { }
 #endif /* CONFIG_SYSCTL */
 
+static inline struct nf_generic_net *nf_generic_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.generic;
+}
+
+static inline struct nf_tcp_net *nf_tcp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.tcp;
+}
+
+static inline struct nf_udp_net *nf_udp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.udp;
+}
+
+static inline struct nf_icmp_net *nf_icmp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmp;
+}
+
+static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmpv6;
+}
+
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.dccp;
+}
+#endif
+
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.sctp;
+}
+#endif
+
 #endif /*_NF_CONNTRACK_PROTOCOL_H*/
-- 
cgit v1.2.3