188 files changed, 12170 insertions, 2181 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8ccee3d01822..5e9950453955 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -647,13 +647,14 @@ out:
 	return err;
 }
 
-static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *vlan_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
-	struct vlan_hdr *vhdr;
-	unsigned int hlen, off_vlan;
 	const struct packet_offload *ptype;
+	unsigned int hlen, off_vlan;
+	struct sk_buff *pp = NULL;
+	struct vlan_hdr *vhdr;
+	struct sk_buff *p;
 	__be16 type;
 	int flush = 1;
 
@@ -675,7 +676,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
 
 	flush = 0;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		struct vlan_hdr *vhdr2;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index de8034d80623..361116f77cb9 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -24,7 +24,6 @@ config BATMAN_ADV
 	depends on NET
 	select CRC16
 	select LIBCRC32C
-        default n
 	help
           B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
           a routing protocol for multi-hop ad-hoc mesh networks. The
@@ -33,7 +32,7 @@ config BATMAN_ADV
           tools.
 
 config BATMAN_ADV_BATMAN_V
-	bool "B.A.T.M.A.N. V protocol (experimental)"
+	bool "B.A.T.M.A.N. V protocol"
 	depends on BATMAN_ADV && !(CFG80211=m && BATMAN_ADV=y)
 	default y
 	help
@@ -60,7 +59,7 @@ config BATMAN_ADV_BLA
 config BATMAN_ADV_DAT
 	bool "Distributed ARP Table"
 	depends on BATMAN_ADV && INET
-	default n
+	default y
 	help
 	  This option enables DAT (Distributed ARP Table), a DHT based
 	  mechanism that increases ARP reliability on sparse wireless
@@ -70,7 +69,6 @@ config BATMAN_ADV_DAT
 config BATMAN_ADV_NC
 	bool "Network Coding"
 	depends on BATMAN_ADV
-	default n
 	help
 	  This option enables network coding, a mechanism that aims to
 	  increase the overall network throughput by fusing multiple
@@ -84,7 +82,6 @@ config BATMAN_ADV_NC
 config BATMAN_ADV_MCAST
 	bool "Multicast optimisation"
 	depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y)
-	default n
 	help
 	  This option enables the multicast optimisation which aims to
 	  reduce the air overhead while improving the reliability of
@@ -94,7 +91,6 @@ config BATMAN_ADV_DEBUGFS
 	bool "batman-adv debugfs entries"
 	depends on BATMAN_ADV
 	depends on DEBUG_FS
-	default n
 	help
 	  Enable this to export routing related debug tables via debugfs.
 	  The information for each soft-interface and used hard-interface can be
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 317cafd302cf..3dc6a7a43eb7 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -16,11 +16,11 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef _BATMAN_ADV_BATADV_IV_OGM_H_
-#define _BATMAN_ADV_BATADV_IV_OGM_H_
+#ifndef _NET_BATMAN_ADV_BAT_IV_OGM_H_
+#define _NET_BATMAN_ADV_BAT_IV_OGM_H_
 
 #include "main.h"
 
 int batadv_iv_init(void);
 
-#endif /* _BATMAN_ADV_BATADV_IV_OGM_H_ */
+#endif /* _NET_BATMAN_ADV_BAT_IV_OGM_H_ */
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index ed36c5e79fde..e5be14c908c6 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -16,8 +16,8 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef _BATMAN_ADV_BATADV_V_OGM_H_
-#define _BATMAN_ADV_BATADV_V_OGM_H_
+#ifndef _NET_BATMAN_ADV_BAT_V_OGM_H_
+#define _NET_BATMAN_ADV_BAT_V_OGM_H_
 
 #include "main.h"
 
@@ -34,4 +34,4 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface);
 int batadv_v_ogm_packet_recv(struct sk_buff *skb,
 			     struct batadv_hard_iface *if_incoming);
 
-#endif /* _BATMAN_ADV_BATADV_V_OGM_H_ */
+#endif /* _NET_BATMAN_ADV_BAT_V_OGM_H_ */
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index a2de5a44bd41..ff9659af6b91 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1449,7 +1449,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
 		 * detection frames. Set the locally administered bit to avoid
 		 * collisions with users mac addresses.
 		 */
-		random_ether_addr(bat_priv->bla.loopdetect_addr);
+		eth_random_addr(bat_priv->bla.loopdetect_addr);
 		bat_priv->bla.loopdetect_addr[0] = 0xba;
 		bat_priv->bla.loopdetect_addr[1] = 0xbe;
 		bat_priv->bla.loopdetect_lasttime = jiffies;
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 87479c60670e..3cb82378300b 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -118,7 +118,7 @@ static int batadv_bla_backbone_table_open(struct inode *inode,
 
 #ifdef CONFIG_BATMAN_ADV_DAT
 /**
- * batadv_dat_cache_open() - Prepare file handler for reads from dat_chache
+ * batadv_dat_cache_open() - Prepare file handler for reads from dat_cache
  * @inode: inode which was opened
  * @file: file handle to be initialized
  *
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 716e5b43acfa..1d295da3e342 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1339,7 +1339,11 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
 	return false;
 }
 
-static void _batadv_purge_orig(struct batadv_priv *bat_priv)
+/**
+ * batadv_purge_orig_ref() - Purge all outdated originators
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
 {
 	struct batadv_hashtable *hash = bat_priv->orig_hash;
 	struct hlist_node *node_tmp;
@@ -1385,21 +1389,12 @@ static void batadv_purge_orig(struct work_struct *work)
 
 	delayed_work = to_delayed_work(work);
 	bat_priv = container_of(delayed_work, struct batadv_priv, orig_work);
-	_batadv_purge_orig(bat_priv);
+	batadv_purge_orig_ref(bat_priv);
 	queue_delayed_work(batadv_event_workqueue,
 			   &bat_priv->orig_work,
 			   msecs_to_jiffies(BATADV_ORIG_WORK_PERIOD));
 }
 
-/**
- * batadv_purge_orig_ref() - Purge all outdated originators
- * @bat_priv: the bat priv with all the soft interface information
- */
-void batadv_purge_orig_ref(struct batadv_priv *bat_priv)
-{
-	_batadv_purge_orig(bat_priv);
-}
-
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
 
 /**
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 360357f83f20..343d304851a5 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -43,12 +43,13 @@ struct seq_file;
 #ifdef CONFIG_BATMAN_ADV_DAT
 
 /**
- * batadv_dat_addr_t - it is the type used for all DHT addresses. If it is
- *  changed, BATADV_DAT_ADDR_MAX is changed as well.
+ * typedef batadv_dat_addr_t - type used for all DHT addresses
+ *
+ * If it is changed, BATADV_DAT_ADDR_MAX is changed as well.
  *
  * *Please be careful: batadv_dat_addr_t must be UNSIGNED*
  */
-#define batadv_dat_addr_t u16
+typedef u16 batadv_dat_addr_t;
 
 #endif /* CONFIG_BATMAN_ADV_DAT */
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a5aa1c7444e6..4f8b92d81d10 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -149,7 +149,6 @@
 
 #include "net-sysfs.h"
 
-/* Instead of increasing this, you should create a hash table. */
 #define MAX_GRO_SKBS 8
 
 /* This should be increased if a protocol with a bigger head is added. */
@@ -2068,11 +2067,13 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 		struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
 		int i;
 
+		/* walk through the TCs and see if it falls into any of them */
 		for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
 			if ((txq - tc->offset) < tc->count)
 				return i;
 		}
 
+		/* didn't find it, just return -1 to indicate no match */
 		return -1;
 	}
 
@@ -2081,6 +2082,10 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
 EXPORT_SYMBOL(netdev_txq_to_tc);
 
 #ifdef CONFIG_XPS
+struct static_key xps_needed __read_mostly;
+EXPORT_SYMBOL(xps_needed);
+struct static_key xps_rxqs_needed __read_mostly;
+EXPORT_SYMBOL(xps_rxqs_needed);
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)		\
 	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
@@ -2092,7 +2097,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 	int pos;
 
 	if (dev_maps)
-		map = xmap_dereference(dev_maps->cpu_map[tci]);
+		map = xmap_dereference(dev_maps->attr_map[tci]);
 	if (!map)
 		return false;
 
@@ -2105,7 +2110,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 			break;
 		}
 
-		RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+		RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
 		kfree_rcu(map, rcu);
 		return false;
 	}
@@ -2135,33 +2140,68 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 	return active;
 }
 
+static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
+			   struct xps_dev_maps *dev_maps, unsigned int nr_ids,
+			   u16 offset, u16 count, bool is_rxqs_map)
+{
+	bool active = false;
+	int i, j;
+
+	for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
+	     j < nr_ids;)
+		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
+					       count);
+	if (!active) {
+		if (is_rxqs_map) {
+			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+		} else {
+			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+
+			for (i = offset + (count - 1); count--; i--)
+				netdev_queue_numa_node_write(
+					netdev_get_tx_queue(dev, i),
+							NUMA_NO_NODE);
+		}
+		kfree_rcu(dev_maps, rcu);
+	}
+}
+
 static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 				   u16 count)
 {
+	const unsigned long *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps;
-	int cpu, i;
-	bool active = false;
+	unsigned int nr_ids;
+
+	if (!static_key_false(&xps_needed))
+		return;
 
 	mutex_lock(&xps_map_mutex);
-	dev_maps = xmap_dereference(dev->xps_maps);
 
+	if (static_key_false(&xps_rxqs_needed)) {
+		dev_maps = xmap_dereference(dev->xps_rxqs_map);
+		if (dev_maps) {
+			nr_ids = dev->num_rx_queues;
+			clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
+				       offset, count, true);
+		}
+	}
+
+	dev_maps = xmap_dereference(dev->xps_cpus_map);
 	if (!dev_maps)
 		goto out_no_maps;
 
-	for_each_possible_cpu(cpu)
-		active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
-					       offset, count);
-
-	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
-		kfree_rcu(dev_maps, rcu);
-	}
-
-	for (i = offset + (count - 1); count--; i--)
-		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
-					     NUMA_NO_NODE);
+	if (num_possible_cpus() > 1)
+		possible_mask = cpumask_bits(cpu_possible_mask);
+	nr_ids = nr_cpu_ids;
+	clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
+		       false);
 
 out_no_maps:
+	if (static_key_enabled(&xps_rxqs_needed))
+		static_key_slow_dec(&xps_rxqs_needed);
+
+	static_key_slow_dec(&xps_needed);
 	mutex_unlock(&xps_map_mutex);
 }
 
@@ -2170,8 +2210,8 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
 	netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
 }
 
-static struct xps_map *expand_xps_map(struct xps_map *map,
-				      int cpu, u16 index)
+static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
+				      u16 index, bool is_rxqs_map)
 {
 	struct xps_map *new_map;
 	int alloc_len = XPS_MIN_MAP_ALLOC;
@@ -2183,7 +2223,7 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		return map;
 	}
 
-	/* Need to add queue to this CPU's existing map */
+	/* Need to add tx-queue to this CPU's/rx-queue's existing map */
 	if (map) {
 		if (pos < map->alloc_len)
 			return map;
@@ -2191,9 +2231,14 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 		alloc_len = map->alloc_len * 2;
 	}
 
-	/* Need to allocate new map to store queue on this CPU's map */
-	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
-			       cpu_to_node(cpu));
+	/* Need to allocate new map to store tx-queue on this CPU's/rx-queue's
+	 *  map
+	 */
+	if (is_rxqs_map)
+		new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
+	else
+		new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
+				       cpu_to_node(attr_index));
 	if (!new_map)
 		return NULL;
 
@@ -2205,32 +2250,52 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
 	return new_map;
 }
 
-int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
-			u16 index)
+int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
+			  u16 index, bool is_rxqs_map)
 {
+	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
-	int i, cpu, tci, numa_node_id = -2;
+	int i, j, tci, numa_node_id = -2;
 	int maps_sz, num_tc = 1, tc = 0;
 	struct xps_map *map, *new_map;
 	bool active = false;
+	unsigned int nr_ids;
 
 	if (dev->num_tc) {
+		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
+		if (num_tc < 0)
+			return -EINVAL;
+
+		/* If queue belongs to subordinate dev use its map */
+		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 		tc = netdev_txq_to_tc(dev, index);
 		if (tc < 0)
 			return -EINVAL;
 	}
 
-	maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
-	if (maps_sz < L1_CACHE_BYTES)
-		maps_sz = L1_CACHE_BYTES;
-
 	mutex_lock(&xps_map_mutex);
+	if (is_rxqs_map) {
+		maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
+		dev_maps = xmap_dereference(dev->xps_rxqs_map);
+		nr_ids = dev->num_rx_queues;
+	} else {
+		maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
+		if (num_possible_cpus() > 1) {
+			online_mask = cpumask_bits(cpu_online_mask);
+			possible_mask = cpumask_bits(cpu_possible_mask);
+		}
+		dev_maps = xmap_dereference(dev->xps_cpus_map);
+		nr_ids = nr_cpu_ids;
+	}
 
-	dev_maps = xmap_dereference(dev->xps_maps);
+	if (maps_sz < L1_CACHE_BYTES)
+		maps_sz = L1_CACHE_BYTES;
 
 	/* allocate memory for queue storage */
-	for_each_cpu_and(cpu, cpu_online_mask, mask) {
+	for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
+	     j < nr_ids;) {
 		if (!new_dev_maps)
 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 		if (!new_dev_maps) {
@@ -2238,73 +2303,85 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 			return -ENOMEM;
 		}
 
-		tci = cpu * num_tc + tc;
-		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
+		tci = j * num_tc + tc;
+		map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
 				 NULL;
 
-		map = expand_xps_map(map, cpu, index);
+		map = expand_xps_map(map, j, index, is_rxqs_map);
 		if (!map)
 			goto error;
 
-		RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+		RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 	}
 
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 
-	for_each_possible_cpu(cpu) {
+	static_key_slow_inc(&xps_needed);
+	if (is_rxqs_map)
+		static_key_slow_inc(&xps_rxqs_needed);
+
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
 		/* copy maps belonging to foreign traffic classes */
-		for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+		for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* We need to explicitly update tci as prevous loop
 		 * could break out early if dev_maps is NULL.
 		 */
-		tci = cpu * num_tc + tc;
+		tci = j * num_tc + tc;
 
-		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
-			/* add queue to CPU maps */
+		if (netif_attr_test_mask(j, mask, nr_ids) &&
+		    netif_attr_test_online(j, online_mask, nr_ids)) {
+			/* add tx-queue to CPU/rx-queue maps */
 			int pos = 0;
 
-			map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+			map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			while ((pos < map->len) && (map->queues[pos] != index))
 				pos++;
 
 			if (pos == map->len)
 				map->queues[map->len++] = index;
 #ifdef CONFIG_NUMA
-			if (numa_node_id == -2)
-				numa_node_id = cpu_to_node(cpu);
-			else if (numa_node_id != cpu_to_node(cpu))
-				numa_node_id = -1;
+			if (!is_rxqs_map) {
+				if (numa_node_id == -2)
+					numa_node_id = cpu_to_node(j);
+				else if (numa_node_id != cpu_to_node(j))
+					numa_node_id = -1;
+			}
 #endif
 		} else if (dev_maps) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 
 		/* copy maps belonging to foreign traffic classes */
 		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
 			/* fill in the new device map from the old device map */
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
-			RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
+			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 		}
 	}
 
-	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	if (is_rxqs_map)
+		rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
+	else
+		rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
 
 	/* Cleanup old maps */
 	if (!dev_maps)
 		goto out_no_old_maps;
 
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
-			map = xmap_dereference(dev_maps->cpu_map[tci]);
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+			map = xmap_dereference(dev_maps->attr_map[tci]);
 			if (map && map != new_map)
 				kfree_rcu(map, rcu);
 		}
@@ -2317,19 +2394,23 @@ out_no_old_maps:
 	active = true;
 
 out_no_new_maps:
-	/* update Tx queue numa node */
-	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
-				     (numa_node_id >= 0) ? numa_node_id :
-				     NUMA_NO_NODE);
+	if (!is_rxqs_map) {
+		/* update Tx queue numa node */
+		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
+					     (numa_node_id >= 0) ?
+					     numa_node_id : NUMA_NO_NODE);
+	}
 
 	if (!dev_maps)
 		goto out_no_maps;
 
-	/* removes queue from unused CPUs */
-	for_each_possible_cpu(cpu) {
-		for (i = tc, tci = cpu * num_tc; i--; tci++)
+	/* removes tx-queue from unused CPUs/rx-queues */
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = tc, tci = j * num_tc; i--; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
-		if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+		if (!netif_attr_test_mask(j, mask, nr_ids) ||
+		    !netif_attr_test_online(j, online_mask, nr_ids))
 			active |= remove_xps_queue(dev_maps, tci, index);
 		for (i = num_tc - tc, tci++; --i; tci++)
 			active |= remove_xps_queue(dev_maps, tci, index);
@@ -2337,7 +2418,10 @@ out_no_new_maps:
 
 	/* free map if not active */
 	if (!active) {
-		RCU_INIT_POINTER(dev->xps_maps, NULL);
+		if (is_rxqs_map)
+			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+		else
+			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
 		kfree_rcu(dev_maps, rcu);
 	}
 
@@ -2347,11 +2431,12 @@ out_no_maps:
 	return 0;
 error:
 	/* remove any maps that we added */
-	for_each_possible_cpu(cpu) {
-		for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
-			new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
+	     j < nr_ids;) {
+		for (i = num_tc, tci = j * num_tc; i--; tci++) {
+			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
 			map = dev_maps ?
-			      xmap_dereference(dev_maps->cpu_map[tci]) :
+			      xmap_dereference(dev_maps->attr_map[tci]) :
 			      NULL;
 			if (new_map && new_map != map)
 				kfree(new_map);
@@ -2363,14 +2448,34 @@ error:
 	kfree(new_dev_maps);
 	return -ENOMEM;
 }
+
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+			u16 index)
+{
+	return __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+}
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
+static void netdev_unbind_all_sb_channels(struct net_device *dev)
+{
+	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+	/* Unbind any subordinate channels */
+	while (txq-- != &dev->_tx[0]) {
+		if (txq->sb_dev)
+			netdev_unbind_sb_channel(dev, txq->sb_dev);
+	}
+}
+
 void netdev_reset_tc(struct net_device *dev)
 {
 #ifdef CONFIG_XPS
 	netif_reset_xps_queues_gt(dev, 0);
 #endif
+	netdev_unbind_all_sb_channels(dev);
+
+	/* Reset TC configuration of device */
 	dev->num_tc = 0;
 	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
 	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
@@ -2399,11 +2504,77 @@ int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
 #ifdef CONFIG_XPS
 	netif_reset_xps_queues_gt(dev, 0);
 #endif
+	netdev_unbind_all_sb_channels(dev);
+
 	dev->num_tc = num_tc;
 	return 0;
 }
 EXPORT_SYMBOL(netdev_set_num_tc);
 
+void netdev_unbind_sb_channel(struct net_device *dev,
+			      struct net_device *sb_dev)
+{
+	struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
+
+#ifdef CONFIG_XPS
+	netif_reset_xps_queues_gt(sb_dev, 0);
+#endif
+	memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
+	memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
+
+	while (txq-- != &dev->_tx[0]) {
+		if (txq->sb_dev == sb_dev)
+			txq->sb_dev = NULL;
+	}
+}
+EXPORT_SYMBOL(netdev_unbind_sb_channel);
+
+int netdev_bind_sb_channel_queue(struct net_device *dev,
+				 struct net_device *sb_dev,
+				 u8 tc, u16 count, u16 offset)
+{
+	/* Make certain the sb_dev and dev are already configured */
+	if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
+		return -EINVAL;
+
+	/* We cannot hand out queues we don't have */
+	if ((offset + count) > dev->real_num_tx_queues)
+		return -EINVAL;
+
+	/* Record the mapping */
+	sb_dev->tc_to_txq[tc].count = count;
+	sb_dev->tc_to_txq[tc].offset = offset;
+
+	/* Provide a way for Tx queue to find the tc_to_txq map or
+	 * XPS map for itself.
+	 */
+	while (count--)
+		netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
+
+	return 0;
+}
+EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
+
+int netdev_set_sb_channel(struct net_device *dev, u16 channel)
+{
+	/* Do not use a multiqueue device to represent a subordinate channel */
+	if (netif_is_multiqueue(dev))
+		return -ENODEV;
+
+	/* We allow channels 1 - 32767 to be used for subordinate channels.
+	 * Channel 0 is meant to be "native" mode and used only to represent
+	 * the main root device. We allow writing 0 to reset the device back
+	 * to normal mode after being used as a subordinate channel.
+	 */
+	if (channel > S16_MAX)
+		return -EINVAL;
+
+	dev->num_tc = -channel;
+
+	return 0;
+}
+EXPORT_SYMBOL(netdev_set_sb_channel);
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater than real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -2615,24 +2786,26 @@ EXPORT_SYMBOL(netif_device_attach);
  * Returns a Tx hash based on the given packet descriptor a Tx queues' number
  * to be used as a distribution range.
  */
-static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb)
+static u16 skb_tx_hash(const struct net_device *dev,
+		       const struct net_device *sb_dev,
+		       struct sk_buff *skb)
 {
 	u32 hash;
 	u16 qoffset = 0;
 	u16 qcount = dev->real_num_tx_queues;
 
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+
+		qoffset = sb_dev->tc_to_txq[tc].offset;
+		qcount = sb_dev->tc_to_txq[tc].count;
+	}
+
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
 		while (unlikely(hash >= qcount))
 			hash -= qcount;
-		return hash;
-	}
-
-	if (dev->num_tc) {
-		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
-
-		qoffset = dev->tc_to_txq[tc].offset;
-		qcount = dev->tc_to_txq[tc].count;
+		return hash + qoffset;
 	}
 
 	return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
@@ -3376,32 +3549,64 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 }
 #endif /* CONFIG_NET_EGRESS */
 
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+#ifdef CONFIG_XPS
+static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
+			       struct xps_dev_maps *dev_maps, unsigned int tci)
+{
+	struct xps_map *map;
+	int queue_index = -1;
+
+	if (dev->num_tc) {
+		tci *= dev->num_tc;
+		tci += netdev_get_prio_tc_map(dev, skb->priority);
+	}
+
+	map = rcu_dereference(dev_maps->attr_map[tci]);
+	if (map) {
+		if (map->len == 1)
+			queue_index = map->queues[0];
+		else
+			queue_index = map->queues[reciprocal_scale(
+						skb_get_hash(skb), map->len)];
+		if (unlikely(queue_index >= dev->real_num_tx_queues))
+			queue_index = -1;
+	}
+	return queue_index;
+}
+#endif
+
+static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
+			 struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
+	struct sock *sk = skb->sk;
 	int queue_index = -1;
 
+	if (!static_key_false(&xps_needed))
+		return -1;
+
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	if (!static_key_false(&xps_rxqs_needed))
+		goto get_cpus_map;
+
+	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
 	if (dev_maps) {
-		unsigned int tci = skb->sender_cpu - 1;
+		int tci = sk_rx_queue_get(sk);
 
-		if (dev->num_tc) {
-			tci *= dev->num_tc;
-			tci += netdev_get_prio_tc_map(dev, skb->priority);
-		}
+		if (tci >= 0 && tci < dev->num_rx_queues)
+			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
+							  tci);
+	}
 
-		map = rcu_dereference(dev_maps->cpu_map[tci]);
-		if (map) {
-			if (map->len == 1)
-				queue_index = map->queues[0];
-			else
-				queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
-									   map->len)];
-			if (unlikely(queue_index >= dev->real_num_tx_queues))
-				queue_index = -1;
+get_cpus_map:
+	if (queue_index < 0) {
+		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
+		if (dev_maps) {
+			unsigned int tci = skb->sender_cpu - 1;
+
+			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
+							  tci);
 		}
 	}
 	rcu_read_unlock();
@@ -3412,17 +3617,36 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 #endif
 }
 
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
+		     struct net_device *sb_dev,
+		     select_queue_fallback_t fallback)
+{
+	return 0;
+}
+EXPORT_SYMBOL(dev_pick_tx_zero);
+
+u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
+		       struct net_device *sb_dev,
+		       select_queue_fallback_t fallback)
+{
+	return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
+}
+EXPORT_SYMBOL(dev_pick_tx_cpu_id);
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
+			    struct net_device *sb_dev)
 {
 	struct sock *sk = skb->sk;
 	int queue_index = sk_tx_queue_get(sk);
 
+	sb_dev = sb_dev ? : dev;
+
 	if (queue_index < 0 || skb->ooo_okay ||
 	    queue_index >= dev->real_num_tx_queues) {
-		int new_index = get_xps_queue(dev, skb);
+		int new_index = get_xps_queue(dev, sb_dev, skb);
 
 		if (new_index < 0)
-			new_index = skb_tx_hash(dev, skb);
+			new_index = skb_tx_hash(dev, sb_dev, skb);
 
 		if (queue_index != new_index && sk &&
 		    sk_fullsock(sk) &&
@@ -3437,7 +3661,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 
 struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 				    struct sk_buff *skb,
-				    void *accel_priv)
+				    struct net_device *sb_dev)
 {
 	int queue_index = 0;
 
@@ -3452,10 +3676,10 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		if (ops->ndo_select_queue)
-			queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+			queue_index = ops->ndo_select_queue(dev, skb, sb_dev,
 							    __netdev_pick_tx);
 		else
-			queue_index = __netdev_pick_tx(dev, skb);
+			queue_index = __netdev_pick_tx(dev, skb, sb_dev);
 
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	}
@@ -3467,7 +3691,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 /**
  *	__dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
- *	@accel_priv: private data used for L2 forwarding offload
+ *	@sb_dev: suboordinate device used for L2 forwarding offload
  *
  *	Queue a buffer for transmission to a network device. The caller must
  *	have set the device and priority and built the buffer before calling
@@ -3490,7 +3714,7 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
+static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 {
 	struct net_device *dev = skb->dev;
 	struct netdev_queue *txq;
@@ -3529,7 +3753,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 	else
 		skb_dst_force(skb);
 
-	txq = netdev_pick_tx(dev, skb, accel_priv);
+	txq = netdev_pick_tx(dev, skb, sb_dev);
 	q = rcu_dereference_bh(txq->qdisc);
 
 	trace_net_dev_queue(skb);
@@ -3603,9 +3827,9 @@ int dev_queue_xmit(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dev_queue_xmit);
 
-int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
+int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
 {
-	return __dev_queue_xmit(skb, accel_priv);
+	return __dev_queue_xmit(skb, sb_dev);
 }
 EXPORT_SYMBOL(dev_queue_xmit_accel);
 
@@ -4494,7 +4718,8 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
 	return 0;
 }
 
-static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
+static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc,
+				    struct packet_type **ppt_prev)
 {
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
@@ -4624,8 +4849,7 @@ skip_classify:
 	if (pt_prev) {
 		if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 			goto drop;
-		else
-			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+		*ppt_prev = pt_prev;
 	} else {
 drop:
 		if (!deliver_exact)
@@ -4643,6 +4867,18 @@ out:
 	return ret;
 }
 
+static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
+{
+	struct net_device *orig_dev = skb->dev;
+	struct packet_type *pt_prev = NULL;
+	int ret;
+
+	ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+	if (pt_prev)
+		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+	return ret;
+}
+
 /**
  *	netif_receive_skb_core - special purpose version of netif_receive_skb
  *	@skb: buffer to process
@@ -4663,13 +4899,72 @@ int netif_receive_skb_core(struct sk_buff *skb)
 	int ret;
 
 	rcu_read_lock();
-	ret = __netif_receive_skb_core(skb, false);
+	ret = __netif_receive_skb_one_core(skb, false);
 	rcu_read_unlock();
 
 	return ret;
 }
 EXPORT_SYMBOL(netif_receive_skb_core);
 
+static inline void __netif_receive_skb_list_ptype(struct list_head *head,
+						  struct packet_type *pt_prev,
+						  struct net_device *orig_dev)
+{
+	struct sk_buff *skb, *next;
+
+	if (!pt_prev)
+		return;
+	if (list_empty(head))
+		return;
+	if (pt_prev->list_func != NULL)
+		pt_prev->list_func(head, pt_prev, orig_dev);
+	else
+		list_for_each_entry_safe(skb, next, head, list)
+			pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
+static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
+{
+	/* Fast-path assumptions:
+	 * - There is no RX handler.
+	 * - Only one packet_type matches.
+	 * If either of these fails, we will end up doing some per-packet
+	 * processing in-line, then handling the 'last ptype' for the whole
+	 * sublist.  This can't cause out-of-order delivery to any single ptype,
+	 * because the 'last ptype' must be constant across the sublist, and all
+	 * other ptypes are handled per-packet.
+	 */
+	/* Current (common) ptype of sublist */
+	struct packet_type *pt_curr = NULL;
+	/* Current (common) orig_dev of sublist */
+	struct net_device *od_curr = NULL;
+	struct list_head sublist;
+	struct sk_buff *skb, *next;
+
+	INIT_LIST_HEAD(&sublist);
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct net_device *orig_dev = skb->dev;
+		struct packet_type *pt_prev = NULL;
+
+		list_del(&skb->list);
+		__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
+		if (!pt_prev)
+			continue;
+		if (pt_curr != pt_prev || od_curr != orig_dev) {
+			/* dispatch old sublist */
+			__netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
+			/* start new sublist */
+			INIT_LIST_HEAD(&sublist);
+			pt_curr = pt_prev;
+			od_curr = orig_dev;
+		}
+		list_add_tail(&skb->list, &sublist);
+	}
+
+	/* dispatch final sublist */
+	__netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
+}
+
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	int ret;
@@ -4687,14 +4982,44 @@ static int __netif_receive_skb(struct sk_buff *skb)
 		 * context down to all allocation sites.
 		 */
 		noreclaim_flag = memalloc_noreclaim_save();
-		ret = __netif_receive_skb_core(skb, true);
+		ret = __netif_receive_skb_one_core(skb, true);
 		memalloc_noreclaim_restore(noreclaim_flag);
 	} else
-		ret = __netif_receive_skb_core(skb, false);
+		ret = __netif_receive_skb_one_core(skb, false);
 
 	return ret;
 }
 
+static void __netif_receive_skb_list(struct list_head *head)
+{
+	unsigned long noreclaim_flag = 0;
+	struct sk_buff *skb, *next;
+	bool pfmemalloc = false; /* Is current sublist PF_MEMALLOC? */
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
+			struct list_head sublist;
+
+			/* Handle the previous sublist */
+			list_cut_before(&sublist, head, &skb->list);
+			if (!list_empty(&sublist))
+				__netif_receive_skb_list_core(&sublist, pfmemalloc);
+			pfmemalloc = !pfmemalloc;
+			/* See comments in __netif_receive_skb */
+			if (pfmemalloc)
+				noreclaim_flag = memalloc_noreclaim_save();
+			else
+				memalloc_noreclaim_restore(noreclaim_flag);
+		}
+	}
+	/* Handle the remaining sublist */
+	if (!list_empty(head))
+		__netif_receive_skb_list_core(head, pfmemalloc);
+	/* Restore pflags */
+	if (pfmemalloc)
+		memalloc_noreclaim_restore(noreclaim_flag);
+}
+
 static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@ -4717,7 +5042,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 		break;
 
 	case XDP_QUERY_PROG:
-		xdp->prog_attached = !!old;
 		xdp->prog_id = old ? old->aux->id : 0;
 		break;
 
@@ -4769,6 +5093,55 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 	return ret;
 }
 
+static void netif_receive_skb_list_internal(struct list_head *head)
+{
+	struct bpf_prog *xdp_prog = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	INIT_LIST_HEAD(&sublist);
+	list_for_each_entry_safe(skb, next, head, list) {
+		net_timestamp_check(netdev_tstamp_prequeue, skb);
+		list_del(&skb->list);
+		if (!skb_defer_rx_timestamp(skb))
+			list_add_tail(&skb->list, &sublist);
+	}
+	list_splice_init(&sublist, head);
+
+	if (static_branch_unlikely(&generic_xdp_needed_key)) {
+		preempt_disable();
+		rcu_read_lock();
+		list_for_each_entry_safe(skb, next, head, list) {
+			xdp_prog = rcu_dereference(skb->dev->xdp_prog);
+			list_del(&skb->list);
+			if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
+				list_add_tail(&skb->list, &sublist);
+		}
+		rcu_read_unlock();
+		preempt_enable();
+		/* Put passed packets back on main list */
+		list_splice_init(&sublist, head);
+	}
+
+	rcu_read_lock();
+#ifdef CONFIG_RPS
+	if (static_key_false(&rps_needed)) {
+		list_for_each_entry_safe(skb, next, head, list) {
+			struct rps_dev_flow voidflow, *rflow = &voidflow;
+			int cpu = get_rps_cpu(skb->dev, skb, &rflow);
+
+			if (cpu >= 0) {
+				/* Will be handled, remove from list */
+				list_del(&skb->list);
+				enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+			}
+		}
+	}
+#endif
+	__netif_receive_skb_list(head);
+	rcu_read_unlock();
+}
+
 /**
  *	netif_receive_skb - process receive buffer from network
  *	@skb: buffer to process
@@ -4792,6 +5165,28 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
+/**
+ *	netif_receive_skb_list - process many receive buffers from network
+ *	@head: list of skbs to process.
+ *
+ *	Since return value of netif_receive_skb() is normally ignored, and
+ *	wouldn't be meaningful for a list, this function returns void.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ */
+void netif_receive_skb_list(struct list_head *head)
+{
+	struct sk_buff *skb;
+
+	if (list_empty(head))
+		return;
+	list_for_each_entry(skb, head, list)
+		trace_netif_receive_skb_list_entry(skb);
+	netif_receive_skb_list_internal(head);
+}
+EXPORT_SYMBOL(netif_receive_skb_list);
+
 DEFINE_PER_CPU(struct work_struct, flush_works);
 
 /* Network device is going away, flush any packets still pending */
@@ -4875,42 +5270,50 @@ out:
 	return netif_receive_skb_internal(skb);
 }
 
-/* napi->gro_list contains packets ordered by age.
- * youngest packets at the head of it.
- * Complete skbs in reverse order to reduce latencies.
- */
-void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
+				   bool flush_old)
 {
-	struct sk_buff *skb, *prev = NULL;
-
-	/* scan list and build reverse chain */
-	for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
-		skb->prev = prev;
-		prev = skb;
-	}
-
-	for (skb = prev; skb; skb = prev) {
-		skb->next = NULL;
+	struct list_head *head = &napi->gro_hash[index].list;
+	struct sk_buff *skb, *p;
 
+	list_for_each_entry_safe_reverse(skb, p, head, list) {
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
-
-		prev = skb->prev;
+		list_del(&skb->list);
+		skb->next = NULL;
 		napi_gro_complete(skb);
-		napi->gro_count--;
+		napi->gro_hash[index].count--;
 	}
 
-	napi->gro_list = NULL;
+	if (!napi->gro_hash[index].count)
+		__clear_bit(index, &napi->gro_bitmask);
+}
+
+/* napi->gro_hash[].list contains packets ordered by age.
+ * youngest packets at the head of it.
+ * Complete skbs in reverse order to reduce latencies.
+ */
+void napi_gro_flush(struct napi_struct *napi, bool flush_old)
+{
+	u32 i;
+
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		if (test_bit(i, &napi->gro_bitmask))
+			__napi_gro_flush_chain(napi, i, flush_old);
+	}
 }
 EXPORT_SYMBOL(napi_gro_flush);
 
-static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
+static struct list_head *gro_list_prepare(struct napi_struct *napi,
+					  struct sk_buff *skb)
 {
-	struct sk_buff *p;
 	unsigned int maclen = skb->dev->hard_header_len;
 	u32 hash = skb_get_hash_raw(skb);
+	struct list_head *head;
+	struct sk_buff *p;
 
-	for (p = napi->gro_list; p; p = p->next) {
+	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
+	list_for_each_entry(p, head, list) {
 		unsigned long diffs;
 
 		NAPI_GRO_CB(p)->flush = 0;
@@ -4933,6 +5336,8 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 				       maclen);
 		NAPI_GRO_CB(p)->same_flow = !diffs;
 	}
+
+	return head;
 }
 
 static void skb_gro_reset_offset(struct sk_buff *skb)
@@ -4975,20 +5380,41 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
 	}
 }
 
+static void gro_flush_oldest(struct list_head *head)
+{
+	struct sk_buff *oldest;
+
+	oldest = list_last_entry(head, struct sk_buff, list);
+
+	/* We are called with head length >= MAX_GRO_SKBS, so this is
+	 * impossible.
+	 */
+	if (WARN_ON_ONCE(!oldest))
+		return;
+
+	/* Do not adjust napi->gro_hash[].count, caller is adding a new
+	 * SKB to the chain.
+	 */
+	list_del(&oldest->list);
+	napi_gro_complete(oldest);
+}
+
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-	struct sk_buff **pp = NULL;
+	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
-	struct list_head *head = &offload_base;
-	int same_flow;
+	struct list_head *gro_head;
+	struct sk_buff *pp = NULL;
 	enum gro_result ret;
+	int same_flow;
 	int grow;
 
 	if (netif_elide_gro(skb->dev))
 		goto normal;
 
-	gro_list_prepare(napi, skb);
+	gro_head = gro_list_prepare(napi, skb);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
@@ -5022,7 +5448,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 			NAPI_GRO_CB(skb)->csum_valid = 0;
 		}
 
-		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
+		pp = ptype->callbacks.gro_receive(gro_head, skb);
 		break;
 	}
 	rcu_read_unlock();
@@ -5039,12 +5465,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 
 	if (pp) {
-		struct sk_buff *nskb = *pp;
-
-		*pp = nskb->next;
-		nskb->next = NULL;
-		napi_gro_complete(nskb);
-		napi->gro_count--;
+		list_del(&pp->list);
+		pp->next = NULL;
+		napi_gro_complete(pp);
+		napi->gro_hash[hash].count--;
 	}
 
 	if (same_flow)
@@ -5053,26 +5477,16 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (NAPI_GRO_CB(skb)->flush)
 		goto normal;
 
-	if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
-		struct sk_buff *nskb = napi->gro_list;
-
-		/* locate the end of the list to select the 'oldest' flow */
-		while (nskb->next) {
-			pp = &nskb->next;
-			nskb = *pp;
-		}
-		*pp = NULL;
-		nskb->next = NULL;
-		napi_gro_complete(nskb);
+	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
+		gro_flush_oldest(gro_head);
 	} else {
-		napi->gro_count++;
+		napi->gro_hash[hash].count++;
 	}
 	NAPI_GRO_CB(skb)->count = 1;
 	NAPI_GRO_CB(skb)->age = jiffies;
 	NAPI_GRO_CB(skb)->last = skb;
 	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
-	skb->next = napi->gro_list;
-	napi->gro_list = skb;
+	list_add(&skb->list, gro_head);
 	ret = GRO_HELD;
 
 pull:
@@ -5080,6 +5494,13 @@ pull:
 	if (grow > 0)
 		gro_pull_from_frag0(skb, grow);
 ok:
+	if (napi->gro_hash[hash].count) {
+		if (!test_bit(hash, &napi->gro_bitmask))
+			__set_bit(hash, &napi->gro_bitmask);
+	} else if (test_bit(hash, &napi->gro_bitmask)) {
+		__clear_bit(hash, &napi->gro_bitmask);
+	}
+
 	return ret;
 
 normal:
@@ -5478,7 +5899,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (n->gro_list) {
+	if (n->gro_bitmask) {
 		unsigned long timeout = 0;
 
 		if (work_done)
@@ -5687,7 +6108,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (napi->gro_list && !napi_disable_pending(napi) &&
+	if (napi->gro_bitmask && !napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
@@ -5697,11 +6118,16 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
 {
+	int i;
+
 	INIT_LIST_HEAD(&napi->poll_list);
 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 	napi->timer.function = napi_watchdog;
-	napi->gro_count = 0;
-	napi->gro_list = NULL;
+	napi->gro_bitmask = 0;
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		INIT_LIST_HEAD(&napi->gro_hash[i].list);
+		napi->gro_hash[i].count = 0;
+	}
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
@@ -5734,6 +6160,19 @@ void napi_disable(struct napi_struct *n)
 }
 EXPORT_SYMBOL(napi_disable);
 
+static void flush_gro_hash(struct napi_struct *napi)
+{
+	int i;
+
+	for (i = 0; i < GRO_HASH_BUCKETS; i++) {
+		struct sk_buff *skb, *n;
+
+		list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
+			kfree_skb(skb);
+		napi->gro_hash[i].count = 0;
+	}
+}
+
 /* Must be called in process context */
 void netif_napi_del(struct napi_struct *napi)
 {
@@ -5743,9 +6182,8 @@ void netif_napi_del(struct napi_struct *napi)
 	list_del_init(&napi->dev_list);
 	napi_free_frags(napi);
 
-	kfree_skb_list(napi->gro_list);
-	napi->gro_list = NULL;
-	napi->gro_count = 0;
+	flush_gro_hash(napi);
+	napi->gro_bitmask = 0;
 }
 EXPORT_SYMBOL(netif_napi_del);
 
@@ -5787,7 +6225,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
-	if (n->gro_list) {
+	if (n->gro_bitmask) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
 		 */
@@ -7276,23 +7714,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
-void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
-		     struct netdev_bpf *xdp)
+u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
+		    enum bpf_netdev_command cmd)
 {
-	memset(xdp, 0, sizeof(*xdp));
-	xdp->command = XDP_QUERY_PROG;
+	struct netdev_bpf xdp;
 
-	/* Query must always succeed. */
-	WARN_ON(bpf_op(dev, xdp) < 0);
-}
+	if (!bpf_op)
+		return 0;
 
-static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
-{
-	struct netdev_bpf xdp;
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = cmd;
 
-	__dev_xdp_query(dev, bpf_op, &xdp);
+	/* Query must always succeed. */
+	WARN_ON(bpf_op(dev, &xdp) < 0 && cmd == XDP_QUERY_PROG);
 
-	return xdp.prog_attached;
+	return xdp.prog_id;
 }
 
 static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
@@ -7326,12 +7762,19 @@ static void dev_xdp_uninstall(struct net_device *dev)
 	if (!ndo_bpf)
 		return;
 
-	__dev_xdp_query(dev, ndo_bpf, &xdp);
-	if (xdp.prog_attached == XDP_ATTACHED_NONE)
-		return;
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_QUERY_PROG;
+	WARN_ON(ndo_bpf(dev, &xdp));
+	if (xdp.prog_id)
+		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
+					NULL));
 
-	/* Program removal should always succeed */
-	WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
+	/* Remove HW offload */
+	memset(&xdp, 0, sizeof(xdp));
+	xdp.command = XDP_QUERY_PROG_HW;
+	if (!ndo_bpf(dev, &xdp) && xdp.prog_id)
+		WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags,
+					NULL));
 }
 
 /**
@@ -7347,12 +7790,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		      int fd, u32 flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	enum bpf_netdev_command query;
 	struct bpf_prog *prog = NULL;
 	bpf_op_t bpf_op, bpf_chk;
 	int err;
 
 	ASSERT_RTNL();
 
+	query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG;
+
 	bpf_op = bpf_chk = ops->ndo_bpf;
 	if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
 		return -EOPNOTSUPP;
@@ -7362,10 +7808,11 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 		bpf_chk = generic_xdp_install;
 
 	if (fd >= 0) {
-		if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
+		if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) ||
+		    __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW))
 			return -EEXIST;
 		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
-		    __dev_xdp_attached(dev, bpf_op))
+		    __dev_xdp_query(dev, bpf_op, query))
 			return -EBUSY;
 
 		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
@@ -8834,6 +9281,9 @@ static struct hlist_head * __net_init netdev_create_hash(void)
 /* Initialize per network namespace state */
 static int __net_init netdev_init(struct net *net)
 {
+	BUILD_BUG_ON(GRO_HASH_BUCKETS >
+		     8 * FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+
 	if (net != &init_net)
 		INIT_LIST_HEAD(&net->dev_base_head);
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 22099705cc41..65fc366a78a4 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -326,6 +326,57 @@ devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
 						  pool_type, p_tc_index);
 }
 
+struct devlink_region {
+	struct devlink *devlink;
+	struct list_head list;
+	const char *name;
+	struct list_head snapshot_list;
+	u32 max_snapshots;
+	u32 cur_snapshots;
+	u64 size;
+};
+
+struct devlink_snapshot {
+	struct list_head list;
+	struct devlink_region *region;
+	devlink_snapshot_data_dest_t *data_destructor;
+	u64 data_len;
+	u8 *data;
+	u32 id;
+};
+
+static struct devlink_region *
+devlink_region_get_by_name(struct devlink *devlink, const char *region_name)
+{
+	struct devlink_region *region;
+
+	list_for_each_entry(region, &devlink->region_list, list)
+		if (!strcmp(region->name, region_name))
+			return region;
+
+	return NULL;
+}
+
+static struct devlink_snapshot *
+devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
+{
+	struct devlink_snapshot *snapshot;
+
+	list_for_each_entry(snapshot, &region->snapshot_list, list)
+		if (snapshot->id == id)
+			return snapshot;
+
+	return NULL;
+}
+
+static void devlink_region_snapshot_del(struct devlink_snapshot *snapshot)
+{
+	snapshot->region->cur_snapshots--;
+	list_del(&snapshot->list);
+	(*snapshot->data_destructor)(snapshot->data);
+	kfree(snapshot);
+}
+
 #define DEVLINK_NL_FLAG_NEED_DEVLINK	BIT(0)
 #define DEVLINK_NL_FLAG_NEED_PORT	BIT(1)
 #define DEVLINK_NL_FLAG_NEED_SB		BIT(2)
@@ -2604,6 +2655,919 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 	return devlink->ops->reload(devlink, info->extack);
 }
 
+static const struct devlink_param devlink_param_generic[] = {
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
+		.name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME,
+		.type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+		.name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME,
+		.type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV,
+		.name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME,
+		.type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE,
+	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT,
+		.name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME,
+		.type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE,
+	},
+};
+
+static int devlink_param_generic_verify(const struct devlink_param *param)
+{
+	/* verify it match generic parameter by id and name */
+	if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX)
+		return -EINVAL;
+	if (strcmp(param->name, devlink_param_generic[param->id].name))
+		return -ENOENT;
+
+	WARN_ON(param->type != devlink_param_generic[param->id].type);
+
+	return 0;
+}
+
+static int devlink_param_driver_verify(const struct devlink_param *param)
+{
+	int i;
+
+	if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX)
+		return -EINVAL;
+	/* verify no such name in generic params */
+	for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++)
+		if (!strcmp(param->name, devlink_param_generic[i].name))
+			return -EEXIST;
+
+	return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_name(struct list_head *param_list,
+			   const char *param_name)
+{
+	struct devlink_param_item *param_item;
+
+	list_for_each_entry(param_item, param_list, list)
+		if (!strcmp(param_item->param->name, param_name))
+			return param_item;
+	return NULL;
+}
+
+static struct devlink_param_item *
+devlink_param_find_by_id(struct list_head *param_list, u32 param_id)
+{
+	struct devlink_param_item *param_item;
+
+	list_for_each_entry(param_item, param_list, list)
+		if (param_item->param->id == param_id)
+			return param_item;
+	return NULL;
+}
+
+static bool
+devlink_param_cmode_is_supported(const struct devlink_param *param,
+				 enum devlink_param_cmode cmode)
+{
+	return test_bit(cmode, &param->supported_cmodes);
+}
+
+static int devlink_param_get(struct devlink *devlink,
+			     const struct devlink_param *param,
+			     struct devlink_param_gset_ctx *ctx)
+{
+	if (!param->get)
+		return -EOPNOTSUPP;
+	return param->get(devlink, param->id, ctx);
+}
+
+static int devlink_param_set(struct devlink *devlink,
+			     const struct devlink_param *param,
+			     struct devlink_param_gset_ctx *ctx)
+{
+	if (!param->set)
+		return -EOPNOTSUPP;
+	return param->set(devlink, param->id, ctx);
+}
+
+static int
+devlink_param_type_to_nla_type(enum devlink_param_type param_type)
+{
+	switch (param_type) {
+	case DEVLINK_PARAM_TYPE_U8:
+		return NLA_U8;
+	case DEVLINK_PARAM_TYPE_U16:
+		return NLA_U16;
+	case DEVLINK_PARAM_TYPE_U32:
+		return NLA_U32;
+	case DEVLINK_PARAM_TYPE_STRING:
+		return NLA_STRING;
+	case DEVLINK_PARAM_TYPE_BOOL:
+		return NLA_FLAG;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+devlink_nl_param_value_fill_one(struct sk_buff *msg,
+				enum devlink_param_type type,
+				enum devlink_param_cmode cmode,
+				union devlink_param_value val)
+{
+	struct nlattr *param_value_attr;
+
+	param_value_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUE);
+	if (!param_value_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode))
+		goto value_nest_cancel;
+
+	switch (type) {
+	case DEVLINK_PARAM_TYPE_U8:
+		if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_U16:
+		if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_U32:
+		if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_STRING:
+		if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA,
+				   val.vstr))
+			goto value_nest_cancel;
+		break;
+	case DEVLINK_PARAM_TYPE_BOOL:
+		if (val.vbool &&
+		    nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA))
+			goto value_nest_cancel;
+		break;
+	}
+
+	nla_nest_end(msg, param_value_attr);
+	return 0;
+
+value_nest_cancel:
+	nla_nest_cancel(msg, param_value_attr);
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
+				 struct devlink_param_item *param_item,
+				 enum devlink_command cmd,
+				 u32 portid, u32 seq, int flags)
+{
+	union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+	const struct devlink_param *param = param_item->param;
+	struct devlink_param_gset_ctx ctx;
+	struct nlattr *param_values_list;
+	struct nlattr *param_attr;
+	int nla_type;
+	void *hdr;
+	int err;
+	int i;
+
+	/* Get value from driver part to driverinit configuration mode */
+	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+		if (!devlink_param_cmode_is_supported(param, i))
+			continue;
+		if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+			if (!param_item->driverinit_value_valid)
+				return -EOPNOTSUPP;
+			param_value[i] = param_item->driverinit_value;
+		} else {
+			ctx.cmode = i;
+			err = devlink_param_get(devlink, param, &ctx);
+			if (err)
+				return err;
+			param_value[i] = ctx.val;
+		}
+	}
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto genlmsg_cancel;
+	param_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM);
+	if (!param_attr)
+		goto genlmsg_cancel;
+	if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name))
+		goto param_nest_cancel;
+	if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC))
+		goto param_nest_cancel;
+
+	nla_type = devlink_param_type_to_nla_type(param->type);
+	if (nla_type < 0)
+		goto param_nest_cancel;
+	if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type))
+		goto param_nest_cancel;
+
+	param_values_list = nla_nest_start(msg, DEVLINK_ATTR_PARAM_VALUES_LIST);
+	if (!param_values_list)
+		goto param_nest_cancel;
+
+	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
+		if (!devlink_param_cmode_is_supported(param, i))
+			continue;
+		err = devlink_nl_param_value_fill_one(msg, param->type,
+						      i, param_value[i]);
+		if (err)
+			goto values_list_nest_cancel;
+	}
+
+	nla_nest_end(msg, param_values_list);
+	nla_nest_end(msg, param_attr);
+	genlmsg_end(msg, hdr);
+	return 0;
+
+values_list_nest_cancel:
+	nla_nest_end(msg, param_values_list);
+param_nest_cancel:
+	nla_nest_cancel(msg, param_attr);
+genlmsg_cancel:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static void devlink_param_notify(struct devlink *devlink,
+				 struct devlink_param_item *param_item,
+				 enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+	err = devlink_nl_param_fill(msg, devlink, param_item, cmd, 0, 0, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
+					   struct netlink_callback *cb)
+{
+	struct devlink_param_item *param_item;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(param_item, &devlink->param_list, list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_param_fill(msg, devlink, param_item,
+						    DEVLINK_CMD_PARAM_GET,
+						    NETLINK_CB(cb->skb).portid,
+						    cb->nlh->nlmsg_seq,
+						    NLM_F_MULTI);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int
+devlink_param_type_get_from_info(struct genl_info *info,
+				 enum devlink_param_type *param_type)
+{
+	if (!info->attrs[DEVLINK_ATTR_PARAM_TYPE])
+		return -EINVAL;
+
+	switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) {
+	case NLA_U8:
+		*param_type = DEVLINK_PARAM_TYPE_U8;
+		break;
+	case NLA_U16:
+		*param_type = DEVLINK_PARAM_TYPE_U16;
+		break;
+	case NLA_U32:
+		*param_type = DEVLINK_PARAM_TYPE_U32;
+		break;
+	case NLA_STRING:
+		*param_type = DEVLINK_PARAM_TYPE_STRING;
+		break;
+	case NLA_FLAG:
+		*param_type = DEVLINK_PARAM_TYPE_BOOL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+devlink_param_value_get_from_info(const struct devlink_param *param,
+				  struct genl_info *info,
+				  union devlink_param_value *value)
+{
+	if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
+	    !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
+		return -EINVAL;
+
+	switch (param->type) {
+	case DEVLINK_PARAM_TYPE_U8:
+		value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+		break;
+	case DEVLINK_PARAM_TYPE_U16:
+		value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+		break;
+	case DEVLINK_PARAM_TYPE_U32:
+		value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+		break;
+	case DEVLINK_PARAM_TYPE_STRING:
+		if (nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) >
+		    DEVLINK_PARAM_MAX_STRING_VALUE)
+			return -EINVAL;
+		value->vstr = nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+		break;
+	case DEVLINK_PARAM_TYPE_BOOL:
+		value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
+			       true : false;
+		break;
+	}
+	return 0;
+}
+
+static struct devlink_param_item *
+devlink_param_get_from_info(struct devlink *devlink,
+			    struct genl_info *info)
+{
+	char *param_name;
+
+	if (!info->attrs[DEVLINK_ATTR_PARAM_NAME])
+		return NULL;
+
+	param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
+	return devlink_param_find_by_name(&devlink->param_list, param_name);
+}
+
+static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_param_item *param_item;
+	struct sk_buff *msg;
+	int err;
+
+	param_item = devlink_param_get_from_info(devlink, info);
+	if (!param_item)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_param_fill(msg, devlink, param_item,
+				    DEVLINK_CMD_PARAM_GET,
+				    info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	enum devlink_param_type param_type;
+	struct devlink_param_gset_ctx ctx;
+	enum devlink_param_cmode cmode;
+	struct devlink_param_item *param_item;
+	const struct devlink_param *param;
+	union devlink_param_value value;
+	int err = 0;
+
+	param_item = devlink_param_get_from_info(devlink, info);
+	if (!param_item)
+		return -EINVAL;
+	param = param_item->param;
+	err = devlink_param_type_get_from_info(info, &param_type);
+	if (err)
+		return err;
+	if (param_type != param->type)
+		return -EINVAL;
+	err = devlink_param_value_get_from_info(param, info, &value);
+	if (err)
+		return err;
+	if (param->validate) {
+		err = param->validate(devlink, param->id, value, info->extack);
+		if (err)
+			return err;
+	}
+
+	if (!info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE])
+		return -EINVAL;
+	cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]);
+	if (!devlink_param_cmode_is_supported(param, cmode))
+		return -EOPNOTSUPP;
+
+	if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
+		param_item->driverinit_value = value;
+		param_item->driverinit_value_valid = true;
+	} else {
+		if (!param->set)
+			return -EOPNOTSUPP;
+		ctx.val = value;
+		ctx.cmode = cmode;
+		err = devlink_param_set(devlink, param, &ctx);
+		if (err)
+			return err;
+	}
+
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	return 0;
+}
+
+static int devlink_param_register_one(struct devlink *devlink,
+				      const struct devlink_param *param)
+{
+	struct devlink_param_item *param_item;
+
+	if (devlink_param_find_by_name(&devlink->param_list,
+				       param->name))
+		return -EEXIST;
+
+	if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+		WARN_ON(param->get || param->set);
+	else
+		WARN_ON(!param->get || !param->set);
+
+	param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+	if (!param_item)
+		return -ENOMEM;
+	param_item->param = param;
+
+	list_add_tail(&param_item->list, &devlink->param_list);
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	return 0;
+}
+
+static void devlink_param_unregister_one(struct devlink *devlink,
+					 const struct devlink_param *param)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_name(&devlink->param_list,
+						param->name);
+	WARN_ON(!param_item);
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_DEL);
+	list_del(&param_item->list);
+	kfree(param_item);
+}
+
+static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
+					     struct devlink *devlink,
+					     struct devlink_snapshot *snapshot)
+{
+	struct nlattr *snap_attr;
+	int err;
+
+	snap_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOT);
+	if (!snap_attr)
+		return -EINVAL;
+
+	err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID, snapshot->id);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, snap_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, snap_attr);
+	return err;
+}
+
+static int devlink_nl_region_snapshots_id_put(struct sk_buff *msg,
+					      struct devlink *devlink,
+					      struct devlink_region *region)
+{
+	struct devlink_snapshot *snapshot;
+	struct nlattr *snapshots_attr;
+	int err;
+
+	snapshots_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_SNAPSHOTS);
+	if (!snapshots_attr)
+		return -EINVAL;
+
+	list_for_each_entry(snapshot, &region->snapshot_list, list) {
+		err = devlink_nl_region_snapshot_id_put(msg, devlink, snapshot);
+		if (err)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(msg, snapshots_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, snapshots_attr);
+	return err;
+}
+
+static int devlink_nl_region_fill(struct sk_buff *msg, struct devlink *devlink,
+				  enum devlink_command cmd, u32 portid,
+				  u32 seq, int flags,
+				  struct devlink_region *region)
+{
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	err = devlink_nl_put_handle(msg, devlink);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME, region->name);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+				region->size,
+				DEVLINK_ATTR_PAD);
+	if (err)
+		goto nla_put_failure;
+
+	err = devlink_nl_region_snapshots_id_put(msg, devlink, region);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return err;
+}
+
+static void devlink_nl_region_notify(struct devlink_region *region,
+				     struct devlink_snapshot *snapshot,
+				     enum devlink_command cmd)
+{
+	struct devlink *devlink = region->devlink;
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+	if (!hdr)
+		goto out_free_msg;
+
+	err = devlink_nl_put_handle(msg, devlink);
+	if (err)
+		goto out_cancel_msg;
+
+	err = nla_put_string(msg, DEVLINK_ATTR_REGION_NAME,
+			     region->name);
+	if (err)
+		goto out_cancel_msg;
+
+	if (snapshot) {
+		err = nla_put_u32(msg, DEVLINK_ATTR_REGION_SNAPSHOT_ID,
+				  snapshot->id);
+		if (err)
+			goto out_cancel_msg;
+	} else {
+		err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_SIZE,
+					region->size, DEVLINK_ATTR_PAD);
+		if (err)
+			goto out_cancel_msg;
+	}
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+
+	return;
+
+out_cancel_msg:
+	genlmsg_cancel(msg, hdr);
+out_free_msg:
+	nlmsg_free(msg);
+}
+
+static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb,
+					  struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_region *region;
+	const char *region_name;
+	struct sk_buff *msg;
+	int err;
+
+	if (!info->attrs[DEVLINK_ATTR_REGION_NAME])
+		return -EINVAL;
+
+	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+	region = devlink_region_get_by_name(devlink, region_name);
+	if (!region)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_region_fill(msg, devlink, DEVLINK_CMD_REGION_GET,
+				     info->snd_portid, info->snd_seq, 0,
+				     region);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
+					    struct netlink_callback *cb)
+{
+	struct devlink_region *region;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(region, &devlink->region_list, list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_region_fill(msg, devlink,
+						     DEVLINK_CMD_REGION_GET,
+						     NETLINK_CB(cb->skb).portid,
+						     cb->nlh->nlmsg_seq,
+						     NLM_F_MULTI, region);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int devlink_nl_cmd_region_del(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_snapshot *snapshot;
+	struct devlink_region *region;
+	const char *region_name;
+	u32 snapshot_id;
+
+	if (!info->attrs[DEVLINK_ATTR_REGION_NAME] ||
+	    !info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+		return -EINVAL;
+
+	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
+	snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+
+	region = devlink_region_get_by_name(devlink, region_name);
+	if (!region)
+		return -EINVAL;
+
+	snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+	if (!snapshot)
+		return -EINVAL;
+
+	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL);
+	devlink_region_snapshot_del(snapshot);
+	return 0;
+}
+
+static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
+						 struct devlink *devlink,
+						 u8 *chunk, u32 chunk_size,
+						 u64 addr)
+{
+	struct nlattr *chunk_attr;
+	int err;
+
+	chunk_attr = nla_nest_start(msg, DEVLINK_ATTR_REGION_CHUNK);
+	if (!chunk_attr)
+		return -EINVAL;
+
+	err = nla_put(msg, DEVLINK_ATTR_REGION_CHUNK_DATA, chunk_size, chunk);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_u64_64bit(msg, DEVLINK_ATTR_REGION_CHUNK_ADDR, addr,
+				DEVLINK_ATTR_PAD);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, chunk_attr);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, chunk_attr);
+	return err;
+}
+
+#define DEVLINK_REGION_READ_CHUNK_SIZE 256
+
+static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
+						struct devlink *devlink,
+						struct devlink_region *region,
+						struct nlattr **attrs,
+						u64 start_offset,
+						u64 end_offset,
+						bool dump,
+						u64 *new_offset)
+{
+	struct devlink_snapshot *snapshot;
+	u64 curr_offset = start_offset;
+	u32 snapshot_id;
+	int err = 0;
+
+	*new_offset = start_offset;
+
+	snapshot_id = nla_get_u32(attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+	snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+	if (!snapshot)
+		return -EINVAL;
+
+	if (end_offset > snapshot->data_len || dump)
+		end_offset = snapshot->data_len;
+
+	while (curr_offset < end_offset) {
+		u32 data_size;
+		u8 *data;
+
+		if (end_offset - curr_offset < DEVLINK_REGION_READ_CHUNK_SIZE)
+			data_size = end_offset - curr_offset;
+		else
+			data_size = DEVLINK_REGION_READ_CHUNK_SIZE;
+
+		data = &snapshot->data[curr_offset];
+		err = devlink_nl_cmd_region_read_chunk_fill(skb, devlink,
+							    data, data_size,
+							    curr_offset);
+		if (err)
+			break;
+
+		curr_offset += data_size;
+	}
+	*new_offset = curr_offset;
+
+	return err;
+}
+
+static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
+					     struct netlink_callback *cb)
+{
+	u64 ret_offset, start_offset, end_offset = 0;
+	struct nlattr *attrs[DEVLINK_ATTR_MAX + 1];
+	const struct genl_ops *ops = cb->data;
+	struct devlink_region *region;
+	struct nlattr *chunks_attr;
+	const char *region_name;
+	struct devlink *devlink;
+	bool dump = true;
+	void *hdr;
+	int err;
+
+	start_offset = *((u64 *)&cb->args[0]);
+
+	err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize,
+			  attrs, DEVLINK_ATTR_MAX, ops->policy, NULL);
+	if (err)
+		goto out;
+
+	devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+	if (IS_ERR(devlink))
+		goto out;
+
+	mutex_lock(&devlink_mutex);
+	mutex_lock(&devlink->lock);
+
+	if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
+	    !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID])
+		goto out_unlock;
+
+	region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
+	region = devlink_region_get_by_name(devlink, region_name);
+	if (!region)
+		goto out_unlock;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+			  &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI,
+			  DEVLINK_CMD_REGION_READ);
+	if (!hdr)
+		goto out_unlock;
+
+	err = devlink_nl_put_handle(skb, devlink);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_string(skb, DEVLINK_ATTR_REGION_NAME, region_name);
+	if (err)
+		goto nla_put_failure;
+
+	chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS);
+	if (!chunks_attr)
+		goto nla_put_failure;
+
+	if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
+	    attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
+		if (!start_offset)
+			start_offset =
+				nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+
+		end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+		end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
+		dump = false;
+	}
+
+	err = devlink_nl_region_read_snapshot_fill(skb, devlink,
+						   region, attrs,
+						   start_offset,
+						   end_offset, dump,
+						   &ret_offset);
+
+	if (err && err != -EMSGSIZE)
+		goto nla_put_failure;
+
+	/* Check if there was any progress done to prevent infinite loop */
+	if (ret_offset == start_offset)
+		goto nla_put_failure;
+
+	*((u64 *)&cb->args[0]) = ret_offset;
+
+	nla_nest_end(skb, chunks_attr);
+	genlmsg_end(skb, hdr);
+	mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink_mutex);
+
+	return skb->len;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+out_unlock:
+	mutex_unlock(&devlink->lock);
+	mutex_unlock(&devlink_mutex);
+out:
+	return 0;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -2624,6 +3588,11 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
 	[DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
+	[DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -2807,6 +3776,43 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NO_LOCK,
 	},
+	{
+		.cmd = DEVLINK_CMD_PARAM_GET,
+		.doit = devlink_nl_cmd_param_get_doit,
+		.dumpit = devlink_nl_cmd_param_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_PARAM_SET,
+		.doit = devlink_nl_cmd_param_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_REGION_GET,
+		.doit = devlink_nl_cmd_region_get_doit,
+		.dumpit = devlink_nl_cmd_region_get_dumpit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_REGION_DEL,
+		.doit = devlink_nl_cmd_region_del,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_REGION_READ,
+		.dumpit = devlink_nl_cmd_region_read_dumpit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
@@ -2845,6 +3851,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD(&devlink->sb_list);
 	INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
 	INIT_LIST_HEAD(&devlink->resource_list);
+	INIT_LIST_HEAD(&devlink->param_list);
+	INIT_LIST_HEAD(&devlink->region_list);
 	mutex_init(&devlink->lock);
 	return devlink;
 }
@@ -3434,6 +4442,320 @@ out:
 }
 EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
 
+/**
+ *	devlink_params_register - register configuration parameters
+ *
+ *	@devlink: devlink
+ *	@params: configuration parameters array
+ *	@params_count: number of parameters provided
+ *
+ *	Register the configuration parameters supported by the driver.
+ */
+int devlink_params_register(struct devlink *devlink,
+			    const struct devlink_param *params,
+			    size_t params_count)
+{
+	const struct devlink_param *param = params;
+	int i;
+	int err;
+
+	mutex_lock(&devlink->lock);
+	for (i = 0; i < params_count; i++, param++) {
+		if (!param || !param->name || !param->supported_cmodes) {
+			err = -EINVAL;
+			goto rollback;
+		}
+		if (param->generic) {
+			err = devlink_param_generic_verify(param);
+			if (err)
+				goto rollback;
+		} else {
+			err = devlink_param_driver_verify(param);
+			if (err)
+				goto rollback;
+		}
+		err = devlink_param_register_one(devlink, param);
+		if (err)
+			goto rollback;
+	}
+
+	mutex_unlock(&devlink->lock);
+	return 0;
+
+rollback:
+	if (!i)
+		goto unlock;
+	for (param--; i > 0; i--, param--)
+		devlink_param_unregister_one(devlink, param);
+unlock:
+	mutex_unlock(&devlink->lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_params_register);
+
+/**
+ *	devlink_params_unregister - unregister configuration parameters
+ *	@devlink: devlink
+ *	@params: configuration parameters to unregister
+ *	@params_count: number of parameters provided
+ */
+void devlink_params_unregister(struct devlink *devlink,
+			       const struct devlink_param *params,
+			       size_t params_count)
+{
+	const struct devlink_param *param = params;
+	int i;
+
+	mutex_lock(&devlink->lock);
+	for (i = 0; i < params_count; i++, param++)
+		devlink_param_unregister_one(devlink, param);
+	mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_params_unregister);
+
+/**
+ *	devlink_param_driverinit_value_get - get configuration parameter
+ *					     value for driver initializing
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *	@init_val: value of parameter in driverinit configuration mode
+ *
+ *	This function should be used by the driver to get driverinit
+ *	configuration for initialization after reload command.
+ */
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+				       union devlink_param_value *init_val)
+{
+	struct devlink_param_item *param_item;
+
+	if (!devlink->ops || !devlink->ops->reload)
+		return -EOPNOTSUPP;
+
+	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+	if (!param_item)
+		return -EINVAL;
+
+	if (!param_item->driverinit_value_valid ||
+	    !devlink_param_cmode_is_supported(param_item->param,
+					      DEVLINK_PARAM_CMODE_DRIVERINIT))
+		return -EOPNOTSUPP;
+
+	*init_val = param_item->driverinit_value;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
+
+/**
+ *	devlink_param_driverinit_value_set - set value of configuration
+ *					     parameter for driverinit
+ *					     configuration mode
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *	@init_val: value of parameter to set for driverinit configuration mode
+ *
+ *	This function should be used by the driver to set driverinit
+ *	configuration mode default value.
+ */
+int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+				       union devlink_param_value init_val)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+	if (!param_item)
+		return -EINVAL;
+
+	if (!devlink_param_cmode_is_supported(param_item->param,
+					      DEVLINK_PARAM_CMODE_DRIVERINIT))
+		return -EOPNOTSUPP;
+
+	param_item->driverinit_value = init_val;
+	param_item->driverinit_value_valid = true;
+
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
+
+/**
+ *	devlink_param_value_changed - notify devlink on a parameter's value
+ *				      change. Should be called by the driver
+ *				      right after the change.
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *
+ *	This function should be used by the driver to notify devlink on value
+ *	change, excluding driverinit configuration mode.
+ *	For driverinit configuration mode driver should use the function
+ *	devlink_param_driverinit_value_set() instead.
+ */
+void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+	WARN_ON(!param_item);
+
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devlink_param_value_changed);
+
+/**
+ *	devlink_region_create - create a new address region
+ *
+ *	@devlink: devlink
+ *	@region_name: region name
+ *	@region_max_snapshots: Maximum supported number of snapshots for region
+ *	@region_size: size of region
+ */
+struct devlink_region *devlink_region_create(struct devlink *devlink,
+					     const char *region_name,
+					     u32 region_max_snapshots,
+					     u64 region_size)
+{
+	struct devlink_region *region;
+	int err = 0;
+
+	mutex_lock(&devlink->lock);
+
+	if (devlink_region_get_by_name(devlink, region_name)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	region->devlink = devlink;
+	region->max_snapshots = region_max_snapshots;
+	region->name = region_name;
+	region->size = region_size;
+	INIT_LIST_HEAD(&region->snapshot_list);
+	list_add_tail(&region->list, &devlink->region_list);
+	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
+
+	mutex_unlock(&devlink->lock);
+	return region;
+
+unlock:
+	mutex_unlock(&devlink->lock);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(devlink_region_create);
+
+/**
+ *	devlink_region_destroy - destroy address region
+ *
+ *	@region: devlink region to destroy
+ */
+void devlink_region_destroy(struct devlink_region *region)
+{
+	struct devlink *devlink = region->devlink;
+	struct devlink_snapshot *snapshot, *ts;
+
+	mutex_lock(&devlink->lock);
+
+	/* Free all snapshots of region */
+	list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
+		devlink_region_snapshot_del(snapshot);
+
+	list_del(&region->list);
+
+	devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_DEL);
+	mutex_unlock(&devlink->lock);
+	kfree(region);
+}
+EXPORT_SYMBOL_GPL(devlink_region_destroy);
+
+/**
+ *	devlink_region_shapshot_id_get - get snapshot ID
+ *
+ *	This callback should be called when adding a new snapshot,
+ *	Driver should use the same id for multiple snapshots taken
+ *	on multiple regions at the same time/by the same trigger.
+ *
+ *	@devlink: devlink
+ */
+u32 devlink_region_shapshot_id_get(struct devlink *devlink)
+{
+	u32 id;
+
+	mutex_lock(&devlink->lock);
+	id = ++devlink->snapshot_id;
+	mutex_unlock(&devlink->lock);
+
+	return id;
+}
+EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get);
+
+/**
+ *	devlink_region_snapshot_create - create a new snapshot
+ *	This will add a new snapshot of a region. The snapshot
+ *	will be stored on the region struct and can be accessed
+ *	from devlink. This is useful for future	analyses of snapshots.
+ *	Multiple snapshots can be created on a region.
+ *	The @snapshot_id should be obtained using the getter function.
+ *
+ *	@devlink_region: devlink region of the snapshot
+ *	@data_len: size of snapshot data
+ *	@data: snapshot data
+ *	@snapshot_id: snapshot id to be created
+ *	@data_destructor: pointer to destructor function to free data
+ */
+int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+				   u8 *data, u32 snapshot_id,
+				   devlink_snapshot_data_dest_t *data_destructor)
+{
+	struct devlink *devlink = region->devlink;
+	struct devlink_snapshot *snapshot;
+	int err;
+
+	mutex_lock(&devlink->lock);
+
+	/* check if region can hold one more snapshot */
+	if (region->cur_snapshots == region->max_snapshots) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+	if (!snapshot) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	snapshot->id = snapshot_id;
+	snapshot->region = region;
+	snapshot->data = data;
+	snapshot->data_len = data_len;
+	snapshot->data_destructor = data_destructor;
+
+	list_add_tail(&snapshot->list, &region->snapshot_list);
+
+	region->cur_snapshots++;
+
+	devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_NEW);
+	mutex_unlock(&devlink->lock);
+	return 0;
+
+unlock:
+	mutex_unlock(&devlink->lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index e677a20180cf..c9993c6c2fd4 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -111,6 +111,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_RX_UDP_TUNNEL_PORT_BIT] =	 "rx-udp_tunnel-port-offload",
 	[NETIF_F_HW_TLS_RECORD_BIT] =	"tls-hw-record",
 	[NETIF_F_HW_TLS_TX_BIT] =	 "tls-hw-tx-offload",
+	[NETIF_F_HW_TLS_RX_BIT] =	 "tls-hw-rx-offload",
 };
 
 static const char
diff --git a/net/core/filter.c b/net/core/filter.c
index 06da770f543f..104d560946da 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3679,7 +3679,7 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
 	if (unlikely(size > IP_TUNNEL_OPTS_MAX))
 		return -ENOMEM;
 
-	ip_tunnel_info_opts_set(info, from, size);
+	ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
 
 	return 0;
 }
@@ -4751,6 +4751,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_trace_printk:
 		if (capable(CAP_SYS_ADMIN))
 			return bpf_get_trace_printk_proto();
+		/* else: fall through */
 	default:
 		return NULL;
 	}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 53f96e4f7bf5..08a5184f4b34 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -152,7 +152,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 	    !dissector_uses_key(flow_dissector,
 				FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
 	    !dissector_uses_key(flow_dissector,
-				FLOW_DISSECTOR_KEY_ENC_PORTS))
+				FLOW_DISSECTOR_KEY_ENC_PORTS) &&
+	    !dissector_uses_key(flow_dissector,
+				FLOW_DISSECTOR_KEY_ENC_IP))
 		return;
 
 	info = skb_tunnel_info(skb);
@@ -212,6 +214,16 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 		tp->src = key->tp_src;
 		tp->dst = key->tp_dst;
 	}
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
+		struct flow_dissector_key_ip *ip;
+
+		ip = skb_flow_dissector_target(flow_dissector,
+					       FLOW_DISSECTOR_KEY_ENC_IP,
+					       target_container);
+		ip->tos = key->tos;
+		ip->ttl = key->ttl;
+	}
 }
 EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
 
@@ -589,7 +601,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_tags *key_tags;
 	struct flow_dissector_key_vlan *key_vlan;
 	enum flow_dissect_ret fdret;
-	bool skip_vlan = false;
+	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
 	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
@@ -748,14 +760,14 @@ proto_again:
 	}
 	case htons(ETH_P_8021AD):
 	case htons(ETH_P_8021Q): {
-		const struct vlan_hdr *vlan;
+		const struct vlan_hdr *vlan = NULL;
 		struct vlan_hdr _vlan;
-		bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
+		__be16 saved_vlan_tpid = proto;
 
-		if (vlan_tag_present)
+		if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX &&
+		    skb && skb_vlan_tag_present(skb)) {
 			proto = skb->protocol;
-
-		if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
+		} else {
 			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
 						    data, hlen, &_vlan);
 			if (!vlan) {
@@ -765,20 +777,23 @@ proto_again:
 
 			proto = vlan->h_vlan_encapsulated_proto;
 			nhoff += sizeof(*vlan);
-			if (skip_vlan) {
-				fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
-				break;
-			}
 		}
 
-		skip_vlan = true;
-		if (dissector_uses_key(flow_dissector,
-				       FLOW_DISSECTOR_KEY_VLAN)) {
+		if (dissector_vlan == FLOW_DISSECTOR_KEY_MAX) {
+			dissector_vlan = FLOW_DISSECTOR_KEY_VLAN;
+		} else if (dissector_vlan == FLOW_DISSECTOR_KEY_VLAN) {
+			dissector_vlan = FLOW_DISSECTOR_KEY_CVLAN;
+		} else {
+			fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
+			break;
+		}
+
+		if (dissector_uses_key(flow_dissector, dissector_vlan)) {
 			key_vlan = skb_flow_dissector_target(flow_dissector,
-							     FLOW_DISSECTOR_KEY_VLAN,
+							     dissector_vlan,
 							     target_container);
 
-			if (vlan_tag_present) {
+			if (!vlan) {
 				key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
 				key_vlan->vlan_priority =
 					(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
@@ -789,6 +804,7 @@ proto_again:
 					(ntohs(vlan->h_vlan_TCI) &
 					 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 			}
+			key_vlan->vlan_tpid = saved_vlan_tpid;
 		}
 
 		fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8e3fda9e725c..cbe85d8d4cc2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 		neigh->nud_state = new;
 		err = 0;
 		notify = old & NUD_VALID;
-		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
+		if (((old & (NUD_INCOMPLETE | NUD_PROBE)) ||
+		     (flags & NEIGH_UPDATE_F_ADMIN)) &&
 		    (new & NUD_FAILED)) {
 			neigh_invalidate(neigh);
 			notify = 1;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bb7e80f4ced3..ffa1d18f2c2c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1047,13 +1047,30 @@ static ssize_t traffic_class_show(struct netdev_queue *queue,
 				  char *buf)
 {
 	struct net_device *dev = queue->dev;
-	int index = get_netdev_queue_index(queue);
-	int tc = netdev_txq_to_tc(dev, index);
+	int index;
+	int tc;
 
+	if (!netif_is_multiqueue(dev))
+		return -ENOENT;
+
+	index = get_netdev_queue_index(queue);
+
+	/* If queue belongs to subordinate dev use its TC mapping */
+	dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
+	tc = netdev_txq_to_tc(dev, index);
 	if (tc < 0)
 		return -EINVAL;
 
-	return sprintf(buf, "%u\n", tc);
+	/* We can report the traffic class one of two ways:
+	 * Subordinate device traffic classes are reported with the traffic
+	 * class first, and then the subordinate class so for example TC0 on
+	 * subordinate device 2 will be reported as "0-2". If the queue
+	 * belongs to the root device it will be reported with just the
+	 * traffic class, so just "0" for TC 0 for example.
+	 */
+	return dev->num_tc < 0 ? sprintf(buf, "%u%d\n", tc, dev->num_tc) :
+				 sprintf(buf, "%u\n", tc);
 }
 
 #ifdef CONFIG_XPS
@@ -1214,10 +1231,20 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 	cpumask_var_t mask;
 	unsigned long index;
 
+	if (!netif_is_multiqueue(dev))
+		return -ENOENT;
+
 	index = get_netdev_queue_index(queue);
 
 	if (dev->num_tc) {
+		/* Do not allow XPS on subordinate device directly */
 		num_tc = dev->num_tc;
+		if (num_tc < 0)
+			return -EINVAL;
+
+		/* If queue belongs to subordinate dev use its map */
+		dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
+
 		tc = netdev_txq_to_tc(dev, index);
 		if (tc < 0)
 			return -EINVAL;
@@ -1227,13 +1254,13 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
 		return -ENOMEM;
 
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_maps);
+	dev_maps = rcu_dereference(dev->xps_cpus_map);
 	if (dev_maps) {
 		for_each_possible_cpu(cpu) {
 			int i, tci = cpu * num_tc + tc;
 			struct xps_map *map;
 
-			map = rcu_dereference(dev_maps->cpu_map[tci]);
+			map = rcu_dereference(dev_maps->attr_map[tci]);
 			if (!map)
 				continue;
 
@@ -1260,6 +1287,9 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
 	cpumask_var_t mask;
 	int err;
 
+	if (!netif_is_multiqueue(dev))
+		return -ENOENT;
+
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
@@ -1283,6 +1313,88 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
 
 static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
 	= __ATTR_RW(xps_cpus);
+
+static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
+{
+	struct net_device *dev = queue->dev;
+	struct xps_dev_maps *dev_maps;
+	unsigned long *mask, index;
+	int j, len, num_tc = 1, tc = 0;
+
+	index = get_netdev_queue_index(queue);
+
+	if (dev->num_tc) {
+		num_tc = dev->num_tc;
+		tc = netdev_txq_to_tc(dev, index);
+		if (tc < 0)
+			return -EINVAL;
+	}
+	mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
+		       GFP_KERNEL);
+	if (!mask)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_rxqs_map);
+	if (!dev_maps)
+		goto out_no_maps;
+
+	for (j = -1; j = netif_attrmask_next(j, NULL, dev->num_rx_queues),
+	     j < dev->num_rx_queues;) {
+		int i, tci = j * num_tc + tc;
+		struct xps_map *map;
+
+		map = rcu_dereference(dev_maps->attr_map[tci]);
+		if (!map)
+			continue;
+
+		for (i = map->len; i--;) {
+			if (map->queues[i] == index) {
+				set_bit(j, mask);
+				break;
+			}
+		}
+	}
+out_no_maps:
+	rcu_read_unlock();
+
+	len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
+	kfree(mask);
+
+	return len < PAGE_SIZE ? len : -EINVAL;
+}
+
+static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
+			      size_t len)
+{
+	struct net_device *dev = queue->dev;
+	struct net *net = dev_net(dev);
+	unsigned long *mask, index;
+	int err;
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
+		       GFP_KERNEL);
+	if (!mask)
+		return -ENOMEM;
+
+	index = get_netdev_queue_index(queue);
+
+	err = bitmap_parse(buf, len, mask, dev->num_rx_queues);
+	if (err) {
+		kfree(mask);
+		return err;
+	}
+
+	err = __netif_set_xps_queue(dev, mask, index, true);
+	kfree(mask);
+	return err ? : len;
+}
+
+static struct netdev_queue_attribute xps_rxqs_attribute __ro_after_init
+	= __ATTR_RW(xps_rxqs);
 #endif /* CONFIG_XPS */
 
 static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
@@ -1290,6 +1402,7 @@ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
 	&queue_traffic_class.attr,
 #ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
+	&xps_rxqs_attribute.attr,
 	&queue_tx_maxrate.attr,
 #endif
 	NULL
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 49368e21d228..308ed04984de 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1265,7 +1265,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		buf[len] = 0;
 		if (strcmp(buf, pkt_dev->dst_min) != 0) {
 			memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min));
-			strncpy(pkt_dev->dst_min, buf, len);
+			strcpy(pkt_dev->dst_min, buf);
 			pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
 			pkt_dev->cur_daddr = pkt_dev->daddr_min;
 		}
@@ -1280,14 +1280,12 @@ static ssize_t pktgen_if_write(struct file *file,
 		if (len < 0)
 			return len;
 
-
 		if (copy_from_user(buf, &user_buffer[i], len))
 			return -EFAULT;
-
 		buf[len] = 0;
 		if (strcmp(buf, pkt_dev->dst_max) != 0) {
 			memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max));
-			strncpy(pkt_dev->dst_max, buf, len);
+			strcpy(pkt_dev->dst_max, buf);
 			pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
 			pkt_dev->cur_daddr = pkt_dev->daddr_max;
 		}
@@ -1396,7 +1394,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		buf[len] = 0;
 		if (strcmp(buf, pkt_dev->src_min) != 0) {
 			memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min));
-			strncpy(pkt_dev->src_min, buf, len);
+			strcpy(pkt_dev->src_min, buf);
 			pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
 			pkt_dev->cur_saddr = pkt_dev->saddr_min;
 		}
@@ -1416,7 +1414,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		buf[len] = 0;
 		if (strcmp(buf, pkt_dev->src_max) != 0) {
 			memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max));
-			strncpy(pkt_dev->src_max, buf, len);
+			strcpy(pkt_dev->src_max, buf);
 			pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
 			pkt_dev->cur_saddr = pkt_dev->saddr_max;
 		}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ef61222fdef..92b6fa5d5f6e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -964,7 +964,8 @@ static size_t rtnl_xdp_size(void)
 {
 	size_t xdp_size = nla_total_size(0) +	/* nest IFLA_XDP */
 			  nla_total_size(1) +	/* XDP_ATTACHED */
-			  nla_total_size(4);	/* XDP_PROG_ID */
+			  nla_total_size(4) +	/* XDP_PROG_ID (or 1st mode) */
+			  nla_total_size(4);	/* XDP_<mode>_PROG_ID */
 
 	return xdp_size;
 }
@@ -1353,27 +1354,51 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
+static u32 rtnl_xdp_prog_skb(struct net_device *dev)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
 	const struct bpf_prog *generic_xdp_prog;
-	struct netdev_bpf xdp;
 
 	ASSERT_RTNL();
 
-	*prog_id = 0;
 	generic_xdp_prog = rtnl_dereference(dev->xdp_prog);
-	if (generic_xdp_prog) {
-		*prog_id = generic_xdp_prog->aux->id;
-		return XDP_ATTACHED_SKB;
-	}
-	if (!ops->ndo_bpf)
-		return XDP_ATTACHED_NONE;
+	if (!generic_xdp_prog)
+		return 0;
+	return generic_xdp_prog->aux->id;
+}
+
+static u32 rtnl_xdp_prog_drv(struct net_device *dev)
+{
+	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf, XDP_QUERY_PROG);
+}
+
+static u32 rtnl_xdp_prog_hw(struct net_device *dev)
+{
+	return __dev_xdp_query(dev, dev->netdev_ops->ndo_bpf,
+			       XDP_QUERY_PROG_HW);
+}
+
+static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev,
+			       u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr,
+			       u32 (*get_prog_id)(struct net_device *dev))
+{
+	u32 curr_id;
+	int err;
+
+	curr_id = get_prog_id(dev);
+	if (!curr_id)
+		return 0;
 
-	__dev_xdp_query(dev, ops->ndo_bpf, &xdp);
-	*prog_id = xdp.prog_id;
+	*prog_id = curr_id;
+	err = nla_put_u32(skb, attr, curr_id);
+	if (err)
+		return err;
+
+	if (*mode != XDP_ATTACHED_NONE)
+		*mode = XDP_ATTACHED_MULTI;
+	else
+		*mode = tgt_mode;
 
-	return xdp.prog_attached;
+	return 0;
 }
 
 static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1381,17 +1406,32 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
 	struct nlattr *xdp;
 	u32 prog_id;
 	int err;
+	u8 mode;
 
 	xdp = nla_nest_start(skb, IFLA_XDP);
 	if (!xdp)
 		return -EMSGSIZE;
 
-	err = nla_put_u8(skb, IFLA_XDP_ATTACHED,
-			 rtnl_xdp_attached_mode(dev, &prog_id));
+	prog_id = 0;
+	mode = XDP_ATTACHED_NONE;
+	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB,
+				  IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb);
+	if (err)
+		goto err_cancel;
+	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV,
+				  IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv);
+	if (err)
+		goto err_cancel;
+	err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW,
+				  IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw);
+	if (err)
+		goto err_cancel;
+
+	err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode);
 	if (err)
 		goto err_cancel;
 
-	if (prog_id) {
+	if (prog_id && mode != XDP_ATTACHED_MULTI) {
 		err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id);
 		if (err)
 			goto err_cancel;
@@ -2759,9 +2799,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
 			return err;
 	}
 
-	dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
-
-	__dev_notify_flags(dev, old_flags, ~0U);
+	if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
+		__dev_notify_flags(dev, old_flags, 0U);
+	} else {
+		dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+		__dev_notify_flags(dev, old_flags, ~0U);
+	}
 	return 0;
 }
 EXPORT_SYMBOL(rtnl_configure_link);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8e51f8555e11..0c1a00672ba9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3816,14 +3816,14 @@ err:
 }
 EXPORT_SYMBOL_GPL(skb_segment);
 
-int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 {
 	struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);
 	unsigned int offset = skb_gro_offset(skb);
 	unsigned int headlen = skb_headlen(skb);
 	unsigned int len = skb_gro_len(skb);
-	struct sk_buff *lp, *p = *head;
 	unsigned int delta_truesize;
+	struct sk_buff *lp;
 
 	if (unlikely(p->len + len >= 65536))
 		return -E2BIG;
@@ -4899,7 +4899,6 @@ EXPORT_SYMBOL(skb_try_coalesce);
  */
 void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 {
-	skb->tstamp = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->skb_iif = 0;
 	skb->ignore_df = 0;
@@ -4912,8 +4911,8 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 		return;
 
 	ipvs_reset(skb);
-	skb_orphan(skb);
 	skb->mark = 0;
+	skb->tstamp = 0;
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 9e8f65585b81..03fdea5b0f57 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -91,6 +91,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <asm/unaligned.h>
 #include <linux/capability.h>
 #include <linux/errno.h>
 #include <linux/errqueue.h>
@@ -697,6 +698,7 @@ EXPORT_SYMBOL(sk_mc_loop);
 int sock_setsockopt(struct socket *sock, int level, int optname,
 		    char __user *optval, unsigned int optlen)
 {
+	struct sock_txtime sk_txtime;
 	struct sock *sk = sock->sk;
 	int val;
 	int valbool;
@@ -1070,6 +1072,26 @@ set_rcvbuf:
 		}
 		break;
 
+	case SO_TXTIME:
+		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+			ret = -EPERM;
+		} else if (optlen != sizeof(struct sock_txtime)) {
+			ret = -EINVAL;
+		} else if (copy_from_user(&sk_txtime, optval,
+			   sizeof(struct sock_txtime))) {
+			ret = -EFAULT;
+		} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
+			ret = -EINVAL;
+		} else {
+			sock_valbool_flag(sk, SOCK_TXTIME, true);
+			sk->sk_clockid = sk_txtime.clockid;
+			sk->sk_txtime_deadline_mode =
+				!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
+			sk->sk_txtime_report_errors =
+				!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
+		}
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1115,6 +1137,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		u64 val64;
 		struct linger ling;
 		struct timeval tm;
+		struct sock_txtime txtime;
 	} v;
 
 	int lv = sizeof(int);
@@ -1403,6 +1426,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sock_flag(sk, SOCK_ZEROCOPY);
 		break;
 
+	case SO_TXTIME:
+		lv = sizeof(v.txtime);
+		v.txtime.clockid = sk->sk_clockid;
+		v.txtime.flags |= sk->sk_txtime_deadline_mode ?
+				  SOF_TXTIME_DEADLINE_MODE : 0;
+		v.txtime.flags |= sk->sk_txtime_report_errors ?
+				  SOF_TXTIME_REPORT_ERRORS : 0;
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -2137,6 +2169,13 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
 		sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
 		sockc->tsflags |= tsflags;
 		break;
+	case SCM_TXTIME:
+		if (!sock_flag(sk, SOCK_TXTIME))
+			return -EINVAL;
+		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
+			return -EINVAL;
+		sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
+		break;
 	/* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
 	case SCM_RIGHTS:
 	case SCM_CREDENTIALS:
@@ -2401,9 +2440,10 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
 {
 	struct proto *prot = sk->sk_prot;
 	long allocated = sk_memory_allocated_add(sk, amt);
+	bool charged = true;
 
 	if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
-	    !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
+	    !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
 		goto suppress_allocation;
 
 	/* Under limit. */
@@ -2461,7 +2501,8 @@ suppress_allocation:
 			return 1;
 	}
 
-	trace_sock_exceed_buf_limit(sk, prot, allocated);
+	if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
+		trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
 
 	sk_memory_allocated_sub(sk, amt);
 
@@ -2818,6 +2859,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_pacing_rate = ~0U;
 	sk->sk_pacing_shift = 10;
 	sk->sk_incoming_cpu = -1;
+
+	sk_rx_queue_clear(sk);
 	/*
 	 * Before updating sk_refcnt, we must commit prior changes to memory
 	 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 9d1f22072d5d..57285383ed00 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -3,8 +3,11 @@
  * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
  * Released under terms in GPL version 2.  See COPYING.
  */
+#include <linux/bpf.h>
+#include <linux/filter.h>
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/rhashtable.h>
@@ -45,8 +48,8 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
 	BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
 		     != sizeof(u32));
 
-	/* Use cyclic increasing ID as direct hash key, see rht_bucket_index */
-	return key << RHT_HASH_RESERVED_SPACE;
+	/* Use cyclic increasing ID as direct hash key */
+	return key;
 }
 
 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
@@ -370,3 +373,34 @@ void xdp_return_buff(struct xdp_buff *xdp)
 	__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle);
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
+
+int xdp_attachment_query(struct xdp_attachment_info *info,
+			 struct netdev_bpf *bpf)
+{
+	bpf->prog_id = info->prog ? info->prog->aux->id : 0;
+	bpf->prog_flags = info->prog ? info->flags : 0;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_attachment_query);
+
+bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
+			     struct netdev_bpf *bpf)
+{
+	if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
+		NL_SET_ERR_MSG(bpf->extack,
+			       "program loaded with different flags");
+		return false;
+	}
+	return true;
+}
+EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
+
+void xdp_attachment_setup(struct xdp_attachment_info *info,
+			  struct netdev_bpf *bpf)
+{
+	if (info->prog)
+		bpf_prog_put(info->prog);
+	info->prog = bpf->prog;
+	info->flags = bpf->flags;
+}
+EXPORT_SYMBOL_GPL(xdp_attachment_setup);
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 1b2120645730..34aba55ed573 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -491,6 +491,7 @@ static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb)
 		break;
 	case DN_RUN:
 		sk->sk_shutdown |= SHUTDOWN_MASK;
+		/* fall through */
 	case DN_CC:
 		scp->state = DN_CN;
 	}
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index dc5d9af3dc80..a1917025e155 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -775,6 +775,20 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
 	if (!ds)
 		return NULL;
 
+	/* We avoid allocating memory outside dsa_switch
+	 * if it is not needed.
+	 */
+	if (n <= sizeof(ds->_bitmap) * 8) {
+		ds->bitmap = &ds->_bitmap;
+	} else {
+		ds->bitmap = devm_kcalloc(dev,
+					  BITS_TO_LONGS(n),
+					  sizeof(unsigned long),
+					  GFP_KERNEL);
+		if (unlikely(!ds->bitmap))
+			return NULL;
+	}
+
 	ds->dev = dev;
 	ds->num_ports = n;
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 1e3b6a6d8a40..71536c435132 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -900,7 +900,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 
 	switch (f->command) {
 	case TC_BLOCK_BIND:
-		return tcf_block_cb_register(f->block, cb, dev, dev);
+		return tcf_block_cb_register(f->block, cb, dev, dev, f->extack);
 	case TC_BLOCK_UNBIND:
 		tcf_block_cb_unregister(f->block, cb, dev);
 		return 0;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index b93511726069..142b294d3446 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -136,21 +136,20 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
 {
 	const struct switchdev_obj_port_mdb *mdb = info->mdb;
 	struct switchdev_trans *trans = info->trans;
-	DECLARE_BITMAP(group, ds->num_ports);
 	int port;
 
 	/* Build a mask of Multicast group members */
-	bitmap_zero(group, ds->num_ports);
+	bitmap_zero(ds->bitmap, ds->num_ports);
 	if (ds->index == info->sw_index)
-		set_bit(info->port, group);
+		set_bit(info->port, ds->bitmap);
 	for (port = 0; port < ds->num_ports; port++)
 		if (dsa_is_dsa_port(ds, port))
-			set_bit(port, group);
+			set_bit(port, ds->bitmap);
 
 	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_mdb_prepare_bitmap(ds, mdb, group);
+		return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap);
 
-	dsa_switch_mdb_add_bitmap(ds, mdb, group);
+	dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap);
 
 	return 0;
 }
@@ -204,21 +203,20 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
 {
 	const struct switchdev_obj_port_vlan *vlan = info->vlan;
 	struct switchdev_trans *trans = info->trans;
-	DECLARE_BITMAP(members, ds->num_ports);
 	int port;
 
 	/* Build a mask of VLAN members */
-	bitmap_zero(members, ds->num_ports);
+	bitmap_zero(ds->bitmap, ds->num_ports);
 	if (ds->index == info->sw_index)
-		set_bit(info->port, members);
+		set_bit(info->port, ds->bitmap);
 	for (port = 0; port < ds->num_ports; port++)
 		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
-			set_bit(port, members);
+			set_bit(port, ds->bitmap);
 
 	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_vlan_prepare_bitmap(ds, vlan, members);
+		return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap);
 
-	dsa_switch_vlan_add_bitmap(ds, vlan, members);
+	dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap);
 
 	return 0;
 }
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index ee28440f57c5..fd8faa0dfa61 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -427,13 +427,13 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 }
 EXPORT_SYMBOL(sysfs_format_mac);
 
-struct sk_buff **eth_gro_receive(struct sk_buff **head,
-				 struct sk_buff *skb)
+struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
-	struct sk_buff *p, **pp = NULL;
-	struct ethhdr *eh, *eh2;
-	unsigned int hlen, off_eth;
 	const struct packet_offload *ptype;
+	unsigned int hlen, off_eth;
+	struct sk_buff *pp = NULL;
+	struct ethhdr *eh, *eh2;
+	struct sk_buff *p;
 	__be16 type;
 	int flush = 1;
 
@@ -448,7 +448,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head,
 
 	flush = 0;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b403499fdabe..f2a0a3bab6b5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -229,6 +229,7 @@ int inet_listen(struct socket *sock, int backlog)
 		err = inet_csk_listen_start(sk, backlog);
 		if (err)
 			goto out;
+		tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
 	}
 	sk->sk_max_ack_backlog = backlog;
 	err = 0;
@@ -1384,12 +1385,12 @@ out:
 }
 EXPORT_SYMBOL(inet_gso_segment);
 
-struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
-	struct sk_buff *p;
+	struct sk_buff *pp = NULL;
 	const struct iphdr *iph;
+	struct sk_buff *p;
 	unsigned int hlen;
 	unsigned int off;
 	unsigned int id;
@@ -1425,7 +1426,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
 	id >>= 16;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		struct iphdr *iph2;
 		u16 flush_id;
 
@@ -1505,8 +1506,8 @@ out:
 }
 EXPORT_SYMBOL(inet_gro_receive);
 
-static struct sk_buff **ipip_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *ipip_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	if (NAPI_GRO_CB(skb)->encap_mark) {
 		NAPI_GRO_CB(skb)->flush = 1;
@@ -1882,6 +1883,7 @@ fs_initcall(ipv4_offload_init);
 static struct packet_type ip_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IP),
 	.func = ip_rcv,
+	.list_func = ip_list_rcv,
 };
 
 static int __init inet_init(void)
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 7cf755ef9efb..bbeecd13e534 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -28,8 +28,8 @@
 #include <linux/spinlock.h>
 #include <net/udp.h>
 
-static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *esp4_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index c9ec1603666b..500a59906b87 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -224,14 +224,14 @@ drop:
 	return 0;
 }
 
-static struct sk_buff **fou_gro_receive(struct sock *sk,
-					struct sk_buff **head,
-					struct sk_buff *skb)
+static struct sk_buff *fou_gro_receive(struct sock *sk,
+				       struct list_head *head,
+				       struct sk_buff *skb)
 {
-	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
 	u8 proto = fou_from_sock(sk)->protocol;
 	const struct net_offload **offloads;
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
 
 	/* We can clear the encap_mark for FOU as we are essentially doing
 	 * one of two possible things.  We are either adding an L4 tunnel
@@ -305,13 +305,13 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 	return guehdr;
 }
 
-static struct sk_buff **gue_gro_receive(struct sock *sk,
-					struct sk_buff **head,
-					struct sk_buff *skb)
+static struct sk_buff *gue_gro_receive(struct sock *sk,
+				       struct list_head *head,
+				       struct sk_buff *skb)
 {
 	const struct net_offload **offloads;
 	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct guehdr *guehdr;
 	size_t len, optlen, hdrlen, off;
@@ -397,7 +397,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
 
 	skb_gro_pull(skb, hdrlen);
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		const struct guehdr *guehdr2;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 6a7d980105f6..6c63524f598a 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -108,10 +108,10 @@ out:
 	return segs;
 }
 
-static struct sk_buff **gre_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb)
+static struct sk_buff *gre_gro_receive(struct list_head *head,
+				       struct sk_buff *skb)
 {
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	const struct gre_base_hdr *greh;
 	unsigned int hlen, grehlen;
@@ -182,7 +182,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 					     null_compute_pseudo);
 	}
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		const struct gre_base_hdr *greh2;
 
 		if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1617604c9284..695979b7ef6d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -429,14 +429,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 
 	icmp_param->data.icmph.checksum = 0;
 
+	ipcm_init(&ipc);
 	inet->tos = ip_hdr(skb)->tos;
 	sk->sk_mark = mark;
 	daddr = ipc.addr = ip_hdr(skb)->saddr;
 	saddr = fib_compute_spec_dst(skb);
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
 
 	if (icmp_param->replyopts.opt.opt.optlen) {
 		ipc.opt = &icmp_param->replyopts.opt;
@@ -710,11 +707,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param.offset = skb_network_offset(skb_in);
 	inet_sk(sk)->tos = tos;
 	sk->sk_mark = mark;
+	ipcm_init(&ipc);
 	ipc.addr = iph->saddr;
 	ipc.opt = &icmp_param.replyopts.opt;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
 
 	rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
 			       type, code, &icmp_param);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 1e4cf3ab560f..d3162baca9f1 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -20,6 +20,7 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <net/sock.h>
 #include <net/inet_frag.h>
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2d8efeecf619..c8ca5d8f0f75 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -587,6 +587,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 		goto err_free_skb;
 
 	key = &tun_info->key;
+	if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+		goto err_free_rt;
 	md = ip_tunnel_info_opts(tun_info);
 	if (!md)
 		goto err_free_rt;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 7582713dd18f..3196cf58f418 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -307,7 +307,8 @@ drop:
 	return true;
 }
 
-static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+			      struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	int (*edemux)(struct sk_buff *skb);
@@ -315,13 +316,6 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 	struct rtable *rt;
 	int err;
 
-	/* if ingress device is enslaved to an L3 master device pass the
-	 * skb to its handler for processing
-	 */
-	skb = l3mdev_ip_rcv(skb);
-	if (!skb)
-		return NET_RX_SUCCESS;
-
 	if (net->ipv4.sysctl_ip_early_demux &&
 	    !skb_dst(skb) &&
 	    !skb->sk &&
@@ -393,7 +387,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	return dst_input(skb);
+	return NET_RX_SUCCESS;
 
 drop:
 	kfree_skb(skb);
@@ -405,13 +399,29 @@ drop_error:
 	goto drop;
 }
 
+static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	/* if ingress device is enslaved to an L3 master device pass the
+	 * skb to its handler for processing
+	 */
+	skb = l3mdev_ip_rcv(skb);
+	if (!skb)
+		return NET_RX_SUCCESS;
+
+	ret = ip_rcv_finish_core(net, sk, skb);
+	if (ret != NET_RX_DROP)
+		ret = dst_input(skb);
+	return ret;
+}
+
 /*
  * 	Main IP Receive routine.
  */
-int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
 {
 	const struct iphdr *iph;
-	struct net *net;
 	u32 len;
 
 	/* When the interface is in promisc. mode, drop all the crap
@@ -421,7 +431,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		goto drop;
 
 
-	net = dev_net(dev);
 	__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
@@ -489,9 +498,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
-		       net, NULL, skb, dev, NULL,
-		       ip_rcv_finish);
+	return skb;
 
 csum_error:
 	__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
@@ -500,5 +507,113 @@ inhdr_error:
 drop:
 	kfree_skb(skb);
 out:
-	return NET_RX_DROP;
+	return NULL;
+}
+
+/*
+ * IP receive entry point
+ */
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
+	   struct net_device *orig_dev)
+{
+	struct net *net = dev_net(dev);
+
+	skb = ip_rcv_core(skb, net);
+	if (skb == NULL)
+		return NET_RX_DROP;
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+		       net, NULL, skb, dev, NULL,
+		       ip_rcv_finish);
+}
+
+static void ip_sublist_rcv_finish(struct list_head *head)
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, head, list) {
+		list_del(&skb->list);
+		/* Handle ip{6}_forward case, as sch_direct_xmit have
+		 * another kind of SKB-list usage (see validate_xmit_skb_list)
+		 */
+		skb->next = NULL;
+		dst_input(skb);
+	}
+}
+
+static void ip_list_rcv_finish(struct net *net, struct sock *sk,
+			       struct list_head *head)
+{
+	struct dst_entry *curr_dst = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	INIT_LIST_HEAD(&sublist);
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct dst_entry *dst;
+
+		list_del(&skb->list);
+		/* if ingress device is enslaved to an L3 master device pass the
+		 * skb to its handler for processing
+		 */
+		skb = l3mdev_ip_rcv(skb);
+		if (!skb)
+			continue;
+		if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP)
+			continue;
+
+		dst = skb_dst(skb);
+		if (curr_dst != dst) {
+			/* dispatch old sublist */
+			if (!list_empty(&sublist))
+				ip_sublist_rcv_finish(&sublist);
+			/* start new sublist */
+			INIT_LIST_HEAD(&sublist);
+			curr_dst = dst;
+		}
+		list_add_tail(&skb->list, &sublist);
+	}
+	/* dispatch final sublist */
+	ip_sublist_rcv_finish(&sublist);
+}
+
+static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
+			   struct net *net)
+{
+	NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
+		     head, dev, NULL, ip_rcv_finish);
+	ip_list_rcv_finish(net, NULL, head);
+}
+
+/* Receive a list of IP packets */
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+		 struct net_device *orig_dev)
+{
+	struct net_device *curr_dev = NULL;
+	struct net *curr_net = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	INIT_LIST_HEAD(&sublist);
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct net_device *dev = skb->dev;
+		struct net *net = dev_net(dev);
+
+		list_del(&skb->list);
+		skb = ip_rcv_core(skb, net);
+		if (skb == NULL)
+			continue;
+
+		if (curr_dev != dev || curr_net != net) {
+			/* dispatch old sublist */
+			if (!list_empty(&sublist))
+				ip_sublist_rcv(&sublist, curr_dev, curr_net);
+			/* start new sublist */
+			INIT_LIST_HEAD(&sublist);
+			curr_dev = dev;
+			curr_net = net;
+		}
+		list_add_tail(&skb->list, &sublist);
+	}
+	/* dispatch final sublist */
+	ip_sublist_rcv(&sublist, curr_dev, curr_net);
 }
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b3308e9d9762..e2b6bd478afb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -423,7 +423,8 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
 }
 
 /* Note: skb->sk can be different from sk, in case of tunnels */
-int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
+int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+		    __u8 tos)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct net *net = sock_net(sk);
@@ -462,7 +463,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
 					   inet->inet_dport,
 					   inet->inet_sport,
 					   sk->sk_protocol,
-					   RT_CONN_FLAGS(sk),
+					   RT_CONN_FLAGS_TOS(sk, tos),
 					   sk->sk_bound_dev_if);
 		if (IS_ERR(rt))
 			goto no_route;
@@ -478,7 +479,7 @@ packet_routed:
 	skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
-	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
+	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (tos & 0xff));
 	if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
 		iph->frag_off = htons(IP_DF);
 	else
@@ -511,7 +512,7 @@ no_route:
 	kfree_skb(skb);
 	return -EHOSTUNREACH;
 }
-EXPORT_SYMBOL(ip_queue_xmit);
+EXPORT_SYMBOL(__ip_queue_xmit);
 
 static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 {
@@ -1145,14 +1146,15 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
 	cork->fragsize = ip_sk_use_pmtu(sk) ?
 			 dst_mtu(&rt->dst) : rt->dst.dev->mtu;
 
-	cork->gso_size = sk->sk_type == SOCK_DGRAM &&
-			 sk->sk_protocol == IPPROTO_UDP ? ipc->gso_size : 0;
+	cork->gso_size = ipc->gso_size;
 	cork->dst = &rt->dst;
 	cork->length = 0;
 	cork->ttl = ipc->ttl;
 	cork->tos = ipc->tos;
 	cork->priority = ipc->priority;
-	cork->tx_flags = ipc->tx_flags;
+	cork->transmit_time = ipc->sockc.transmit_time;
+	cork->tx_flags = 0;
+	sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
 
 	return 0;
 }
@@ -1413,6 +1415,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 
 	skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
 	skb->mark = sk->sk_mark;
+	skb->tstamp = cork->transmit_time;
 	/*
 	 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
 	 * on dst refcount
@@ -1545,11 +1548,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
 		return;
 
+	ipcm_init(&ipc);
 	ipc.addr = daddr;
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
 
 	if (replyopts.opt.opt.optlen) {
 		ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9f79b9803a16..5660adcf7a04 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -60,6 +60,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <linux/compat.h>
 #include <linux/export.h>
+#include <linux/rhashtable.h>
 #include <net/ip_tunnels.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
@@ -1051,7 +1052,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 	struct sk_buff *skb;
 	int ret;
 
-	if (assert == IGMPMSG_WHOLEPKT)
+	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 	else
 		skb = alloc_skb(128, GFP_ATOMIC);
@@ -1059,7 +1060,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 	if (!skb)
 		return -ENOBUFS;
 
-	if (assert == IGMPMSG_WHOLEPKT) {
+	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
 		/* Ugly, but we have no choice with this interface.
 		 * Duplicate old header, fix ihl, length etc.
 		 * And all this only to mangle msg->im_msgtype and
@@ -1070,9 +1071,12 @@ static int ipmr_cache_report(struct mr_table *mrt,
 		skb_reset_transport_header(skb);
 		msg = (struct igmpmsg *)skb_network_header(skb);
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
-		msg->im_msgtype = IGMPMSG_WHOLEPKT;
+		msg->im_msgtype = assert;
 		msg->im_mbz = 0;
-		msg->im_vif = mrt->mroute_reg_vif_num;
+		if (assert == IGMPMSG_WRVIFWHOLE)
+			msg->im_vif = vifi;
+		else
+			msg->im_vif = mrt->mroute_reg_vif_num;
 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 					     sizeof(struct iphdr));
@@ -1371,6 +1375,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 	struct mr_table *mrt;
 	struct vifctl vif;
 	struct mfcctl mfc;
+	bool do_wrvifwhole;
 	u32 uval;
 
 	/* There's one exception to the lock - MRT_DONE which needs to unlock */
@@ -1501,10 +1506,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 			break;
 		}
 
+		do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
 		val = !!val;
 		if (val != mrt->mroute_do_pim) {
 			mrt->mroute_do_pim = val;
 			mrt->mroute_do_assert = val;
+			mrt->mroute_do_wrvifwhole = do_wrvifwhole;
 		}
 		break;
 	case MRT_TABLE:
@@ -1982,6 +1989,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 			       MFC_ASSERT_THRESH)) {
 			c->_c.mfc_un.res.last_assert = jiffies;
 			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
+			if (mrt->mroute_do_wrvifwhole)
+				ipmr_cache_report(mrt, skb, true_vifi,
+						  IGMPMSG_WRVIFWHOLE);
 		}
 		goto dont_forward;
 	}
@@ -2658,7 +2668,9 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
 			mrt->mroute_reg_vif_num) ||
 	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
 		       mrt->mroute_do_assert) ||
-	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim))
+	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) ||
+	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
+		       mrt->mroute_do_wrvifwhole))
 		return false;
 
 	return true;
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index cafb0506c8c9..1ad9aa62a97b 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -2,6 +2,7 @@
  * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
  */
 
+#include <linux/rhashtable.h>
 #include <linux/mroute_base.h>
 
 /* Sets everything common except 'dev', since that is done under locking */
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 4388de0e5380..1e6f28c97d3a 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -35,7 +35,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv4_packet(struct nf_log_buf *m,
+static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
 			     const struct nf_loginfo *info,
 			     const struct sk_buff *skb, unsigned int iphoff)
 {
@@ -183,7 +183,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 			/* Max length: 3+maxlen */
 			if (!iphoff) { /* Only recurse once. */
 				nf_log_buf_add(m, "[");
-				dump_ipv4_packet(m, info, skb,
+				dump_ipv4_packet(net, m, info, skb,
 					    iphoff + ih->ihl*4+sizeof(_icmph));
 				nf_log_buf_add(m, "] ");
 			}
@@ -251,7 +251,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
 
 	/* Max length: 15 "UID=4294967295 " */
 	if ((logflags & NF_LOG_UID) && !iphoff)
-		nf_log_dump_sk_uid_gid(m, skb->sk);
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
 	if (!iphoff && skb->mark)
@@ -333,7 +333,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
 	if (in != NULL)
 		dump_ipv4_mac_header(m, loginfo, skb);
 
-	dump_ipv4_packet(m, loginfo, skb, 0);
+	dump_ipv4_packet(net, m, loginfo, skb, 0);
 
 	nf_log_buf_close(m);
 }
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2ed64bca54e3..b54c964ad925 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -739,13 +739,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		/* no remote port */
 	}
 
-	ipc.sockc.tsflags = sk->sk_tsflags;
-	ipc.addr = inet->inet_saddr;
-	ipc.opt = NULL;
-	ipc.oif = sk->sk_bound_dev_if;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
+	ipcm_init_sk(&ipc, inet);
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -769,8 +763,6 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		rcu_read_unlock();
 	}
 
-	sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
 	saddr = ipc.addr;
 	ipc.addr = faddr = daddr;
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 77350c1256ce..b46e4cf9a55a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -287,6 +287,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED),
 	SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE),
 	SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
+	SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
+	SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index abb3c9490c55..33df4d76db2d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -381,6 +381,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
+	skb->tstamp = sockc->transmit_time;
 	skb_dst_set(skb, &rt->dst);
 	*rtp = NULL;
 
@@ -561,13 +562,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		daddr = inet->inet_daddr;
 	}
 
-	ipc.sockc.tsflags = sk->sk_tsflags;
-	ipc.addr = inet->inet_saddr;
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-	ipc.oif = sk->sk_bound_dev_if;
+	ipcm_init_sk(&ipc, inet);
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -670,8 +665,6 @@ back_from_confirm:
 				      &rt, msg->msg_flags, &ipc.sockc);
 
 	 else {
-		sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
 		if (!ipc.addr)
 			ipc.addr = fl4.daddr;
 		lock_sock(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4491faf83f4f..bce53b1728a6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -817,8 +817,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 				 * This occurs when user tries to read
 				 * from never connected socket.
 				 */
-				if (!sock_flag(sk, SOCK_DONE))
-					ret = -ENOTCONN;
+				ret = -ENOTCONN;
 				break;
 			}
 			if (!timeo) {
@@ -1241,7 +1240,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 		/* 'common' sending to sendq */
 	}
 
-	sockc.tsflags = sk->sk_tsflags;
+	sockcm_init(&sockc, sk);
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
 		if (unlikely(err)) {
@@ -1275,9 +1274,6 @@ restart:
 			int linear;
 
 new_segment:
-			/* Allocate new segment. If the interface is SG,
-			 * allocate skb fitting to single page.
-			 */
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
 
@@ -2042,13 +2038,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 				break;
 
 			if (sk->sk_state == TCP_CLOSE) {
-				if (!sock_flag(sk, SOCK_DONE)) {
-					/* This occurs when user tries to read
-					 * from never connected socket.
-					 */
-					copied = -ENOTCONN;
-					break;
-				}
+				/* This occurs when user tries to read
+				 * from never connected socket.
+				 */
+				copied = -ENOTCONN;
 				break;
 			}
 
@@ -2576,6 +2569,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	sk->sk_shutdown = 0;
 	sock_reset_flag(sk, SOCK_DONE);
 	tp->srtt_us = 0;
+	tp->rcv_rtt_last_tsecr = 0;
 	tp->write_seq += tp->max_window + 2;
 	if (tp->write_seq == 0)
 		tp->write_seq = 1;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 58e2f479ffb4..3b5f45b9e81e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -205,7 +205,11 @@ static u32 bbr_bw(const struct sock *sk)
  */
 static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
 {
-	rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
+	unsigned int mss = tcp_sk(sk)->mss_cache;
+
+	if (!tcp_needs_internal_pacing(sk))
+		mss = tcp_mss_to_mtu(sk, mss);
+	rate *= mss;
 	rate *= gain;
 	rate >>= BBR_SCALE;
 	rate *= USEC_PER_SEC;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8e5522c6833a..91dbb9afb950 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,6 +78,7 @@
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
+#include <net/busy_poll.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -582,9 +583,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tp->rx_opt.rcv_tsecr &&
-	    (TCP_SKB_CB(skb)->end_seq -
-	     TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+	if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
+		return;
+	tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
+
+	if (TCP_SKB_CB(skb)->end_seq -
+	    TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
 		u32 delta_us;
 
@@ -3458,7 +3462,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 static void tcp_store_ts_recent(struct tcp_sock *tp)
 {
 	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-	tp->rx_opt.ts_recent_stamp = get_seconds();
+	tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
 }
 
 static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -4339,6 +4343,11 @@ static bool tcp_try_coalesce(struct sock *sk,
 	if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
 		return false;
 
+#ifdef CONFIG_TLS_DEVICE
+	if (from->decrypted != to->decrypted)
+		return false;
+#endif
+
 	if (!skb_try_coalesce(to, from, fragstolen, &delta))
 		return false;
 
@@ -4617,8 +4626,10 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 	skb->data_len = data_len;
 	skb->len = size;
 
-	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+	if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
 		goto err_free;
+	}
 
 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
 	if (err)
@@ -4674,18 +4685,21 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	 *  Out of sequence packets to the out_of_order_queue.
 	 */
 	if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
-		if (tcp_receive_window(tp) == 0)
+		if (tcp_receive_window(tp) == 0) {
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
 			goto out_of_window;
+		}
 
 		/* Ok. In sequence. In window. */
 queue_and_out:
 		if (skb_queue_len(&sk->sk_receive_queue) == 0)
 			sk_forced_mem_schedule(sk, skb->truesize);
-		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
 			goto drop;
+		}
 
 		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
-		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		if (skb->len)
 			tcp_event_data_recv(sk, skb);
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4741,8 +4755,10 @@ drop:
 		/* If window is closed, drop tail of packet. But after
 		 * remembering D-SACK for its head made in previous line.
 		 */
-		if (!tcp_receive_window(tp))
+		if (!tcp_receive_window(tp)) {
+			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
 			goto out_of_window;
+		}
 		goto queue_and_out;
 	}
 
@@ -4860,6 +4876,9 @@ restart:
 			break;
 
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+#ifdef CONFIG_TLS_DEVICE
+		nskb->decrypted = skb->decrypted;
+#endif
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
 		if (list)
 			__skb_queue_before(list, skb, nskb);
@@ -4887,6 +4906,10 @@ restart:
 				    skb == tail ||
 				    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
 					goto end;
+#ifdef CONFIG_TLS_DEVICE
+				if (skb->decrypted != nskb->decrypted)
+					goto end;
+#endif
 			}
 		}
 	}
@@ -5484,6 +5507,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 				tcp_ack(sk, skb, 0);
 				__kfree_skb(skb);
 				tcp_data_snd_check(sk);
+				/* When receiving pure ack in fast path, update
+				 * last ts ecr directly instead of calling
+				 * tcp_rcv_rtt_measure_ts()
+				 */
+				tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
 				return;
 			} else { /* Header too small */
 				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -5585,6 +5613,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	if (skb) {
 		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
 		security_inet_conn_established(sk, skb);
+		sk_mark_napi_id(sk, skb);
 	}
 
 	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
@@ -6413,6 +6442,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_rsk(req)->snt_isn = isn;
 	tcp_rsk(req)->txhash = net_tx_rndhash();
 	tcp_openreq_init_rwin(req, sk, dst);
+	sk_rx_queue_set(req_to_sk(req), skb);
 	if (!want_cookie) {
 		tcp_reqsk_record_syn(sk, req, skb);
 		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3b2711e33e4c..9e041fa5c545 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -155,7 +155,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 	   and use initial timestamp retrieved from peer table.
 	 */
 	if (tcptw->tw_ts_recent_stamp &&
-	    (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+	    (!twp || (reuse && time_after32(ktime_get_seconds(),
+					    tcptw->tw_ts_recent_stamp)))) {
 		/* In case of repair and re-using TIME-WAIT sockets we still
 		 * want to be sure that it is safe as above but honor the
 		 * sequence numbers and time stamps set as part of the repair
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1dda1341a223..75ef332a7caf 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -144,7 +144,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		tw->tw_substate	  = TCP_TIME_WAIT;
 		tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (tmp_opt.saw_tstamp) {
-			tcptw->tw_ts_recent_stamp = get_seconds();
+			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
 		}
 
@@ -189,7 +189,7 @@ kill:
 
 		if (tmp_opt.saw_tstamp) {
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
-			tcptw->tw_ts_recent_stamp = get_seconds();
+			tcptw->tw_ts_recent_stamp = ktime_get_seconds();
 		}
 
 		inet_twsk_put(tw);
@@ -449,119 +449,122 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 				      struct sk_buff *skb)
 {
 	struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
+	const struct inet_request_sock *ireq = inet_rsk(req);
+	struct tcp_request_sock *treq = tcp_rsk(req);
+	struct inet_connection_sock *newicsk;
+	struct tcp_sock *oldtp, *newtp;
 
-	if (newsk) {
-		const struct inet_request_sock *ireq = inet_rsk(req);
-		struct tcp_request_sock *treq = tcp_rsk(req);
-		struct inet_connection_sock *newicsk = inet_csk(newsk);
-		struct tcp_sock *newtp = tcp_sk(newsk);
-		struct tcp_sock *oldtp = tcp_sk(sk);
-
-		smc_check_reset_syn_req(oldtp, req, newtp);
-
-		/* Now setup tcp_sock */
-		newtp->pred_flags = 0;
-
-		newtp->rcv_wup = newtp->copied_seq =
-		newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->segs_in = 1;
-
-		newtp->snd_sml = newtp->snd_una =
-		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
-
-		INIT_LIST_HEAD(&newtp->tsq_node);
-		INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
-
-		tcp_init_wl(newtp, treq->rcv_isn);
-
-		newtp->srtt_us = 0;
-		newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
-		minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
-		newicsk->icsk_rto = TCP_TIMEOUT_INIT;
-		newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
-
-		newtp->packets_out = 0;
-		newtp->retrans_out = 0;
-		newtp->sacked_out = 0;
-		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
-		newtp->tlp_high_seq = 0;
-		newtp->lsndtime = tcp_jiffies32;
-		newsk->sk_txhash = treq->txhash;
-		newtp->last_oow_ack_time = 0;
-		newtp->total_retrans = req->num_retrans;
-
-		/* So many TCP implementations out there (incorrectly) count the
-		 * initial SYN frame in their delayed-ACK and congestion control
-		 * algorithms that we must have the following bandaid to talk
-		 * efficiently to them.  -DaveM
-		 */
-		newtp->snd_cwnd = TCP_INIT_CWND;
-		newtp->snd_cwnd_cnt = 0;
-
-		/* There's a bubble in the pipe until at least the first ACK. */
-		newtp->app_limited = ~0U;
-
-		tcp_init_xmit_timers(newsk);
-		newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
-
-		newtp->rx_opt.saw_tstamp = 0;
-
-		newtp->rx_opt.dsack = 0;
-		newtp->rx_opt.num_sacks = 0;
-
-		newtp->urg_data = 0;
-
-		if (sock_flag(newsk, SOCK_KEEPOPEN))
-			inet_csk_reset_keepalive_timer(newsk,
-						       keepalive_time_when(newtp));
-
-		newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
-		newtp->rx_opt.sack_ok = ireq->sack_ok;
-		newtp->window_clamp = req->rsk_window_clamp;
-		newtp->rcv_ssthresh = req->rsk_rcv_wnd;
-		newtp->rcv_wnd = req->rsk_rcv_wnd;
-		newtp->rx_opt.wscale_ok = ireq->wscale_ok;
-		if (newtp->rx_opt.wscale_ok) {
-			newtp->rx_opt.snd_wscale = ireq->snd_wscale;
-			newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
-		} else {
-			newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
-			newtp->window_clamp = min(newtp->window_clamp, 65535U);
-		}
-		newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
-				  newtp->rx_opt.snd_wscale);
-		newtp->max_window = newtp->snd_wnd;
-
-		if (newtp->rx_opt.tstamp_ok) {
-			newtp->rx_opt.ts_recent = req->ts_recent;
-			newtp->rx_opt.ts_recent_stamp = get_seconds();
-			newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
-		} else {
-			newtp->rx_opt.ts_recent_stamp = 0;
-			newtp->tcp_header_len = sizeof(struct tcphdr);
-		}
-		newtp->tsoffset = treq->ts_off;
+	if (!newsk)
+		return NULL;
+
+	newicsk = inet_csk(newsk);
+	newtp = tcp_sk(newsk);
+	oldtp = tcp_sk(sk);
+
+	smc_check_reset_syn_req(oldtp, req, newtp);
+
+	/* Now setup tcp_sock */
+	newtp->pred_flags = 0;
+
+	newtp->rcv_wup = newtp->copied_seq =
+	newtp->rcv_nxt = treq->rcv_isn + 1;
+	newtp->segs_in = 1;
+
+	newtp->snd_sml = newtp->snd_una =
+	newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
+
+	INIT_LIST_HEAD(&newtp->tsq_node);
+	INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
+
+	tcp_init_wl(newtp, treq->rcv_isn);
+
+	newtp->srtt_us = 0;
+	newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+	minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
+	newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+	newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
+
+	newtp->packets_out = 0;
+	newtp->retrans_out = 0;
+	newtp->sacked_out = 0;
+	newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+	newtp->tlp_high_seq = 0;
+	newtp->lsndtime = tcp_jiffies32;
+	newsk->sk_txhash = treq->txhash;
+	newtp->last_oow_ack_time = 0;
+	newtp->total_retrans = req->num_retrans;
+
+	/* So many TCP implementations out there (incorrectly) count the
+	 * initial SYN frame in their delayed-ACK and congestion control
+	 * algorithms that we must have the following bandaid to talk
+	 * efficiently to them.  -DaveM
+	 */
+	newtp->snd_cwnd = TCP_INIT_CWND;
+	newtp->snd_cwnd_cnt = 0;
+
+	/* There's a bubble in the pipe until at least the first ACK. */
+	newtp->app_limited = ~0U;
+
+	tcp_init_xmit_timers(newsk);
+	newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
+
+	newtp->rx_opt.saw_tstamp = 0;
+
+	newtp->rx_opt.dsack = 0;
+	newtp->rx_opt.num_sacks = 0;
+
+	newtp->urg_data = 0;
+
+	if (sock_flag(newsk, SOCK_KEEPOPEN))
+		inet_csk_reset_keepalive_timer(newsk,
+					       keepalive_time_when(newtp));
+
+	newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
+	newtp->rx_opt.sack_ok = ireq->sack_ok;
+	newtp->window_clamp = req->rsk_window_clamp;
+	newtp->rcv_ssthresh = req->rsk_rcv_wnd;
+	newtp->rcv_wnd = req->rsk_rcv_wnd;
+	newtp->rx_opt.wscale_ok = ireq->wscale_ok;
+	if (newtp->rx_opt.wscale_ok) {
+		newtp->rx_opt.snd_wscale = ireq->snd_wscale;
+		newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
+	} else {
+		newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
+		newtp->window_clamp = min(newtp->window_clamp, 65535U);
+	}
+	newtp->snd_wnd = ntohs(tcp_hdr(skb)->window) << newtp->rx_opt.snd_wscale;
+	newtp->max_window = newtp->snd_wnd;
+
+	if (newtp->rx_opt.tstamp_ok) {
+		newtp->rx_opt.ts_recent = req->ts_recent;
+		newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
+		newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+	} else {
+		newtp->rx_opt.ts_recent_stamp = 0;
+		newtp->tcp_header_len = sizeof(struct tcphdr);
+	}
+	newtp->tsoffset = treq->ts_off;
 #ifdef CONFIG_TCP_MD5SIG
-		newtp->md5sig_info = NULL;	/*XXX*/
-		if (newtp->af_specific->md5_lookup(sk, newsk))
-			newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+	newtp->md5sig_info = NULL;	/*XXX*/
+	if (newtp->af_specific->md5_lookup(sk, newsk))
+		newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
-		if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
-			newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
-		newtp->rx_opt.mss_clamp = req->mss;
-		tcp_ecn_openreq_child(newtp, req);
-		newtp->fastopen_req = NULL;
-		newtp->fastopen_rsk = NULL;
-		newtp->syn_data_acked = 0;
-		newtp->rack.mstamp = 0;
-		newtp->rack.advanced = 0;
-		newtp->rack.reo_wnd_steps = 1;
-		newtp->rack.last_delivered = 0;
-		newtp->rack.reo_wnd_persist = 0;
-		newtp->rack.dsack_seen = 0;
-
-		__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
-	}
+	if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
+		newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
+	newtp->rx_opt.mss_clamp = req->mss;
+	tcp_ecn_openreq_child(newtp, req);
+	newtp->fastopen_req = NULL;
+	newtp->fastopen_rsk = NULL;
+	newtp->syn_data_acked = 0;
+	newtp->rack.mstamp = 0;
+	newtp->rack.advanced = 0;
+	newtp->rack.reo_wnd_steps = 1;
+	newtp->rack.last_delivered = 0;
+	newtp->rack.reo_wnd_persist = 0;
+	newtp->rack.dsack_seen = 0;
+
+	__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
+
 	return newsk;
 }
 EXPORT_SYMBOL(tcp_create_openreq_child);
@@ -600,7 +603,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			 * it can be estimated (approximately)
 			 * from another data.
 			 */
-			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
+			tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
 			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
 		}
 	}
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 8cc7c3487330..870b0a335061 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -180,9 +180,9 @@ out:
 	return segs;
 }
 
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct tcphdr *th;
 	struct tcphdr *th2;
@@ -220,7 +220,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	len = skb_gro_len(skb);
 	flags = tcp_flag_word(th);
 
-	for (; (p = *head); head = &p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
@@ -233,7 +233,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 		goto found;
 	}
-
+	p = NULL;
 	goto out_check_final;
 
 found:
@@ -262,8 +262,11 @@ found:
 
 	flush |= (len - 1) >= mss;
 	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
+#ifdef CONFIG_TLS_DEVICE
+	flush |= p->decrypted ^ skb->decrypted;
+#endif
 
-	if (flush || skb_gro_receive(head, skb)) {
+	if (flush || skb_gro_receive(p, skb)) {
 		mss = 1;
 		goto out_check_final;
 	}
@@ -277,7 +280,7 @@ out_check_final:
 					TCP_FLAG_FIN));
 
 	if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
-		pp = head;
+		pp = p;
 
 out:
 	NAPI_GRO_CB(skb)->flush |= (flush != 0);
@@ -302,7 +305,7 @@ int tcp_gro_complete(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_gro_complete);
 
-static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
 	if (!NAPI_GRO_CB(skb)->flush &&
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 00e5a300ddb9..6cbab56e7407 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -973,17 +973,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-/* BBR congestion control needs pacing.
- * Same remark for SO_MAX_PACING_RATE.
- * sch_fq packet scheduler is efficiently handling pacing,
- * but is not always installed/used.
- * Return true if TCP stack should pace packets itself.
- */
-static bool tcp_needs_internal_pacing(const struct sock *sk)
-{
-	return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
-}
-
 static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
 {
 	u64 len_ns;
@@ -995,9 +984,6 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
 	if (!rate || rate == ~0U)
 		return;
 
-	/* Should account for header sizes as sch_fq does,
-	 * but lets make things simple.
-	 */
 	len_ns = (u64)skb->len * NSEC_PER_SEC;
 	do_div(len_ns, rate);
 	hrtimer_start(&tcp_sk(sk)->pacing_timer,
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index c61240e43923..4dff40dad4dc 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -146,6 +146,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
 				    rs->prior_mstamp); /* ack phase */
 	rs->interval_us = max(snd_us, ack_us);
 
+	/* Record both segment send and ack receive intervals */
+	rs->snd_interval_us = snd_us;
+	rs->rcv_interval_us = ack_us;
+
 	/* Normally we expect interval_us >= min-rtt.
 	 * Note that rate may still be over-estimated when a spuriously
 	 * retransmistted skb was first (s)acked because "interval_us"
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24e116ddae79..060e841dde40 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -926,11 +926,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
 		return -EOPNOTSUPP;
 
-	ipc.opt = NULL;
-	ipc.tx_flags = 0;
-	ipc.ttl = 0;
-	ipc.tos = -1;
-
 	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
 
 	fl4 = &inet->cork.fl.u.ip4;
@@ -977,9 +972,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		connected = 1;
 	}
 
-	ipc.sockc.tsflags = sk->sk_tsflags;
-	ipc.addr = inet->inet_saddr;
-	ipc.oif = sk->sk_bound_dev_if;
+	ipcm_init_sk(&ipc, inet);
 	ipc.gso_size = up->gso_size;
 
 	if (msg->msg_controllen) {
@@ -1027,8 +1020,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	saddr = ipc.addr;
 	ipc.addr = faddr = daddr;
 
-	sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
 	if (ipc.opt && ipc.opt->opt.srr) {
 		if (!daddr) {
 			err = -EINVAL;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 69c54540d5b4..0c0522b79b43 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -343,10 +343,11 @@ out:
 	return segs;
 }
 
-struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
-				 struct udphdr *uh, udp_lookup_t lookup)
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+				struct udphdr *uh, udp_lookup_t lookup)
 {
-	struct sk_buff *p, **pp = NULL;
+	struct sk_buff *pp = NULL;
+	struct sk_buff *p;
 	struct udphdr *uh2;
 	unsigned int off = skb_gro_offset(skb);
 	int flush = 1;
@@ -371,7 +372,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
 unflush:
 	flush = 0;
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
 
@@ -399,8 +400,8 @@ out:
 }
 EXPORT_SYMBOL(udp_gro_receive);
 
-static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *udp4_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 91580c62bb86..1659a6b3cf42 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -385,8 +385,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 
 	if (ndev->cnf.stable_secret.initialized)
 		ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
-	else
-		ndev->cnf.addr_gen_mode = ipv6_devconf_dflt.addr_gen_mode;
 
 	ndev->cnf.mtu6 = dev->mtu;
 	ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
@@ -5210,7 +5208,9 @@ static inline size_t inet6_ifla6_size(void)
 	     + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
 	     + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
 	     + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
-	     + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
+	     + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
+	     + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
+	     + 0;
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -5892,32 +5892,31 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 					 loff_t *ppos)
 {
 	int ret = 0;
-	int new_val;
+	u32 new_val;
 	struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
 	struct net *net = (struct net *)ctl->extra2;
+	struct ctl_table tmp = {
+		.data = &new_val,
+		.maxlen = sizeof(new_val),
+		.mode = ctl->mode,
+	};
 
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+	new_val = *((u32 *)ctl->data);
 
-	if (write) {
-		new_val = *((int *)ctl->data);
+	ret = proc_douintvec(&tmp, write, buffer, lenp, ppos);
+	if (ret != 0)
+		goto out;
 
+	if (write) {
 		if (check_addr_gen_mode(new_val) < 0) {
 			ret = -EINVAL;
 			goto out;
 		}
 
-		/* request for default */
-		if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) {
-			ipv6_devconf_dflt.addr_gen_mode = new_val;
-
-		/* request for individual net device */
-		} else {
-			if (!idev)
-				goto out;
-
+		if (idev) {
 			if (check_stable_privacy(idev, net, new_val) < 0) {
 				ret = -EINVAL;
 				goto out;
@@ -5927,7 +5926,21 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
 				idev->cnf.addr_gen_mode = new_val;
 				addrconf_dev_config(idev->dev);
 			}
+		} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
+			struct net_device *dev;
+
+			net->ipv6.devconf_dflt->addr_gen_mode = new_val;
+			for_each_netdev(net, dev) {
+				idev = __in6_dev_get(dev);
+				if (idev &&
+				    idev->cnf.addr_gen_mode != new_val) {
+					idev->cnf.addr_gen_mode = new_val;
+					addrconf_dev_config(idev->dev);
+				}
+			}
 		}
+
+		*((u32 *)ctl->data) = new_val;
 	}
 
 out:
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9ed0eae91758..c9535354149f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 static struct packet_type ipv6_packet_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IPV6),
 	.func = ipv6_rcv,
+	.list_func = ipv6_list_rcv,
 };
 
 static int __init ipv6_packet_init(void)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2ee08b6a86a4..201306b9b5ea 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -736,7 +736,7 @@ EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl);
 
 int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 			  struct msghdr *msg, struct flowi6 *fl6,
-			  struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc)
+			  struct ipcm6_cookie *ipc6)
 {
 	struct in6_pktinfo *src_info;
 	struct cmsghdr *cmsg;
@@ -755,7 +755,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 		}
 
 		if (cmsg->cmsg_level == SOL_SOCKET) {
-			err = __sock_cmsg_send(sk, msg, cmsg, sockc);
+			err = __sock_cmsg_send(sk, msg, cmsg, &ipc6->sockc);
 			if (err)
 				return err;
 			continue;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 27f59b61f70f..ddfa533a84e5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -49,8 +49,8 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
 	return 0;
 }
 
-static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *esp6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index be491bf6ab6e..24611c8b0562 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -430,7 +430,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	struct icmp6hdr tmp_hdr;
 	struct flowi6 fl6;
 	struct icmpv6_msg msg;
-	struct sockcm_cookie sockc_unused = {0};
 	struct ipcm6_cookie ipc6;
 	int iif = 0;
 	int addr_type = 0;
@@ -545,7 +544,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 	else if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->ucast_oif;
 
-	ipc6.tclass = np->tclass;
+	ipcm6_init_sk(&ipc6, np);
 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
@@ -553,8 +552,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 		goto out;
 
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-	ipc6.dontfrag = np->dontfrag;
-	ipc6.opt = NULL;
 
 	msg.skb = skb;
 	msg.offset = skb_network_offset(skb);
@@ -575,7 +572,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
 			    len + sizeof(struct icmp6hdr),
 			    sizeof(struct icmp6hdr),
 			    &ipc6, &fl6, (struct rt6_info *)dst,
-			    MSG_DONTWAIT, &sockc_unused)) {
+			    MSG_DONTWAIT)) {
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 	} else {
@@ -679,7 +676,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	struct dst_entry *dst;
 	struct ipcm6_cookie ipc6;
 	u32 mark = IP6_REPLY_MARK(net, skb->mark);
-	struct sockcm_cookie sockc_unused = {0};
 
 	saddr = &ipv6_hdr(skb)->daddr;
 
@@ -726,16 +722,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	msg.offset = 0;
 	msg.type = ICMPV6_ECHO_REPLY;
 
+	ipcm6_init_sk(&ipc6, np);
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
 	ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
-	ipc6.dontfrag = np->dontfrag;
-	ipc6.opt = NULL;
 
 	if (ip6_append_data(sk, icmpv6_getfrag, &msg,
 			    skb->len + sizeof(struct icmp6hdr),
 			    sizeof(struct icmp6hdr), &ipc6, &fl6,
-			    (struct rt6_info *)dst, MSG_DONTWAIT,
-			    &sockc_unused)) {
+			    (struct rt6_info *)dst, MSG_DONTWAIT)) {
 		__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 	} else {
diff --git a/net/ipv6/ila/Makefile b/net/ipv6/ila/Makefile
index 4b32e5921e5c..b7739aba6e68 100644
--- a/net/ipv6/ila/Makefile
+++ b/net/ipv6/ila/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_IPV6_ILA) += ila.o
 
-ila-objs := ila_common.o ila_lwt.o ila_xlat.o
+ila-objs := ila_main.o ila_common.o ila_lwt.o ila_xlat.o
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index 3c7a11b62334..1f747bcbec29 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -19,6 +19,7 @@
 #include <linux/skbuff.h>
 #include <linux/types.h>
 #include <net/checksum.h>
+#include <net/genetlink.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <uapi/linux/ila.h>
@@ -104,9 +105,31 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
 
 void ila_init_saved_csum(struct ila_params *p);
 
+struct ila_net {
+	struct {
+		struct rhashtable rhash_table;
+		spinlock_t *locks; /* Bucket locks for entry manipulation */
+		unsigned int locks_mask;
+		bool hooks_registered;
+	} xlat;
+};
+
 int ila_lwt_init(void);
 void ila_lwt_fini(void);
-int ila_xlat_init(void);
-void ila_xlat_fini(void);
+
+int ila_xlat_init_net(struct net *net);
+void ila_xlat_exit_net(struct net *net);
+
+int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info);
+int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info);
+int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info);
+int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info);
+int ila_xlat_nl_dump_start(struct netlink_callback *cb);
+int ila_xlat_nl_dump_done(struct netlink_callback *cb);
+int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb);
+
+extern unsigned int ila_net_id;
+
+extern struct genl_family ila_nl_family;
 
 #endif /* __ILA_H */
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index 8c88ecf29b93..579310466eac 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -154,33 +154,3 @@ void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
 	iaddr->loc = p->locator;
 }
 
-static int __init ila_init(void)
-{
-	int ret;
-
-	ret = ila_lwt_init();
-
-	if (ret)
-		goto fail_lwt;
-
-	ret = ila_xlat_init();
-	if (ret)
-		goto fail_xlat;
-
-	return 0;
-fail_xlat:
-	ila_lwt_fini();
-fail_lwt:
-	return ret;
-}
-
-static void __exit ila_fini(void)
-{
-	ila_xlat_fini();
-	ila_lwt_fini();
-}
-
-module_init(ila_init);
-module_exit(ila_fini);
-MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
new file mode 100644
index 000000000000..18fac76b9520
--- /dev/null
+++ b/net/ipv6/ila/ila_main.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/genetlink.h>
+#include <net/ila.h>
+#include <net/netns/generic.h>
+#include <uapi/linux/genetlink.h>
+#include "ila.h"
+
+static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+	[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+	[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
+	[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
+	[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+	[ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
+};
+
+static const struct genl_ops ila_nl_ops[] = {
+	{
+		.cmd = ILA_CMD_ADD,
+		.doit = ila_xlat_nl_cmd_add_mapping,
+		.policy = ila_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = ILA_CMD_DEL,
+		.doit = ila_xlat_nl_cmd_del_mapping,
+		.policy = ila_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = ILA_CMD_FLUSH,
+		.doit = ila_xlat_nl_cmd_flush,
+		.policy = ila_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = ILA_CMD_GET,
+		.doit = ila_xlat_nl_cmd_get_mapping,
+		.start = ila_xlat_nl_dump_start,
+		.dumpit = ila_xlat_nl_dump,
+		.done = ila_xlat_nl_dump_done,
+		.policy = ila_nl_policy,
+	},
+};
+
+unsigned int ila_net_id;
+
+struct genl_family ila_nl_family __ro_after_init = {
+	.hdrsize	= 0,
+	.name		= ILA_GENL_NAME,
+	.version	= ILA_GENL_VERSION,
+	.maxattr	= ILA_ATTR_MAX,
+	.netnsok	= true,
+	.parallel_ops	= true,
+	.module		= THIS_MODULE,
+	.ops		= ila_nl_ops,
+	.n_ops		= ARRAY_SIZE(ila_nl_ops),
+};
+
+static __net_init int ila_init_net(struct net *net)
+{
+	int err;
+
+	err = ila_xlat_init_net(net);
+	if (err)
+		goto ila_xlat_init_fail;
+
+	return 0;
+
+ila_xlat_init_fail:
+	return err;
+}
+
+static __net_exit void ila_exit_net(struct net *net)
+{
+	ila_xlat_exit_net(net);
+}
+
+static struct pernet_operations ila_net_ops = {
+	.init = ila_init_net,
+	.exit = ila_exit_net,
+	.id   = &ila_net_id,
+	.size = sizeof(struct ila_net),
+};
+
+static int __init ila_init(void)
+{
+	int ret;
+
+	ret = register_pernet_device(&ila_net_ops);
+	if (ret)
+		goto register_device_fail;
+
+	ret = genl_register_family(&ila_nl_family);
+	if (ret)
+		goto register_family_fail;
+
+	ret = ila_lwt_init();
+	if (ret)
+		goto fail_lwt;
+
+	return 0;
+
+fail_lwt:
+	genl_unregister_family(&ila_nl_family);
+register_family_fail:
+	unregister_pernet_device(&ila_net_ops);
+register_device_fail:
+	return ret;
+}
+
+static void __exit ila_fini(void)
+{
+	ila_lwt_fini();
+	genl_unregister_family(&ila_nl_family);
+	unregister_pernet_device(&ila_net_ops);
+}
+
+module_init(ila_init);
+module_exit(ila_fini);
+MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 10ae13560b40..51a15ce50a64 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -22,36 +22,14 @@ struct ila_map {
 	struct rcu_head rcu;
 };
 
-static unsigned int ila_net_id;
-
-struct ila_net {
-	struct rhashtable rhash_table;
-	spinlock_t *locks; /* Bucket locks for entry manipulation */
-	unsigned int locks_mask;
-	bool hooks_registered;
-};
-
+#define MAX_LOCKS 1024
 #define	LOCKS_PER_CPU 10
 
 static int alloc_ila_locks(struct ila_net *ilan)
 {
-	unsigned int i, size;
-	unsigned int nr_pcpus = num_possible_cpus();
-
-	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
-	size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU);
-
-	if (sizeof(spinlock_t) != 0) {
-		ilan->locks = kvmalloc_array(size, sizeof(spinlock_t),
-					     GFP_KERNEL);
-		if (!ilan->locks)
-			return -ENOMEM;
-		for (i = 0; i < size; i++)
-			spin_lock_init(&ilan->locks[i]);
-	}
-	ilan->locks_mask = size - 1;
-
-	return 0;
+	return alloc_bucket_spinlocks(&ilan->xlat.locks, &ilan->xlat.locks_mask,
+				      MAX_LOCKS, LOCKS_PER_CPU,
+				      GFP_KERNEL);
 }
 
 static u32 hashrnd __read_mostly;
@@ -71,7 +49,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc)
 static inline spinlock_t *ila_get_lock(struct ila_net *ilan,
 				       struct ila_locator loc)
 {
-	return &ilan->locks[ila_locator_hash(loc) & ilan->locks_mask];
+	return &ilan->xlat.locks[ila_locator_hash(loc) & ilan->xlat.locks_mask];
 }
 
 static inline int ila_cmp_wildcards(struct ila_map *ila,
@@ -115,16 +93,6 @@ static const struct rhashtable_params rht_params = {
 	.obj_cmpfn = ila_cmpfn,
 };
 
-static struct genl_family ila_nl_family;
-
-static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
-	[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
-	[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
-	[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
-	[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
-	[ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
-};
-
 static int parse_nl_config(struct genl_info *info,
 			   struct ila_xlat_params *xp)
 {
@@ -162,7 +130,7 @@ static inline struct ila_map *ila_lookup_wildcards(struct ila_addr *iaddr,
 {
 	struct ila_map *ila;
 
-	ila = rhashtable_lookup_fast(&ilan->rhash_table, &iaddr->loc,
+	ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table, &iaddr->loc,
 				     rht_params);
 	while (ila) {
 		if (!ila_cmp_wildcards(ila, iaddr, ifindex))
@@ -179,7 +147,7 @@ static inline struct ila_map *ila_lookup_by_params(struct ila_xlat_params *xp,
 {
 	struct ila_map *ila;
 
-	ila = rhashtable_lookup_fast(&ilan->rhash_table,
+	ila = rhashtable_lookup_fast(&ilan->xlat.rhash_table,
 				     &xp->ip.locator_match,
 				     rht_params);
 	while (ila) {
@@ -196,9 +164,9 @@ static inline void ila_release(struct ila_map *ila)
 	kfree_rcu(ila, rcu);
 }
 
-static void ila_free_cb(void *ptr, void *arg)
+static void ila_free_node(struct ila_map *ila)
 {
-	struct ila_map *ila = (struct ila_map *)ptr, *next;
+	struct ila_map *next;
 
 	/* Assume rcu_readlock held */
 	while (ila) {
@@ -208,6 +176,11 @@ static void ila_free_cb(void *ptr, void *arg)
 	}
 }
 
+static void ila_free_cb(void *ptr, void *arg)
+{
+	ila_free_node((struct ila_map *)ptr);
+}
+
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
 
 static unsigned int
@@ -235,7 +208,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
 	spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
 	int err = 0, order;
 
-	if (!ilan->hooks_registered) {
+	if (!ilan->xlat.hooks_registered) {
 		/* We defer registering net hooks in the namespace until the
 		 * first mapping is added.
 		 */
@@ -244,7 +217,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
 		if (err)
 			return err;
 
-		ilan->hooks_registered = true;
+		ilan->xlat.hooks_registered = true;
 	}
 
 	ila = kzalloc(sizeof(*ila), GFP_KERNEL);
@@ -259,12 +232,12 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
 
 	spin_lock(lock);
 
-	head = rhashtable_lookup_fast(&ilan->rhash_table,
+	head = rhashtable_lookup_fast(&ilan->xlat.rhash_table,
 				      &xp->ip.locator_match,
 				      rht_params);
 	if (!head) {
 		/* New entry for the rhash_table */
-		err = rhashtable_lookup_insert_fast(&ilan->rhash_table,
+		err = rhashtable_lookup_insert_fast(&ilan->xlat.rhash_table,
 						    &ila->node, rht_params);
 	} else {
 		struct ila_map *tila = head, *prev = NULL;
@@ -290,7 +263,7 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
 		} else {
 			/* Make this ila new head */
 			RCU_INIT_POINTER(ila->next, head);
-			err = rhashtable_replace_fast(&ilan->rhash_table,
+			err = rhashtable_replace_fast(&ilan->xlat.rhash_table,
 						      &head->node,
 						      &ila->node, rht_params);
 			if (err)
@@ -316,7 +289,7 @@ static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp)
 
 	spin_lock(lock);
 
-	head = rhashtable_lookup_fast(&ilan->rhash_table,
+	head = rhashtable_lookup_fast(&ilan->xlat.rhash_table,
 				      &xp->ip.locator_match, rht_params);
 	ila = head;
 
@@ -346,15 +319,15 @@ static int ila_del_mapping(struct net *net, struct ila_xlat_params *xp)
 				 * table
 				 */
 				err = rhashtable_replace_fast(
-					&ilan->rhash_table, &ila->node,
+					&ilan->xlat.rhash_table, &ila->node,
 					&head->node, rht_params);
 				if (err)
 					goto out;
 			} else {
 				/* Entry no longer used */
-				err = rhashtable_remove_fast(&ilan->rhash_table,
-							     &ila->node,
-							     rht_params);
+				err = rhashtable_remove_fast(
+						&ilan->xlat.rhash_table,
+						&ila->node, rht_params);
 			}
 		}
 
@@ -369,7 +342,7 @@ out:
 	return err;
 }
 
-static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
+int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
 	struct ila_xlat_params p;
@@ -382,7 +355,7 @@ static int ila_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info)
 	return ila_add_mapping(net, &p);
 }
 
-static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
+int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
 	struct ila_xlat_params xp;
@@ -397,6 +370,59 @@ static int ila_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+static inline spinlock_t *lock_from_ila_map(struct ila_net *ilan,
+					    struct ila_map *ila)
+{
+	return ila_get_lock(ilan, ila->xp.ip.locator_match);
+}
+
+int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct ila_net *ilan = net_generic(net, ila_net_id);
+	struct rhashtable_iter iter;
+	struct ila_map *ila;
+	spinlock_t *lock;
+	int ret;
+
+	ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL);
+	if (ret)
+		goto done;
+
+	rhashtable_walk_start(&iter);
+
+	for (;;) {
+		ila = rhashtable_walk_next(&iter);
+
+		if (IS_ERR(ila)) {
+			if (PTR_ERR(ila) == -EAGAIN)
+				continue;
+			ret = PTR_ERR(ila);
+			goto done;
+		} else if (!ila) {
+			break;
+		}
+
+		lock = lock_from_ila_map(ilan, ila);
+
+		spin_lock(lock);
+
+		ret = rhashtable_remove_fast(&ilan->xlat.rhash_table,
+					     &ila->node, rht_params);
+		if (!ret)
+			ila_free_node(ila);
+
+		spin_unlock(lock);
+
+		if (ret)
+			break;
+	}
+
+done:
+	rhashtable_walk_stop(&iter);
+	return ret;
+}
+
 static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
 {
 	if (nla_put_u64_64bit(msg, ILA_ATTR_LOCATOR,
@@ -434,7 +460,7 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int ila_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
+int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
 	struct ila_net *ilan = net_generic(net, ila_net_id);
@@ -475,27 +501,34 @@ out_free:
 
 struct ila_dump_iter {
 	struct rhashtable_iter rhiter;
+	int skip;
 };
 
-static int ila_nl_dump_start(struct netlink_callback *cb)
+int ila_xlat_nl_dump_start(struct netlink_callback *cb)
 {
 	struct net *net = sock_net(cb->skb->sk);
 	struct ila_net *ilan = net_generic(net, ila_net_id);
-	struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
+	struct ila_dump_iter *iter;
+	int ret;
 
-	if (!iter) {
-		iter = kmalloc(sizeof(*iter), GFP_KERNEL);
-		if (!iter)
-			return -ENOMEM;
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	if (!iter)
+		return -ENOMEM;
 
-		cb->args[0] = (long)iter;
+	ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter->rhiter,
+				   GFP_KERNEL);
+	if (ret) {
+		kfree(iter);
+		return ret;
 	}
 
-	return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter,
-				    GFP_KERNEL);
+	iter->skip = 0;
+	cb->args[0] = (long)iter;
+
+	return ret;
 }
 
-static int ila_nl_dump_done(struct netlink_callback *cb)
+int ila_xlat_nl_dump_done(struct netlink_callback *cb)
 {
 	struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
 
@@ -506,24 +539,49 @@ static int ila_nl_dump_done(struct netlink_callback *cb)
 	return 0;
 }
 
-static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
 	struct rhashtable_iter *rhiter = &iter->rhiter;
+	int skip = iter->skip;
 	struct ila_map *ila;
 	int ret;
 
 	rhashtable_walk_start(rhiter);
 
-	for (;;) {
-		ila = rhashtable_walk_next(rhiter);
+	/* Get first entry */
+	ila = rhashtable_walk_peek(rhiter);
+
+	if (ila && !IS_ERR(ila) && skip) {
+		/* Skip over visited entries */
 
+		while (ila && skip) {
+			/* Skip over any ila entries in this list that we
+			 * have already dumped.
+			 */
+			ila = rcu_access_pointer(ila->next);
+			skip--;
+		}
+	}
+
+	skip = 0;
+
+	for (;;) {
 		if (IS_ERR(ila)) {
-			if (PTR_ERR(ila) == -EAGAIN)
-				continue;
 			ret = PTR_ERR(ila);
-			goto done;
+			if (ret == -EAGAIN) {
+				/* Table has changed and iter has reset. Return
+				 * -EAGAIN to the application even if we have
+				 * written data to the skb. The application
+				 * needs to deal with this.
+				 */
+
+				goto out_ret;
+			} else {
+				break;
+			}
 		} else if (!ila) {
+			ret = 0;
 			break;
 		}
 
@@ -532,90 +590,54 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 					     cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					     skb, ILA_CMD_GET);
 			if (ret)
-				goto done;
+				goto out;
 
+			skip++;
 			ila = rcu_access_pointer(ila->next);
 		}
+
+		skip = 0;
+		ila = rhashtable_walk_next(rhiter);
 	}
 
-	ret = skb->len;
+out:
+	iter->skip = skip;
+	ret = (skb->len ? : ret);
 
-done:
+out_ret:
 	rhashtable_walk_stop(rhiter);
 	return ret;
 }
 
-static const struct genl_ops ila_nl_ops[] = {
-	{
-		.cmd = ILA_CMD_ADD,
-		.doit = ila_nl_cmd_add_mapping,
-		.policy = ila_nl_policy,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = ILA_CMD_DEL,
-		.doit = ila_nl_cmd_del_mapping,
-		.policy = ila_nl_policy,
-		.flags = GENL_ADMIN_PERM,
-	},
-	{
-		.cmd = ILA_CMD_GET,
-		.doit = ila_nl_cmd_get_mapping,
-		.start = ila_nl_dump_start,
-		.dumpit = ila_nl_dump,
-		.done = ila_nl_dump_done,
-		.policy = ila_nl_policy,
-	},
-};
-
-static struct genl_family ila_nl_family __ro_after_init = {
-	.hdrsize	= 0,
-	.name		= ILA_GENL_NAME,
-	.version	= ILA_GENL_VERSION,
-	.maxattr	= ILA_ATTR_MAX,
-	.netnsok	= true,
-	.parallel_ops	= true,
-	.module		= THIS_MODULE,
-	.ops		= ila_nl_ops,
-	.n_ops		= ARRAY_SIZE(ila_nl_ops),
-};
-
 #define ILA_HASH_TABLE_SIZE 1024
 
-static __net_init int ila_init_net(struct net *net)
+int ila_xlat_init_net(struct net *net)
 {
-	int err;
 	struct ila_net *ilan = net_generic(net, ila_net_id);
+	int err;
 
 	err = alloc_ila_locks(ilan);
 	if (err)
 		return err;
 
-	rhashtable_init(&ilan->rhash_table, &rht_params);
+	rhashtable_init(&ilan->xlat.rhash_table, &rht_params);
 
 	return 0;
 }
 
-static __net_exit void ila_exit_net(struct net *net)
+void ila_xlat_exit_net(struct net *net)
 {
 	struct ila_net *ilan = net_generic(net, ila_net_id);
 
-	rhashtable_free_and_destroy(&ilan->rhash_table, ila_free_cb, NULL);
+	rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL);
 
-	kvfree(ilan->locks);
+	free_bucket_spinlocks(ilan->xlat.locks);
 
-	if (ilan->hooks_registered)
+	if (ilan->xlat.hooks_registered)
 		nf_unregister_net_hooks(net, ila_nf_hook_ops,
 					ARRAY_SIZE(ila_nf_hook_ops));
 }
 
-static struct pernet_operations ila_net_ops = {
-	.init = ila_init_net,
-	.exit = ila_exit_net,
-	.id   = &ila_net_id,
-	.size = sizeof(struct ila_net),
-};
-
 static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
 {
 	struct ila_map *ila;
@@ -642,28 +664,3 @@ static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
 	return 0;
 }
 
-int __init ila_xlat_init(void)
-{
-	int ret;
-
-	ret = register_pernet_device(&ila_net_ops);
-	if (ret)
-		goto exit;
-
-	ret = genl_register_family(&ila_nl_family);
-	if (ret < 0)
-		goto unregister;
-
-	return 0;
-
-unregister:
-	unregister_pernet_device(&ila_net_ops);
-exit:
-	return ret;
-}
-
-void ila_xlat_fini(void)
-{
-	genl_unregister_family(&ila_nl_family);
-	unregister_pernet_device(&ila_net_ops);
-}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3eee7637bdfe..cb54a8a3c273 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -373,7 +373,6 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 	if (olen > 0) {
 		struct msghdr msg;
 		struct flowi6 flowi6;
-		struct sockcm_cookie sockc_junk;
 		struct ipcm6_cookie ipc6;
 
 		err = -ENOMEM;
@@ -392,7 +391,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 		memset(&flowi6, 0, sizeof(flowi6));
 
 		ipc6.opt = fl->opt;
-		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6, &sockc_junk);
+		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
 		if (err)
 			goto done;
 		err = -EINVAL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index cd2cfb04e5d8..fc7dd3a04360 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -989,6 +989,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 		fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
 
 		dsfield = key->tos;
+		if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+			goto tx_err;
 		md = ip_tunnel_info_opts(tun_info);
 		if (!md)
 			goto tx_err;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index f08d34491ece..6242682be876 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,17 +47,11 @@
 #include <net/inet_ecn.h>
 #include <net/dst_metadata.h>
 
-int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
+				struct sk_buff *skb)
 {
 	void (*edemux)(struct sk_buff *skb);
 
-	/* if ingress device is enslaved to an L3 master device pass the
-	 * skb to its handler for processing
-	 */
-	skb = l3mdev_ip6_rcv(skb);
-	if (!skb)
-		return NET_RX_SUCCESS;
-
 	if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
 		const struct inet6_protocol *ipprot;
 
@@ -67,20 +61,73 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 	}
 	if (!skb_valid_dst(skb))
 		ip6_route_input(skb);
+}
+
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	/* if ingress device is enslaved to an L3 master device pass the
+	 * skb to its handler for processing
+	 */
+	skb = l3mdev_ip6_rcv(skb);
+	if (!skb)
+		return NET_RX_SUCCESS;
+	ip6_rcv_finish_core(net, sk, skb);
 
 	return dst_input(skb);
 }
 
-int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static void ip6_sublist_rcv_finish(struct list_head *head)
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, head, list)
+		dst_input(skb);
+}
+
+static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
+				struct list_head *head)
+{
+	struct dst_entry *curr_dst = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	INIT_LIST_HEAD(&sublist);
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct dst_entry *dst;
+
+		list_del(&skb->list);
+		/* if ingress device is enslaved to an L3 master device pass the
+		 * skb to its handler for processing
+		 */
+		skb = l3mdev_ip6_rcv(skb);
+		if (!skb)
+			continue;
+		ip6_rcv_finish_core(net, sk, skb);
+		dst = skb_dst(skb);
+		if (curr_dst != dst) {
+			/* dispatch old sublist */
+			if (!list_empty(&sublist))
+				ip6_sublist_rcv_finish(&sublist);
+			/* start new sublist */
+			INIT_LIST_HEAD(&sublist);
+			curr_dst = dst;
+		}
+		list_add_tail(&skb->list, &sublist);
+	}
+	/* dispatch final sublist */
+	ip6_sublist_rcv_finish(&sublist);
+}
+
+static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
+				    struct net *net)
 {
 	const struct ipv6hdr *hdr;
 	u32 pkt_len;
 	struct inet6_dev *idev;
-	struct net *net = dev_net(skb->dev);
 
 	if (skb->pkt_type == PACKET_OTHERHOST) {
 		kfree_skb(skb);
-		return NET_RX_DROP;
+		return NULL;
 	}
 
 	rcu_read_lock();
@@ -196,7 +243,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		if (ipv6_parse_hopopts(skb) < 0) {
 			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 			rcu_read_unlock();
-			return NET_RX_DROP;
+			return NULL;
 		}
 	}
 
@@ -205,15 +252,67 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
-		       net, NULL, skb, dev, NULL,
-		       ip6_rcv_finish);
+	return skb;
 err:
 	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 drop:
 	rcu_read_unlock();
 	kfree_skb(skb);
-	return NET_RX_DROP;
+	return NULL;
+}
+
+int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct net *net = dev_net(skb->dev);
+
+	skb = ip6_rcv_core(skb, dev, net);
+	if (skb == NULL)
+		return NET_RX_DROP;
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+		       net, NULL, skb, dev, NULL,
+		       ip6_rcv_finish);
+}
+
+static void ip6_sublist_rcv(struct list_head *head, struct net_device *dev,
+			    struct net *net)
+{
+	NF_HOOK_LIST(NFPROTO_IPV6, NF_INET_PRE_ROUTING, net, NULL,
+		     head, dev, NULL, ip6_rcv_finish);
+	ip6_list_rcv_finish(net, NULL, head);
+}
+
+/* Receive a list of IPv6 packets */
+void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
+		   struct net_device *orig_dev)
+{
+	struct net_device *curr_dev = NULL;
+	struct net *curr_net = NULL;
+	struct sk_buff *skb, *next;
+	struct list_head sublist;
+
+	INIT_LIST_HEAD(&sublist);
+	list_for_each_entry_safe(skb, next, head, list) {
+		struct net_device *dev = skb->dev;
+		struct net *net = dev_net(dev);
+
+		list_del(&skb->list);
+		skb = ip6_rcv_core(skb, dev, net);
+		if (skb == NULL)
+			continue;
+
+		if (curr_dev != dev || curr_net != net) {
+			/* dispatch old sublist */
+			if (!list_empty(&sublist))
+				ip6_sublist_rcv(&sublist, curr_dev, curr_net);
+			/* start new sublist */
+			INIT_LIST_HEAD(&sublist);
+			curr_dev = dev;
+			curr_net = net;
+		}
+		list_add_tail(&skb->list, &sublist);
+	}
+	/* dispatch final sublist */
+	ip6_sublist_rcv(&sublist, curr_dev, curr_net);
 }
 
 /*
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 5b3f2f89ef41..37ff4805b20c 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -163,11 +163,11 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
 	return len;
 }
 
-static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *ipv6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	const struct net_offload *ops;
-	struct sk_buff **pp = NULL;
+	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
 	struct ipv6hdr *iph;
 	unsigned int nlen;
@@ -214,7 +214,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 	flush--;
 	nlen = skb_network_header_len(skb);
 
-	for (p = *head; p; p = p->next) {
+	list_for_each_entry(p, head, list) {
 		const struct ipv6hdr *iph2;
 		__be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
 
@@ -263,8 +263,8 @@ out:
 	return pp;
 }
 
-static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head,
-					       struct sk_buff *skb)
+static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head,
+					      struct sk_buff *skb)
 {
 	/* Common GRO receive for SIT and IP6IP6 */
 
@@ -278,8 +278,8 @@ static struct sk_buff **sit_ip6ip6_gro_receive(struct sk_buff **head,
 	return ipv6_gro_receive(head, skb);
 }
 
-static struct sk_buff **ip4ip6_gro_receive(struct sk_buff **head,
-					   struct sk_buff *skb)
+static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
+					  struct sk_buff *skb)
 {
 	/* Common GRO receive for SIT and IP6IP6 */
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a14fb4fcdf18..8047fd41ba88 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1219,13 +1219,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	if (mtu < IPV6_MIN_MTU)
 		return -EINVAL;
 	cork->base.fragsize = mtu;
-	cork->base.gso_size = sk->sk_type == SOCK_DGRAM &&
-			      sk->sk_protocol == IPPROTO_UDP ? ipc6->gso_size : 0;
+	cork->base.gso_size = ipc6->gso_size;
+	cork->base.tx_flags = 0;
+	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
 
 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
 		cork->base.flags |= IPCORK_ALLFRAG;
 	cork->base.length = 0;
 
+	cork->base.transmit_time = ipc6->sockc.transmit_time;
+
 	return 0;
 }
 
@@ -1238,8 +1241,7 @@ static int __ip6_append_data(struct sock *sk,
 			     int getfrag(void *from, char *to, int offset,
 					 int len, int odd, struct sk_buff *skb),
 			     void *from, int length, int transhdrlen,
-			     unsigned int flags, struct ipcm6_cookie *ipc6,
-			     const struct sockcm_cookie *sockc)
+			     unsigned int flags, struct ipcm6_cookie *ipc6)
 {
 	struct sk_buff *skb, *skb_prev = NULL;
 	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
@@ -1249,7 +1251,6 @@ static int __ip6_append_data(struct sock *sk,
 	int copy;
 	int err;
 	int offset = 0;
-	__u8 tx_flags = 0;
 	u32 tskey = 0;
 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
 	struct ipv6_txoptions *opt = v6_cork->opt;
@@ -1268,6 +1269,10 @@ static int __ip6_append_data(struct sock *sk,
 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
 	orig_mtu = mtu;
 
+	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+		tskey = sk->sk_tskey++;
+
 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
@@ -1317,13 +1322,6 @@ emsgsize:
 	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
 		csummode = CHECKSUM_PARTIAL;
 
-	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
-		sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
-		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
-		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
-			tskey = sk->sk_tskey++;
-	}
-
 	/*
 	 * Let's try using as much space as possible.
 	 * Use MTU if total length of the message fits into the MTU.
@@ -1442,8 +1440,8 @@ alloc_new_skb:
 				    dst_exthdrlen);
 
 			/* Only the initial fragment is time stamped */
-			skb_shinfo(skb)->tx_flags = tx_flags;
-			tx_flags = 0;
+			skb_shinfo(skb)->tx_flags = cork->tx_flags;
+			cork->tx_flags = 0;
 			skb_shinfo(skb)->tskey = tskey;
 			tskey = 0;
 
@@ -1560,8 +1558,7 @@ int ip6_append_data(struct sock *sk,
 				int odd, struct sk_buff *skb),
 		    void *from, int length, int transhdrlen,
 		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
-		    struct rt6_info *rt, unsigned int flags,
-		    const struct sockcm_cookie *sockc)
+		    struct rt6_info *rt, unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1589,7 +1586,7 @@ int ip6_append_data(struct sock *sk,
 
 	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
 				 &np->cork, sk_page_frag(sk), getfrag,
-				 from, length, transhdrlen, flags, ipc6, sockc);
+				 from, length, transhdrlen, flags, ipc6);
 }
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
@@ -1673,6 +1670,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
+	skb->tstamp = cork->base.transmit_time;
+
 	skb_dst_set(skb, dst_clone(&rt->dst));
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	if (proto == IPPROTO_ICMPV6) {
@@ -1747,8 +1746,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 			     void *from, int length, int transhdrlen,
 			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
 			     struct rt6_info *rt, unsigned int flags,
-			     struct inet_cork_full *cork,
-			     const struct sockcm_cookie *sockc)
+			     struct inet_cork_full *cork)
 {
 	struct inet6_cork v6_cork;
 	struct sk_buff_head queue;
@@ -1776,7 +1774,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
 				&current->task_frag, getfrag, from,
 				length + exthdrlen, transhdrlen + exthdrlen,
-				flags, ipc6, sockc);
+				flags, ipc6);
 	if (err) {
 		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
 		return ERR_PTR(err);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0d0f0053bb11..d0b7e0249c13 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/compat.h>
+#include <linux/rhashtable.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
 #include <net/raw.h>
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 568ca4187cd1..c0cac9cc3a28 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -500,7 +500,6 @@ sticky_done:
 		struct ipv6_txoptions *opt = NULL;
 		struct msghdr msg;
 		struct flowi6 fl6;
-		struct sockcm_cookie sockc_junk;
 		struct ipcm6_cookie ipc6;
 
 		memset(&fl6, 0, sizeof(fl6));
@@ -533,7 +532,7 @@ sticky_done:
 		msg.msg_control = (void *)(opt+1);
 		ipc6.opt = opt;
 
-		retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk);
+		retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6);
 		if (retv)
 			goto done;
 update:
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index b397a8fe88b9..c6bf580d0f33 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -36,7 +36,7 @@ static const struct nf_loginfo default_loginfo = {
 };
 
 /* One level of recursion won't kill us */
-static void dump_ipv6_packet(struct nf_log_buf *m,
+static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
 			     const struct nf_loginfo *info,
 			     const struct sk_buff *skb, unsigned int ip6hoff,
 			     int recurse)
@@ -258,7 +258,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 			/* Max length: 3+maxlen */
 			if (recurse) {
 				nf_log_buf_add(m, "[");
-				dump_ipv6_packet(m, info, skb,
+				dump_ipv6_packet(net, m, info, skb,
 						 ptr + sizeof(_icmp6h), 0);
 				nf_log_buf_add(m, "] ");
 			}
@@ -278,7 +278,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m,
 
 	/* Max length: 15 "UID=4294967295 " */
 	if ((logflags & NF_LOG_UID) && recurse)
-		nf_log_dump_sk_uid_gid(m, skb->sk);
+		nf_log_dump_sk_uid_gid(net, m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
 	if (recurse && skb->mark)
@@ -365,7 +365,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
 	if (in != NULL)
 		dump_ipv6_mac_header(m, loginfo, skb);
 
-	dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1);
+	dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
 
 	nf_log_buf_close(m);
 }
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 96f56bf49a30..4c04bccc7417 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -62,7 +62,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct dst_entry *dst;
 	struct rt6_info *rt;
 	struct pingfakehdr pfh;
-	struct sockcm_cookie junk = {0};
 	struct ipcm6_cookie ipc6;
 
 	pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -119,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.fl6_icmp_code = user_icmph.icmp6_code;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	ipc6.tclass = np->tclass;
+	ipcm6_init_sk(&ipc6, np);
 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
 	dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
@@ -142,13 +141,11 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	pfh.family = AF_INET6;
 
 	ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
-	ipc6.dontfrag = np->dontfrag;
-	ipc6.opt = NULL;
 
 	lock_sock(sk);
 	err = ip6_append_data(sk, ping_getfrag, &pfh, len,
 			      0, &ipc6, &fl6, rt,
-			      MSG_DONTWAIT, &junk);
+			      MSG_DONTWAIT);
 
 	if (err) {
 		ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index afc307c89d1a..413d98bf24f4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -620,7 +620,7 @@ out:
 
 static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 			struct flowi6 *fl6, struct dst_entry **dstp,
-			unsigned int flags)
+			unsigned int flags, const struct sockcm_cookie *sockc)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net *net = sock_net(sk);
@@ -650,6 +650,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
+	skb->tstamp = sockc->transmit_time;
 	skb_dst_set(skb, &rt->dst);
 	*dstp = NULL;
 
@@ -766,7 +767,6 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct dst_entry *dst = NULL;
 	struct raw6_frag_vec rfv;
 	struct flowi6 fl6;
-	struct sockcm_cookie sockc;
 	struct ipcm6_cookie ipc6;
 	int addr_len = msg->msg_namelen;
 	u16 proto;
@@ -790,10 +790,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.flowi6_uid = sk->sk_uid;
 
-	ipc6.hlimit = -1;
-	ipc6.tclass = -1;
-	ipc6.dontfrag = -1;
-	ipc6.opt = NULL;
+	ipcm6_init(&ipc6);
+	ipc6.sockc.tsflags = sk->sk_tsflags;
 
 	if (sin6) {
 		if (addr_len < SIN6_LEN_RFC2133)
@@ -847,14 +845,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	if (fl6.flowi6_oif == 0)
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 
-	sockc.tsflags = sk->sk_tsflags;
 	if (msg->msg_controllen) {
 		opt = &opt_space;
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 		ipc6.opt = opt;
 
-		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6, &sockc);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -921,13 +918,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 back_from_confirm:
 	if (inet->hdrincl)
-		err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags);
+		err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst,
+					msg->msg_flags, &ipc6.sockc);
 	else {
 		ipc6.opt = opt;
 		lock_sock(sk);
 		err = ip6_append_data(sk, raw6_getfrag, &rfv,
 			len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
-			msg->msg_flags, &sockc);
+			msg->msg_flags);
 
 		if (err)
 			ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 0fdf2a55e746..8d0ba757a46c 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -17,6 +17,7 @@
 #include <linux/net.h>
 #include <linux/in6.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <net/ipv6.h>
 #include <net/protocol.h>
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 558fe8cc6d43..8546f94f30d4 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -22,6 +22,7 @@
 #include <linux/icmpv6.h>
 #include <linux/mroute6.h>
 #include <linux/slab.h>
+#include <linux/rhashtable.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 278e49cd67d4..e72947c99454 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -15,8 +15,8 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *tcp6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
 	if (!NAPI_GRO_CB(skb)->flush &&
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e6645cae403e..f6b96956a8ed 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1141,13 +1141,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
-	struct sockcm_cookie sockc;
 
-	ipc6.hlimit = -1;
-	ipc6.tclass = -1;
-	ipc6.dontfrag = -1;
+	ipcm6_init(&ipc6);
 	ipc6.gso_size = up->gso_size;
-	sockc.tsflags = sk->sk_tsflags;
+	ipc6.sockc.tsflags = sk->sk_tsflags;
 
 	/* destination address check */
 	if (sin6) {
@@ -1282,7 +1279,7 @@ do_udp_sendmsg:
 		err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
 		if (err > 0)
 			err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
-						    &ipc6, &sockc);
+						    &ipc6);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -1376,7 +1373,7 @@ back_from_confirm:
 		skb = ip6_make_skb(sk, getfrag, msg, ulen,
 				   sizeof(struct udphdr), &ipc6,
 				   &fl6, (struct rt6_info *)dst,
-				   msg->msg_flags, &cork, &sockc);
+				   msg->msg_flags, &cork);
 		err = PTR_ERR(skb);
 		if (!IS_ERR_OR_NULL(skb))
 			err = udp_v6_send_skb(skb, &fl6, &cork.base);
@@ -1402,7 +1399,7 @@ do_append_data:
 	up->len += ulen;
 	err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
 			      &ipc6, &fl6, (struct rt6_info *)dst,
-			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, &sockc);
+			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
 	if (err)
 		udp_v6_flush_pending_frames(sk);
 	else if (!corkreq)
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 03a2ff3fe1e6..95dee9ca8d22 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -114,8 +114,8 @@ out:
 	return segs;
 }
 
-static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
+static struct sk_buff *udp6_gro_receive(struct list_head *head,
+					struct sk_buff *skb)
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 40261cb68e83..1ea285bad84b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -322,8 +322,7 @@ int l2tp_session_register(struct l2tp_session *session,
 
 	if (tunnel->version == L2TP_HDR_VER_3) {
 		pn = l2tp_pernet(tunnel->l2tp_net);
-		g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net),
-						session->session_id);
+		g_head = l2tp_session_id_hash_2(pn, session->session_id);
 
 		spin_lock_bh(&pn->l2tp_session_hlist_lock);
 
@@ -783,7 +782,7 @@ EXPORT_SYMBOL(l2tp_recv_common);
 
 /* Drop skbs from the session's reorder_q
  */
-int l2tp_session_queue_purge(struct l2tp_session *session)
+static int l2tp_session_queue_purge(struct l2tp_session *session)
 {
 	struct sk_buff *skb = NULL;
 	BUG_ON(!session);
@@ -794,7 +793,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session)
 	}
 	return 0;
 }
-EXPORT_SYMBOL_GPL(l2tp_session_queue_purge);
 
 /* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
  * here. The skb is not on a list when we get here.
@@ -1009,8 +1007,8 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
 	return bufp - optr;
 }
 
-static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
-			  struct flowi *fl, size_t data_len)
+static void l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
+			   struct flowi *fl, size_t data_len)
 {
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	unsigned int len = skb->len;
@@ -1052,8 +1050,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 		atomic_long_inc(&tunnel->stats.tx_errors);
 		atomic_long_inc(&session->stats.tx_errors);
 	}
-
-	return 0;
 }
 
 /* If caller requires the skb to have a ppp header, the header must be
@@ -1193,7 +1189,7 @@ end:
 
 /* When the tunnel is closed, all the attached sessions need to go too.
  */
-void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
 {
 	int hash;
 	struct hlist_node *walk;
@@ -1242,7 +1238,6 @@ again:
 	}
 	write_unlock_bh(&tunnel->hlist_lock);
 }
-EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
 
 /* Tunnel socket destroy hook for UDP encapsulation */
 static void l2tp_udp_encap_destroy(struct sock *sk)
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index c199020f8a8a..a5c09d3a5698 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -180,9 +180,6 @@ struct l2tp_tunnel {
 	struct net		*l2tp_net;	/* the net we belong to */
 
 	refcount_t		ref_count;
-#ifdef CONFIG_DEBUG_FS
-	void (*show)(struct seq_file *m, void *arg);
-#endif
 	int (*recv_payload_hook)(struct sk_buff *skb);
 	void (*old_sk_destruct)(struct sock *);
 	struct sock		*sock;		/* Parent socket */
@@ -190,8 +187,6 @@ struct l2tp_tunnel {
 						 * was created by userspace */
 
 	struct work_struct	del_work;
-
-	uint8_t			priv[0];	/* private data */
 };
 
 struct l2tp_nl_cmd_ops {
@@ -201,11 +196,6 @@ struct l2tp_nl_cmd_ops {
 	int (*session_delete)(struct l2tp_session *session);
 };
 
-static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
-{
-	return &tunnel->priv[0];
-}
-
 static inline void *l2tp_session_priv(struct l2tp_session *session)
 {
 	return &session->priv[0];
@@ -229,7 +219,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
 int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
 			 struct l2tp_tunnel_cfg *cfg);
 
-void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 struct l2tp_session *l2tp_session_create(int priv_size,
 					 struct l2tp_tunnel *tunnel,
@@ -244,7 +233,6 @@ void l2tp_session_free(struct l2tp_session *session);
 void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 		      unsigned char *ptr, unsigned char *optr, u16 hdrflags,
 		      int length, int (*payload_hook)(struct sk_buff *skb));
-int l2tp_session_queue_purge(struct l2tp_session *session);
 int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
 void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index e87686f7d63c..b5d7dde003ef 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -177,9 +177,6 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 		   atomic_long_read(&tunnel->stats.rx_packets),
 		   atomic_long_read(&tunnel->stats.rx_bytes),
 		   atomic_long_read(&tunnel->stats.rx_errors));
-
-	if (tunnel->show != NULL)
-		tunnel->show(m, tunnel);
 }
 
 static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 957369192ca1..672e5b753738 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -500,7 +500,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct dst_entry *dst = NULL;
 	struct flowi6 fl6;
-	struct sockcm_cookie sockc_unused = {0};
 	struct ipcm6_cookie ipc6;
 	int addr_len = msg->msg_namelen;
 	int transhdrlen = 4; /* zero session-id */
@@ -525,9 +524,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.flowi6_uid = sk->sk_uid;
 
-	ipc6.hlimit = -1;
-	ipc6.tclass = -1;
-	ipc6.dontfrag = -1;
+	ipcm6_init(&ipc6);
 
 	if (lsa) {
 		if (addr_len < SIN6_LEN_RFC2133)
@@ -575,8 +572,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 		ipc6.opt = opt;
 
-		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6,
-					    &sockc_unused);
+		err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -641,7 +637,7 @@ back_from_confirm:
 	err = ip6_append_data(sk, ip_generic_getfrag, msg,
 			      ulen, transhdrlen, &ipc6,
 			      &fl6, (struct rt6_info *)dst,
-			      msg->msg_flags, &sockc_unused);
+			      msg->msg_flags);
 	if (err)
 		ip6_flush_pending_frames(sk);
 	else if (!(msg->msg_flags & MSG_MORE))
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index e398797878a9..9ac02c93df98 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -424,12 +424,6 @@ static void pppol2tp_put_sk(struct rcu_head *head)
 	sock_put(ps->__sk);
 }
 
-/* Called by l2tp_core when a session socket is being closed.
- */
-static void pppol2tp_session_close(struct l2tp_session *session)
-{
-}
-
 /* Really kill the session socket. (Called from sock_put() if
  * refcnt == 0.)
  */
@@ -573,7 +567,6 @@ static void pppol2tp_session_init(struct l2tp_session *session)
 	struct dst_entry *dst;
 
 	session->recv_skb = pppol2tp_recv;
-	session->session_close = pppol2tp_session_close;
 #if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
 	session->show = pppol2tp_show;
 #endif
@@ -595,40 +588,113 @@ static void pppol2tp_session_init(struct l2tp_session *session)
 	}
 }
 
+struct l2tp_connect_info {
+	u8 version;
+	int fd;
+	u32 tunnel_id;
+	u32 peer_tunnel_id;
+	u32 session_id;
+	u32 peer_session_id;
+};
+
+static int pppol2tp_sockaddr_get_info(const void *sa, int sa_len,
+				      struct l2tp_connect_info *info)
+{
+	switch (sa_len) {
+	case sizeof(struct sockaddr_pppol2tp):
+	{
+		const struct sockaddr_pppol2tp *sa_v2in4 = sa;
+
+		if (sa_v2in4->sa_protocol != PX_PROTO_OL2TP)
+			return -EINVAL;
+
+		info->version = 2;
+		info->fd = sa_v2in4->pppol2tp.fd;
+		info->tunnel_id = sa_v2in4->pppol2tp.s_tunnel;
+		info->peer_tunnel_id = sa_v2in4->pppol2tp.d_tunnel;
+		info->session_id = sa_v2in4->pppol2tp.s_session;
+		info->peer_session_id = sa_v2in4->pppol2tp.d_session;
+
+		break;
+	}
+	case sizeof(struct sockaddr_pppol2tpv3):
+	{
+		const struct sockaddr_pppol2tpv3 *sa_v3in4 = sa;
+
+		if (sa_v3in4->sa_protocol != PX_PROTO_OL2TP)
+			return -EINVAL;
+
+		info->version = 3;
+		info->fd = sa_v3in4->pppol2tp.fd;
+		info->tunnel_id = sa_v3in4->pppol2tp.s_tunnel;
+		info->peer_tunnel_id = sa_v3in4->pppol2tp.d_tunnel;
+		info->session_id = sa_v3in4->pppol2tp.s_session;
+		info->peer_session_id = sa_v3in4->pppol2tp.d_session;
+
+		break;
+	}
+	case sizeof(struct sockaddr_pppol2tpin6):
+	{
+		const struct sockaddr_pppol2tpin6 *sa_v2in6 = sa;
+
+		if (sa_v2in6->sa_protocol != PX_PROTO_OL2TP)
+			return -EINVAL;
+
+		info->version = 2;
+		info->fd = sa_v2in6->pppol2tp.fd;
+		info->tunnel_id = sa_v2in6->pppol2tp.s_tunnel;
+		info->peer_tunnel_id = sa_v2in6->pppol2tp.d_tunnel;
+		info->session_id = sa_v2in6->pppol2tp.s_session;
+		info->peer_session_id = sa_v2in6->pppol2tp.d_session;
+
+		break;
+	}
+	case sizeof(struct sockaddr_pppol2tpv3in6):
+	{
+		const struct sockaddr_pppol2tpv3in6 *sa_v3in6 = sa;
+
+		if (sa_v3in6->sa_protocol != PX_PROTO_OL2TP)
+			return -EINVAL;
+
+		info->version = 3;
+		info->fd = sa_v3in6->pppol2tp.fd;
+		info->tunnel_id = sa_v3in6->pppol2tp.s_tunnel;
+		info->peer_tunnel_id = sa_v3in6->pppol2tp.d_tunnel;
+		info->session_id = sa_v3in6->pppol2tp.s_session;
+		info->peer_session_id = sa_v3in6->pppol2tp.d_session;
+
+		break;
+	}
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
  */
 static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 			    int sockaddr_len, int flags)
 {
 	struct sock *sk = sock->sk;
-	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
 	struct pppox_sock *po = pppox_sk(sk);
 	struct l2tp_session *session = NULL;
+	struct l2tp_connect_info info;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
 	struct l2tp_session_cfg cfg = { 0, };
-	int error = 0;
-	u32 tunnel_id, peer_tunnel_id;
-	u32 session_id, peer_session_id;
 	bool drop_refcnt = false;
 	bool drop_tunnel = false;
 	bool new_session = false;
 	bool new_tunnel = false;
-	int ver = 2;
-	int fd;
-
-	lock_sock(sk);
-
-	error = -EINVAL;
+	int error;
 
-	if (sockaddr_len != sizeof(struct sockaddr_pppol2tp) &&
-	    sockaddr_len != sizeof(struct sockaddr_pppol2tpv3) &&
-	    sockaddr_len != sizeof(struct sockaddr_pppol2tpin6) &&
-	    sockaddr_len != sizeof(struct sockaddr_pppol2tpv3in6))
-		goto end;
+	error = pppol2tp_sockaddr_get_info(uservaddr, sockaddr_len, &info);
+	if (error < 0)
+		return error;
 
-	if (sp->sa_protocol != PX_PROTO_OL2TP)
-		goto end;
+	lock_sock(sk);
 
 	/* Check for already bound sockets */
 	error = -EBUSY;
@@ -640,56 +706,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (sk->sk_user_data)
 		goto end; /* socket is already attached */
 
-	/* Get params from socket address. Handle L2TPv2 and L2TPv3.
-	 * This is nasty because there are different sockaddr_pppol2tp
-	 * structs for L2TPv2, L2TPv3, over IPv4 and IPv6. We use
-	 * the sockaddr size to determine which structure the caller
-	 * is using.
-	 */
-	peer_tunnel_id = 0;
-	if (sockaddr_len == sizeof(struct sockaddr_pppol2tp)) {
-		fd = sp->pppol2tp.fd;
-		tunnel_id = sp->pppol2tp.s_tunnel;
-		peer_tunnel_id = sp->pppol2tp.d_tunnel;
-		session_id = sp->pppol2tp.s_session;
-		peer_session_id = sp->pppol2tp.d_session;
-	} else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3)) {
-		struct sockaddr_pppol2tpv3 *sp3 =
-			(struct sockaddr_pppol2tpv3 *) sp;
-		ver = 3;
-		fd = sp3->pppol2tp.fd;
-		tunnel_id = sp3->pppol2tp.s_tunnel;
-		peer_tunnel_id = sp3->pppol2tp.d_tunnel;
-		session_id = sp3->pppol2tp.s_session;
-		peer_session_id = sp3->pppol2tp.d_session;
-	} else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpin6)) {
-		struct sockaddr_pppol2tpin6 *sp6 =
-			(struct sockaddr_pppol2tpin6 *) sp;
-		fd = sp6->pppol2tp.fd;
-		tunnel_id = sp6->pppol2tp.s_tunnel;
-		peer_tunnel_id = sp6->pppol2tp.d_tunnel;
-		session_id = sp6->pppol2tp.s_session;
-		peer_session_id = sp6->pppol2tp.d_session;
-	} else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3in6)) {
-		struct sockaddr_pppol2tpv3in6 *sp6 =
-			(struct sockaddr_pppol2tpv3in6 *) sp;
-		ver = 3;
-		fd = sp6->pppol2tp.fd;
-		tunnel_id = sp6->pppol2tp.s_tunnel;
-		peer_tunnel_id = sp6->pppol2tp.d_tunnel;
-		session_id = sp6->pppol2tp.s_session;
-		peer_session_id = sp6->pppol2tp.d_session;
-	} else {
-		error = -EINVAL;
-		goto end; /* bad socket address */
-	}
-
 	/* Don't bind if tunnel_id is 0 */
 	error = -EINVAL;
-	if (tunnel_id == 0)
+	if (!info.tunnel_id)
 		goto end;
 
-	tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id);
+	tunnel = l2tp_tunnel_get(sock_net(sk), info.tunnel_id);
 	if (tunnel)
 		drop_tunnel = true;
 
@@ -697,7 +719,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	 * peer_session_id is 0. Otherwise look up tunnel using supplied
 	 * tunnel id.
 	 */
-	if ((session_id == 0) && (peer_session_id == 0)) {
+	if (!info.session_id && !info.peer_session_id) {
 		if (tunnel == NULL) {
 			struct l2tp_tunnel_cfg tcfg = {
 				.encap = L2TP_ENCAPTYPE_UDP,
@@ -707,12 +729,16 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 			/* Prevent l2tp_tunnel_register() from trying to set up
 			 * a kernel socket.
 			 */
-			if (fd < 0) {
+			if (info.fd < 0) {
 				error = -EBADF;
 				goto end;
 			}
 
-			error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
+			error = l2tp_tunnel_create(sock_net(sk), info.fd,
+						   info.version,
+						   info.tunnel_id,
+						   info.peer_tunnel_id, &tcfg,
+						   &tunnel);
 			if (error < 0)
 				goto end;
 
@@ -741,9 +767,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 		tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
 
 	if (tunnel->peer_tunnel_id == 0)
-		tunnel->peer_tunnel_id = peer_tunnel_id;
+		tunnel->peer_tunnel_id = info.peer_tunnel_id;
 
-	session = l2tp_session_get(sock_net(sk), tunnel, session_id);
+	session = l2tp_session_get(sock_net(sk), tunnel, info.session_id);
 	if (session) {
 		drop_refcnt = true;
 
@@ -772,8 +798,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 		cfg.pw_type = L2TP_PWTYPE_PPP;
 
 		session = l2tp_session_create(sizeof(struct pppol2tp_session),
-					      tunnel, session_id,
-					      peer_session_id, &cfg);
+					      tunnel, info.session_id,
+					      info.peer_session_id, &cfg);
 		if (IS_ERR(session)) {
 			error = PTR_ERR(session);
 			goto end;
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index e3589ade62e0..bb707789ef2b 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -12,6 +12,7 @@ mac80211-y := \
 	scan.o offchannel.o \
 	ht.o agg-tx.o agg-rx.o \
 	vht.o \
+	he.o \
 	ibss.o \
 	iface.o \
 	rate.o \
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index e83c19d4c292..6a4f154c99f6 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -245,6 +245,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 	};
 	int i, ret = -EOPNOTSUPP;
 	u16 status = WLAN_STATUS_REQUEST_DECLINED;
+	u16 max_buf_size;
 
 	if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
 		ht_dbg(sta->sdata,
@@ -268,13 +269,18 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 		goto end;
 	}
 
+	if (sta->sta.he_cap.has_he)
+		max_buf_size = IEEE80211_MAX_AMPDU_BUF;
+	else
+		max_buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+
 	/* sanity check for incoming parameters:
 	 * check if configuration can support the BA policy
 	 * and if buffer size does not exceeds max value */
 	/* XXX: check own ht delayed BA capability?? */
 	if (((ba_policy != 1) &&
 	     (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) ||
-	    (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
+	    (buf_size > max_buf_size)) {
 		status = WLAN_STATUS_INVALID_QOS_PARAM;
 		ht_dbg_ratelimited(sta->sdata,
 				   "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n",
@@ -283,7 +289,7 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 	}
 	/* determine default buffer size */
 	if (buf_size == 0)
-		buf_size = IEEE80211_MAX_AMPDU_BUF;
+		buf_size = max_buf_size;
 
 	/* make sure the size doesn't exceed the maximum supported by the hw */
 	if (buf_size > sta->sta.max_rx_aggregation_subframes)
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index ac4295296514..69e831bc317b 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -463,6 +463,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 		.timeout = 0,
 	};
 	int ret;
+	u16 buf_size;
 
 	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
 
@@ -511,11 +512,22 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	sta->ampdu_mlme.addba_req_num[tid]++;
 	spin_unlock_bh(&sta->lock);
 
+	if (sta->sta.he_cap.has_he) {
+		buf_size = local->hw.max_tx_aggregation_subframes;
+	} else {
+		/*
+		 * We really should use what the driver told us it will
+		 * transmit as the maximum, but certain APs (e.g. the
+		 * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012)
+		 * will crash when we use a lower number.
+		 */
+		buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+	}
+
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
 				     tid_tx->dialog_token, params.ssn,
-				     IEEE80211_MAX_AMPDU_BUF,
-				     tid_tx->timeout);
+				     buf_size, tid_tx->timeout);
 }
 
 /*
@@ -905,8 +917,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 {
 	struct tid_ampdu_tx *tid_tx;
 	struct ieee80211_txq *txq;
-	u16 capab, tid;
-	u8 buf_size;
+	u16 capab, tid, buf_size;
 	bool amsdu;
 
 	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index bdf6fa78d0d2..02f3672e7b5e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1412,6 +1412,11 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
 						    params->vht_capa, sta);
 
+	if (params->he_capa)
+		ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
+						  (void *)params->he_capa,
+						  params->he_capa_len, sta);
+
 	if (params->opmode_notif_used) {
 		/* returned value is only needed for rc update, but the
 		 * rc isn't initialized here yet, so ignore it
@@ -3486,7 +3491,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
 	}
 
 	local_bh_disable();
-	ieee80211_xmit(sdata, sta, skb);
+	ieee80211_xmit(sdata, sta, skb, 0);
 	local_bh_enable();
 
 	ret = 0;
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index 690c142a7a44..5ac743816b59 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -116,16 +116,16 @@ static void ieee80211_get_stats(struct net_device *dev,
 		data[i++] = sta->sta_state;
 
 
-		if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))
+		if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))
 			data[i] = 100000ULL *
 				cfg80211_calculate_bitrate(&sinfo.txrate);
 		i++;
-		if (sinfo.filled & BIT(NL80211_STA_INFO_RX_BITRATE))
+		if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE))
 			data[i] = 100000ULL *
 				cfg80211_calculate_bitrate(&sinfo.rxrate);
 		i++;
 
-		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))
+		if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))
 			data[i] = (u8)sinfo.signal_avg;
 		i++;
 	} else {
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
new file mode 100644
index 000000000000..769078ed5a12
--- /dev/null
+++ b/net/mac80211/he.c
@@ -0,0 +1,55 @@
+/*
+ * HE handling
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "ieee80211_i.h"
+
+void
+ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
+				  struct ieee80211_supported_band *sband,
+				  const u8 *he_cap_ie, u8 he_cap_len,
+				  struct sta_info *sta)
+{
+	struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
+	struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
+	u8 he_ppe_size;
+	u8 mcs_nss_size;
+	u8 he_total_size;
+
+	memset(he_cap, 0, sizeof(*he_cap));
+
+	if (!he_cap_ie || !ieee80211_get_he_sta_cap(sband))
+		return;
+
+	/* Make sure size is OK */
+	mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem);
+	he_ppe_size =
+		ieee80211_he_ppe_size(he_cap_ie[sizeof(he_cap->he_cap_elem) +
+						mcs_nss_size],
+				      he_cap_ie_elem->phy_cap_info);
+	he_total_size = sizeof(he_cap->he_cap_elem) + mcs_nss_size +
+			he_ppe_size;
+	if (he_cap_len < he_total_size)
+		return;
+
+	memcpy(&he_cap->he_cap_elem, he_cap_ie, sizeof(he_cap->he_cap_elem));
+
+	/* HE Tx/Rx HE MCS NSS Support Field */
+	memcpy(&he_cap->he_mcs_nss_supp,
+	       &he_cap_ie[sizeof(he_cap->he_cap_elem)], mcs_nss_size);
+
+	/* Check if there are (optional) PPE Thresholds */
+	if (he_cap->he_cap_elem.phy_cap_info[6] &
+	    IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT)
+		memcpy(he_cap->ppe_thres,
+		       &he_cap_ie[sizeof(he_cap->he_cap_elem) + mcs_nss_size],
+		       he_ppe_size);
+
+	he_cap->has_he = true;
+}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 26a7ba3b698f..f849ea814993 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -352,7 +352,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
 		    test_and_clear_bit(tid,
 				       sta->ampdu_mlme.tid_rx_manage_offl))
 			___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
-							 IEEE80211_MAX_AMPDU_BUF,
+							 IEEE80211_MAX_AMPDU_BUF_HT,
 							 false, true);
 
 		if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d1978aa1c15d..172aeae21ae9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -165,6 +165,7 @@ typedef unsigned __bitwise ieee80211_tx_result;
 #define TX_DROP		((__force ieee80211_tx_result) 1u)
 #define TX_QUEUED	((__force ieee80211_tx_result) 2u)
 
+#define IEEE80211_TX_NO_SEQNO		BIT(0)
 #define IEEE80211_TX_UNICAST		BIT(1)
 #define IEEE80211_TX_PS_BUFFERED	BIT(2)
 
@@ -364,6 +365,7 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_DISABLE_160MHZ	= BIT(13),
 	IEEE80211_STA_DISABLE_WMM	= BIT(14),
 	IEEE80211_STA_ENABLE_RRM	= BIT(15),
+	IEEE80211_STA_DISABLE_HE	= BIT(16),
 };
 
 struct ieee80211_mgd_auth_data {
@@ -1453,6 +1455,10 @@ struct ieee802_11_elems {
 	const struct ieee80211_vht_cap *vht_cap_elem;
 	const struct ieee80211_vht_operation *vht_operation;
 	const struct ieee80211_meshconf_ie *mesh_config;
+	const u8 *he_cap;
+	const struct ieee80211_he_operation *he_operation;
+	const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
+	const u8 *uora_element;
 	const u8 *mesh_id;
 	const u8 *peering;
 	const __le16 *awake_window;
@@ -1482,6 +1488,7 @@ struct ieee802_11_elems {
 	u8 ext_supp_rates_len;
 	u8 wmm_info_len;
 	u8 wmm_param_len;
+	u8 he_cap_len;
 	u8 mesh_id_len;
 	u8 peering_len;
 	u8 preq_len;
@@ -1824,6 +1831,13 @@ void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
 enum nl80211_chan_width
 ieee80211_sta_rx_bw_to_chan_width(struct sta_info *sta);
 
+/* HE */
+void
+ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
+				  struct ieee80211_supported_band *sband,
+				  const u8 *he_cap_ie, u8 he_cap_len,
+				  struct sta_info *sta);
+
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_mgmt *mgmt,
@@ -1880,19 +1894,20 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
 			       bool bss_notify, bool enable_qos);
 void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
-		    struct sta_info *sta, struct sk_buff *skb);
+		    struct sta_info *sta, struct sk_buff *skb,
+		    u32 txdata_flags);
 
 void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
 				 struct sk_buff *skb, int tid,
-				 enum nl80211_band band);
+				 enum nl80211_band band, u32 txdata_flags);
 
 static inline void
 ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
 			  struct sk_buff *skb, int tid,
-			  enum nl80211_band band)
+			  enum nl80211_band band, u32 txdata_flags)
 {
 	rcu_read_lock();
-	__ieee80211_tx_skb_tid_band(sdata, skb, tid, band);
+	__ieee80211_tx_skb_tid_band(sdata, skb, tid, band, txdata_flags);
 	rcu_read_unlock();
 }
 
@@ -1910,7 +1925,7 @@ static inline void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata,
 	}
 
 	__ieee80211_tx_skb_tid_band(sdata, skb, tid,
-				    chanctx_conf->def.chan->band);
+				    chanctx_conf->def.chan->band, 0);
 	rcu_read_unlock();
 }
 
@@ -2031,26 +2046,27 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 				    const u8 *bssid, u16 stype, u16 reason,
 				    bool send_frame, u8 *frame_buf);
+
+enum {
+	IEEE80211_PROBE_FLAG_DIRECTED		= BIT(0),
+	IEEE80211_PROBE_FLAG_MIN_CONTENT	= BIT(1),
+	IEEE80211_PROBE_FLAG_RANDOM_SN		= BIT(2),
+};
+
 int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 			     size_t buffer_len,
 			     struct ieee80211_scan_ies *ie_desc,
 			     const u8 *ie, size_t ie_len,
 			     u8 bands_used, u32 *rate_masks,
-			     struct cfg80211_chan_def *chandef);
+			     struct cfg80211_chan_def *chandef,
+			     u32 flags);
 struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
 					  const u8 *src, const u8 *dst,
 					  u32 ratemask,
 					  struct ieee80211_channel *chan,
 					  const u8 *ssid, size_t ssid_len,
 					  const u8 *ie, size_t ie_len,
-					  bool directed);
-void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata,
-			      const u8 *src, const u8 *dst,
-			      const u8 *ssid, size_t ssid_len,
-			      const u8 *ie, size_t ie_len,
-			      u32 ratemask, bool directed, u32 tx_flags,
-			      struct ieee80211_channel *channel, bool scan);
-
+					  u32 flags);
 u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
 			    struct ieee802_11_elems *elems,
 			    enum nl80211_band band, u32 *basic_rates);
@@ -2073,6 +2089,9 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 			       u32 cap);
 u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 				const struct cfg80211_chan_def *chandef);
+u8 *ieee80211_ie_build_he_cap(u8 *pos,
+			      const struct ieee80211_sta_he_cap *he_cap,
+			      u8 *end);
 int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
 			     const struct ieee80211_supported_band *sband,
 			     const u8 *srates, int srates_len, u32 *rates);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 555e389b7dfa..5e6cf2cee965 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1130,7 +1130,7 @@ static void ieee80211_uninit(struct net_device *dev)
 
 static u16 ieee80211_netdev_select_queue(struct net_device *dev,
 					 struct sk_buff *skb,
-					 void *accel_priv,
+					 struct net_device *sb_dev,
 					 select_queue_fallback_t fallback)
 {
 	return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
@@ -1176,7 +1176,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
 
 static u16 ieee80211_monitor_select_queue(struct net_device *dev,
 					  struct sk_buff *skb,
-					  void *accel_priv,
+					  struct net_device *sb_dev,
 					  select_queue_fallback_t fallback)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index fb73451ed85e..4fb2709cb527 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -3,6 +3,7 @@
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright (C) 2017     Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -557,10 +558,19 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	wiphy_ext_feature_set(wiphy,
 			      NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211);
 
-	if (!ops->hw_scan)
+	if (!ops->hw_scan) {
 		wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
 				   NL80211_FEATURE_AP_SCAN;
-
+		/*
+		 * if the driver behaves correctly using the probe request
+		 * (template) from mac80211, then both of these should be
+		 * supported even with hw scan - but let drivers opt in.
+		 */
+		wiphy_ext_feature_set(wiphy,
+				      NL80211_EXT_FEATURE_SCAN_RANDOM_SN);
+		wiphy_ext_feature_set(wiphy,
+				      NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT);
+	}
 
 	if (!ops->set_key)
 		wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
@@ -588,8 +598,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	local->hw.queues = 1;
 	local->hw.max_rates = 1;
 	local->hw.max_report_rates = 0;
-	local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
-	local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+	local->hw.max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
+	local->hw.max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HT;
 	local->hw.offchannel_tx_hw_queue = IEEE80211_INVAL_HW_QUEUE;
 	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
 	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
@@ -816,7 +826,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	int result, i;
 	enum nl80211_band band;
 	int channels, max_bitrates;
-	bool supp_ht, supp_vht;
+	bool supp_ht, supp_vht, supp_he;
 	netdev_features_t feature_whitelist;
 	struct cfg80211_chan_def dflt_chandef = {};
 
@@ -896,6 +906,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	max_bitrates = 0;
 	supp_ht = false;
 	supp_vht = false;
+	supp_he = false;
 	for (band = 0; band < NUM_NL80211_BANDS; band++) {
 		struct ieee80211_supported_band *sband;
 
@@ -922,6 +933,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		supp_ht = supp_ht || sband->ht_cap.ht_supported;
 		supp_vht = supp_vht || sband->vht_cap.vht_supported;
 
+		if (!supp_he)
+			supp_he = !!ieee80211_get_he_sta_cap(sband);
+
 		if (!sband->ht_cap.ht_supported)
 			continue;
 
@@ -1011,6 +1025,18 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		local->scan_ies_len +=
 			2 + sizeof(struct ieee80211_vht_cap);
 
+	/* HE cap element is variable in size - set len to allow max size */
+	/*
+	 * TODO: 1 is added at the end of the calculation to accommodate for
+	 *	the temporary placing of the HE capabilities IE under EXT.
+	 *	Remove it once it is placed in the final place.
+	 */
+	if (supp_he)
+		local->scan_ies_len +=
+			2 + sizeof(struct ieee80211_he_cap_elem) +
+			sizeof(struct ieee80211_he_mcs_nss_supp) +
+			IEEE80211_HE_PPE_THRES_MAX_LEN + 1;
+
 	if (!local->ops->hw_scan) {
 		/* For hw_scan, driver needs to set these up. */
 		local->hw.wiphy->max_scan_ssids = 4;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a59187c016e0..7fb9957359a3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -149,6 +149,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_channel *channel,
 			     const struct ieee80211_ht_operation *ht_oper,
 			     const struct ieee80211_vht_operation *vht_oper,
+			     const struct ieee80211_he_operation *he_oper,
 			     struct cfg80211_chan_def *chandef, bool tracking)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -207,7 +208,27 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 	}
 
 	vht_chandef = *chandef;
-	if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && he_oper &&
+	    (le32_to_cpu(he_oper->he_oper_params) &
+	     IEEE80211_HE_OPERATION_VHT_OPER_INFO)) {
+		struct ieee80211_vht_operation he_oper_vht_cap;
+
+		/*
+		 * Set only first 3 bytes (other 2 aren't used in
+		 * ieee80211_chandef_vht_oper() anyway)
+		 */
+		memcpy(&he_oper_vht_cap, he_oper->optional, 3);
+		he_oper_vht_cap.basic_mcs_set = cpu_to_le16(0);
+
+		if (!ieee80211_chandef_vht_oper(&he_oper_vht_cap,
+						&vht_chandef)) {
+			if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+				sdata_info(sdata,
+					   "HE AP VHT information is invalid, disable HE\n");
+			ret = IEEE80211_STA_DISABLE_HE;
+			goto out;
+		}
+	} else if (!ieee80211_chandef_vht_oper(vht_oper, &vht_chandef)) {
 		if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 			sdata_info(sdata,
 				   "AP VHT information is invalid, disable VHT\n");
@@ -300,12 +321,14 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 			       const struct ieee80211_ht_cap *ht_cap,
 			       const struct ieee80211_ht_operation *ht_oper,
 			       const struct ieee80211_vht_operation *vht_oper,
+			       const struct ieee80211_he_operation *he_oper,
 			       const u8 *bssid, u32 *changed)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct ieee80211_supported_band *sband;
-	struct ieee80211_channel *chan;
+	struct ieee80211_channel *chan = sdata->vif.bss_conf.chandef.chan;
+	struct ieee80211_supported_band *sband =
+		local->hw.wiphy->bands[chan->band];
 	struct cfg80211_chan_def chandef;
 	u16 ht_opmode;
 	u32 flags;
@@ -320,6 +343,11 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 	if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
 		vht_oper = NULL;
 
+	/* don't check HE if we associated as non-HE station */
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_HE ||
+	    !ieee80211_get_he_sta_cap(sband))
+		he_oper = NULL;
+
 	if (WARN_ON_ONCE(!sta))
 		return -EINVAL;
 
@@ -333,12 +361,9 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 		sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
 	}
 
-	chan = sdata->vif.bss_conf.chandef.chan;
-	sband = local->hw.wiphy->bands[chan->band];
-
-	/* calculate new channel (type) based on HT/VHT operation IEs */
+	/* calculate new channel (type) based on HT/VHT/HE operation IEs */
 	flags = ieee80211_determine_chantype(sdata, sband, chan,
-					     ht_oper, vht_oper,
+					     ht_oper, vht_oper, he_oper,
 					     &chandef, true);
 
 	/*
@@ -582,6 +607,34 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
 	ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
 }
 
+/* This function determines HE capability flags for the association
+ * and builds the IE.
+ */
+static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
+				struct sk_buff *skb,
+				struct ieee80211_supported_band *sband)
+{
+	u8 *pos;
+	const struct ieee80211_sta_he_cap *he_cap = NULL;
+	u8 he_cap_size;
+
+	he_cap = ieee80211_get_he_sta_cap(sband);
+	if (!he_cap)
+		return;
+
+	/*
+	 * TODO: the 1 added is because this temporarily is under the EXTENSION
+	 * IE. Get rid of it when it moves.
+	 */
+	he_cap_size =
+		2 + 1 + sizeof(he_cap->he_cap_elem) +
+		ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem) +
+		ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+				      he_cap->he_cap_elem.phy_cap_info);
+	pos = skb_put(skb, he_cap_size);
+	ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size);
+}
+
 static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
@@ -643,6 +696,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 			2 + 2 * sband->n_channels + /* supported channels */
 			2 + sizeof(struct ieee80211_ht_cap) + /* HT */
 			2 + sizeof(struct ieee80211_vht_cap) + /* VHT */
+			2 + 1 + sizeof(struct ieee80211_he_cap_elem) + /* HE */
+				sizeof(struct ieee80211_he_mcs_nss_supp) +
+				IEEE80211_HE_PPE_THRES_MAX_LEN +
 			assoc_data->ie_len + /* extra IEs */
 			(assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
 			9, /* WMM */
@@ -827,11 +883,41 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		offset = noffset;
 	}
 
+	/* if present, add any custom IEs that go before HE */
+	if (assoc_data->ie_len) {
+		static const u8 before_he[] = {
+			/*
+			 * no need to list the ones split off before VHT
+			 * or generated here
+			 */
+			WLAN_EID_OPMODE_NOTIF,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE,
+			/* 11ai elements */
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_SESSION,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_PUBLIC_KEY,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_KEY_CONFIRM,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_HLP_CONTAINER,
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN,
+			/* TODO: add 11ah/11aj/11ak elements */
+		};
+
+		/* RIC already taken above, so no need to handle here anymore */
+		noffset = ieee80211_ie_split(assoc_data->ie, assoc_data->ie_len,
+					     before_he, ARRAY_SIZE(before_he),
+					     offset);
+		pos = skb_put(skb, noffset - offset);
+		memcpy(pos, assoc_data->ie + offset, noffset - offset);
+		offset = noffset;
+	}
+
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 		ieee80211_add_vht_ie(sdata, skb, sband,
 				     &assoc_data->ap_vht_cap);
 
-	/* if present, add any custom non-vendor IEs that go after HT */
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+		ieee80211_add_he_ie(sdata, skb, sband);
+
+	/* if present, add any custom non-vendor IEs that go after HE */
 	if (assoc_data->ie_len) {
 		noffset = ieee80211_ie_split_vendor(assoc_data->ie,
 						    assoc_data->ie_len,
@@ -898,6 +984,11 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 	struct ieee80211_hdr_3addr *nullfunc;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
+	/* Don't send NDPs when STA is connected HE */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
+		return;
+
 	skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
 		!ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
 	if (!skb)
@@ -929,6 +1020,10 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
 	if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
 		return;
 
+	/* Don't send NDPs when connected HE */
+	if (!(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
+		return;
+
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30);
 	if (!skb)
 		return;
@@ -1700,9 +1795,11 @@ static void ieee80211_sta_handle_tspec_ac_params_wk(struct work_struct *work)
 }
 
 /* MLME */
-static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
-				     struct ieee80211_sub_if_data *sdata,
-				     const u8 *wmm_param, size_t wmm_param_len)
+static bool
+ieee80211_sta_wmm_params(struct ieee80211_local *local,
+			 struct ieee80211_sub_if_data *sdata,
+			 const u8 *wmm_param, size_t wmm_param_len,
+			 const struct ieee80211_mu_edca_param_set *mu_edca)
 {
 	struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS];
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1749,6 +1846,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				sdata->wmm_acm |= BIT(1) | BIT(2); /* BK/- */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
 				uapsd = true;
+			params[ac].mu_edca = !!mu_edca;
+			if (mu_edca)
+				params[ac].mu_edca_param_rec = mu_edca->ac_bk;
 			break;
 		case 2: /* AC_VI */
 			ac = IEEE80211_AC_VI;
@@ -1756,6 +1856,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				sdata->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
 				uapsd = true;
+			params[ac].mu_edca = !!mu_edca;
+			if (mu_edca)
+				params[ac].mu_edca_param_rec = mu_edca->ac_vi;
 			break;
 		case 3: /* AC_VO */
 			ac = IEEE80211_AC_VO;
@@ -1763,6 +1866,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				sdata->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
 				uapsd = true;
+			params[ac].mu_edca = !!mu_edca;
+			if (mu_edca)
+				params[ac].mu_edca_param_rec = mu_edca->ac_vo;
 			break;
 		case 0: /* AC_BE */
 		default:
@@ -1771,6 +1877,9 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				sdata->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
 				uapsd = true;
+			params[ac].mu_edca = !!mu_edca;
+			if (mu_edca)
+				params[ac].mu_edca_param_rec = mu_edca->ac_be;
 			break;
 		}
 
@@ -2219,6 +2328,20 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
 		ieee80211_sta_reset_conn_monitor(sdata);
 }
 
+static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata,
+					  const u8 *src, const u8 *dst,
+					  const u8 *ssid, size_t ssid_len,
+					  struct ieee80211_channel *channel)
+{
+	struct sk_buff *skb;
+
+	skb = ieee80211_build_probe_req(sdata, src, dst, (u32)-1, channel,
+					ssid, ssid_len, NULL, 0,
+					IEEE80211_PROBE_FLAG_DIRECTED);
+	if (skb)
+		ieee80211_tx_skb(sdata, skb);
+}
+
 static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -2265,10 +2388,9 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 		else
 			ssid_len = ssid[1];
 
-		ieee80211_send_probe_req(sdata, sdata->vif.addr, dst,
-					 ssid + 2, ssid_len, NULL,
-					 0, (u32) -1, true, 0,
-					 ifmgd->associated->channel, false);
+		ieee80211_mlme_send_probe_req(sdata, sdata->vif.addr, dst,
+					      ssid + 2, ssid_len,
+					      ifmgd->associated->channel);
 		rcu_read_unlock();
 	}
 
@@ -2370,7 +2492,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
 	skb = ieee80211_build_probe_req(sdata, sdata->vif.addr, cbss->bssid,
 					(u32) -1, cbss->channel,
 					ssid + 2, ssid_len,
-					NULL, 0, true);
+					NULL, 0, IEEE80211_PROBE_FLAG_DIRECTED);
 	rcu_read_unlock();
 
 	return skb;
@@ -3008,6 +3130,25 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
+	/*
+	 * If AP doesn't support HT, or it doesn't have HE mandatory IEs, mark
+	 * HE as disabled. If on the 5GHz band, make sure it supports VHT.
+	 */
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_HT ||
+	    (sband->band == NL80211_BAND_5GHZ &&
+	     ifmgd->flags & IEEE80211_STA_DISABLE_VHT) ||
+	    (!elems.he_cap && !elems.he_operation))
+		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+	    (!elems.he_cap || !elems.he_operation)) {
+		mutex_unlock(&sdata->local->sta_mtx);
+		sdata_info(sdata,
+			   "HE AP is missing HE capability/operation\n");
+		ret = false;
+		goto out;
+	}
+
 	/* Set up internal HT/VHT capabilities */
 	if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
 		ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
@@ -3017,6 +3158,48 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
 						    elems.vht_cap_elem, sta);
 
+	if (elems.he_operation && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+	    elems.he_cap) {
+		ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
+						  elems.he_cap,
+						  elems.he_cap_len,
+						  sta);
+
+		bss_conf->he_support = sta->sta.he_cap.has_he;
+	} else {
+		bss_conf->he_support = false;
+	}
+
+	if (bss_conf->he_support) {
+		u32 he_oper_params =
+			le32_to_cpu(elems.he_operation->he_oper_params);
+
+		bss_conf->bss_color = he_oper_params &
+				      IEEE80211_HE_OPERATION_BSS_COLOR_MASK;
+		bss_conf->htc_trig_based_pkt_ext =
+			(he_oper_params &
+			 IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK) <<
+			IEEE80211_HE_OPERATION_DFLT_PE_DURATION_OFFSET;
+		bss_conf->frame_time_rts_th =
+			(he_oper_params &
+			 IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK) <<
+			IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET;
+
+		bss_conf->multi_sta_back_32bit =
+			sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+			IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP;
+
+		bss_conf->ack_enabled =
+			sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+			IEEE80211_HE_MAC_CAP2_ACK_EN;
+
+		bss_conf->uora_exists = !!elems.uora_element;
+		if (elems.uora_element)
+			bss_conf->uora_ocw_range = elems.uora_element[0];
+
+		/* TODO: OPEN: what happens if BSS color disable is set? */
+	}
+
 	/*
 	 * Some APs, e.g. Netgear WNDR3700, report invalid HT operation data
 	 * in their association response, so ignore that data for our own
@@ -3076,7 +3259,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 	if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) {
 		ieee80211_set_wmm_default(sdata, false, false);
 	} else if (!ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
-					     elems.wmm_param_len)) {
+					     elems.wmm_param_len,
+					     elems.mu_edca_param_set)) {
 		/* still enable QoS since we might have HT/VHT */
 		ieee80211_set_wmm_default(sdata, false, true);
 		/* set the disable-WMM flag in this case to disable
@@ -3590,7 +3774,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) &&
 	    ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
-				     elems.wmm_param_len))
+				     elems.wmm_param_len,
+				     elems.mu_edca_param_set))
 		changed |= BSS_CHANGED_QOS;
 
 	/*
@@ -3629,7 +3814,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	if (ieee80211_config_bw(sdata, sta,
 				elems.ht_cap_elem, elems.ht_operation,
-				elems.vht_operation, bssid, &changed)) {
+				elems.vht_operation, elems.he_operation,
+				bssid, &changed)) {
 		mutex_unlock(&local->sta_mtx);
 		sdata_info(sdata,
 			   "failed to follow AP %pM bandwidth change, disconnect\n",
@@ -4266,6 +4452,68 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
 	return chains;
 }
 
+static bool
+ieee80211_verify_sta_he_mcs_support(struct ieee80211_supported_band *sband,
+				    const struct ieee80211_he_operation *he_op)
+{
+	const struct ieee80211_sta_he_cap *sta_he_cap =
+		ieee80211_get_he_sta_cap(sband);
+	u16 ap_min_req_set;
+	int i;
+
+	if (!sta_he_cap || !he_op)
+		return false;
+
+	ap_min_req_set = le16_to_cpu(he_op->he_mcs_nss_set);
+
+	/* Need to go over for 80MHz, 160MHz and for 80+80 */
+	for (i = 0; i < 3; i++) {
+		const struct ieee80211_he_mcs_nss_supp *sta_mcs_nss_supp =
+			&sta_he_cap->he_mcs_nss_supp;
+		u16 sta_mcs_map_rx =
+			le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i]);
+		u16 sta_mcs_map_tx =
+			le16_to_cpu(((__le16 *)sta_mcs_nss_supp)[2 * i + 1]);
+		u8 nss;
+		bool verified = true;
+
+		/*
+		 * For each band there is a maximum of 8 spatial streams
+		 * possible. Each of the sta_mcs_map_* is a 16-bit struct built
+		 * of 2 bits per NSS (1-8), with the values defined in enum
+		 * ieee80211_he_mcs_support. Need to make sure STA TX and RX
+		 * capabilities aren't less than the AP's minimum requirements
+		 * for this HE BSS per SS.
+		 * It is enough to find one such band that meets the reqs.
+		 */
+		for (nss = 8; nss > 0; nss--) {
+			u8 sta_rx_val = (sta_mcs_map_rx >> (2 * (nss - 1))) & 3;
+			u8 sta_tx_val = (sta_mcs_map_tx >> (2 * (nss - 1))) & 3;
+			u8 ap_val = (ap_min_req_set >> (2 * (nss - 1))) & 3;
+
+			if (ap_val == IEEE80211_HE_MCS_NOT_SUPPORTED)
+				continue;
+
+			/*
+			 * Make sure the HE AP doesn't require MCSs that aren't
+			 * supported by the client
+			 */
+			if (sta_rx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+			    sta_tx_val == IEEE80211_HE_MCS_NOT_SUPPORTED ||
+			    (ap_val > sta_rx_val) || (ap_val > sta_tx_val)) {
+				verified = false;
+				break;
+			}
+		}
+
+		if (verified)
+			return true;
+	}
+
+	/* If here, STA doesn't meet AP's HE min requirements */
+	return false;
+}
+
 static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 				  struct cfg80211_bss *cbss)
 {
@@ -4274,6 +4522,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 	const struct ieee80211_ht_cap *ht_cap = NULL;
 	const struct ieee80211_ht_operation *ht_oper = NULL;
 	const struct ieee80211_vht_operation *vht_oper = NULL;
+	const struct ieee80211_he_operation *he_oper = NULL;
 	struct ieee80211_supported_band *sband;
 	struct cfg80211_chan_def chandef;
 	int ret;
@@ -4329,6 +4578,24 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+	    ieee80211_get_he_sta_cap(sband)) {
+		const struct cfg80211_bss_ies *ies;
+		const u8 *he_oper_ie;
+
+		ies = rcu_dereference(cbss->ies);
+		he_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION,
+						  ies->data, ies->len);
+		if (he_oper_ie &&
+		    he_oper_ie[1] == ieee80211_he_oper_size(&he_oper_ie[3]))
+			he_oper = (void *)(he_oper_ie + 3);
+		else
+			he_oper = NULL;
+
+		if (!ieee80211_verify_sta_he_mcs_support(sband, he_oper))
+			ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+	}
+
 	/* Allow VHT if at least one channel on the sband supports 80 MHz */
 	have_80mhz = false;
 	for (i = 0; i < sband->n_channels; i++) {
@@ -4345,7 +4612,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 
 	ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
 						     cbss->channel,
-						     ht_oper, vht_oper,
+						     ht_oper, vht_oper, he_oper,
 						     &chandef, false);
 
 	sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
@@ -4751,8 +5018,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		    req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) {
 			ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
 			ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+			ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 			netdev_info(sdata->dev,
-				    "disabling HT/VHT due to WEP/TKIP use\n");
+				    "disabling HE/HT/VHT due to WEP/TKIP use\n");
 		}
 	}
 
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index f1d40b6645ff..8ef4153cd299 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -262,7 +262,7 @@ static void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc,
 	if (roc->mgmt_tx_cookie) {
 		if (!WARN_ON(!roc->frame)) {
 			ieee80211_tx_skb_tid_band(roc->sdata, roc->frame, 7,
-						  roc->chan->band);
+						  roc->chan->band, 0);
 			roc->frame = NULL;
 		}
 	} else {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0a38cc1cbebc..a16ba568e2a3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -175,6 +175,20 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
 		len += 12;
 	}
 
+	if (status->encoding == RX_ENC_HE &&
+	    status->flag & RX_FLAG_RADIOTAP_HE) {
+		len = ALIGN(len, 2);
+		len += 12;
+		BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he) != 12);
+	}
+
+	if (status->encoding == RX_ENC_HE &&
+	    status->flag & RX_FLAG_RADIOTAP_HE_MU) {
+		len = ALIGN(len, 2);
+		len += 12;
+		BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he_mu) != 12);
+	}
+
 	if (status->chains) {
 		/* antenna and antenna signal fields */
 		len += 2 * hweight8(status->chains);
@@ -263,6 +277,19 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	int mpdulen, chain;
 	unsigned long chains = status->chains;
 	struct ieee80211_vendor_radiotap rtap = {};
+	struct ieee80211_radiotap_he he = {};
+	struct ieee80211_radiotap_he_mu he_mu = {};
+
+	if (status->flag & RX_FLAG_RADIOTAP_HE) {
+		he = *(struct ieee80211_radiotap_he *)skb->data;
+		skb_pull(skb, sizeof(he));
+		WARN_ON_ONCE(status->encoding != RX_ENC_HE);
+	}
+
+	if (status->flag & RX_FLAG_RADIOTAP_HE_MU) {
+		he_mu = *(struct ieee80211_radiotap_he_mu *)skb->data;
+		skb_pull(skb, sizeof(he_mu));
+	}
 
 	if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
 		rtap = *(struct ieee80211_vendor_radiotap *)skb->data;
@@ -520,6 +547,89 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		*pos++ = flags;
 	}
 
+	if (status->encoding == RX_ENC_HE &&
+	    status->flag & RX_FLAG_RADIOTAP_HE) {
+#define HE_PREP(f, val)	cpu_to_le16(FIELD_PREP(IEEE80211_RADIOTAP_HE_##f, val))
+
+		if (status->enc_flags & RX_ENC_FLAG_STBC_MASK) {
+			he.data6 |= HE_PREP(DATA6_NSTS,
+					    FIELD_GET(RX_ENC_FLAG_STBC_MASK,
+						      status->enc_flags));
+			he.data3 |= HE_PREP(DATA3_STBC, 1);
+		} else {
+			he.data6 |= HE_PREP(DATA6_NSTS, status->nss);
+		}
+
+#define CHECK_GI(s) \
+	BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \
+		     (int)NL80211_RATE_INFO_HE_GI_##s)
+
+		CHECK_GI(0_8);
+		CHECK_GI(1_6);
+		CHECK_GI(3_2);
+
+		he.data3 |= HE_PREP(DATA3_DATA_MCS, status->rate_idx);
+		he.data3 |= HE_PREP(DATA3_DATA_DCM, status->he_dcm);
+		he.data3 |= HE_PREP(DATA3_CODING,
+				    !!(status->enc_flags & RX_ENC_FLAG_LDPC));
+
+		he.data5 |= HE_PREP(DATA5_GI, status->he_gi);
+
+		switch (status->bw) {
+		case RATE_INFO_BW_20:
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ);
+			break;
+		case RATE_INFO_BW_40:
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ);
+			break;
+		case RATE_INFO_BW_80:
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ);
+			break;
+		case RATE_INFO_BW_160:
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ);
+			break;
+		case RATE_INFO_BW_HE_RU:
+#define CHECK_RU_ALLOC(s) \
+	BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_##s##T != \
+		     NL80211_RATE_INFO_HE_RU_ALLOC_##s + 4)
+
+			CHECK_RU_ALLOC(26);
+			CHECK_RU_ALLOC(52);
+			CHECK_RU_ALLOC(106);
+			CHECK_RU_ALLOC(242);
+			CHECK_RU_ALLOC(484);
+			CHECK_RU_ALLOC(996);
+			CHECK_RU_ALLOC(2x996);
+
+			he.data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC,
+					    status->he_ru + 4);
+			break;
+		default:
+			WARN_ONCE(1, "Invalid SU BW %d\n", status->bw);
+		}
+
+		/* ensure 2 byte alignment */
+		while ((pos - (u8 *)rthdr) & 1)
+			pos++;
+		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+		memcpy(pos, &he, sizeof(he));
+		pos += sizeof(he);
+	}
+
+	if (status->encoding == RX_ENC_HE &&
+	    status->flag & RX_FLAG_RADIOTAP_HE_MU) {
+		/* ensure 2 byte alignment */
+		while ((pos - (u8 *)rthdr) & 1)
+			pos++;
+		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE_MU);
+		memcpy(pos, &he_mu, sizeof(he_mu));
+		pos += sizeof(he_mu);
+	}
+
 	for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
 		*pos++ = status->chain_signal[chain];
 		*pos++ = chain;
@@ -613,6 +723,12 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		rcu_dereference(local->monitor_sdata);
 	bool only_monitor = false;
 
+	if (status->flag & RX_FLAG_RADIOTAP_HE)
+		rtap_space += sizeof(struct ieee80211_radiotap_he);
+
+	if (status->flag & RX_FLAG_RADIOTAP_HE_MU)
+		rtap_space += sizeof(struct ieee80211_radiotap_he_mu);
+
 	if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) {
 		struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data;
 
@@ -3241,7 +3357,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
 		}
 
 		__ieee80211_tx_skb_tid_band(rx->sdata, nskb, 7,
-					    status->band);
+					    status->band, 0);
 	}
 	dev_kfree_skb(rx->skb);
 	return RX_QUEUED;
@@ -3386,8 +3502,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
 		status = IEEE80211_SKB_RXCB((rx->skb));
 
 		sband = rx->local->hw.wiphy->bands[status->band];
-		if (!(status->encoding == RX_ENC_HT) &&
-		    !(status->encoding == RX_ENC_VHT))
+		if (status->encoding == RX_ENC_LEGACY)
 			rate = &sband->bitrates[status->rate_idx];
 
 		ieee80211_rx_cooked_monitor(rx, rate);
@@ -4386,6 +4501,14 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
 				      status->rate_idx, status->nss))
 				goto drop;
 			break;
+		case RX_ENC_HE:
+			if (WARN_ONCE(status->rate_idx > 11 ||
+				      !status->nss ||
+				      status->nss > 8,
+				      "Rate marked as an HE rate but data is invalid: MCS: %d, NSS: %d\n",
+				      status->rate_idx, status->nss))
+				goto drop;
+			break;
 		default:
 			WARN_ON_ONCE(1);
 			/* fall through */
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 2e917a6d239d..5d2a11777718 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -20,6 +20,7 @@
 #include <net/sch_generic.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/random.h>
 #include <net/mac80211.h>
 
 #include "ieee80211_i.h"
@@ -293,6 +294,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
 	struct cfg80211_chan_def chandef;
 	u8 bands_used = 0;
 	int i, ielen, n_chans;
+	u32 flags = 0;
 
 	req = rcu_dereference_protected(local->scan_req,
 					lockdep_is_held(&local->mtx));
@@ -331,12 +333,16 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
 	local->hw_scan_req->req.n_channels = n_chans;
 	ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
 
+	if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
+		flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT;
+
 	ielen = ieee80211_build_preq_ies(local,
 					 (u8 *)local->hw_scan_req->req.ie,
 					 local->hw_scan_ies_bufsize,
 					 &local->hw_scan_req->ies,
 					 req->ie, req->ie_len,
-					 bands_used, req->rates, &chandef);
+					 bands_used, req->rates, &chandef,
+					 flags);
 	local->hw_scan_req->req.ie_len = ielen;
 	local->hw_scan_req->req.no_cck = req->no_cck;
 	ether_addr_copy(local->hw_scan_req->req.mac_addr, req->mac_addr);
@@ -528,6 +534,35 @@ void ieee80211_run_deferred_scan(struct ieee80211_local *local)
 				     round_jiffies_relative(0));
 }
 
+static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
+					  const u8 *src, const u8 *dst,
+					  const u8 *ssid, size_t ssid_len,
+					  const u8 *ie, size_t ie_len,
+					  u32 ratemask, u32 flags, u32 tx_flags,
+					  struct ieee80211_channel *channel)
+{
+	struct sk_buff *skb;
+	u32 txdata_flags = 0;
+
+	skb = ieee80211_build_probe_req(sdata, src, dst, ratemask, channel,
+					ssid, ssid_len,
+					ie, ie_len, flags);
+
+	if (skb) {
+		if (flags & IEEE80211_PROBE_FLAG_RANDOM_SN) {
+			struct ieee80211_hdr *hdr = (void *)skb->data;
+			u16 sn = get_random_u32();
+
+			txdata_flags |= IEEE80211_TX_NO_SEQNO;
+			hdr->seq_ctrl =
+				cpu_to_le16(IEEE80211_SN_TO_SEQ(sn));
+		}
+		IEEE80211_SKB_CB(skb)->flags |= tx_flags;
+		ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band,
+					  txdata_flags);
+	}
+}
+
 static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
 					    unsigned long *next_delay)
 {
@@ -535,7 +570,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
 	struct ieee80211_sub_if_data *sdata;
 	struct cfg80211_scan_request *scan_req;
 	enum nl80211_band band = local->hw.conf.chandef.chan->band;
-	u32 tx_flags;
+	u32 flags = 0, tx_flags;
 
 	scan_req = rcu_dereference_protected(local->scan_req,
 					     lockdep_is_held(&local->mtx));
@@ -543,17 +578,21 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
 	tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
 	if (scan_req->no_cck)
 		tx_flags |= IEEE80211_TX_CTL_NO_CCK_RATE;
+	if (scan_req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
+		flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT;
+	if (scan_req->flags & NL80211_SCAN_FLAG_RANDOM_SN)
+		flags |= IEEE80211_PROBE_FLAG_RANDOM_SN;
 
 	sdata = rcu_dereference_protected(local->scan_sdata,
 					  lockdep_is_held(&local->mtx));
 
 	for (i = 0; i < scan_req->n_ssids; i++)
-		ieee80211_send_probe_req(
+		ieee80211_send_scan_probe_req(
 			sdata, local->scan_addr, scan_req->bssid,
 			scan_req->ssids[i].ssid, scan_req->ssids[i].ssid_len,
 			scan_req->ie, scan_req->ie_len,
-			scan_req->rates[band], false,
-			tx_flags, local->hw.conf.chandef.chan, true);
+			scan_req->rates[band], flags,
+			tx_flags, local->hw.conf.chandef.chan);
 
 	/*
 	 * After sending probe requests, wait for probe responses
@@ -1141,6 +1180,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 	u32 rate_masks[NUM_NL80211_BANDS] = {};
 	u8 bands_used = 0;
 	u8 *ie;
+	u32 flags = 0;
 
 	iebufsz = local->scan_ies_len + req->ie_len;
 
@@ -1157,6 +1197,9 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
+	if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
+		flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT;
+
 	ie = kcalloc(iebufsz, num_bands, GFP_KERNEL);
 	if (!ie) {
 		ret = -ENOMEM;
@@ -1167,7 +1210,8 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_build_preq_ies(local, ie, num_bands * iebufsz,
 				 &sched_scan_ies, req->ie,
-				 req->ie_len, bands_used, rate_masks, &chandef);
+				 req->ie_len, bands_used, rate_masks, &chandef,
+				 flags);
 
 	ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies);
 	if (ret == 0) {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 6428f1ac37b6..f34202242d24 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1323,6 +1323,11 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
 	struct ieee80211_tx_info *info;
 	struct ieee80211_chanctx_conf *chanctx_conf;
 
+	/* Don't send NDPs when STA is connected HE */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    !(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
+		return;
+
 	if (qos) {
 		fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
 				 IEEE80211_STYPE_QOS_NULLFUNC |
@@ -1391,7 +1396,7 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
 	}
 
 	info->band = chanctx_conf->def.chan->band;
-	ieee80211_xmit(sdata, sta, skb);
+	ieee80211_xmit(sdata, sta, skb, 0);
 	rcu_read_unlock();
 }
 
@@ -1968,7 +1973,7 @@ sta_get_last_rx_stats(struct sta_info *sta)
 	return stats;
 }
 
-static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
+static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
 				  struct rate_info *rinfo)
 {
 	rinfo->bw = STA_STATS_GET(BW, rate);
@@ -2005,6 +2010,14 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate,
 		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
 		break;
 		}
+	case STA_STATS_RATE_TYPE_HE:
+		rinfo->flags = RATE_INFO_FLAGS_HE_MCS;
+		rinfo->mcs = STA_STATS_GET(HE_MCS, rate);
+		rinfo->nss = STA_STATS_GET(HE_NSS, rate);
+		rinfo->he_gi = STA_STATS_GET(HE_GI, rate);
+		rinfo->he_ru_alloc = STA_STATS_GET(HE_RU, rate);
+		rinfo->he_dcm = STA_STATS_GET(HE_DCM, rate);
+		break;
 	}
 }
 
@@ -2101,38 +2114,38 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 
 	drv_sta_statistics(local, sdata, &sta->sta, sinfo);
 
-	sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) |
-			 BIT(NL80211_STA_INFO_STA_FLAGS) |
-			 BIT(NL80211_STA_INFO_BSS_PARAM) |
-			 BIT(NL80211_STA_INFO_CONNECTED_TIME) |
-			 BIT(NL80211_STA_INFO_RX_DROP_MISC);
+	sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) |
+			 BIT_ULL(NL80211_STA_INFO_STA_FLAGS) |
+			 BIT_ULL(NL80211_STA_INFO_BSS_PARAM) |
+			 BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) |
+			 BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC);
 
 	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 		sinfo->beacon_loss_count = sdata->u.mgd.beacon_loss_count;
-		sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_LOSS);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS);
 	}
 
 	sinfo->connected_time = ktime_get_seconds() - sta->last_connected;
 	sinfo->inactive_time =
 		jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta));
 
-	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) |
-			       BIT(NL80211_STA_INFO_TX_BYTES)))) {
+	if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) |
+			       BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) {
 		sinfo->tx_bytes = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
 			sinfo->tx_bytes += sta->tx_stats.bytes[ac];
-		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64);
 	}
 
-	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) {
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) {
 		sinfo->tx_packets = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
 			sinfo->tx_packets += sta->tx_stats.packets[ac];
-		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
 	}
 
-	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
-			       BIT(NL80211_STA_INFO_RX_BYTES)))) {
+	if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) |
+			       BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) {
 		sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
 
 		if (sta->pcpu_rx_stats) {
@@ -2144,10 +2157,10 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 			}
 		}
 
-		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64);
 	}
 
-	if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) {
 		sinfo->rx_packets = sta->rx_stats.packets;
 		if (sta->pcpu_rx_stats) {
 			for_each_possible_cpu(cpu) {
@@ -2157,17 +2170,17 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 				sinfo->rx_packets += cpurxs->packets;
 			}
 		}
-		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS);
 	}
 
-	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) {
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) {
 		sinfo->tx_retries = sta->status_stats.retry_count;
-		sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES);
 	}
 
-	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) {
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) {
 		sinfo->tx_failed = sta->status_stats.retry_failed;
-		sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
 	}
 
 	sinfo->rx_dropped_misc = sta->rx_stats.dropped;
@@ -2182,23 +2195,23 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 
 	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
 	    !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
-		sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_RX) |
-				 BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) |
+				 BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG);
 		sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif);
 	}
 
 	if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
 	    ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
-		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
+		if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL))) {
 			sinfo->signal = (s8)last_rxstats->last_signal;
-			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL);
 		}
 
 		if (!sta->pcpu_rx_stats &&
-		    !(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
+		    !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) {
 			sinfo->signal_avg =
 				-ewma_signal_read(&sta->rx_stats_avg.signal);
-			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
+			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
 		}
 	}
 
@@ -2207,11 +2220,11 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 	 * pcpu statistics
 	 */
 	if (last_rxstats->chains &&
-	    !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
-			       BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
-		sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL);
+	    !(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) |
+			       BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL);
 		if (!sta->pcpu_rx_stats)
-			sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
+			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
 
 		sinfo->chains = last_rxstats->chains;
 
@@ -2223,15 +2236,15 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 		}
 	}
 
-	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) {
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) {
 		sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate,
 				     &sinfo->txrate);
-		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
 	}
 
-	if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) {
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE))) {
 		if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0)
-			sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
+			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
 	}
 
 	if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) {
@@ -2244,18 +2257,18 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
-		sinfo->filled |= BIT(NL80211_STA_INFO_LLID) |
-				 BIT(NL80211_STA_INFO_PLID) |
-				 BIT(NL80211_STA_INFO_PLINK_STATE) |
-				 BIT(NL80211_STA_INFO_LOCAL_PM) |
-				 BIT(NL80211_STA_INFO_PEER_PM) |
-				 BIT(NL80211_STA_INFO_NONPEER_PM);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_LLID) |
+				 BIT_ULL(NL80211_STA_INFO_PLID) |
+				 BIT_ULL(NL80211_STA_INFO_PLINK_STATE) |
+				 BIT_ULL(NL80211_STA_INFO_LOCAL_PM) |
+				 BIT_ULL(NL80211_STA_INFO_PEER_PM) |
+				 BIT_ULL(NL80211_STA_INFO_NONPEER_PM);
 
 		sinfo->llid = sta->mesh->llid;
 		sinfo->plid = sta->mesh->plid;
 		sinfo->plink_state = sta->mesh->plink_state;
 		if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
-			sinfo->filled |= BIT(NL80211_STA_INFO_T_OFFSET);
+			sinfo->filled |= BIT_ULL(NL80211_STA_INFO_T_OFFSET);
 			sinfo->t_offset = sta->mesh->t_offset;
 		}
 		sinfo->local_pm = sta->mesh->local_pm;
@@ -2300,7 +2313,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 	thr = sta_get_expected_throughput(sta);
 
 	if (thr != 0) {
-		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
 		sinfo->expected_throughput = thr;
 	}
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 81b35f623792..9a04327d71d1 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -170,7 +170,7 @@ struct tid_ampdu_tx {
 	u8 dialog_token;
 	u8 stop_initiator;
 	bool tx_stop;
-	u8 buf_size;
+	u16 buf_size;
 
 	u16 failed_bar_ssn;
 	bool bar_pending;
@@ -405,7 +405,7 @@ struct ieee80211_sta_rx_stats {
 	int last_signal;
 	u8 chains;
 	s8 chain_signal_last[IEEE80211_MAX_CHAINS];
-	u16 last_rate;
+	u32 last_rate;
 	struct u64_stats_sync syncp;
 	u64 bytes;
 	u64 msdu[IEEE80211_NUM_TIDS + 1];
@@ -764,6 +764,7 @@ enum sta_stats_type {
 	STA_STATS_RATE_TYPE_LEGACY,
 	STA_STATS_RATE_TYPE_HT,
 	STA_STATS_RATE_TYPE_VHT,
+	STA_STATS_RATE_TYPE_HE,
 };
 
 #define STA_STATS_FIELD_HT_MCS		GENMASK( 7,  0)
@@ -771,9 +772,14 @@ enum sta_stats_type {
 #define STA_STATS_FIELD_LEGACY_BAND	GENMASK( 7,  4)
 #define STA_STATS_FIELD_VHT_MCS		GENMASK( 3,  0)
 #define STA_STATS_FIELD_VHT_NSS		GENMASK( 7,  4)
+#define STA_STATS_FIELD_HE_MCS		GENMASK( 3,  0)
+#define STA_STATS_FIELD_HE_NSS		GENMASK( 7,  4)
 #define STA_STATS_FIELD_BW		GENMASK(11,  8)
 #define STA_STATS_FIELD_SGI		GENMASK(12, 12)
 #define STA_STATS_FIELD_TYPE		GENMASK(15, 13)
+#define STA_STATS_FIELD_HE_RU		GENMASK(18, 16)
+#define STA_STATS_FIELD_HE_GI		GENMASK(20, 19)
+#define STA_STATS_FIELD_HE_DCM		GENMASK(21, 21)
 
 #define STA_STATS_FIELD(_n, _v)		FIELD_PREP(STA_STATS_FIELD_ ## _n, _v)
 #define STA_STATS_GET(_n, _v)		FIELD_GET(STA_STATS_FIELD_ ## _n, _v)
@@ -782,7 +788,7 @@ enum sta_stats_type {
 
 static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s)
 {
-	u16 r;
+	u32 r;
 
 	r = STA_STATS_FIELD(BW, s->bw);
 
@@ -804,6 +810,14 @@ static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s)
 		r |= STA_STATS_FIELD(LEGACY_BAND, s->band);
 		r |= STA_STATS_FIELD(LEGACY_IDX, s->rate_idx);
 		break;
+	case RX_ENC_HE:
+		r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_HE);
+		r |= STA_STATS_FIELD(HE_NSS, s->nss);
+		r |= STA_STATS_FIELD(HE_MCS, s->rate_idx);
+		r |= STA_STATS_FIELD(HE_GI, s->he_gi);
+		r |= STA_STATS_FIELD(HE_RU, s->he_ru);
+		r |= STA_STATS_FIELD(HE_DCM, s->he_dcm);
+		break;
 	default:
 		WARN_ON(1);
 		return STA_STATS_RATE_INVALID;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 80a7edf8d314..0ab69a1964f8 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -92,7 +92,7 @@
 				STA_ENTRY						\
 				__field(u16, tid)					\
 				__field(u16, ssn)					\
-				__field(u8, buf_size)					\
+				__field(u16, buf_size)					\
 				__field(bool, amsdu)					\
 				__field(u16, timeout)					\
 				__field(u16, action)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index fa1f1e63a264..6a79d564de35 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -825,6 +825,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 	 */
 	if (!ieee80211_is_data_qos(hdr->frame_control) ||
 	    is_multicast_ether_addr(hdr->addr1)) {
+		if (tx->flags & IEEE80211_TX_NO_SEQNO)
+			return TX_CONTINUE;
 		/* driver should assign sequence number */
 		info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
 		/* for pure STA mode without beacons, we can do it */
@@ -1854,7 +1856,7 @@ EXPORT_SYMBOL(ieee80211_tx_prepare_skb);
  */
 static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
 			 struct sta_info *sta, struct sk_buff *skb,
-			 bool txpending)
+			 bool txpending, u32 txdata_flags)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_data tx;
@@ -1872,6 +1874,8 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
 	led_len = skb->len;
 	res_prepare = ieee80211_tx_prepare(sdata, &tx, sta, skb);
 
+	tx.flags |= txdata_flags;
+
 	if (unlikely(res_prepare == TX_DROP)) {
 		ieee80211_free_txskb(&local->hw, skb);
 		return true;
@@ -1933,7 +1937,8 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
 }
 
 void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
-		    struct sta_info *sta, struct sk_buff *skb)
+		    struct sta_info *sta, struct sk_buff *skb,
+		    u32 txdata_flags)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -1968,7 +1973,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 	}
 
 	ieee80211_set_qos_hdr(sdata, skb);
-	ieee80211_tx(sdata, sta, skb, false);
+	ieee80211_tx(sdata, sta, skb, false, txdata_flags);
 }
 
 static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
@@ -2289,7 +2294,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	if (!ieee80211_parse_tx_radiotap(local, skb))
 		goto fail_rcu;
 
-	ieee80211_xmit(sdata, NULL, skb);
+	ieee80211_xmit(sdata, NULL, skb, 0);
 	rcu_read_unlock();
 
 	return NETDEV_TX_OK;
@@ -3648,7 +3653,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 
 		ieee80211_tx_stats(dev, skb->len);
 
-		ieee80211_xmit(sdata, sta, skb);
+		ieee80211_xmit(sdata, sta, skb, 0);
 	}
 	goto out;
  out_free:
@@ -3867,7 +3872,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
 			return true;
 		}
 		info->band = chanctx_conf->def.chan->band;
-		result = ieee80211_tx(sdata, NULL, skb, true);
+		result = ieee80211_tx(sdata, NULL, skb, true, 0);
 	} else {
 		struct sk_buff_head skbs;
 
@@ -4783,7 +4788,7 @@ EXPORT_SYMBOL(ieee80211_unreserve_tid);
 
 void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
 				 struct sk_buff *skb, int tid,
-				 enum nl80211_band band)
+				 enum nl80211_band band, u32 txdata_flags)
 {
 	int ac = ieee80211_ac_from_tid(tid);
 
@@ -4800,7 +4805,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
 	 */
 	local_bh_disable();
 	IEEE80211_SKB_CB(skb)->band = band;
-	ieee80211_xmit(sdata, NULL, skb);
+	ieee80211_xmit(sdata, NULL, skb, txdata_flags);
 	local_bh_enable();
 }
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5e2e511c4a6f..3e68132a41fa 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1095,6 +1095,21 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			if (elen >= sizeof(*elems->max_idle_period_ie))
 				elems->max_idle_period_ie = (void *)pos;
 			break;
+		case WLAN_EID_EXTENSION:
+			if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA &&
+			    elen >= (sizeof(*elems->mu_edca_param_set) + 1)) {
+				elems->mu_edca_param_set = (void *)&pos[1];
+			} else if (pos[0] == WLAN_EID_EXT_HE_CAPABILITY) {
+				elems->he_cap = (void *)&pos[1];
+				elems->he_cap_len = elen - 1;
+			} else if (pos[0] == WLAN_EID_EXT_HE_OPERATION &&
+				   elen >= sizeof(*elems->he_operation) &&
+				   elen >= ieee80211_he_oper_size(&pos[1])) {
+				elems->he_operation = (void *)&pos[1];
+			} else if (pos[0] == WLAN_EID_EXT_UORA && elen >= 1) {
+				elems->uora_element = (void *)&pos[1];
+			}
+			break;
 		default:
 			break;
 		}
@@ -1353,9 +1368,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 					 enum nl80211_band band,
 					 u32 rate_mask,
 					 struct cfg80211_chan_def *chandef,
-					 size_t *offset)
+					 size_t *offset, u32 flags)
 {
 	struct ieee80211_supported_band *sband;
+	const struct ieee80211_sta_he_cap *he_cap;
 	u8 *pos = buffer, *end = buffer + buffer_len;
 	size_t noffset;
 	int supp_rates_len, i;
@@ -1433,6 +1449,9 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 				chandef->chan->center_freq);
 	}
 
+	if (flags & IEEE80211_PROBE_FLAG_MIN_CONTENT)
+		goto done;
+
 	/* insert custom IEs that go before HT */
 	if (ie && ie_len) {
 		static const u8 before_ht[] = {
@@ -1460,11 +1479,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 						sband->ht_cap.cap);
 	}
 
-	/*
-	 * If adding more here, adjust code in main.c
-	 * that calculates local->scan_ies_len.
-	 */
-
 	/* insert custom IEs that go before VHT */
 	if (ie && ie_len) {
 		static const u8 before_vht[] = {
@@ -1507,9 +1521,43 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 						 sband->vht_cap.cap);
 	}
 
+	/* insert custom IEs that go before HE */
+	if (ie && ie_len) {
+		static const u8 before_he[] = {
+			/*
+			 * no need to list the ones split off before VHT
+			 * or generated here
+			 */
+			WLAN_EID_EXTENSION, WLAN_EID_EXT_FILS_REQ_PARAMS,
+			WLAN_EID_AP_CSN,
+			/* TODO: add 11ah/11aj/11ak elements */
+		};
+		noffset = ieee80211_ie_split(ie, ie_len,
+					     before_he, ARRAY_SIZE(before_he),
+					     *offset);
+		if (end - pos < noffset - *offset)
+			goto out_err;
+		memcpy(pos, ie + *offset, noffset - *offset);
+		pos += noffset - *offset;
+		*offset = noffset;
+	}
+
+	he_cap = ieee80211_get_he_sta_cap(sband);
+	if (he_cap) {
+		pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
+		if (!pos)
+			goto out_err;
+	}
+
+	/*
+	 * If adding more here, adjust code in main.c
+	 * that calculates local->scan_ies_len.
+	 */
+
 	return pos - buffer;
  out_err:
 	WARN_ONCE(1, "not enough space for preq IEs\n");
+ done:
 	return pos - buffer;
 }
 
@@ -1518,7 +1566,8 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 			     struct ieee80211_scan_ies *ie_desc,
 			     const u8 *ie, size_t ie_len,
 			     u8 bands_used, u32 *rate_masks,
-			     struct cfg80211_chan_def *chandef)
+			     struct cfg80211_chan_def *chandef,
+			     u32 flags)
 {
 	size_t pos = 0, old_pos = 0, custom_ie_offset = 0;
 	int i;
@@ -1533,7 +1582,8 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 							     ie, ie_len, i,
 							     rate_masks[i],
 							     chandef,
-							     &custom_ie_offset);
+							     &custom_ie_offset,
+							     flags);
 			ie_desc->ies[i] = buffer + old_pos;
 			ie_desc->len[i] = pos - old_pos;
 			old_pos = pos;
@@ -1561,7 +1611,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
 					  struct ieee80211_channel *chan,
 					  const u8 *ssid, size_t ssid_len,
 					  const u8 *ie, size_t ie_len,
-					  bool directed)
+					  u32 flags)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct cfg80211_chan_def chandef;
@@ -1577,7 +1627,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
 	 * badly-behaved APs don't respond when this parameter is included.
 	 */
 	chandef.width = sdata->vif.bss_conf.chandef.width;
-	if (directed)
+	if (flags & IEEE80211_PROBE_FLAG_DIRECTED)
 		chandef.chan = NULL;
 	else
 		chandef.chan = chan;
@@ -1591,7 +1641,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
 	ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb),
 					   skb_tailroom(skb), &dummy_ie_desc,
 					   ie, ie_len, BIT(chan->band),
-					   rate_masks, &chandef);
+					   rate_masks, &chandef, flags);
 	skb_put(skb, ies_len);
 
 	if (dst) {
@@ -1605,27 +1655,6 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
 	return skb;
 }
 
-void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata,
-			      const u8 *src, const u8 *dst,
-			      const u8 *ssid, size_t ssid_len,
-			      const u8 *ie, size_t ie_len,
-			      u32 ratemask, bool directed, u32 tx_flags,
-			      struct ieee80211_channel *channel, bool scan)
-{
-	struct sk_buff *skb;
-
-	skb = ieee80211_build_probe_req(sdata, src, dst, ratemask, channel,
-					ssid, ssid_len,
-					ie, ie_len, directed);
-	if (skb) {
-		IEEE80211_SKB_CB(skb)->flags |= tx_flags;
-		if (scan)
-			ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
-		else
-			ieee80211_tx_skb(sdata, skb);
-	}
-}
-
 u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
 			    struct ieee802_11_elems *elems,
 			    enum nl80211_band band, u32 *basic_rates)
@@ -2412,6 +2441,72 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 	return pos;
 }
 
+u8 *ieee80211_ie_build_he_cap(u8 *pos,
+			      const struct ieee80211_sta_he_cap *he_cap,
+			      u8 *end)
+{
+	u8 n;
+	u8 ie_len;
+	u8 *orig_pos = pos;
+
+	/* Make sure we have place for the IE */
+	/*
+	 * TODO: the 1 added is because this temporarily is under the EXTENSION
+	 * IE. Get rid of it when it moves.
+	 */
+	if (!he_cap)
+		return orig_pos;
+
+	n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem);
+	ie_len = 2 + 1 +
+		 sizeof(he_cap->he_cap_elem) + n +
+		 ieee80211_he_ppe_size(he_cap->ppe_thres[0],
+				       he_cap->he_cap_elem.phy_cap_info);
+
+	if ((end - pos) < ie_len)
+		return orig_pos;
+
+	*pos++ = WLAN_EID_EXTENSION;
+	pos++; /* We'll set the size later below */
+	*pos++ = WLAN_EID_EXT_HE_CAPABILITY;
+
+	/* Fixed data */
+	memcpy(pos, &he_cap->he_cap_elem, sizeof(he_cap->he_cap_elem));
+	pos += sizeof(he_cap->he_cap_elem);
+
+	memcpy(pos, &he_cap->he_mcs_nss_supp, n);
+	pos += n;
+
+	/* Check if PPE Threshold should be present */
+	if ((he_cap->he_cap_elem.phy_cap_info[6] &
+	     IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0)
+		goto end;
+
+	/*
+	 * Calculate how many PPET16/PPET8 pairs are to come. Algorithm:
+	 * (NSS_M1 + 1) x (num of 1 bits in RU_INDEX_BITMASK)
+	 */
+	n = hweight8(he_cap->ppe_thres[0] &
+		     IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK);
+	n *= (1 + ((he_cap->ppe_thres[0] & IEEE80211_PPE_THRES_NSS_MASK) >>
+		   IEEE80211_PPE_THRES_NSS_POS));
+
+	/*
+	 * Each pair is 6 bits, and we need to add the 7 "header" bits to the
+	 * total size.
+	 */
+	n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7;
+	n = DIV_ROUND_UP(n, 8);
+
+	/* Copy PPE Thresholds */
+	memcpy(pos, &he_cap->ppe_thres, n);
+	pos += n;
+
+end:
+	orig_pos[1] = (pos - orig_pos) - 2;
+	return pos;
+}
+
 u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 			       const struct cfg80211_chan_def *chandef,
 			       u16 prot_mode, bool rifs_mode)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 168af54db975..dc240cb47ddf 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -603,6 +603,21 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
 }
 EXPORT_SYMBOL(nf_conntrack_destroy);
 
+bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+			 const struct sk_buff *skb)
+{
+	struct nf_ct_hook *ct_hook;
+	bool ret = false;
+
+	rcu_read_lock();
+	ct_hook = rcu_dereference(nf_ct_hook);
+	if (ct_hook)
+		ret = ct_hook->get_tuple_skb(dst_tuple, skb);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(nf_ct_get_tuple_skb);
+
 /* Built-in default zone used e.g. by modules. */
 const struct nf_conntrack_zone nf_ct_zone_dflt = {
 	.id	= NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index a1086bdec242..5423b197d98a 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -32,7 +32,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 	__be32 mask = 0;
 
 	/* we're only interested in locally generated packets */
-	if (skb->sk == NULL)
+	if (skb->sk == NULL || !net_eq(nf_ct_net(ct), sock_net(skb->sk)))
 		goto out;
 	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
 		goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3d5280425027..805500197c22 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1683,6 +1683,41 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 	return 0;
 }
 
+static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
+				       const struct sk_buff *skb)
+{
+	const struct nf_conntrack_tuple *src_tuple;
+	const struct nf_conntrack_tuple_hash *hash;
+	struct nf_conntrack_tuple srctuple;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct) {
+		src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
+		memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+		return true;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       NFPROTO_IPV4, dev_net(skb->dev),
+			       &srctuple))
+		return false;
+
+	hash = nf_conntrack_find_get(dev_net(skb->dev),
+				     &nf_ct_zone_dflt,
+				     &srctuple);
+	if (!hash)
+		return false;
+
+	ct = nf_ct_tuplehash_to_ctrack(hash);
+	src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
+	memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple));
+	nf_ct_put(ct);
+
+	return true;
+}
+
 /* Bring out ya dead! */
 static struct nf_conn *
 get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
@@ -2204,6 +2239,7 @@ err_cachep:
 static struct nf_ct_hook nf_conntrack_hook = {
 	.update		= nf_conntrack_update,
 	.destroy	= destroy_conntrack,
+	.get_tuple_skb  = nf_conntrack_get_tuple_skb,
 };
 
 void nf_conntrack_init_end(void)
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index dc61399e30be..a8c5c846aec1 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -132,9 +132,10 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
 
-void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
+void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
+			    struct sock *sk)
 {
-	if (!sk || !sk_fullsock(sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk)))
 		return;
 
 	read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 46f9df99d276..86df2a1666fd 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -108,6 +108,7 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 	struct flowi fl;
 	unsigned int hh_len;
 	struct dst_entry *dst;
+	struct sock *sk = skb->sk;
 	int err;
 
 	err = xfrm_decode_session(skb, &fl, family);
@@ -119,7 +120,10 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
 
-	dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
+	if (sk && !net_eq(net, sock_net(sk)))
+		sk = NULL;
+
+	dst = xfrm_lookup(net, dst, &fl, sk, 0);
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 896d4a36081d..3f211e1025c1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -14,6 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/vmalloc.h>
+#include <linux/rhashtable.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nf_tables.h>
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 1105a23bda5e..2b94dcc43456 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -107,7 +107,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_SKUID:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 
 		read_lock_bh(&sk->sk_callback_lock);
@@ -123,7 +124,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 		break;
 	case NFT_META_SKGID:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 
 		read_lock_bh(&sk->sk_callback_lock);
@@ -214,7 +216,8 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
 #ifdef CONFIG_CGROUP_NET_CLASSID
 	case NFT_META_CGROUP:
 		sk = skb_to_full_sk(skb);
-		if (!sk || !sk_fullsock(sk))
+		if (!sk || !sk_fullsock(sk) ||
+		    !net_eq(nft_net(pkt), sock_net(sk)))
 			goto err;
 		*dest = sock_cgroup_classid(&sk->sk_cgrp_data);
 		break;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 74e1b3bd6954..998c2b546f6d 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -23,6 +23,9 @@ static void nft_socket_eval(const struct nft_expr *expr,
 	struct sock *sk = skb->sk;
 	u32 *dest = &regs->data[priv->dreg];
 
+	if (sk && !net_eq(nft_net(pkt), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		switch(nft_pf(pkt)) {
 		case NFPROTO_IPV4:
@@ -39,7 +42,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
 			return;
 		}
 
-	if(!sk) {
+	if (!sk) {
 		nft_reg_store8(dest, 0);
 		return;
 	}
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 7df2dece57d3..5d92e1781980 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -72,8 +72,9 @@ static bool
 cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_cgroup_info_v0 *info = par->matchinfo;
+	struct sock *sk = skb->sk;
 
-	if (skb->sk == NULL || !sk_fullsock(skb->sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
 		return false;
 
 	return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^
@@ -85,8 +86,9 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_cgroup_info_v1 *info = par->matchinfo;
 	struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
 	struct cgroup *ancestor = info->priv;
+	struct sock *sk = skb->sk;
 
-	if (!skb->sk || !sk_fullsock(skb->sk))
+	if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk)))
 		return false;
 
 	if (ancestor)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 3d705c688a27..46686fb73784 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -67,7 +67,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sock *sk = skb_to_full_sk(skb);
 	struct net *net = xt_net(par);
 
-	if (sk == NULL || sk->sk_socket == NULL)
+	if (!sk || !sk->sk_socket || !net_eq(net, sock_net(sk)))
 		return (info->match ^ info->invert) == 0;
 	else if (info->match & info->invert & XT_OWNER_SOCKET)
 		/*
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 07085c22b19c..f44de4bc2100 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -265,7 +265,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	}
 
 	/* use TTL as seen before forwarding */
-	if (xt_out(par) != NULL && skb->sk == NULL)
+	if (xt_out(par) != NULL &&
+	    (!skb->sk || !net_eq(net, sock_net(skb->sk))))
 		ttl++;
 
 	spin_lock_bh(&recent_lock);
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 5c0779c4fa3c..0472f3472842 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -56,8 +56,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
+	if (!net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
+
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
@@ -113,8 +117,12 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
+	if (!net_eq(xt_net(par), sock_net(sk)))
+		sk = NULL;
+
 	if (!sk)
 		sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
+
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 30a5df27116e..85ae53d8fd09 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1057,6 +1057,28 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 			     clone_flow_key);
 }
 
+/* When 'last' is true, clone() should always consume the 'skb'.
+ * Otherwise, clone() should keep 'skb' intact regardless what
+ * actions are executed within clone().
+ */
+static int clone(struct datapath *dp, struct sk_buff *skb,
+		 struct sw_flow_key *key, const struct nlattr *attr,
+		 bool last)
+{
+	struct nlattr *actions;
+	struct nlattr *clone_arg;
+	int rem = nla_len(attr);
+	bool dont_clone_flow_key;
+
+	/* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+	clone_arg = nla_data(attr);
+	dont_clone_flow_key = nla_get_u32(clone_arg);
+	actions = nla_next(clone_arg, &rem);
+
+	return clone_execute(dp, skb, key, 0, actions, rem, last,
+			     !dont_clone_flow_key);
+}
+
 static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 			 const struct nlattr *attr)
 {
@@ -1336,6 +1358,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 				consume_skb(skb);
 				return 0;
 			}
+			break;
+
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = clone(dp, skb, key, a, last);
+			if (last)
+				return err;
+
+			break;
+		}
 		}
 
 		if (unlikely(err)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 492ab0c36f7c..a70097ecf33c 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2460,6 +2460,40 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 	return 0;
 }
 
+static int validate_and_copy_clone(struct net *net,
+				   const struct nlattr *attr,
+				   const struct sw_flow_key *key,
+				   struct sw_flow_actions **sfa,
+				   __be16 eth_type, __be16 vlan_tci,
+				   bool log, bool last)
+{
+	int start, err;
+	u32 exec;
+
+	if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN)
+		return -EINVAL;
+
+	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log);
+	if (start < 0)
+		return start;
+
+	exec = last || !actions_may_change_flow(attr);
+
+	err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec,
+				 sizeof(exec), log);
+	if (err)
+		return err;
+
+	err = __ovs_nla_copy_actions(net, attr, key, sfa,
+				     eth_type, vlan_tci, log);
+	if (err)
+		return err;
+
+	add_nested_action_end(*sfa, start);
+
+	return 0;
+}
+
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key,
 		    bool reset_key,
@@ -2516,7 +2550,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	struct ovs_tunnel_info *ovs_tun;
 	struct nlattr *a;
 	int err = 0, start, opts_type;
+	__be16 dst_opt_type;
 
+	dst_opt_type = 0;
 	ovs_match_init(&match, &key, true, NULL);
 	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
 	if (opts_type < 0)
@@ -2528,10 +2564,13 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 			err = validate_geneve_opts(&key);
 			if (err < 0)
 				return err;
+			dst_opt_type = TUNNEL_GENEVE_OPT;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			dst_opt_type = TUNNEL_VXLAN_OPT;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+			dst_opt_type = TUNNEL_ERSPAN_OPT;
 			break;
 		}
 	}
@@ -2574,7 +2613,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	 */
 	ip_tunnel_info_opts_set(tun_info,
 				TUN_METADATA_OPTS(&key, key.tun_opts_len),
-				key.tun_opts_len);
+				key.tun_opts_len, dst_opt_type);
 	add_nested_action_end(*sfa, start);
 
 	return err;
@@ -2844,6 +2883,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
 			[OVS_ACTION_ATTR_POP_NSH] = 0,
 			[OVS_ACTION_ATTR_METER] = sizeof(u32),
+			[OVS_ACTION_ATTR_CLONE] = (u32)-1,
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -3033,6 +3073,18 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			/* Non-existent meters are simply ignored.  */
 			break;
 
+		case OVS_ACTION_ATTR_CLONE: {
+			bool last = nla_is_last(a, rem);
+
+			err = validate_and_copy_clone(net, a, key, sfa,
+						      eth_type, vlan_tci,
+						      log, last);
+			if (err)
+				return err;
+			skip_copy = true;
+			break;
+		}
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
@@ -3111,6 +3163,26 @@ out:
 	return err;
 }
 
+static int clone_action_to_attr(const struct nlattr *attr,
+				struct sk_buff *skb)
+{
+	struct nlattr *start;
+	int err = 0, rem = nla_len(attr);
+
+	start = nla_nest_start(skb, OVS_ACTION_ATTR_CLONE);
+	if (!start)
+		return -EMSGSIZE;
+
+	err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+
+	if (err)
+		nla_nest_cancel(skb, start);
+	else
+		nla_nest_end(skb, start);
+
+	return err;
+}
+
 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 {
 	const struct nlattr *ovs_key = nla_data(a);
@@ -3199,6 +3271,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_CLONE:
+			err = clone_action_to_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		default:
 			if (nla_put(skb, type, nla_len(a), nla_data(a)))
 				return -EMSGSIZE;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9b27d0cd766d..e3e00d3a972e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -275,9 +275,10 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
 	return po->xmit == packet_direct_xmit;
 }
 
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
+				  struct net_device *sb_dev)
 {
-	return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
+	return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
 }
 
 static u16 packet_pick_tx_queue(struct sk_buff *skb)
@@ -291,7 +292,7 @@ static u16 packet_pick_tx_queue(struct sk_buff *skb)
 						    __packet_pick_tx_queue);
 		queue_index = netdev_cap_txqueue(dev, queue_index);
 	} else {
-		queue_index = __packet_pick_tx_queue(dev, skb);
+		queue_index = __packet_pick_tx_queue(dev, skb, NULL);
 	}
 
 	return queue_index;
@@ -1951,7 +1952,7 @@ retry:
 		goto out_unlock;
 	}
 
-	sockc.tsflags = sk->sk_tsflags;
+	sockcm_init(&sockc, sk);
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
 		if (unlikely(err))
@@ -1962,6 +1963,7 @@ retry:
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
+	skb->tstamp = sockc.transmit_time;
 
 	sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
 
@@ -2457,6 +2459,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->dev = dev;
 	skb->priority = po->sk.sk_priority;
 	skb->mark = po->sk.sk_mark;
+	skb->tstamp = sockc->transmit_time;
 	sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
 	skb_shinfo(skb)->destructor_arg = ph.raw;
 
@@ -2633,7 +2636,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
-	sockc.tsflags = po->sk.sk_tsflags;
+	sockcm_init(&sockc, &po->sk);
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(&po->sk, msg, &sockc);
 		if (unlikely(err))
@@ -2829,7 +2832,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_unlock;
 
-	sockc.tsflags = sk->sk_tsflags;
+	sockcm_init(&sockc, sk);
 	sockc.mark = sk->sk_mark;
 	if (msg->msg_controllen) {
 		err = sock_cmsg_send(sk, msg, &sockc);
@@ -2905,6 +2908,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sockc.mark;
+	skb->tstamp = sockc.transmit_time;
 
 	if (has_vnet_hdr) {
 		err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index b4e421aa9727..1eaf2550a9f8 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -376,8 +376,6 @@ static void release_refill(struct rds_connection *conn)
  * This tries to allocate and post unused work requests after making sure that
  * they have all the allocations they need to queue received fragments into
  * sockets.
- *
- * -1 is returned if posting fails due to temporary resource exhaustion.
  */
 void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 {
@@ -1025,7 +1023,6 @@ int rds_ib_recv_path(struct rds_conn_path *cp)
 {
 	struct rds_connection *conn = cp->cp_conn;
 	struct rds_ib_connection *ic = conn->c_transport_data;
-	int ret = 0;
 
 	rdsdebug("conn %p\n", conn);
 	if (rds_conn_up(conn)) {
@@ -1034,7 +1031,7 @@ int rds_ib_recv_path(struct rds_conn_path *cp)
 		rds_ib_stats_inc(s_ib_rx_refill_from_thread);
 	}
 
-	return ret;
+	return 0;
 }
 
 int rds_ib_recv_init(void)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a01169fb5325..7af246764a35 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -183,6 +183,17 @@ config NET_SCH_CBS
 	  To compile this code as a module, choose M here: the
 	  module will be called sch_cbs.
 
+config NET_SCH_ETF
+	tristate "Earliest TxTime First (ETF)"
+	help
+	  Say Y here if you want to use the Earliest TxTime First (ETF) packet
+	  scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_etf.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_etf.
+
 config NET_SCH_GRED
 	tristate "Generic Random Early Detection (GRED)"
 	---help---
@@ -284,6 +295,17 @@ config NET_SCH_FQ_CODEL
 
 	  If unsure, say N.
 
+config NET_SCH_CAKE
+	tristate "Common Applications Kept Enhanced (CAKE)"
+	help
+	  Say Y here if you want to use the Common Applications Kept Enhanced
+          (CAKE) queue management algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_cake.
+
+	  If unsure, say N.
+
 config NET_SCH_FQ
 	tristate "Fair Queue"
 	help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8811d3804878..673ee7d26ff2 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -50,10 +50,12 @@ obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
 obj-$(CONFIG_NET_SCH_QFQ)	+= sch_qfq.o
 obj-$(CONFIG_NET_SCH_CODEL)	+= sch_codel.o
 obj-$(CONFIG_NET_SCH_FQ_CODEL)	+= sch_fq_codel.o
+obj-$(CONFIG_NET_SCH_CAKE)	+= sch_cake.o
 obj-$(CONFIG_NET_SCH_FQ)	+= sch_fq.o
 obj-$(CONFIG_NET_SCH_HHF)	+= sch_hhf.o
 obj-$(CONFIG_NET_SCH_PIE)	+= sch_pie.o
 obj-$(CONFIG_NET_SCH_CBS)	+= sch_cbs.o
+obj-$(CONFIG_NET_SCH_ETF)	+= sch_etf.o
 
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3f4cf930f809..148a89ab789b 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -55,6 +55,24 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a,
 	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
 
+static void tcf_free_cookie_rcu(struct rcu_head *p)
+{
+	struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu);
+
+	kfree(cookie->data);
+	kfree(cookie);
+}
+
+static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
+				  struct tc_cookie *new_cookie)
+{
+	struct tc_cookie *old;
+
+	old = xchg((__force struct tc_cookie **)old_cookie, new_cookie);
+	if (old)
+		call_rcu(&old->rcu, tcf_free_cookie_rcu);
+}
+
 /* XXX: For standalone actions, we don't need a RCU grace period either, because
  * actions are always connected to filters and filters are already destroyed in
  * RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -65,44 +83,64 @@ static void free_tcf(struct tc_action *p)
 	free_percpu(p->cpu_bstats);
 	free_percpu(p->cpu_qstats);
 
-	if (p->act_cookie) {
-		kfree(p->act_cookie->data);
-		kfree(p->act_cookie);
-	}
+	tcf_set_action_cookie(&p->act_cookie, NULL);
 	if (p->goto_chain)
 		tcf_action_goto_chain_fini(p);
 
 	kfree(p);
 }
 
-static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
+static void tcf_action_cleanup(struct tc_action *p)
 {
-	spin_lock(&idrinfo->lock);
-	idr_remove(&idrinfo->action_idr, p->tcfa_index);
-	spin_unlock(&idrinfo->lock);
+	if (p->ops->cleanup)
+		p->ops->cleanup(p);
+
 	gen_kill_estimator(&p->tcfa_rate_est);
 	free_tcf(p);
 }
 
+static int __tcf_action_put(struct tc_action *p, bool bind)
+{
+	struct tcf_idrinfo *idrinfo = p->idrinfo;
+
+	if (refcount_dec_and_lock(&p->tcfa_refcnt, &idrinfo->lock)) {
+		if (bind)
+			atomic_dec(&p->tcfa_bindcnt);
+		idr_remove(&idrinfo->action_idr, p->tcfa_index);
+		spin_unlock(&idrinfo->lock);
+
+		tcf_action_cleanup(p);
+		return 1;
+	}
+
+	if (bind)
+		atomic_dec(&p->tcfa_bindcnt);
+
+	return 0;
+}
+
 int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
 {
 	int ret = 0;
 
-	ASSERT_RTNL();
-
+	/* Release with strict==1 and bind==0 is only called through act API
+	 * interface (classifiers always bind). Only case when action with
+	 * positive reference count and zero bind count can exist is when it was
+	 * also created with act API (unbinding last classifier will destroy the
+	 * action if it was created by classifier). So only case when bind count
+	 * can be changed after initial check is when unbound action is
+	 * destroyed by act API while classifier binds to action with same id
+	 * concurrently. This result either creation of new action(same behavior
+	 * as before), or reusing existing action if concurrent process
+	 * increments reference count before action is deleted. Both scenarios
+	 * are acceptable.
+	 */
 	if (p) {
-		if (bind)
-			p->tcfa_bindcnt--;
-		else if (strict && p->tcfa_bindcnt > 0)
+		if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0)
 			return -EPERM;
 
-		p->tcfa_refcnt--;
-		if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
-			if (p->ops->cleanup)
-				p->ops->cleanup(p);
-			tcf_idr_remove(p->idrinfo, p);
+		if (__tcf_action_put(p, bind))
 			ret = ACT_P_DELETED;
-		}
 	}
 
 	return ret;
@@ -111,10 +149,15 @@ EXPORT_SYMBOL(__tcf_idr_release);
 
 static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
 {
+	struct tc_cookie *act_cookie;
 	u32 cookie_len = 0;
 
-	if (act->act_cookie)
-		cookie_len = nla_total_size(act->act_cookie->len);
+	rcu_read_lock();
+	act_cookie = rcu_dereference(act->act_cookie);
+
+	if (act_cookie)
+		cookie_len = nla_total_size(act_cookie->len);
+	rcu_read_unlock();
 
 	return  nla_total_size(0) /* action number nested */
 		+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
@@ -257,46 +300,77 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(tcf_generic_walker);
 
-static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo)
+static bool __tcf_idr_check(struct tc_action_net *tn, u32 index,
+			    struct tc_action **a, int bind)
 {
-	struct tc_action *p = NULL;
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p;
 
 	spin_lock(&idrinfo->lock);
 	p = idr_find(&idrinfo->action_idr, index);
+	if (IS_ERR(p)) {
+		p = NULL;
+	} else if (p) {
+		refcount_inc(&p->tcfa_refcnt);
+		if (bind)
+			atomic_inc(&p->tcfa_bindcnt);
+	}
 	spin_unlock(&idrinfo->lock);
 
-	return p;
+	if (p) {
+		*a = p;
+		return true;
+	}
+	return false;
 }
 
 int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
 {
-	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
-
-	if (p) {
-		*a = p;
-		return 1;
-	}
-	return 0;
+	return __tcf_idr_check(tn, index, a, 0);
 }
 EXPORT_SYMBOL(tcf_idr_search);
 
 bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
 		   int bind)
 {
+	return __tcf_idr_check(tn, index, a, bind);
+}
+EXPORT_SYMBOL(tcf_idr_check);
+
+int tcf_idr_delete_index(struct tc_action_net *tn, u32 index)
+{
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-	struct tc_action *p = tcf_idr_lookup(index, idrinfo);
+	struct tc_action *p;
+	int ret = 0;
 
-	if (index && p) {
-		if (bind)
-			p->tcfa_bindcnt++;
-		p->tcfa_refcnt++;
-		*a = p;
-		return true;
+	spin_lock(&idrinfo->lock);
+	p = idr_find(&idrinfo->action_idr, index);
+	if (!p) {
+		spin_unlock(&idrinfo->lock);
+		return -ENOENT;
 	}
-	return false;
+
+	if (!atomic_read(&p->tcfa_bindcnt)) {
+		if (refcount_dec_and_test(&p->tcfa_refcnt)) {
+			struct module *owner = p->ops->owner;
+
+			WARN_ON(p != idr_remove(&idrinfo->action_idr,
+						p->tcfa_index));
+			spin_unlock(&idrinfo->lock);
+
+			tcf_action_cleanup(p);
+			module_put(owner);
+			return 0;
+		}
+		ret = 0;
+	} else {
+		ret = -EPERM;
+	}
+
+	spin_unlock(&idrinfo->lock);
+	return ret;
 }
-EXPORT_SYMBOL(tcf_idr_check);
+EXPORT_SYMBOL(tcf_idr_delete_index);
 
 int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 		   struct tc_action **a, const struct tc_action_ops *ops,
@@ -304,14 +378,13 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 {
 	struct tc_action *p = kzalloc(ops->size, GFP_KERNEL);
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-	struct idr *idr = &idrinfo->action_idr;
 	int err = -ENOMEM;
 
 	if (unlikely(!p))
 		return -ENOMEM;
-	p->tcfa_refcnt = 1;
+	refcount_set(&p->tcfa_refcnt, 1);
 	if (bind)
-		p->tcfa_bindcnt = 1;
+		atomic_set(&p->tcfa_bindcnt, 1);
 
 	if (cpustats) {
 		p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
@@ -322,20 +395,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 			goto err2;
 	}
 	spin_lock_init(&p->tcfa_lock);
-	idr_preload(GFP_KERNEL);
-	spin_lock(&idrinfo->lock);
-	/* user doesn't specify an index */
-	if (!index) {
-		index = 1;
-		err = idr_alloc_u32(idr, NULL, &index, UINT_MAX, GFP_ATOMIC);
-	} else {
-		err = idr_alloc_u32(idr, NULL, &index, index, GFP_ATOMIC);
-	}
-	spin_unlock(&idrinfo->lock);
-	idr_preload_end();
-	if (err)
-		goto err3;
-
 	p->tcfa_index = index;
 	p->tcfa_tm.install = jiffies;
 	p->tcfa_tm.lastuse = jiffies;
@@ -345,7 +404,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 					&p->tcfa_rate_est,
 					&p->tcfa_lock, NULL, est);
 		if (err)
-			goto err4;
+			goto err3;
 	}
 
 	p->idrinfo = idrinfo;
@@ -353,8 +412,6 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 	INIT_LIST_HEAD(&p->list);
 	*a = p;
 	return 0;
-err4:
-	idr_remove(idr, index);
 err3:
 	free_percpu(p->cpu_qstats);
 err2:
@@ -370,11 +427,78 @@ void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
 	struct tcf_idrinfo *idrinfo = tn->idrinfo;
 
 	spin_lock(&idrinfo->lock);
-	idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
+	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
 	spin_unlock(&idrinfo->lock);
 }
 EXPORT_SYMBOL(tcf_idr_insert);
 
+/* Cleanup idr index that was allocated but not initialized. */
+
+void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
+{
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+
+	spin_lock(&idrinfo->lock);
+	/* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+	WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index)));
+	spin_unlock(&idrinfo->lock);
+}
+EXPORT_SYMBOL(tcf_idr_cleanup);
+
+/* Check if action with specified index exists. If actions is found, increments
+ * its reference and bind counters, and return 1. Otherwise insert temporary
+ * error pointer (to prevent concurrent users from inserting actions with same
+ * index) and return 0.
+ */
+
+int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
+			struct tc_action **a, int bind)
+{
+	struct tcf_idrinfo *idrinfo = tn->idrinfo;
+	struct tc_action *p;
+	int ret;
+
+again:
+	spin_lock(&idrinfo->lock);
+	if (*index) {
+		p = idr_find(&idrinfo->action_idr, *index);
+		if (IS_ERR(p)) {
+			/* This means that another process allocated
+			 * index but did not assign the pointer yet.
+			 */
+			spin_unlock(&idrinfo->lock);
+			goto again;
+		}
+
+		if (p) {
+			refcount_inc(&p->tcfa_refcnt);
+			if (bind)
+				atomic_inc(&p->tcfa_bindcnt);
+			*a = p;
+			ret = 1;
+		} else {
+			*a = NULL;
+			ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
+					    *index, GFP_ATOMIC);
+			if (!ret)
+				idr_replace(&idrinfo->action_idr,
+					    ERR_PTR(-EBUSY), *index);
+		}
+	} else {
+		*index = 1;
+		*a = NULL;
+		ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
+				    UINT_MAX, GFP_ATOMIC);
+		if (!ret)
+			idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
+				    *index);
+	}
+	spin_unlock(&idrinfo->lock);
+	return ret;
+}
+EXPORT_SYMBOL(tcf_idr_check_alloc);
+
 void tcf_idrinfo_destroy(const struct tc_action_ops *ops,
 			 struct tcf_idrinfo *idrinfo)
 {
@@ -538,13 +662,15 @@ repeat:
 }
 EXPORT_SYMBOL(tcf_action_exec);
 
-int tcf_action_destroy(struct list_head *actions, int bind)
+int tcf_action_destroy(struct tc_action *actions[], int bind)
 {
 	const struct tc_action_ops *ops;
-	struct tc_action *a, *tmp;
-	int ret = 0;
+	struct tc_action *a;
+	int ret = 0, i;
 
-	list_for_each_entry_safe(a, tmp, actions, list) {
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		a = actions[i];
+		actions[i] = NULL;
 		ops = a->ops;
 		ret = __tcf_idr_release(a, bind, true);
 		if (ret == ACT_P_DELETED)
@@ -555,6 +681,24 @@ int tcf_action_destroy(struct list_head *actions, int bind)
 	return ret;
 }
 
+static int tcf_action_put(struct tc_action *p)
+{
+	return __tcf_action_put(p, false);
+}
+
+static void tcf_action_put_many(struct tc_action *actions[])
+{
+	int i;
+
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		struct tc_action *a = actions[i];
+		const struct tc_action_ops *ops = a->ops;
+
+		if (tcf_action_put(a))
+			module_put(ops->owner);
+	}
+}
+
 int
 tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
@@ -567,16 +711,22 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	int err = -EINVAL;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
+	struct tc_cookie *cookie;
 
 	if (nla_put_string(skb, TCA_KIND, a->ops->kind))
 		goto nla_put_failure;
 	if (tcf_action_copy_stats(skb, a, 0))
 		goto nla_put_failure;
-	if (a->act_cookie) {
-		if (nla_put(skb, TCA_ACT_COOKIE, a->act_cookie->len,
-			    a->act_cookie->data))
+
+	rcu_read_lock();
+	cookie = rcu_dereference(a->act_cookie);
+	if (cookie) {
+		if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
+			rcu_read_unlock();
 			goto nla_put_failure;
+		}
 	}
+	rcu_read_unlock();
 
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
@@ -593,14 +743,15 @@ nla_put_failure:
 }
 EXPORT_SYMBOL(tcf_action_dump_1);
 
-int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
 		    int bind, int ref)
 {
 	struct tc_action *a;
-	int err = -EINVAL;
+	int err = -EINVAL, i;
 	struct nlattr *nest;
 
-	list_for_each_entry(a, actions, list) {
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		a = actions[i];
 		nest = nla_nest_start(skb, a->order);
 		if (nest == NULL)
 			goto nla_put_failure;
@@ -638,6 +789,7 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
+				    bool rtnl_held,
 				    struct netlink_ext_ack *extack)
 {
 	struct tc_action *a;
@@ -688,9 +840,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	a_o = tc_lookup_action_n(act_name);
 	if (a_o == NULL) {
 #ifdef CONFIG_MODULES
-		rtnl_unlock();
+		if (rtnl_held)
+			rtnl_unlock();
 		request_module("act_%s", act_name);
-		rtnl_lock();
+		if (rtnl_held)
+			rtnl_lock();
 
 		a_o = tc_lookup_action_n(act_name);
 
@@ -713,19 +867,15 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	/* backward compatibility for policer */
 	if (name == NULL)
 		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-				extack);
+				rtnl_held, extack);
 	else
-		err = a_o->init(net, nla, est, &a, ovr, bind, extack);
+		err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
+				extack);
 	if (err < 0)
 		goto err_mod;
 
-	if (name == NULL && tb[TCA_ACT_COOKIE]) {
-		if (a->act_cookie) {
-			kfree(a->act_cookie->data);
-			kfree(a->act_cookie);
-		}
-		a->act_cookie = cookie;
-	}
+	if (!name && tb[TCA_ACT_COOKIE])
+		tcf_set_action_cookie(&a->act_cookie, cookie);
 
 	/* module count goes up only when brand new policy is created
 	 * if it exists and is only bound to in a_o->init() then
@@ -737,10 +887,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
 		err = tcf_action_goto_chain_init(a, tp);
 		if (err) {
-			LIST_HEAD(actions);
+			struct tc_action *actions[] = { a, NULL };
 
-			list_add_tail(&a->list, &actions);
-			tcf_action_destroy(&actions, bind);
+			tcf_action_destroy(actions, bind);
 			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
 			return ERR_PTR(err);
 		}
@@ -758,21 +907,12 @@ err_out:
 	return ERR_PTR(err);
 }
 
-static void cleanup_a(struct list_head *actions, int ovr)
-{
-	struct tc_action *a;
-
-	if (!ovr)
-		return;
-
-	list_for_each_entry(a, actions, list)
-		a->tcfa_refcnt--;
-}
+/* Returns numbers of initialized actions or negative error. */
 
 int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		    struct nlattr *est, char *name, int ovr, int bind,
-		    struct list_head *actions, size_t *attr_size,
-		    struct netlink_ext_ack *extack)
+		    struct tc_action *actions[], size_t *attr_size,
+		    bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
@@ -786,25 +926,19 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
 		act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
-					extack);
+					rtnl_held, extack);
 		if (IS_ERR(act)) {
 			err = PTR_ERR(act);
 			goto err;
 		}
 		act->order = i;
 		sz += tcf_action_fill_size(act);
-		if (ovr)
-			act->tcfa_refcnt++;
-		list_add_tail(&act->list, actions);
+		/* Start from index 0 */
+		actions[i - 1] = act;
 	}
 
 	*attr_size = tcf_action_full_attrs_size(sz);
-
-	/* Remove the temp refcnt which was necessary to protect against
-	 * destroying an existing action which was being replaced
-	 */
-	cleanup_a(actions, ovr);
-	return 0;
+	return i - 1;
 
 err:
 	tcf_action_destroy(actions, bind);
@@ -855,7 +989,7 @@ errout:
 	return -1;
 }
 
-static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
+static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
 			u32 portid, u32 seq, u16 flags, int event, int bind,
 			int ref)
 {
@@ -891,7 +1025,7 @@ out_nlmsg_trim:
 
 static int
 tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
-	       struct list_head *actions, int event,
+	       struct tc_action *actions[], int event,
 	       struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
@@ -900,7 +1034,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 	if (!skb)
 		return -ENOBUFS;
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
-			 0, 0) <= 0) {
+			 0, 1) <= 0) {
 		NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
 		kfree_skb(skb);
 		return -EINVAL;
@@ -1027,9 +1161,41 @@ err_out:
 	return err;
 }
 
+static int tcf_action_delete(struct net *net, struct tc_action *actions[],
+			     int *acts_deleted, struct netlink_ext_ack *extack)
+{
+	u32 act_index;
+	int ret, i;
+
+	for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+		struct tc_action *a = actions[i];
+		const struct tc_action_ops *ops = a->ops;
+
+		/* Actions can be deleted concurrently so we must save their
+		 * type and id to search again after reference is released.
+		 */
+		act_index = a->tcfa_index;
+
+		if (tcf_action_put(a)) {
+			/* last reference, action was deleted concurrently */
+			module_put(ops->owner);
+		} else  {
+			/* now do the delete */
+			ret = ops->delete(net, act_index);
+			if (ret < 0) {
+				*acts_deleted = i + 1;
+				return ret;
+			}
+		}
+	}
+	*acts_deleted = i;
+	return 0;
+}
+
 static int
-tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
-	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
+tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
+	       int *acts_deleted, u32 portid, size_t attr_size,
+	       struct netlink_ext_ack *extack)
 {
 	int ret;
 	struct sk_buff *skb;
@@ -1040,14 +1206,14 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
 		return -ENOBUFS;
 
 	if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
-			 0, 1) <= 0) {
+			 0, 2) <= 0) {
 		NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
 	/* now do the delete */
-	ret = tcf_action_destroy(actions, 0);
+	ret = tcf_action_delete(net, actions, acts_deleted, extack);
 	if (ret < 0) {
 		NL_SET_ERR_MSG(extack, "Failed to delete TC action");
 		kfree_skb(skb);
@@ -1069,7 +1235,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
 	size_t attr_size = 0;
-	LIST_HEAD(actions);
+	struct tc_action *actions[TCA_ACT_MAX_PRIO + 1] = {};
+	int acts_deleted = 0;
 
 	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
 	if (ret < 0)
@@ -1091,27 +1258,27 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 		}
 		act->order = i;
 		attr_size += tcf_action_fill_size(act);
-		list_add_tail(&act->list, &actions);
+		actions[i - 1] = act;
 	}
 
 	attr_size = tcf_action_full_attrs_size(attr_size);
 
 	if (event == RTM_GETACTION)
-		ret = tcf_get_notify(net, portid, n, &actions, event, extack);
+		ret = tcf_get_notify(net, portid, n, actions, event, extack);
 	else { /* delete */
-		ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
+		ret = tcf_del_notify(net, n, actions, &acts_deleted, portid,
+				     attr_size, extack);
 		if (ret)
 			goto err;
 		return ret;
 	}
 err:
-	if (event != RTM_GETACTION)
-		tcf_action_destroy(&actions, 0);
+	tcf_action_put_many(&actions[acts_deleted]);
 	return ret;
 }
 
 static int
-tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
+tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
 	       u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
@@ -1142,14 +1309,17 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 {
 	size_t attr_size = 0;
 	int ret = 0;
-	LIST_HEAD(actions);
+	struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
 
-	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
-			      &attr_size, extack);
-	if (ret)
+	ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions,
+			      &attr_size, true, extack);
+	if (ret < 0)
 		return ret;
+	ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
+	if (ovr)
+		tcf_action_put_many(actions);
 
-	return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+	return ret;
 }
 
 static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 18089c02e557..06f743d8ed41 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -141,8 +141,8 @@ static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
 	struct tcf_bpf *prog = to_bpf(act);
 	struct tc_act_bpf opt = {
 		.index   = prog->tcf_index,
-		.refcnt  = prog->tcf_refcnt - ref,
-		.bindcnt = prog->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&prog->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&prog->tcf_bindcnt) - bind,
 		.action  = prog->tcf_action,
 	};
 	struct tcf_t tm;
@@ -276,7 +276,8 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act,
-			int replace, int bind, struct netlink_ext_ack *extack)
+			int replace, int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
@@ -298,21 +299,27 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, act, bind)) {
+	ret = tcf_idr_check_alloc(tn, &parm->index, act, bind);
+	if (!ret) {
 		ret = tcf_idr_create(tn, parm->index, est, act,
 				     &act_bpf_ops, bind, true);
-		if (ret < 0)
+		if (ret < 0) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		res = ACT_P_CREATED;
-	} else {
+	} else if (ret > 0) {
 		/* Don't override defaults. */
 		if (bind)
 			return 0;
 
-		tcf_idr_release(*act, bind);
-		if (!replace)
+		if (!replace) {
+			tcf_idr_release(*act, bind);
 			return -EEXIST;
+		}
+	} else {
+		return ret;
 	}
 
 	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
@@ -355,8 +362,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
 	return res;
 out:
-	if (res == ACT_P_CREATED)
-		tcf_idr_release(*act, bind);
+	tcf_idr_release(*act, bind);
 
 	return ret;
 }
@@ -387,6 +393,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_bpf_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.kind		=	"bpf",
 	.type		=	TCA_ACT_BPF,
@@ -397,6 +410,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.init		=	tcf_bpf_init,
 	.walk		=	tcf_bpf_walker,
 	.lookup		=	tcf_bpf_search,
+	.delete		=	tcf_bpf_delete,
 	.size		=	sizeof(struct tcf_bpf),
 };
 
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e4b880fa51fe..1e31f0e448e2 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,7 +96,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 			     struct nlattr *est, struct tc_action **a,
-			     int ovr, int bind,
+			     int ovr, int bind, bool rtnl_held,
 			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
@@ -118,11 +118,14 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	ret = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!ret) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_connmark_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		ci = to_connmark(*a);
 		ci->tcf_action = parm->action;
@@ -131,16 +134,18 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 		tcf_idr_insert(tn, *a);
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (ret > 0) {
 		ci = to_connmark(*a);
 		if (bind)
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 		/* replacing action and zone */
 		ci->tcf_action = parm->action;
 		ci->zone = parm->zone;
+		ret = 0;
 	}
 
 	return ret;
@@ -154,8 +159,8 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
 
 	struct tc_connmark opt = {
 		.index   = ci->tcf_index,
-		.refcnt  = ci->tcf_refcnt - ref,
-		.bindcnt = ci->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
 		.action  = ci->tcf_action,
 		.zone   = ci->zone,
 	};
@@ -193,6 +198,13 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_connmark_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_connmark_ops = {
 	.kind		=	"connmark",
 	.type		=	TCA_ACT_CONNMARK,
@@ -202,6 +214,7 @@ static struct tc_action_ops act_connmark_ops = {
 	.init		=	tcf_connmark_init,
 	.walk		=	tcf_connmark_walker,
 	.lookup		=	tcf_connmark_search,
+	.delete		=	tcf_connmark_delete,
 	.size		=	sizeof(struct tcf_connmark_info),
 };
 
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 6e7124e57918..4e8c383f379e 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -46,7 +46,8 @@ static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a, int ovr,
-			 int bind, struct netlink_ext_ack *extack)
+			 int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 	struct tcf_csum_params *params_old, *params_new;
@@ -66,18 +67,24 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_CSUM_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_csum_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
+	} else {
+		return err;
 	}
 
 	p = to_tcf_csum(*a);
@@ -85,8 +92,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 	params_old = rtnl_dereference(p->params);
@@ -597,8 +603,8 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	struct tcf_csum_params *params;
 	struct tc_csum opt = {
 		.index   = p->tcf_index,
-		.refcnt  = p->tcf_refcnt - ref,
-		.bindcnt = p->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&p->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
 		.action  = p->tcf_action,
 	};
 	struct tcf_t t;
@@ -653,6 +659,13 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
 	return nla_total_size(sizeof(struct tc_csum));
 }
 
+static int tcf_csum_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_csum_ops = {
 	.kind		= "csum",
 	.type		= TCA_ACT_CSUM,
@@ -664,6 +677,7 @@ static struct tc_action_ops act_csum_ops = {
 	.walk		= tcf_csum_walker,
 	.lookup		= tcf_csum_search,
 	.get_fill_size  = tcf_csum_get_fill_size,
+	.delete		= tcf_csum_delete,
 	.size		= sizeof(struct tcf_csum),
 };
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 4dc4f153cad8..661b72b9147d 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -56,7 +56,8 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind, struct netlink_ext_ack *extack)
+			 int ovr, int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 	struct nlattr *tb[TCA_GACT_MAX + 1];
@@ -90,18 +91,24 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	}
 #endif
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_gact_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
+	} else {
+		return err;
 	}
 
 	gact = to_gact(*a);
@@ -169,8 +176,8 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_gact *gact = to_gact(a);
 	struct tc_gact opt = {
 		.index   = gact->tcf_index,
-		.refcnt  = gact->tcf_refcnt - ref,
-		.bindcnt = gact->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&gact->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
 		.action  = gact->tcf_action,
 	};
 	struct tcf_t t;
@@ -230,6 +237,13 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
 	return sz;
 }
 
+static int tcf_gact_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
 	.type		=	TCA_ACT_GACT,
@@ -241,6 +255,7 @@ static struct tc_action_ops act_gact_ops = {
 	.walk		=	tcf_gact_walker,
 	.lookup		=	tcf_gact_search,
 	.get_fill_size	=	tcf_gact_get_fill_size,
+	.delete		=	tcf_gact_delete,
 	.size		=	sizeof(struct tcf_gact),
 };
 
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 20d7d36b2fc9..3d6e265758c0 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -448,7 +448,8 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a,
-			int ovr, int bind, struct netlink_ext_ack *extack)
+			int ovr, int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 	struct nlattr *tb[TCA_IFE_MAX + 1];
@@ -483,7 +484,12 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	if (!p)
 		return -ENOMEM;
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0) {
+		kfree(p);
+		return err;
+	}
+	exists = err;
 	if (exists && bind) {
 		kfree(p);
 		return 0;
@@ -493,16 +499,15 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
 				     bind, true);
 		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			kfree(p);
 			return ret;
 		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr) {
-			kfree(p);
-			return -EEXIST;
-		}
+		kfree(p);
+		return -EEXIST;
 	}
 
 	ife = to_ife(*a);
@@ -547,6 +552,8 @@ metadata_parse_err:
 
 			if (exists)
 				spin_unlock_bh(&ife->tcf_lock);
+			tcf_idr_release(*a, bind);
+
 			kfree(p);
 			return err;
 		}
@@ -596,8 +603,8 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	struct tcf_ife_params *p = rtnl_dereference(ife->params);
 	struct tc_ife opt = {
 		.index = ife->tcf_index,
-		.refcnt = ife->tcf_refcnt - ref,
-		.bindcnt = ife->tcf_bindcnt - bind,
+		.refcnt = refcount_read(&ife->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ife->tcf_bindcnt) - bind,
 		.action = ife->tcf_action,
 		.flags = p->flags,
 	};
@@ -843,6 +850,13 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ife_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ife_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_ife_ops = {
 	.kind = "ife",
 	.type = TCA_ACT_IFE,
@@ -853,6 +867,7 @@ static struct tc_action_ops act_ife_ops = {
 	.init = tcf_ife_init,
 	.walk = tcf_ife_walker,
 	.lookup = tcf_ife_search,
+	.delete = tcf_ife_delete,
 	.size =	sizeof(struct tcf_ife_info),
 };
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 14c312d7908f..0dc787a57798 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -119,13 +119,18 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (tb[TCA_IPT_INDEX] != NULL)
 		index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
-	exists = tcf_idr_check(tn, index, a, bind);
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 
@@ -133,22 +138,27 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, index, est, a, ops, bind,
 				     false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
 	} else {
 		if (bind)/* dont override defaults */
 			return 0;
-		tcf_idr_release(*a, bind);
 
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 	}
 	hook = nla_get_u32(tb[TCA_IPT_HOOK]);
 
@@ -196,7 +206,8 @@ err1:
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a, int ovr,
-			int bind, struct netlink_ext_ack *extack)
+			int bind, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
 			      bind);
@@ -204,7 +215,8 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
 		       struct nlattr *est, struct tc_action **a, int ovr,
-		       int bind, struct netlink_ext_ack *extack)
+		       int bind, bool unlocked,
+		       struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
 			      bind);
@@ -280,8 +292,8 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	if (unlikely(!t))
 		goto nla_put_failure;
 
-	c.bindcnt = ipt->tcf_bindcnt - bind;
-	c.refcnt = ipt->tcf_refcnt - ref;
+	c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind;
+	c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref;
 	strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
 
 	if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
@@ -322,6 +334,13 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_ipt_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_ipt_ops = {
 	.kind		=	"ipt",
 	.type		=	TCA_ACT_IPT,
@@ -332,6 +351,7 @@ static struct tc_action_ops act_ipt_ops = {
 	.init		=	tcf_ipt_init,
 	.walk		=	tcf_ipt_walker,
 	.lookup		=	tcf_ipt_search,
+	.delete		=	tcf_ipt_delete,
 	.size		=	sizeof(struct tcf_ipt),
 };
 
@@ -372,6 +392,13 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_xt_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_xt_ops = {
 	.kind		=	"xt",
 	.type		=	TCA_ACT_XT,
@@ -382,6 +409,7 @@ static struct tc_action_ops act_xt_ops = {
 	.init		=	tcf_xt_init,
 	.walk		=	tcf_xt_walker,
 	.lookup		=	tcf_xt_search,
+	.delete		=	tcf_xt_delete,
 	.size		=	sizeof(struct tcf_ipt),
 };
 
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fd34015331ab..6afd89a36c69 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -68,8 +68,9 @@ static unsigned int mirred_net_id;
 static struct tc_action_ops act_mirred_ops;
 
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
-			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind, struct netlink_ext_ack *extack)
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 	struct nlattr *tb[TCA_MIRRED_MAX + 1];
@@ -78,7 +79,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	struct tcf_mirred *m;
 	struct net_device *dev;
 	bool exists = false;
-	int ret;
+	int ret, err;
 
 	if (!nla) {
 		NL_SET_ERR_MSG_MOD(extack, "Mirred requires attributes to be passed");
@@ -93,7 +94,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	}
 	parm = nla_data(tb[TCA_MIRRED_PARMS]);
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
@@ -106,6 +110,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		NL_SET_ERR_MSG_MOD(extack, "Unknown mirred option");
 		return -EINVAL;
 	}
@@ -114,6 +120,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		if (dev == NULL) {
 			if (exists)
 				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, parm->index);
 			return -ENODEV;
 		}
 		mac_header_xmit = dev_is_mac_header_xmit(dev);
@@ -123,18 +131,20 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 
 	if (!exists) {
 		if (!dev) {
+			tcf_idr_cleanup(tn, parm->index);
 			NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
 			return -EINVAL;
 		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_mirred_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 	m = to_mirred(*a);
 
@@ -250,8 +260,8 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 	struct tc_mirred opt = {
 		.index   = m->tcf_index,
 		.action  = m->tcf_action,
-		.refcnt  = m->tcf_refcnt - ref,
-		.bindcnt = m->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&m->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&m->tcf_bindcnt) - bind,
 		.eaction = m->tcfm_eaction,
 		.ifindex = dev ? dev->ifindex : 0,
 	};
@@ -321,6 +331,13 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
 	return rtnl_dereference(m->tcfm_dev);
 }
 
+static int tcf_mirred_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
 	.type		=	TCA_ACT_MIRRED,
@@ -334,6 +351,7 @@ static struct tc_action_ops act_mirred_ops = {
 	.lookup		=	tcf_mirred_search,
 	.size		=	sizeof(struct tcf_mirred),
 	.get_dev	=	tcf_mirred_get_dev,
+	.delete		=	tcf_mirred_delete,
 };
 
 static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4b5848b6c252..4dd9188a72fd 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -38,7 +38,7 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 			struct tc_action **a, int ovr, int bind,
-			struct netlink_ext_ack *extack)
+			bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 	struct nlattr *tb[TCA_NAT_MAX + 1];
@@ -57,18 +57,24 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 		return -EINVAL;
 	parm = nla_data(tb[TCA_NAT_PARMS]);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_nat_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)
 			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
+	} else {
+		return err;
 	}
 	p = to_tcf_nat(*a);
 
@@ -257,8 +263,8 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 
 		.index    = p->tcf_index,
 		.action   = p->tcf_action,
-		.refcnt   = p->tcf_refcnt - ref,
-		.bindcnt  = p->tcf_bindcnt - bind,
+		.refcnt   = refcount_read(&p->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&p->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
@@ -294,6 +300,13 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_nat_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_nat_ops = {
 	.kind		=	"nat",
 	.type		=	TCA_ACT_NAT,
@@ -303,6 +316,7 @@ static struct tc_action_ops act_nat_ops = {
 	.init		=	tcf_nat_init,
 	.walk		=	tcf_nat_walker,
 	.lookup		=	tcf_nat_search,
+	.delete		=	tcf_nat_delete,
 	.size		=	sizeof(struct tcf_nat),
 };
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 8a925c72db5f..cc8ffcd1ddb5 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -132,20 +132,23 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
-			  int ovr, int bind, struct netlink_ext_ack *extack)
+			  int ovr, int bind, bool rtnl_held,
+			  struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
-	struct nlattr *pattr;
-	struct tc_pedit *parm;
-	int ret = 0, err;
-	struct tcf_pedit *p;
 	struct tc_pedit_key *keys = NULL;
 	struct tcf_pedit_key_ex *keys_ex;
+	struct tc_pedit *parm;
+	struct nlattr *pattr;
+	struct tcf_pedit *p;
+	int ret = 0, err;
 	int ksize;
 
-	if (nla == NULL)
+	if (!nla) {
+		NL_SET_ERR_MSG_MOD(extack, "Pedit requires attributes to be passed");
 		return -EINVAL;
+	}
 
 	err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy, NULL);
 	if (err < 0)
@@ -154,47 +157,62 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	pattr = tb[TCA_PEDIT_PARMS];
 	if (!pattr)
 		pattr = tb[TCA_PEDIT_PARMS_EX];
-	if (!pattr)
+	if (!pattr) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing required TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute");
 		return -EINVAL;
+	}
 
 	parm = nla_data(pattr);
 	ksize = parm->nkeys * sizeof(struct tc_pedit_key);
-	if (nla_len(pattr) < sizeof(*parm) + ksize)
+	if (nla_len(pattr) < sizeof(*parm) + ksize) {
+		NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
 		return -EINVAL;
+	}
 
 	keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
 	if (IS_ERR(keys_ex))
 		return PTR_ERR(keys_ex);
 
-	if (!tcf_idr_check(tn, parm->index, a, bind)) {
-		if (!parm->nkeys)
-			return -EINVAL;
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (!err) {
+		if (!parm->nkeys) {
+			tcf_idr_cleanup(tn, parm->index);
+			NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
+			ret = -EINVAL;
+			goto out_free;
+		}
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_pedit_ops, bind, false);
-		if (ret)
-			return ret;
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
+			goto out_free;
+		}
 		p = to_pedit(*a);
 		keys = kmalloc(ksize, GFP_KERNEL);
-		if (keys == NULL) {
+		if (!keys) {
 			tcf_idr_release(*a, bind);
-			kfree(keys_ex);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto out_free;
 		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (err > 0) {
 		if (bind)
-			return 0;
-		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+			goto out_free;
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
+			ret = -EEXIST;
+			goto out_free;
+		}
 		p = to_pedit(*a);
 		if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
 			keys = kmalloc(ksize, GFP_KERNEL);
 			if (!keys) {
-				kfree(keys_ex);
-				return -ENOMEM;
+				ret = -ENOMEM;
+				goto out_free;
 			}
 		}
+	} else {
+		return err;
 	}
 
 	spin_lock_bh(&p->tcf_lock);
@@ -214,12 +232,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
 	return ret;
+out_free:
+	kfree(keys_ex);
+	return ret;
+
 }
 
 static void tcf_pedit_cleanup(struct tc_action *a)
 {
 	struct tcf_pedit *p = to_pedit(a);
 	struct tc_pedit_key *keys = p->tcfp_keys;
+
 	kfree(keys);
 	kfree(p->tcfp_keys_ex);
 }
@@ -284,11 +307,12 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 	if (p->tcfp_nkeys > 0) {
 		struct tc_pedit_key *tkey = p->tcfp_keys;
 		struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
-		enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+		enum pedit_header_type htype =
+			TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
 		enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
 
 		for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
-			u32 *ptr, _data;
+			u32 *ptr, hdata;
 			int offset = tkey->off;
 			int hoffset;
 			u32 val;
@@ -303,39 +327,39 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 
 			rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
 			if (rc) {
-				pr_info("tc filter pedit bad header type specified (0x%x)\n",
+				pr_info("tc action pedit bad header type specified (0x%x)\n",
 					htype);
 				goto bad;
 			}
 
 			if (tkey->offmask) {
-				char *d, _d;
+				u8 *d, _d;
 
 				if (!offset_valid(skb, hoffset + tkey->at)) {
-					pr_info("tc filter pedit 'at' offset %d out of bounds\n",
+					pr_info("tc action pedit 'at' offset %d out of bounds\n",
 						hoffset + tkey->at);
 					goto bad;
 				}
-				d = skb_header_pointer(skb, hoffset + tkey->at, 1,
-						       &_d);
+				d = skb_header_pointer(skb, hoffset + tkey->at,
+						       sizeof(_d), &_d);
 				if (!d)
 					goto bad;
 				offset += (*d & tkey->offmask) >> tkey->shift;
 			}
 
 			if (offset % 4) {
-				pr_info("tc filter pedit"
-					" offset must be on 32 bit boundaries\n");
+				pr_info("tc action pedit offset must be on 32 bit boundaries\n");
 				goto bad;
 			}
 
 			if (!offset_valid(skb, hoffset + offset)) {
-				pr_info("tc filter pedit offset %d out of bounds\n",
+				pr_info("tc action pedit offset %d out of bounds\n",
 					hoffset + offset);
 				goto bad;
 			}
 
-			ptr = skb_header_pointer(skb, hoffset + offset, 4, &_data);
+			ptr = skb_header_pointer(skb, hoffset + offset,
+						 sizeof(hdata), &hdata);
 			if (!ptr)
 				goto bad;
 			/* just do it, baby */
@@ -347,19 +371,20 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 				val = (*ptr + tkey->val) & ~tkey->mask;
 				break;
 			default:
-				pr_info("tc filter pedit bad command (%d)\n",
+				pr_info("tc action pedit bad command (%d)\n",
 					cmd);
 				goto bad;
 			}
 
 			*ptr = ((*ptr & tkey->mask) ^ val);
-			if (ptr == &_data)
+			if (ptr == &hdata)
 				skb_store_bits(skb, hoffset + offset, ptr, 4);
 		}
 
 		goto done;
-	} else
+	} else {
 		WARN(1, "pedit BUG: index %d\n", p->tcf_index);
+	}
 
 bad:
 	p->tcf_qstats.overlimits++;
@@ -391,8 +416,8 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 	opt->nkeys = p->tcfp_nkeys;
 	opt->flags = p->tcfp_flags;
 	opt->action = p->tcf_action;
-	opt->refcnt = p->tcf_refcnt - ref;
-	opt->bindcnt = p->tcf_bindcnt - bind;
+	opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
+	opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
 
 	if (p->tcfp_keys_ex) {
 		tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys);
@@ -435,6 +460,13 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_pedit_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_pedit_ops = {
 	.kind		=	"pedit",
 	.type		=	TCA_ACT_PEDIT,
@@ -445,6 +477,7 @@ static struct tc_action_ops act_pedit_ops = {
 	.init		=	tcf_pedit_init,
 	.walk		=	tcf_pedit_walker,
 	.lookup		=	tcf_pedit_search,
+	.delete		=	tcf_pedit_delete,
 	.size		=	sizeof(struct tcf_pedit),
 };
 
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 4e72bc2a0dfb..1f3192ea8df7 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -75,7 +75,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 
 static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 			       struct nlattr *est, struct tc_action **a,
-			       int ovr, int bind,
+			       int ovr, int bind, bool rtnl_held,
 			       struct netlink_ext_ack *extack)
 {
 	int ret = 0, err;
@@ -101,20 +101,24 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_POLICE_TBF]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, NULL, a,
 				     &act_police_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	police = to_police(*a);
@@ -195,8 +199,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla,
 failure:
 	qdisc_put_rtab(P_tab);
 	qdisc_put_rtab(R_tab);
-	if (ret == ACT_P_CREATED)
-		tcf_idr_release(*a, bind);
+	tcf_idr_release(*a, bind);
 	return err;
 }
 
@@ -274,8 +277,8 @@ static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a,
 		.action = police->tcf_action,
 		.mtu = police->tcfp_mtu,
 		.burst = PSCHED_NS2TICKS(police->tcfp_burst),
-		.refcnt = police->tcf_refcnt - ref,
-		.bindcnt = police->tcf_bindcnt - bind,
+		.refcnt = refcount_read(&police->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&police->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
@@ -314,6 +317,13 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_police_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, police_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 MODULE_AUTHOR("Alexey Kuznetsov");
 MODULE_DESCRIPTION("Policing actions");
 MODULE_LICENSE("GPL");
@@ -327,6 +337,7 @@ static struct tc_action_ops act_police_ops = {
 	.init		=	tcf_act_police_init,
 	.walk		=	tcf_act_police_walker,
 	.lookup		=	tcf_police_search,
+	.delete		=	tcf_police_delete,
 	.size		=	sizeof(struct tcf_police),
 };
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 5db358497c9e..3079e7be5bde 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -37,7 +37,8 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind, struct netlink_ext_ack *extack)
+			   int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 	struct nlattr *tb[TCA_SAMPLE_MAX + 1];
@@ -45,7 +46,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	struct tc_sample *parm;
 	struct tcf_sample *s;
 	bool exists = false;
-	int ret;
+	int ret, err;
 
 	if (!nla)
 		return -EINVAL;
@@ -58,20 +59,24 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 
 	parm = nla_data(tb[TCA_SAMPLE_PARMS]);
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_sample_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 	s = to_sample(*a);
 
@@ -80,8 +85,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
 	psample_group = psample_group_get(net, s->psample_group_num);
 	if (!psample_group) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 	RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -173,8 +177,8 @@ static int tcf_sample_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tc_sample opt = {
 		.index      = s->tcf_index,
 		.action     = s->tcf_action,
-		.refcnt     = s->tcf_refcnt - ref,
-		.bindcnt    = s->tcf_bindcnt - bind,
+		.refcnt     = refcount_read(&s->tcf_refcnt) - ref,
+		.bindcnt    = atomic_read(&s->tcf_bindcnt) - bind,
 	};
 	struct tcf_t t;
 
@@ -219,6 +223,13 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_sample_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_sample_ops = {
 	.kind	  = "sample",
 	.type	  = TCA_ACT_SAMPLE,
@@ -229,6 +240,7 @@ static struct tc_action_ops act_sample_ops = {
 	.cleanup  = tcf_sample_cleanup,
 	.walk	  = tcf_sample_walker,
 	.lookup	  = tcf_sample_search,
+	.delete	  = tcf_sample_delete,
 	.size	  = sizeof(struct tcf_sample),
 };
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 98c4afe7c15b..aa51152e0066 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -79,7 +79,8 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind, struct netlink_ext_ack *extack)
+			 int ovr, int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 	struct nlattr *tb[TCA_DEF_MAX + 1];
@@ -99,21 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	parm = nla_data(tb[TCA_DEF_PARMS]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (tb[TCA_DEF_DATA] == NULL) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_simp_ops, bind, false);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		d = to_defact(*a);
 		ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
@@ -126,9 +134,10 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	} else {
 		d = to_defact(*a);
 
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 
 		reset_policy(d, tb[TCA_DEF_DATA], parm);
 	}
@@ -145,8 +154,8 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_defact *d = to_defact(a);
 	struct tc_defact opt = {
 		.index   = d->tcf_index,
-		.refcnt  = d->tcf_refcnt - ref,
-		.bindcnt = d->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
 	struct tcf_t t;
@@ -183,6 +192,13 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_simp_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_simp_ops = {
 	.kind		=	"simple",
 	.type		=	TCA_ACT_SIMP,
@@ -193,6 +209,7 @@ static struct tc_action_ops act_simp_ops = {
 	.init		=	tcf_simp_init,
 	.walk		=	tcf_simp_walker,
 	.lookup		=	tcf_simp_search,
+	.delete		=	tcf_simp_delete,
 	.size		=	sizeof(struct tcf_defact),
 };
 
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 6138d1d71900..da56e6938c9e 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -23,6 +23,9 @@
 #include <linux/rtnetlink.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/dsfield.h>
 
 #include <linux/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_skbedit.h>
@@ -34,25 +37,54 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
 		       struct tcf_result *res)
 {
 	struct tcf_skbedit *d = to_skbedit(a);
+	struct tcf_skbedit_params *params;
+	int action;
 
-	spin_lock(&d->tcf_lock);
 	tcf_lastuse_update(&d->tcf_tm);
-	bstats_update(&d->tcf_bstats, skb);
-
-	if (d->flags & SKBEDIT_F_PRIORITY)
-		skb->priority = d->priority;
-	if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
-	    skb->dev->real_num_tx_queues > d->queue_mapping)
-		skb_set_queue_mapping(skb, d->queue_mapping);
-	if (d->flags & SKBEDIT_F_MARK) {
-		skb->mark &= ~d->mask;
-		skb->mark |= d->mark & d->mask;
+	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+	rcu_read_lock();
+	params = rcu_dereference(d->params);
+	action = READ_ONCE(d->tcf_action);
+
+	if (params->flags & SKBEDIT_F_PRIORITY)
+		skb->priority = params->priority;
+	if (params->flags & SKBEDIT_F_INHERITDSFIELD) {
+		int wlen = skb_network_offset(skb);
+
+		switch (tc_skb_protocol(skb)) {
+		case htons(ETH_P_IP):
+			wlen += sizeof(struct iphdr);
+			if (!pskb_may_pull(skb, wlen))
+				goto err;
+			skb->priority = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+			break;
+
+		case htons(ETH_P_IPV6):
+			wlen += sizeof(struct ipv6hdr);
+			if (!pskb_may_pull(skb, wlen))
+				goto err;
+			skb->priority = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+			break;
+		}
 	}
-	if (d->flags & SKBEDIT_F_PTYPE)
-		skb->pkt_type = d->ptype;
-
-	spin_unlock(&d->tcf_lock);
-	return d->tcf_action;
+	if (params->flags & SKBEDIT_F_QUEUE_MAPPING &&
+	    skb->dev->real_num_tx_queues > params->queue_mapping)
+		skb_set_queue_mapping(skb, params->queue_mapping);
+	if (params->flags & SKBEDIT_F_MARK) {
+		skb->mark &= ~params->mask;
+		skb->mark |= params->mark & params->mask;
+	}
+	if (params->flags & SKBEDIT_F_PTYPE)
+		skb->pkt_type = params->ptype;
+
+unlock:
+	rcu_read_unlock();
+	return action;
+err:
+	qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
+	action = TC_ACT_SHOT;
+	goto unlock;
 }
 
 static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
@@ -62,13 +94,16 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 	[TCA_SKBEDIT_MARK]		= { .len = sizeof(u32) },
 	[TCA_SKBEDIT_PTYPE]		= { .len = sizeof(u16) },
 	[TCA_SKBEDIT_MASK]		= { .len = sizeof(u32) },
+	[TCA_SKBEDIT_FLAGS]		= { .len = sizeof(u64) },
 };
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct nlattr *est, struct tc_action **a,
-			    int ovr, int bind, struct netlink_ext_ack *extack)
+			    int ovr, int bind, bool rtnl_held,
+			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+	struct tcf_skbedit_params *params_old, *params_new;
 	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
 	struct tc_skbedit *parm;
 	struct tcf_skbedit *d;
@@ -114,52 +149,76 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 		mask = nla_data(tb[TCA_SKBEDIT_MASK]);
 	}
 
+	if (tb[TCA_SKBEDIT_FLAGS] != NULL) {
+		u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]);
+
+		if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
+			flags |= SKBEDIT_F_INHERITDSFIELD;
+	}
+
 	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!flags) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
-				     &act_skbedit_ops, bind, false);
-		if (ret)
+				     &act_skbedit_ops, bind, true);
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		d = to_skbedit(*a);
 		ret = ACT_P_CREATED;
 	} else {
 		d = to_skbedit(*a);
-		tcf_idr_release(*a, bind);
-		if (!ovr)
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
 			return -EEXIST;
+		}
 	}
 
-	spin_lock_bh(&d->tcf_lock);
+	ASSERT_RTNL();
+
+	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+	if (unlikely(!params_new)) {
+		if (ret == ACT_P_CREATED)
+			tcf_idr_release(*a, bind);
+		return -ENOMEM;
+	}
 
-	d->flags = flags;
+	params_new->flags = flags;
 	if (flags & SKBEDIT_F_PRIORITY)
-		d->priority = *priority;
+		params_new->priority = *priority;
 	if (flags & SKBEDIT_F_QUEUE_MAPPING)
-		d->queue_mapping = *queue_mapping;
+		params_new->queue_mapping = *queue_mapping;
 	if (flags & SKBEDIT_F_MARK)
-		d->mark = *mark;
+		params_new->mark = *mark;
 	if (flags & SKBEDIT_F_PTYPE)
-		d->ptype = *ptype;
+		params_new->ptype = *ptype;
 	/* default behaviour is to use all the bits */
-	d->mask = 0xffffffff;
+	params_new->mask = 0xffffffff;
 	if (flags & SKBEDIT_F_MASK)
-		d->mask = *mask;
+		params_new->mask = *mask;
 
 	d->tcf_action = parm->action;
-
-	spin_unlock_bh(&d->tcf_lock);
+	params_old = rtnl_dereference(d->params);
+	rcu_assign_pointer(d->params, params_new);
+	if (params_old)
+		kfree_rcu(params_old, rcu);
 
 	if (ret == ACT_P_CREATED)
 		tcf_idr_insert(tn, *a);
@@ -171,30 +230,39 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_skbedit *d = to_skbedit(a);
+	struct tcf_skbedit_params *params;
 	struct tc_skbedit opt = {
 		.index   = d->tcf_index,
-		.refcnt  = d->tcf_refcnt - ref,
-		.bindcnt = d->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
+	u64 pure_flags = 0;
 	struct tcf_t t;
 
+	params = rtnl_dereference(d->params);
+
 	if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_PRIORITY) &&
-	    nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority))
+	if ((params->flags & SKBEDIT_F_PRIORITY) &&
+	    nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, params->priority))
+		goto nla_put_failure;
+	if ((params->flags & SKBEDIT_F_QUEUE_MAPPING) &&
+	    nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, params->queue_mapping))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) &&
-	    nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping))
+	if ((params->flags & SKBEDIT_F_MARK) &&
+	    nla_put_u32(skb, TCA_SKBEDIT_MARK, params->mark))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_MARK) &&
-	    nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark))
+	if ((params->flags & SKBEDIT_F_PTYPE) &&
+	    nla_put_u16(skb, TCA_SKBEDIT_PTYPE, params->ptype))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_PTYPE) &&
-	    nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
+	if ((params->flags & SKBEDIT_F_MASK) &&
+	    nla_put_u32(skb, TCA_SKBEDIT_MASK, params->mask))
 		goto nla_put_failure;
-	if ((d->flags & SKBEDIT_F_MASK) &&
-	    nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
+	if (params->flags & SKBEDIT_F_INHERITDSFIELD)
+		pure_flags |= SKBEDIT_F_INHERITDSFIELD;
+	if (pure_flags != 0 &&
+	    nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
 		goto nla_put_failure;
 
 	tcf_tm_dump(&t, &d->tcf_tm);
@@ -207,6 +275,16 @@ nla_put_failure:
 	return -1;
 }
 
+static void tcf_skbedit_cleanup(struct tc_action *a)
+{
+	struct tcf_skbedit *d = to_skbedit(a);
+	struct tcf_skbedit_params *params;
+
+	params = rcu_dereference_protected(d->params, 1);
+	if (params)
+		kfree_rcu(params, rcu);
+}
+
 static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb,
 			      struct netlink_callback *cb, int type,
 			      const struct tc_action_ops *ops,
@@ -225,6 +303,13 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbedit_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_skbedit_ops = {
 	.kind		=	"skbedit",
 	.type		=	TCA_ACT_SKBEDIT,
@@ -232,8 +317,10 @@ static struct tc_action_ops act_skbedit_ops = {
 	.act		=	tcf_skbedit,
 	.dump		=	tcf_skbedit_dump,
 	.init		=	tcf_skbedit_init,
+	.cleanup	=	tcf_skbedit_cleanup,
 	.walk		=	tcf_skbedit_walker,
 	.lookup		=	tcf_skbedit_search,
+	.delete		=	tcf_skbedit_delete,
 	.size		=	sizeof(struct tcf_skbedit),
 };
 
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index ad050d7d4b46..cdc6bacfb190 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -84,7 +84,8 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind, struct netlink_ext_ack *extack)
+			   int ovr, int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
@@ -127,27 +128,33 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	if (parm->flags & SKBMOD_F_SWAPMAC)
 		lflags = SKBMOD_F_SWAPMAC;
 
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
 	if (!lflags) {
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_skbmod_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	d = to_skbmod(*a);
@@ -155,8 +162,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
 	if (unlikely(!p)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -205,8 +211,8 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
 	struct tc_skbmod opt = {
 		.index   = d->tcf_index,
-		.refcnt  = d->tcf_refcnt - ref,
-		.bindcnt = d->tcf_bindcnt - bind,
+		.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
 		.action  = d->tcf_action,
 	};
 	struct tcf_t t;
@@ -252,6 +258,13 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_skbmod_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_skbmod_ops = {
 	.kind		=	"skbmod",
 	.type		=	TCA_ACT_SKBMOD,
@@ -262,6 +275,7 @@ static struct tc_action_ops act_skbmod_ops = {
 	.cleanup	=	tcf_skbmod_cleanup,
 	.walk		=	tcf_skbmod_walker,
 	.lookup		=	tcf_skbmod_search,
+	.delete		=	tcf_skbmod_delete,
 	.size		=	sizeof(struct tcf_skbmod),
 };
 
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 9bc6c2ae98a5..f811850fd1d0 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <net/geneve.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
@@ -57,6 +58,135 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
 	return action;
 }
 
+static const struct nla_policy
+enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = {
+	[TCA_TUNNEL_KEY_ENC_OPTS_GENEVE]	= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = {
+	[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]	   = { .type = NLA_U16 },
+	[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]	   = { .type = NLA_U8 },
+	[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]	   = { .type = NLA_BINARY,
+						       .len = 128 },
+};
+
+static int
+tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len,
+			   struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1];
+	int err, data_len, opt_len;
+	u8 *data;
+
+	err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
+			       nla, geneve_opt_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] ||
+	    !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] ||
+	    !tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]) {
+		NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
+		return -EINVAL;
+	}
+
+	data = nla_data(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
+	data_len = nla_len(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
+	if (data_len < 4) {
+		NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
+		return -ERANGE;
+	}
+	if (data_len % 4) {
+		NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
+		return -ERANGE;
+	}
+
+	opt_len = sizeof(struct geneve_opt) + data_len;
+	if (dst) {
+		struct geneve_opt *opt = dst;
+
+		WARN_ON(dst_len < opt_len);
+
+		opt->opt_class =
+			nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]);
+		opt->type = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]);
+		opt->length = data_len / 4; /* length is in units of 4 bytes */
+		opt->r1 = 0;
+		opt->r2 = 0;
+		opt->r3 = 0;
+
+		memcpy(opt + 1, data, data_len);
+	}
+
+	return opt_len;
+}
+
+static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
+				int dst_len, struct netlink_ext_ack *extack)
+{
+	int err, rem, opt_len, len = nla_len(nla), opts_len = 0;
+	const struct nlattr *attr, *head = nla_data(nla);
+
+	err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
+			   enc_opts_policy, extack);
+	if (err)
+		return err;
+
+	nla_for_each_attr(attr, head, len, rem) {
+		switch (nla_type(attr)) {
+		case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
+			opt_len = tunnel_key_copy_geneve_opt(attr, dst,
+							     dst_len, extack);
+			if (opt_len < 0)
+				return opt_len;
+			opts_len += opt_len;
+			if (dst) {
+				dst_len -= opt_len;
+				dst += opt_len;
+			}
+			break;
+		}
+	}
+
+	if (!opts_len) {
+		NL_SET_ERR_MSG(extack, "Empty list of tunnel options");
+		return -EINVAL;
+	}
+
+	if (rem > 0) {
+		NL_SET_ERR_MSG(extack, "Trailing data after parsing tunnel key options attributes");
+		return -EINVAL;
+	}
+
+	return opts_len;
+}
+
+static int tunnel_key_get_opts_len(struct nlattr *nla,
+				   struct netlink_ext_ack *extack)
+{
+	return tunnel_key_copy_opts(nla, NULL, 0, extack);
+}
+
+static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
+			       int opts_len, struct netlink_ext_ack *extack)
+{
+	info->options_len = opts_len;
+	switch (nla_type(nla_data(nla))) {
+	case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
+#if IS_ENABLED(CONFIG_INET)
+		info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+		return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+					    opts_len, extack);
+#else
+		return -EAFNOSUPPORT;
+#endif
+	default:
+		NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type");
+		return -EINVAL;
+	}
+}
+
 static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 	[TCA_TUNNEL_KEY_PARMS]	    = { .len = sizeof(struct tc_tunnel_key) },
 	[TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
@@ -66,11 +196,15 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
 	[TCA_TUNNEL_KEY_ENC_KEY_ID]   = { .type = NLA_U32 },
 	[TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
 	[TCA_TUNNEL_KEY_NO_CSUM]      = { .type = NLA_U8 },
+	[TCA_TUNNEL_KEY_ENC_OPTS]     = { .type = NLA_NESTED },
+	[TCA_TUNNEL_KEY_ENC_TOS]      = { .type = NLA_U8 },
+	[TCA_TUNNEL_KEY_ENC_TTL]      = { .type = NLA_U8 },
 };
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
-			   int ovr, int bind, struct netlink_ext_ack *extack)
+			   int ovr, int bind, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
@@ -81,24 +215,35 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	struct tcf_tunnel_key *t;
 	bool exists = false;
 	__be16 dst_port = 0;
+	int opts_len = 0;
 	__be64 key_id;
 	__be16 flags;
+	u8 tos, ttl;
 	int ret = 0;
 	int err;
 
-	if (!nla)
+	if (!nla) {
+		NL_SET_ERR_MSG(extack, "Tunnel requires attributes to be passed");
 		return -EINVAL;
+	}
 
 	err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy,
-			       NULL);
-	if (err < 0)
+			       extack);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes");
 		return err;
+	}
 
-	if (!tb[TCA_TUNNEL_KEY_PARMS])
+	if (!tb[TCA_TUNNEL_KEY_PARMS]) {
+		NL_SET_ERR_MSG(extack, "Missing tunnel key parameters");
 		return -EINVAL;
+	}
 
 	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
@@ -107,6 +252,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		break;
 	case TCA_TUNNEL_KEY_ACT_SET:
 		if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
+			NL_SET_ERR_MSG(extack, "Missing tunnel key id");
 			ret = -EINVAL;
 			goto err_out;
 		}
@@ -121,6 +267,22 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 		if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
 			dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
 
+		if (tb[TCA_TUNNEL_KEY_ENC_OPTS]) {
+			opts_len = tunnel_key_get_opts_len(tb[TCA_TUNNEL_KEY_ENC_OPTS],
+							   extack);
+			if (opts_len < 0) {
+				ret = opts_len;
+				goto err_out;
+			}
+		}
+
+		tos = 0;
+		if (tb[TCA_TUNNEL_KEY_ENC_TOS])
+			tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]);
+		ttl = 0;
+		if (tb[TCA_TUNNEL_KEY_ENC_TTL])
+			ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]);
+
 		if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
 		    tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
 			__be32 saddr;
@@ -129,9 +291,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
 			daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
 
-			metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
+			metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl,
 						    dst_port, flags,
-						    key_id, 0);
+						    key_id, opts_len);
 		} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
 			   tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
 			struct in6_addr saddr;
@@ -140,19 +302,33 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
 			daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
 
-			metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, dst_port,
+			metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
 						      0, flags,
 						      key_id, 0);
+		} else {
+			NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst");
+			ret = -EINVAL;
+			goto err_out;
 		}
 
 		if (!metadata) {
-			ret = -EINVAL;
+			NL_SET_ERR_MSG(extack, "Cannot allocate tunnel metadata dst");
+			ret = -ENOMEM;
 			goto err_out;
 		}
 
+		if (opts_len) {
+			ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
+						  &metadata->u.tun_info,
+						  opts_len, extack);
+			if (ret < 0)
+				goto err_out;
+		}
+
 		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
 		break;
 	default:
+		NL_SET_ERR_MSG(extack, "Unknown tunnel key action");
 		ret = -EINVAL;
 		goto err_out;
 	}
@@ -160,14 +336,16 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_tunnel_key_ops, bind, true);
-		if (ret)
-			return ret;
+		if (ret) {
+			NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
+			goto err_out;
+		}
 
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		NL_SET_ERR_MSG(extack, "TC IDR already exists");
+		return -EEXIST;
 	}
 
 	t = to_tunnel_key(*a);
@@ -175,8 +353,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
 	if (unlikely(!params_new)) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
+		NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
 		return -ENOMEM;
 	}
 
@@ -199,6 +377,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 err_out:
 	if (exists)
 		tcf_idr_release(*a, bind);
+	else
+		tcf_idr_cleanup(tn, parm->index);
 	return ret;
 }
 
@@ -216,6 +396,61 @@ static void tunnel_key_release(struct tc_action *a)
 	}
 }
 
+static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
+				       const struct ip_tunnel_info *info)
+{
+	int len = info->options_len;
+	u8 *src = (u8 *)(info + 1);
+	struct nlattr *start;
+
+	start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
+	if (!start)
+		return -EMSGSIZE;
+
+	while (len > 0) {
+		struct geneve_opt *opt = (struct geneve_opt *)src;
+
+		if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,
+				 opt->opt_class) ||
+		    nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,
+			       opt->type) ||
+		    nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,
+			    opt->length * 4, opt + 1))
+			return -EMSGSIZE;
+
+		len -= sizeof(struct geneve_opt) + opt->length * 4;
+		src += sizeof(struct geneve_opt) + opt->length * 4;
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+}
+
+static int tunnel_key_opts_dump(struct sk_buff *skb,
+				const struct ip_tunnel_info *info)
+{
+	struct nlattr *start;
+	int err;
+
+	if (!info->options_len)
+		return 0;
+
+	start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS);
+	if (!start)
+		return -EMSGSIZE;
+
+	if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+		err = tunnel_key_geneve_opts_dump(skb, info);
+		if (err)
+			return err;
+	} else {
+		return -EINVAL;
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+}
+
 static int tunnel_key_dump_addresses(struct sk_buff *skb,
 				     const struct ip_tunnel_info *info)
 {
@@ -252,8 +487,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_tunnel_key_params *params;
 	struct tc_tunnel_key opt = {
 		.index    = t->tcf_index,
-		.refcnt   = t->tcf_refcnt - ref,
-		.bindcnt  = t->tcf_bindcnt - bind,
+		.refcnt   = refcount_read(&t->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&t->tcf_bindcnt) - bind,
 		.action   = t->tcf_action,
 	};
 	struct tcf_t tm;
@@ -266,8 +501,9 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 		goto nla_put_failure;
 
 	if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
-		struct ip_tunnel_key *key =
-			&params->tcft_enc_metadata->u.tun_info.key;
+		struct ip_tunnel_info *info =
+			&params->tcft_enc_metadata->u.tun_info;
+		struct ip_tunnel_key *key = &info->key;
 		__be32 key_id = tunnel_id_to_key32(key->tun_id);
 
 		if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
@@ -275,7 +511,14 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 					      &params->tcft_enc_metadata->u.tun_info) ||
 		    nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst) ||
 		    nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
-			       !(key->tun_flags & TUNNEL_CSUM)))
+			       !(key->tun_flags & TUNNEL_CSUM)) ||
+		    tunnel_key_opts_dump(skb, info))
+			goto nla_put_failure;
+
+		if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos))
+			goto nla_put_failure;
+
+		if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl))
 			goto nla_put_failure;
 	}
 
@@ -309,6 +552,13 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tunnel_key_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_tunnel_key_ops = {
 	.kind		=	"tunnel_key",
 	.type		=	TCA_ACT_TUNNEL_KEY,
@@ -319,6 +569,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
 	.cleanup	=	tunnel_key_release,
 	.walk		=	tunnel_key_walker,
 	.lookup		=	tunnel_key_search,
+	.delete		=	tunnel_key_delete,
 	.size		=	sizeof(struct tcf_tunnel_key),
 };
 
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 1fb39e1f9d07..ad37f308175a 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -109,7 +109,8 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
-			 int ovr, int bind, struct netlink_ext_ack *extack)
+			 int ovr, int bind, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];
@@ -133,7 +134,10 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!tb[TCA_VLAN_PARMS])
 		return -EINVAL;
 	parm = nla_data(tb[TCA_VLAN_PARMS]);
-	exists = tcf_idr_check(tn, parm->index, a, bind);
+	err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
+	if (err < 0)
+		return err;
+	exists = err;
 	if (exists && bind)
 		return 0;
 
@@ -145,12 +149,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
 				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, parm->index);
 			return -EINVAL;
 		}
 		push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]);
 		if (push_vid >= VLAN_VID_MASK) {
 			if (exists)
 				tcf_idr_release(*a, bind);
+			else
+				tcf_idr_cleanup(tn, parm->index);
 			return -ERANGE;
 		}
 
@@ -163,6 +171,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			default:
 				if (exists)
 					tcf_idr_release(*a, bind);
+				else
+					tcf_idr_cleanup(tn, parm->index);
 				return -EPROTONOSUPPORT;
 			}
 		} else {
@@ -175,6 +185,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	default:
 		if (exists)
 			tcf_idr_release(*a, bind);
+		else
+			tcf_idr_cleanup(tn, parm->index);
 		return -EINVAL;
 	}
 	action = parm->v_action;
@@ -182,14 +194,15 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_vlan_ops, bind, true);
-		if (ret)
+		if (ret) {
+			tcf_idr_cleanup(tn, parm->index);
 			return ret;
+		}
 
 		ret = ACT_P_CREATED;
-	} else {
+	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
-		if (!ovr)
-			return -EEXIST;
+		return -EEXIST;
 	}
 
 	v = to_vlan(*a);
@@ -197,8 +210,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	ASSERT_RTNL();
 	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p) {
-		if (ret == ACT_P_CREATED)
-			tcf_idr_release(*a, bind);
+		tcf_idr_release(*a, bind);
 		return -ENOMEM;
 	}
 
@@ -239,8 +251,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
 	struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p);
 	struct tc_vlan opt = {
 		.index    = v->tcf_index,
-		.refcnt   = v->tcf_refcnt - ref,
-		.bindcnt  = v->tcf_bindcnt - bind,
+		.refcnt   = refcount_read(&v->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&v->tcf_bindcnt) - bind,
 		.action   = v->tcf_action,
 		.v_action = p->tcfv_action,
 	};
@@ -286,6 +298,13 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
 	return tcf_idr_search(tn, a, index);
 }
 
+static int tcf_vlan_delete(struct net *net, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+	return tcf_idr_delete_index(tn, index);
+}
+
 static struct tc_action_ops act_vlan_ops = {
 	.kind		=	"vlan",
 	.type		=	TCA_ACT_VLAN,
@@ -296,6 +315,7 @@ static struct tc_action_ops act_vlan_ops = {
 	.cleanup	=	tcf_vlan_cleanup,
 	.walk		=	tcf_vlan_walker,
 	.lookup		=	tcf_vlan_search,
+	.delete		=	tcf_vlan_delete,
 	.size		=	sizeof(struct tcf_vlan),
 };
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f74513a7c7a8..623fe2cfe529 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -277,18 +277,21 @@ static bool tcf_block_offload_in_use(struct tcf_block *block)
 static int tcf_block_offload_cmd(struct tcf_block *block,
 				 struct net_device *dev,
 				 struct tcf_block_ext_info *ei,
-				 enum tc_block_command command)
+				 enum tc_block_command command,
+				 struct netlink_ext_ack *extack)
 {
 	struct tc_block_offload bo = {};
 
 	bo.command = command;
 	bo.binder_type = ei->binder_type;
 	bo.block = block;
+	bo.extack = extack;
 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 }
 
 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
-				  struct tcf_block_ext_info *ei)
+				  struct tcf_block_ext_info *ei,
+				  struct netlink_ext_ack *extack)
 {
 	struct net_device *dev = q->dev_queue->dev;
 	int err;
@@ -299,10 +302,12 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 	/* If tc offload feature is disabled and the block we try to bind
 	 * to already has some offloaded filters, forbid to bind.
 	 */
-	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
+	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
+		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
 		return -EOPNOTSUPP;
+	}
 
-	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
+	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_inc;
 	return err;
@@ -322,7 +327,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 
 	if (!dev->netdev_ops->ndo_setup_tc)
 		goto no_offload_dev_dec;
-	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
+	err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_dec;
 	return;
@@ -612,7 +617,7 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 	if (err)
 		goto err_chain_head_change_cb_add;
 
-	err = tcf_block_offload_bind(block, q, ei);
+	err = tcf_block_offload_bind(block, q, ei, extack);
 	if (err)
 		goto err_block_offload_bind;
 
@@ -746,18 +751,53 @@ unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
 }
 EXPORT_SYMBOL(tcf_block_cb_decref);
 
+static int
+tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+			    void *cb_priv, bool add, bool offload_in_use,
+			    struct netlink_ext_ack *extack)
+{
+	struct tcf_chain *chain;
+	struct tcf_proto *tp;
+	int err;
+
+	list_for_each_entry(chain, &block->chain_list, list) {
+		for (tp = rtnl_dereference(chain->filter_chain); tp;
+		     tp = rtnl_dereference(tp->next)) {
+			if (tp->ops->reoffload) {
+				err = tp->ops->reoffload(tp, add, cb, cb_priv,
+							 extack);
+				if (err && add)
+					goto err_playback_remove;
+			} else if (add && offload_in_use) {
+				err = -EOPNOTSUPP;
+				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
+				goto err_playback_remove;
+			}
+		}
+	}
+
+	return 0;
+
+err_playback_remove:
+	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
+				    extack);
+	return err;
+}
+
 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 					     tc_setup_cb_t *cb, void *cb_ident,
-					     void *cb_priv)
+					     void *cb_priv,
+					     struct netlink_ext_ack *extack)
 {
 	struct tcf_block_cb *block_cb;
+	int err;
 
-	/* At this point, playback of previous block cb calls is not supported,
-	 * so forbid to register to block which already has some offloaded
-	 * filters present.
-	 */
-	if (tcf_block_offload_in_use(block))
-		return ERR_PTR(-EOPNOTSUPP);
+	/* Replay any already present rules */
+	err = tcf_block_playback_offloads(block, cb, cb_priv, true,
+					  tcf_block_offload_in_use(block),
+					  extack);
+	if (err)
+		return ERR_PTR(err);
 
 	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
 	if (!block_cb)
@@ -772,17 +812,22 @@ EXPORT_SYMBOL(__tcf_block_cb_register);
 
 int tcf_block_cb_register(struct tcf_block *block,
 			  tc_setup_cb_t *cb, void *cb_ident,
-			  void *cb_priv)
+			  void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tcf_block_cb *block_cb;
 
-	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
+	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv,
+					   extack);
 	return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
 }
 EXPORT_SYMBOL(tcf_block_cb_register);
 
-void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+void __tcf_block_cb_unregister(struct tcf_block *block,
+			       struct tcf_block_cb *block_cb)
 {
+	tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv,
+				    false, tcf_block_offload_in_use(block),
+				    NULL);
 	list_del(&block_cb->list);
 	kfree(block_cb);
 }
@@ -796,7 +841,7 @@ void tcf_block_cb_unregister(struct tcf_block *block,
 	block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
 	if (!block_cb)
 		return;
-	__tcf_block_cb_unregister(block_cb);
+	__tcf_block_cb_unregister(block, block_cb);
 }
 EXPORT_SYMBOL(tcf_block_cb_unregister);
 
@@ -1463,7 +1508,9 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 		arg.w.stop = 0;
 		arg.w.skip = cb->args[1] - 1;
 		arg.w.count = 0;
+		arg.w.cookie = cb->args[2];
 		tp->ops->walk(tp, &arg.w);
+		cb->args[2] = arg.w.cookie;
 		cb->args[1] = arg.w.count + 1;
 		if (arg.w.stop)
 			return false;
@@ -1564,11 +1611,7 @@ out:
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	LIST_HEAD(actions);
-
-	ASSERT_RTNL();
-	tcf_exts_to_list(exts, &actions);
-	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+	tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
 	kfree(exts->actions);
 	exts->nr_actions = 0;
 #endif
@@ -1587,7 +1630,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND, extack);
+						TCA_ACT_BIND, true, extack);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -1595,17 +1638,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 			exts->actions[0] = act;
 			exts->nr_actions = 1;
 		} else if (exts->action && tb[exts->action]) {
-			LIST_HEAD(actions);
-			int err, i = 0;
+			int err;
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      &actions, &attr_size, extack);
-			if (err)
+					      exts->actions, &attr_size, true,
+					      extack);
+			if (err < 0)
 				return err;
-			list_for_each_entry(act, &actions, list)
-				exts->actions[i++] = act;
-			exts->nr_actions = i;
+			exts->nr_actions = err;
 		}
 		exts->net = net;
 	}
@@ -1654,14 +1695,11 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 		 * tc data even if iproute2  was newer - jhs
 		 */
 		if (exts->type != TCA_OLD_COMPAT) {
-			LIST_HEAD(actions);
-
 			nest = nla_nest_start(skb, exts->action);
 			if (nest == NULL)
 				goto nla_put_failure;
 
-			tcf_exts_to_list(exts, &actions);
-			if (tcf_action_dump(skb, &actions, 0, 0) < 0)
+			if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
 				goto nla_put_failure;
 			nla_nest_end(skb, nest);
 		} else if (exts->police) {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 1aa7f6511065..66e0ac9811f9 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -43,6 +43,7 @@ struct cls_bpf_prog {
 	struct tcf_result res;
 	bool exts_integrated;
 	u32 gen_flags;
+	unsigned int in_hw_count;
 	struct tcf_exts exts;
 	u32 handle;
 	u16 bpf_num_ops;
@@ -174,6 +175,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			cls_bpf_offload_cmd(tp, oldprog, prog, extack);
 			return err;
 		} else if (err > 0) {
+			prog->in_hw_count = err;
 			tcf_block_offload_inc(block, &prog->gen_flags);
 		}
 	}
@@ -652,6 +654,42 @@ skip:
 	}
 }
 
+static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+			     void *cb_priv, struct netlink_ext_ack *extack)
+{
+	struct cls_bpf_head *head = rtnl_dereference(tp->root);
+	struct tcf_block *block = tp->chain->block;
+	struct tc_cls_bpf_offload cls_bpf = {};
+	struct cls_bpf_prog *prog;
+	int err;
+
+	list_for_each_entry(prog, &head->plist, link) {
+		if (tc_skip_hw(prog->gen_flags))
+			continue;
+
+		tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags,
+					   extack);
+		cls_bpf.command = TC_CLSBPF_OFFLOAD;
+		cls_bpf.exts = &prog->exts;
+		cls_bpf.prog = add ? prog->filter : NULL;
+		cls_bpf.oldprog = add ? NULL : prog->filter;
+		cls_bpf.name = prog->bpf_name;
+		cls_bpf.exts_integrated = prog->exts_integrated;
+
+		err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv);
+		if (err) {
+			if (add && tc_skip_sw(prog->gen_flags))
+				return err;
+			continue;
+		}
+
+		tc_cls_offload_cnt_update(block, &prog->in_hw_count,
+					  &prog->gen_flags, add);
+	}
+
+	return 0;
+}
+
 static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
 	.kind		=	"bpf",
 	.owner		=	THIS_MODULE,
@@ -662,6 +700,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
 	.change		=	cls_bpf_change,
 	.delete		=	cls_bpf_delete,
 	.walk		=	cls_bpf_walk,
+	.reoffload	=	cls_bpf_reoffload,
 	.dump		=	cls_bpf_dump,
 	.bind_class	=	cls_bpf_bind_class,
 };
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9e8b26a80fb3..38d74803e2df 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -35,6 +35,7 @@ struct fl_flow_key {
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_eth_addrs eth;
 	struct flow_dissector_key_vlan vlan;
+	struct flow_dissector_key_vlan cvlan;
 	union {
 		struct flow_dissector_key_ipv4_addrs ipv4;
 		struct flow_dissector_key_ipv6_addrs ipv6;
@@ -51,6 +52,7 @@ struct fl_flow_key {
 	struct flow_dissector_key_mpls mpls;
 	struct flow_dissector_key_tcp tcp;
 	struct flow_dissector_key_ip ip;
+	struct flow_dissector_key_ip enc_ip;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -87,6 +89,7 @@ struct cls_fl_filter {
 	struct list_head list;
 	u32 handle;
 	u32 flags;
+	unsigned int in_hw_count;
 	struct rcu_work rwork;
 	struct net_device *hw_dev;
 };
@@ -289,6 +292,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 		fl_hw_destroy_filter(tp, f, NULL);
 		return err;
 	} else if (err > 0) {
+		f->in_hw_count = err;
 		tcf_block_offload_inc(block, &f->flags);
 	}
 
@@ -447,6 +451,13 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_IP_TOS_MASK]	= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_IP_TTL]		= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_IP_TTL_MASK]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_CVLAN_ID]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_CVLAN_PRIO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_CVLAN_ETH_TYPE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_ENC_IP_TOS]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_IP_TTL]	 = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -498,22 +509,26 @@ static int fl_set_key_mpls(struct nlattr **tb,
 }
 
 static void fl_set_key_vlan(struct nlattr **tb,
+			    __be16 ethertype,
+			    int vlan_id_key, int vlan_prio_key,
 			    struct flow_dissector_key_vlan *key_val,
 			    struct flow_dissector_key_vlan *key_mask)
 {
 #define VLAN_PRIORITY_MASK	0x7
 
-	if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
+	if (tb[vlan_id_key]) {
 		key_val->vlan_id =
-			nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
+			nla_get_u16(tb[vlan_id_key]) & VLAN_VID_MASK;
 		key_mask->vlan_id = VLAN_VID_MASK;
 	}
-	if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
+	if (tb[vlan_prio_key]) {
 		key_val->vlan_priority =
-			nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
+			nla_get_u8(tb[vlan_prio_key]) &
 			VLAN_PRIORITY_MASK;
 		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
 	}
+	key_val->vlan_tpid = ethertype;
+	key_mask->vlan_tpid = cpu_to_be16(~0);
 }
 
 static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
@@ -551,17 +566,17 @@ static int fl_set_key_flags(struct nlattr **tb,
 	return 0;
 }
 
-static void fl_set_key_ip(struct nlattr **tb,
+static void fl_set_key_ip(struct nlattr **tb, bool encap,
 			  struct flow_dissector_key_ip *key,
 			  struct flow_dissector_key_ip *mask)
 {
-		fl_set_key_val(tb, &key->tos, TCA_FLOWER_KEY_IP_TOS,
-			       &mask->tos, TCA_FLOWER_KEY_IP_TOS_MASK,
-			       sizeof(key->tos));
+	int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
+	int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
+	int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
+	int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
 
-		fl_set_key_val(tb, &key->ttl, TCA_FLOWER_KEY_IP_TTL,
-			       &mask->ttl, TCA_FLOWER_KEY_IP_TTL_MASK,
-			       sizeof(key->ttl));
+	fl_set_key_val(tb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos));
+	fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
 }
 
 static int fl_set_key(struct net *net, struct nlattr **tb,
@@ -590,12 +605,28 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 	if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
 		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
 
-		if (ethertype == htons(ETH_P_8021Q)) {
-			fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
-			fl_set_key_val(tb, &key->basic.n_proto,
-				       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-				       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
-				       sizeof(key->basic.n_proto));
+		if (eth_type_vlan(ethertype)) {
+			fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
+					TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
+					&mask->vlan);
+
+			if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
+				ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
+				if (eth_type_vlan(ethertype)) {
+					fl_set_key_vlan(tb, ethertype,
+							TCA_FLOWER_KEY_CVLAN_ID,
+							TCA_FLOWER_KEY_CVLAN_PRIO,
+							&key->cvlan, &mask->cvlan);
+					fl_set_key_val(tb, &key->basic.n_proto,
+						       TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+						       &mask->basic.n_proto,
+						       TCA_FLOWER_UNSPEC,
+						       sizeof(key->basic.n_proto));
+				} else {
+					key->basic.n_proto = ethertype;
+					mask->basic.n_proto = cpu_to_be16(~0);
+				}
+			}
 		} else {
 			key->basic.n_proto = ethertype;
 			mask->basic.n_proto = cpu_to_be16(~0);
@@ -607,7 +638,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
 			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
 			       sizeof(key->basic.ip_proto));
-		fl_set_key_ip(tb, &key->ip, &mask->ip);
+		fl_set_key_ip(tb, false, &key->ip, &mask->ip);
 	}
 
 	if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
@@ -742,6 +773,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		       &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
 		       sizeof(key->enc_tp.dst));
 
+	fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
+
 	if (tb[TCA_FLOWER_KEY_FLAGS])
 		ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
 
@@ -821,6 +854,8 @@ static void fl_init_dissector(struct fl_flow_mask *mask)
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_VLAN, vlan);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_CVLAN, cvlan);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
@@ -832,6 +867,8 @@ static void fl_init_dissector(struct fl_flow_mask *mask)
 			   enc_control);
 	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
 			     FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
 
 	skb_flow_dissector_init(&mask->dissector, keys, cnt);
 }
@@ -1071,20 +1108,59 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct cls_fl_filter *f;
+
+	arg->count = arg->skip;
+
+	while ((f = idr_get_next_ul(&head->handle_idr,
+				    &arg->cookie)) != NULL) {
+		if (arg->fn(tp, f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->cookie = f->handle + 1;
+		arg->count++;
+	}
+}
+
+static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+			void *cb_priv, struct netlink_ext_ack *extack)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct tc_cls_flower_offload cls_flower = {};
+	struct tcf_block *block = tp->chain->block;
 	struct fl_flow_mask *mask;
+	struct cls_fl_filter *f;
+	int err;
 
-	list_for_each_entry_rcu(mask, &head->masks, list) {
-		list_for_each_entry_rcu(f, &mask->filters, list) {
-			if (arg->count < arg->skip)
-				goto skip;
-			if (arg->fn(tp, f, arg) < 0) {
-				arg->stop = 1;
-				break;
+	list_for_each_entry(mask, &head->masks, list) {
+		list_for_each_entry(f, &mask->filters, list) {
+			if (tc_skip_hw(f->flags))
+				continue;
+
+			tc_cls_common_offload_init(&cls_flower.common, tp,
+						   f->flags, extack);
+			cls_flower.command = add ?
+				TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
+			cls_flower.cookie = (unsigned long)f;
+			cls_flower.dissector = &mask->dissector;
+			cls_flower.mask = &f->mkey;
+			cls_flower.key = &f->key;
+			cls_flower.exts = &f->exts;
+			cls_flower.classid = f->res.classid;
+
+			err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+			if (err) {
+				if (add && tc_skip_sw(f->flags))
+					return err;
+				continue;
 			}
-skip:
-			arg->count++;
+
+			tc_cls_offload_cnt_update(block, &f->in_hw_count,
+						  &f->flags, add);
 		}
 	}
+
+	return 0;
 }
 
 static int fl_dump_key_val(struct sk_buff *skb,
@@ -1141,20 +1217,24 @@ static int fl_dump_key_mpls(struct sk_buff *skb,
 	return 0;
 }
 
-static int fl_dump_key_ip(struct sk_buff *skb,
+static int fl_dump_key_ip(struct sk_buff *skb, bool encap,
 			  struct flow_dissector_key_ip *key,
 			  struct flow_dissector_key_ip *mask)
 {
-	if (fl_dump_key_val(skb, &key->tos, TCA_FLOWER_KEY_IP_TOS, &mask->tos,
-			    TCA_FLOWER_KEY_IP_TOS_MASK, sizeof(key->tos)) ||
-	    fl_dump_key_val(skb, &key->ttl, TCA_FLOWER_KEY_IP_TTL, &mask->ttl,
-			    TCA_FLOWER_KEY_IP_TTL_MASK, sizeof(key->ttl)))
+	int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
+	int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
+	int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
+	int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
+
+	if (fl_dump_key_val(skb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos)) ||
+	    fl_dump_key_val(skb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)))
 		return -1;
 
 	return 0;
 }
 
 static int fl_dump_key_vlan(struct sk_buff *skb,
+			    int vlan_id_key, int vlan_prio_key,
 			    struct flow_dissector_key_vlan *vlan_key,
 			    struct flow_dissector_key_vlan *vlan_mask)
 {
@@ -1163,13 +1243,13 @@ static int fl_dump_key_vlan(struct sk_buff *skb,
 	if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
 		return 0;
 	if (vlan_mask->vlan_id) {
-		err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
+		err = nla_put_u16(skb, vlan_id_key,
 				  vlan_key->vlan_id);
 		if (err)
 			return err;
 	}
 	if (vlan_mask->vlan_priority) {
-		err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
+		err = nla_put_u8(skb, vlan_prio_key,
 				 vlan_key->vlan_priority);
 		if (err)
 			return err;
@@ -1264,15 +1344,36 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
 		goto nla_put_failure;
 
-	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
+	if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_VLAN_ID,
+			     TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, &mask->vlan))
+		goto nla_put_failure;
+
+	if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_CVLAN_ID,
+			     TCA_FLOWER_KEY_CVLAN_PRIO,
+			     &key->cvlan, &mask->cvlan) ||
+	    (mask->cvlan.vlan_tpid &&
+	     nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+			 key->cvlan.vlan_tpid)))
 		goto nla_put_failure;
 
+	if (mask->basic.n_proto) {
+		if (mask->cvlan.vlan_tpid) {
+			if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
+					 key->basic.n_proto))
+				goto nla_put_failure;
+		} else if (mask->vlan.vlan_tpid) {
+			if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+					 key->basic.n_proto))
+				goto nla_put_failure;
+		}
+	}
+
 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
 	    (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
 			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
 			    sizeof(key->basic.ip_proto)) ||
-	    fl_dump_key_ip(skb, &key->ip, &mask->ip)))
+	    fl_dump_key_ip(skb, false, &key->ip, &mask->ip)))
 		goto nla_put_failure;
 
 	if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
@@ -1397,7 +1498,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
 			    &mask->enc_tp.dst,
 			    TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
-			    sizeof(key->enc_tp.dst)))
+			    sizeof(key->enc_tp.dst)) ||
+	    fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip))
 		goto nla_put_failure;
 
 	if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
@@ -1438,6 +1540,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.change		= fl_change,
 	.delete		= fl_delete,
 	.walk		= fl_walk,
+	.reoffload	= fl_reoffload,
 	.dump		= fl_dump,
 	.bind_class	= fl_bind_class,
 	.owner		= THIS_MODULE,
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 47b207ef7762..af16f36ed578 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -21,6 +21,7 @@ struct cls_mall_head {
 	struct tcf_result res;
 	u32 handle;
 	u32 flags;
+	unsigned int in_hw_count;
 	struct rcu_work rwork;
 };
 
@@ -95,6 +96,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 		mall_destroy_hw_filter(tp, head, cookie, NULL);
 		return err;
 	} else if (err > 0) {
+		head->in_hw_count = err;
 		tcf_block_offload_inc(block, &head->flags);
 	}
 
@@ -235,6 +237,35 @@ skip:
 	arg->count++;
 }
 
+static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+			  void *cb_priv, struct netlink_ext_ack *extack)
+{
+	struct cls_mall_head *head = rtnl_dereference(tp->root);
+	struct tc_cls_matchall_offload cls_mall = {};
+	struct tcf_block *block = tp->chain->block;
+	int err;
+
+	if (tc_skip_hw(head->flags))
+		return 0;
+
+	tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
+	cls_mall.command = add ?
+		TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
+	cls_mall.exts = &head->exts;
+	cls_mall.cookie = (unsigned long)head;
+
+	err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv);
+	if (err) {
+		if (add && tc_skip_sw(head->flags))
+			return err;
+		return 0;
+	}
+
+	tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add);
+
+	return 0;
+}
+
 static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
@@ -289,6 +320,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = {
 	.change		= mall_change,
 	.delete		= mall_delete,
 	.walk		= mall_walk,
+	.reoffload	= mall_reoffload,
 	.dump		= mall_dump,
 	.bind_class	= mall_bind_class,
 	.owner		= THIS_MODULE,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index fb861f90fde6..d5d2a6dc3921 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -62,6 +62,7 @@ struct tc_u_knode {
 	struct tc_u32_pcnt __percpu *pf;
 #endif
 	u32			flags;
+	unsigned int		in_hw_count;
 #ifdef CONFIG_CLS_U32_MARK
 	u32			val;
 	u32			mask;
@@ -571,6 +572,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 		u32_remove_hw_knode(tp, n, NULL);
 		return err;
 	} else if (err > 0) {
+		n->in_hw_count = err;
 		tcf_block_offload_inc(block, &n->flags);
 	}
 
@@ -1199,6 +1201,114 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 	}
 }
 
+static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+			       bool add, tc_setup_cb_t *cb, void *cb_priv,
+			       struct netlink_ext_ack *extack)
+{
+	struct tc_cls_u32_offload cls_u32 = {};
+	int err;
+
+	tc_cls_common_offload_init(&cls_u32.common, tp, ht->flags, extack);
+	cls_u32.command = add ? TC_CLSU32_NEW_HNODE : TC_CLSU32_DELETE_HNODE;
+	cls_u32.hnode.divisor = ht->divisor;
+	cls_u32.hnode.handle = ht->handle;
+	cls_u32.hnode.prio = ht->prio;
+
+	err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
+	if (err && add && tc_skip_sw(ht->flags))
+		return err;
+
+	return 0;
+}
+
+static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
+			       bool add, tc_setup_cb_t *cb, void *cb_priv,
+			       struct netlink_ext_ack *extack)
+{
+	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
+	struct tcf_block *block = tp->chain->block;
+	struct tc_cls_u32_offload cls_u32 = {};
+	int err;
+
+	tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
+	cls_u32.command = add ?
+		TC_CLSU32_REPLACE_KNODE : TC_CLSU32_DELETE_KNODE;
+	cls_u32.knode.handle = n->handle;
+
+	if (add) {
+		cls_u32.knode.fshift = n->fshift;
+#ifdef CONFIG_CLS_U32_MARK
+		cls_u32.knode.val = n->val;
+		cls_u32.knode.mask = n->mask;
+#else
+		cls_u32.knode.val = 0;
+		cls_u32.knode.mask = 0;
+#endif
+		cls_u32.knode.sel = &n->sel;
+		cls_u32.knode.exts = &n->exts;
+		if (n->ht_down)
+			cls_u32.knode.link_handle = ht->handle;
+	}
+
+	err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
+	if (err) {
+		if (add && tc_skip_sw(n->flags))
+			return err;
+		return 0;
+	}
+
+	tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add);
+
+	return 0;
+}
+
+static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+			 void *cb_priv, struct netlink_ext_ack *extack)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *ht;
+	struct tc_u_knode *n;
+	unsigned int h;
+	int err;
+
+	for (ht = rtnl_dereference(tp_c->hlist);
+	     ht;
+	     ht = rtnl_dereference(ht->next)) {
+		if (ht->prio != tp->prio)
+			continue;
+
+		/* When adding filters to a new dev, try to offload the
+		 * hashtable first. When removing, do the filters before the
+		 * hashtable.
+		 */
+		if (add && !tc_skip_hw(ht->flags)) {
+			err = u32_reoffload_hnode(tp, ht, add, cb, cb_priv,
+						  extack);
+			if (err)
+				return err;
+		}
+
+		for (h = 0; h <= ht->divisor; h++) {
+			for (n = rtnl_dereference(ht->ht[h]);
+			     n;
+			     n = rtnl_dereference(n->next)) {
+				if (tc_skip_hw(n->flags))
+					continue;
+
+				err = u32_reoffload_knode(tp, n, add, cb,
+							  cb_priv, extack);
+				if (err)
+					return err;
+			}
+		}
+
+		if (!add && !tc_skip_hw(ht->flags))
+			u32_reoffload_hnode(tp, ht, add, cb, cb_priv, extack);
+	}
+
+	return 0;
+}
+
 static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
 {
 	struct tc_u_knode *n = fh;
@@ -1336,6 +1446,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
 	.change		=	u32_change,
 	.delete		=	u32_delete,
 	.walk		=	u32_walk,
+	.reoffload	=	u32_reoffload,
 	.dump		=	u32_dump,
 	.bind_class	=	u32_bind_class,
 	.owner		=	THIS_MODULE,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 54eca685420f..98541c6399db 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -596,12 +596,19 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
+				 clockid_t clockid)
 {
-	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
 	wd->timer.function = qdisc_watchdog;
 	wd->qdisc = qdisc;
 }
+EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
+
+void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+{
+	qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
+}
 EXPORT_SYMBOL(qdisc_watchdog_init);
 
 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
new file mode 100644
index 000000000000..539c9490c308
--- /dev/null
+++ b/net/sched/sch_cake.c
@@ -0,0 +1,3019 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* COMMON Applications Kept Enhanced (CAKE) discipline
+ *
+ * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com>
+ * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <toke@toke.dk>
+ * Copyright (C) 2014-2018 Dave Täht <dave.taht@gmail.com>
+ * Copyright (C) 2015-2018 Sebastian Moeller <moeller0@gmx.de>
+ * (C) 2015-2018 Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
+ * Copyright (C) 2017-2018 Ryan Mounce <ryan@mounce.com.au>
+ *
+ * The CAKE Principles:
+ *		   (or, how to have your cake and eat it too)
+ *
+ * This is a combination of several shaping, AQM and FQ techniques into one
+ * easy-to-use package:
+ *
+ * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE
+ *   equipment and bloated MACs.  This operates in deficit mode (as in sch_fq),
+ *   eliminating the need for any sort of burst parameter (eg. token bucket
+ *   depth).  Burst support is limited to that necessary to overcome scheduling
+ *   latency.
+ *
+ * - A Diffserv-aware priority queue, giving more priority to certain classes,
+ *   up to a specified fraction of bandwidth.  Above that bandwidth threshold,
+ *   the priority is reduced to avoid starving other tins.
+ *
+ * - Each priority tin has a separate Flow Queue system, to isolate traffic
+ *   flows from each other.  This prevents a burst on one flow from increasing
+ *   the delay to another.  Flows are distributed to queues using a
+ *   set-associative hash function.
+ *
+ * - Each queue is actively managed by Cobalt, which is a combination of the
+ *   Codel and Blue AQM algorithms.  This serves flows fairly, and signals
+ *   congestion early via ECN (if available) and/or packet drops, to keep
+ *   latency low.  The codel parameters are auto-tuned based on the bandwidth
+ *   setting, as is necessary at low bandwidths.
+ *
+ * The configuration parameters are kept deliberately simple for ease of use.
+ * Everything has sane defaults.  Complete generality of configuration is *not*
+ * a goal.
+ *
+ * The priority queue operates according to a weighted DRR scheme, combined with
+ * a bandwidth tracker which reuses the shaper logic to detect which side of the
+ * bandwidth sharing threshold the tin is operating.  This determines whether a
+ * priority-based weight (high) or a bandwidth-based weight (low) is used for
+ * that tin in the current pass.
+ *
+ * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly
+ * granted us permission to leverage.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/reciprocal_div.h>
+#include <net/netlink.h>
+#include <linux/version.h>
+#include <linux/if_vlan.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <net/tcp.h>
+#include <net/flow_dissector.h>
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
+#define CAKE_SET_WAYS (8)
+#define CAKE_MAX_TINS (8)
+#define CAKE_QUEUES (1024)
+#define CAKE_FLOW_MASK 63
+#define CAKE_FLOW_NAT_FLAG 64
+#define CAKE_SPLIT_GSO_THRESHOLD (125000000) /* 1Gbps */
+
+/* struct cobalt_params - contains codel and blue parameters
+ * @interval:	codel initial drop rate
+ * @target:     maximum persistent sojourn time & blue update rate
+ * @mtu_time:   serialisation delay of maximum-size packet
+ * @p_inc:      increment of blue drop probability (0.32 fxp)
+ * @p_dec:      decrement of blue drop probability (0.32 fxp)
+ */
+struct cobalt_params {
+	u64	interval;
+	u64	target;
+	u64	mtu_time;
+	u32	p_inc;
+	u32	p_dec;
+};
+
+/* struct cobalt_vars - contains codel and blue variables
+ * @count:		codel dropping frequency
+ * @rec_inv_sqrt:	reciprocal value of sqrt(count) >> 1
+ * @drop_next:		time to drop next packet, or when we dropped last
+ * @blue_timer:		Blue time to next drop
+ * @p_drop:		BLUE drop probability (0.32 fxp)
+ * @dropping:		set if in dropping state
+ * @ecn_marked:		set if marked
+ */
+struct cobalt_vars {
+	u32	count;
+	u32	rec_inv_sqrt;
+	ktime_t	drop_next;
+	ktime_t	blue_timer;
+	u32     p_drop;
+	bool	dropping;
+	bool    ecn_marked;
+};
+
+enum {
+	CAKE_SET_NONE = 0,
+	CAKE_SET_SPARSE,
+	CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */
+	CAKE_SET_BULK,
+	CAKE_SET_DECAYING
+};
+
+struct cake_flow {
+	/* this stuff is all needed per-flow at dequeue time */
+	struct sk_buff	  *head;
+	struct sk_buff	  *tail;
+	struct list_head  flowchain;
+	s32		  deficit;
+	u32		  dropped;
+	struct cobalt_vars cvars;
+	u16		  srchost; /* index into cake_host table */
+	u16		  dsthost;
+	u8		  set;
+}; /* please try to keep this structure <= 64 bytes */
+
+struct cake_host {
+	u32 srchost_tag;
+	u32 dsthost_tag;
+	u16 srchost_refcnt;
+	u16 dsthost_refcnt;
+};
+
+struct cake_heap_entry {
+	u16 t:3, b:10;
+};
+
+struct cake_tin_data {
+	struct cake_flow flows[CAKE_QUEUES];
+	u32	backlogs[CAKE_QUEUES];
+	u32	tags[CAKE_QUEUES]; /* for set association */
+	u16	overflow_idx[CAKE_QUEUES];
+	struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */
+	u16	flow_quantum;
+
+	struct cobalt_params cparams;
+	u32	drop_overlimit;
+	u16	bulk_flow_count;
+	u16	sparse_flow_count;
+	u16	decaying_flow_count;
+	u16	unresponsive_flow_count;
+
+	u32	max_skblen;
+
+	struct list_head new_flows;
+	struct list_head old_flows;
+	struct list_head decaying_flows;
+
+	/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+	ktime_t	time_next_packet;
+	u64	tin_rate_ns;
+	u64	tin_rate_bps;
+	u16	tin_rate_shft;
+
+	u16	tin_quantum_prio;
+	u16	tin_quantum_band;
+	s32	tin_deficit;
+	u32	tin_backlog;
+	u32	tin_dropped;
+	u32	tin_ecn_mark;
+
+	u32	packets;
+	u64	bytes;
+
+	u32	ack_drops;
+
+	/* moving averages */
+	u64 avge_delay;
+	u64 peak_delay;
+	u64 base_delay;
+
+	/* hash function stats */
+	u32	way_directs;
+	u32	way_hits;
+	u32	way_misses;
+	u32	way_collisions;
+}; /* number of tins is small, so size of this struct doesn't matter much */
+
+struct cake_sched_data {
+	struct tcf_proto __rcu *filter_list; /* optional external classifier */
+	struct tcf_block *block;
+	struct cake_tin_data *tins;
+
+	struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS];
+	u16		overflow_timeout;
+
+	u16		tin_cnt;
+	u8		tin_mode;
+	u8		flow_mode;
+	u8		ack_filter;
+	u8		atm_mode;
+
+	/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+	u16		rate_shft;
+	ktime_t		time_next_packet;
+	ktime_t		failsafe_next_packet;
+	u64		rate_ns;
+	u64		rate_bps;
+	u16		rate_flags;
+	s16		rate_overhead;
+	u16		rate_mpu;
+	u64		interval;
+	u64		target;
+
+	/* resource tracking */
+	u32		buffer_used;
+	u32		buffer_max_used;
+	u32		buffer_limit;
+	u32		buffer_config_limit;
+
+	/* indices for dequeue */
+	u16		cur_tin;
+	u16		cur_flow;
+
+	struct qdisc_watchdog watchdog;
+	const u8	*tin_index;
+	const u8	*tin_order;
+
+	/* bandwidth capacity estimate */
+	ktime_t		last_packet_time;
+	ktime_t		avg_window_begin;
+	u64		avg_packet_interval;
+	u64		avg_window_bytes;
+	u64		avg_peak_bandwidth;
+	ktime_t		last_reconfig_time;
+
+	/* packet length stats */
+	u32		avg_netoff;
+	u16		max_netlen;
+	u16		max_adjlen;
+	u16		min_netlen;
+	u16		min_adjlen;
+};
+
+enum {
+	CAKE_FLAG_OVERHEAD	   = BIT(0),
+	CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
+	CAKE_FLAG_INGRESS	   = BIT(2),
+	CAKE_FLAG_WASH		   = BIT(3),
+	CAKE_FLAG_SPLIT_GSO	   = BIT(4)
+};
+
+/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
+ * obtain the best features of each.  Codel is excellent on flows which
+ * respond to congestion signals in a TCP-like way.  BLUE is more effective on
+ * unresponsive flows.
+ */
+
+struct cobalt_skb_cb {
+	ktime_t enqueue_time;
+	u32     adjusted_len;
+};
+
+static u64 us_to_ns(u64 us)
+{
+	return us * NSEC_PER_USEC;
+}
+
+static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb)
+{
+	qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb));
+	return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb)
+{
+	return get_cobalt_cb(skb)->enqueue_time;
+}
+
+static void cobalt_set_enqueue_time(struct sk_buff *skb,
+				    ktime_t now)
+{
+	get_cobalt_cb(skb)->enqueue_time = now;
+}
+
+static u16 quantum_div[CAKE_QUEUES + 1] = {0};
+
+/* Diffserv lookup tables */
+
+static const u8 precedence[] = {
+	0, 0, 0, 0, 0, 0, 0, 0,
+	1, 1, 1, 1, 1, 1, 1, 1,
+	2, 2, 2, 2, 2, 2, 2, 2,
+	3, 3, 3, 3, 3, 3, 3, 3,
+	4, 4, 4, 4, 4, 4, 4, 4,
+	5, 5, 5, 5, 5, 5, 5, 5,
+	6, 6, 6, 6, 6, 6, 6, 6,
+	7, 7, 7, 7, 7, 7, 7, 7,
+};
+
+static const u8 diffserv8[] = {
+	2, 5, 1, 2, 4, 2, 2, 2,
+	0, 2, 1, 2, 1, 2, 1, 2,
+	5, 2, 4, 2, 4, 2, 4, 2,
+	3, 2, 3, 2, 3, 2, 3, 2,
+	6, 2, 3, 2, 3, 2, 3, 2,
+	6, 2, 2, 2, 6, 2, 6, 2,
+	7, 2, 2, 2, 2, 2, 2, 2,
+	7, 2, 2, 2, 2, 2, 2, 2,
+};
+
+static const u8 diffserv4[] = {
+	0, 2, 0, 0, 2, 0, 0, 0,
+	1, 0, 0, 0, 0, 0, 0, 0,
+	2, 0, 2, 0, 2, 0, 2, 0,
+	2, 0, 2, 0, 2, 0, 2, 0,
+	3, 0, 2, 0, 2, 0, 2, 0,
+	3, 0, 0, 0, 3, 0, 3, 0,
+	3, 0, 0, 0, 0, 0, 0, 0,
+	3, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 diffserv3[] = {
+	0, 0, 0, 0, 2, 0, 0, 0,
+	1, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 2, 0, 2, 0,
+	2, 0, 0, 0, 0, 0, 0, 0,
+	2, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const u8 besteffort[] = {
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/* tin priority order for stats dumping */
+
+static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
+static const u8 bulk_order[] = {1, 0, 2, 3};
+
+#define REC_INV_SQRT_CACHE (16)
+static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
+
+/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
+ */
+
+static void cobalt_newton_step(struct cobalt_vars *vars)
+{
+	u32 invsqrt, invsqrt2;
+	u64 val;
+
+	invsqrt = vars->rec_inv_sqrt;
+	invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
+	val = (3LL << 32) - ((u64)vars->count * invsqrt2);
+
+	val >>= 2; /* avoid overflow in following multiply */
+	val = (val * invsqrt) >> (32 - 2 + 1);
+
+	vars->rec_inv_sqrt = val;
+}
+
+static void cobalt_invsqrt(struct cobalt_vars *vars)
+{
+	if (vars->count < REC_INV_SQRT_CACHE)
+		vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
+	else
+		cobalt_newton_step(vars);
+}
+
+/* There is a big difference in timing between the accurate values placed in
+ * the cache and the approximations given by a single Newton step for small
+ * count values, particularly when stepping from count 1 to 2 or vice versa.
+ * Above 16, a single Newton step gives sufficient accuracy in either
+ * direction, given the precision stored.
+ *
+ * The magnitude of the error when stepping up to count 2 is such as to give
+ * the value that *should* have been produced at count 4.
+ */
+
+static void cobalt_cache_init(void)
+{
+	struct cobalt_vars v;
+
+	memset(&v, 0, sizeof(v));
+	v.rec_inv_sqrt = ~0U;
+	cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
+
+	for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+		cobalt_newton_step(&v);
+
+		cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
+	}
+}
+
+static void cobalt_vars_init(struct cobalt_vars *vars)
+{
+	memset(vars, 0, sizeof(*vars));
+
+	if (!cobalt_rec_inv_sqrt_cache[0]) {
+		cobalt_cache_init();
+		cobalt_rec_inv_sqrt_cache[0] = ~0;
+	}
+}
+
+/* CoDel control_law is t + interval/sqrt(count)
+ * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
+ * both sqrt() and divide operation.
+ */
+static ktime_t cobalt_control(ktime_t t,
+			      u64 interval,
+			      u32 rec_inv_sqrt)
+{
+	return ktime_add_ns(t, reciprocal_scale(interval,
+						rec_inv_sqrt));
+}
+
+/* Call this when a packet had to be dropped due to queue overflow.  Returns
+ * true if the BLUE state was quiescent before but active after this call.
+ */
+static bool cobalt_queue_full(struct cobalt_vars *vars,
+			      struct cobalt_params *p,
+			      ktime_t now)
+{
+	bool up = false;
+
+	if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+		up = !vars->p_drop;
+		vars->p_drop += p->p_inc;
+		if (vars->p_drop < p->p_inc)
+			vars->p_drop = ~0;
+		vars->blue_timer = now;
+	}
+	vars->dropping = true;
+	vars->drop_next = now;
+	if (!vars->count)
+		vars->count = 1;
+
+	return up;
+}
+
+/* Call this when the queue was serviced but turned out to be empty.  Returns
+ * true if the BLUE state was active before but quiescent after this call.
+ */
+static bool cobalt_queue_empty(struct cobalt_vars *vars,
+			       struct cobalt_params *p,
+			       ktime_t now)
+{
+	bool down = false;
+
+	if (vars->p_drop &&
+	    ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
+		if (vars->p_drop < p->p_dec)
+			vars->p_drop = 0;
+		else
+			vars->p_drop -= p->p_dec;
+		vars->blue_timer = now;
+		down = !vars->p_drop;
+	}
+	vars->dropping = false;
+
+	if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
+		vars->count--;
+		cobalt_invsqrt(vars);
+		vars->drop_next = cobalt_control(vars->drop_next,
+						 p->interval,
+						 vars->rec_inv_sqrt);
+	}
+
+	return down;
+}
+
+/* Call this with a freshly dequeued packet for possible congestion marking.
+ * Returns true as an instruction to drop the packet, false for delivery.
+ */
+static bool cobalt_should_drop(struct cobalt_vars *vars,
+			       struct cobalt_params *p,
+			       ktime_t now,
+			       struct sk_buff *skb,
+			       u32 bulk_flows)
+{
+	bool next_due, over_target, drop = false;
+	ktime_t schedule;
+	u64 sojourn;
+
+/* The 'schedule' variable records, in its sign, whether 'now' is before or
+ * after 'drop_next'.  This allows 'drop_next' to be updated before the next
+ * scheduling decision is actually branched, without destroying that
+ * information.  Similarly, the first 'schedule' value calculated is preserved
+ * in the boolean 'next_due'.
+ *
+ * As for 'drop_next', we take advantage of the fact that 'interval' is both
+ * the delay between first exceeding 'target' and the first signalling event,
+ * *and* the scaling factor for the signalling frequency.  It's therefore very
+ * natural to use a single mechanism for both purposes, and eliminates a
+ * significant amount of reference Codel's spaghetti code.  To help with this,
+ * both the '0' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close
+ * as possible to 1.0 in fixed-point.
+ */
+
+	sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+	schedule = ktime_sub(now, vars->drop_next);
+	over_target = sojourn > p->target &&
+		      sojourn > p->mtu_time * bulk_flows * 2 &&
+		      sojourn > p->mtu_time * 4;
+	next_due = vars->count && ktime_to_ns(schedule) >= 0;
+
+	vars->ecn_marked = false;
+
+	if (over_target) {
+		if (!vars->dropping) {
+			vars->dropping = true;
+			vars->drop_next = cobalt_control(now,
+							 p->interval,
+							 vars->rec_inv_sqrt);
+		}
+		if (!vars->count)
+			vars->count = 1;
+	} else if (vars->dropping) {
+		vars->dropping = false;
+	}
+
+	if (next_due && vars->dropping) {
+		/* Use ECN mark if possible, otherwise drop */
+		drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
+
+		vars->count++;
+		if (!vars->count)
+			vars->count--;
+		cobalt_invsqrt(vars);
+		vars->drop_next = cobalt_control(vars->drop_next,
+						 p->interval,
+						 vars->rec_inv_sqrt);
+		schedule = ktime_sub(now, vars->drop_next);
+	} else {
+		while (next_due) {
+			vars->count--;
+			cobalt_invsqrt(vars);
+			vars->drop_next = cobalt_control(vars->drop_next,
+							 p->interval,
+							 vars->rec_inv_sqrt);
+			schedule = ktime_sub(now, vars->drop_next);
+			next_due = vars->count && ktime_to_ns(schedule) >= 0;
+		}
+	}
+
+	/* Simple BLUE implementation.  Lack of ECN is deliberate. */
+	if (vars->p_drop)
+		drop |= (prandom_u32() < vars->p_drop);
+
+	/* Overload the drop_next field as an activity timeout */
+	if (!vars->count)
+		vars->drop_next = ktime_add_ns(now, p->interval);
+	else if (ktime_to_ns(schedule) > 0 && !drop)
+		vars->drop_next = now;
+
+	return drop;
+}
+
+static void cake_update_flowkeys(struct flow_keys *keys,
+				 const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	struct nf_conntrack_tuple tuple = {};
+	bool rev = !skb->_nfct;
+
+	if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+		return;
+
+	if (!nf_ct_get_tuple_skb(&tuple, skb))
+		return;
+
+	keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+	keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+
+	if (keys->ports.ports) {
+		keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
+		keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+	}
+#endif
+}
+
+/* Cake has several subtle multiple bit settings. In these cases you
+ *  would be matching triple isolate mode as well.
+ */
+
+static bool cake_dsrc(int flow_mode)
+{
+	return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC;
+}
+
+static bool cake_ddst(int flow_mode)
+{
+	return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST;
+}
+
+static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
+		     int flow_mode)
+{
+	u32 flow_hash = 0, srchost_hash, dsthost_hash;
+	u16 reduced_hash, srchost_idx, dsthost_idx;
+	struct flow_keys keys, host_keys;
+
+	if (unlikely(flow_mode == CAKE_FLOW_NONE))
+		return 0;
+
+	skb_flow_dissect_flow_keys(skb, &keys,
+				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+
+	if (flow_mode & CAKE_FLOW_NAT_FLAG)
+		cake_update_flowkeys(&keys, skb);
+
+	/* flow_hash_from_keys() sorts the addresses by value, so we have
+	 * to preserve their order in a separate data structure to treat
+	 * src and dst host addresses as independently selectable.
+	 */
+	host_keys = keys;
+	host_keys.ports.ports     = 0;
+	host_keys.basic.ip_proto  = 0;
+	host_keys.keyid.keyid     = 0;
+	host_keys.tags.flow_label = 0;
+
+	switch (host_keys.control.addr_type) {
+	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+		host_keys.addrs.v4addrs.src = 0;
+		dsthost_hash = flow_hash_from_keys(&host_keys);
+		host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+		host_keys.addrs.v4addrs.dst = 0;
+		srchost_hash = flow_hash_from_keys(&host_keys);
+		break;
+
+	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+		memset(&host_keys.addrs.v6addrs.src, 0,
+		       sizeof(host_keys.addrs.v6addrs.src));
+		dsthost_hash = flow_hash_from_keys(&host_keys);
+		host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+		memset(&host_keys.addrs.v6addrs.dst, 0,
+		       sizeof(host_keys.addrs.v6addrs.dst));
+		srchost_hash = flow_hash_from_keys(&host_keys);
+		break;
+
+	default:
+		dsthost_hash = 0;
+		srchost_hash = 0;
+	}
+
+	/* This *must* be after the above switch, since as a
+	 * side-effect it sorts the src and dst addresses.
+	 */
+	if (flow_mode & CAKE_FLOW_FLOWS)
+		flow_hash = flow_hash_from_keys(&keys);
+
+	if (!(flow_mode & CAKE_FLOW_FLOWS)) {
+		if (flow_mode & CAKE_FLOW_SRC_IP)
+			flow_hash ^= srchost_hash;
+
+		if (flow_mode & CAKE_FLOW_DST_IP)
+			flow_hash ^= dsthost_hash;
+	}
+
+	reduced_hash = flow_hash % CAKE_QUEUES;
+
+	/* set-associative hashing */
+	/* fast path if no hash collision (direct lookup succeeds) */
+	if (likely(q->tags[reduced_hash] == flow_hash &&
+		   q->flows[reduced_hash].set)) {
+		q->way_directs++;
+	} else {
+		u32 inner_hash = reduced_hash % CAKE_SET_WAYS;
+		u32 outer_hash = reduced_hash - inner_hash;
+		bool allocate_src = false;
+		bool allocate_dst = false;
+		u32 i, k;
+
+		/* check if any active queue in the set is reserved for
+		 * this flow.
+		 */
+		for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+		     i++, k = (k + 1) % CAKE_SET_WAYS) {
+			if (q->tags[outer_hash + k] == flow_hash) {
+				if (i)
+					q->way_hits++;
+
+				if (!q->flows[outer_hash + k].set) {
+					/* need to increment host refcnts */
+					allocate_src = cake_dsrc(flow_mode);
+					allocate_dst = cake_ddst(flow_mode);
+				}
+
+				goto found;
+			}
+		}
+
+		/* no queue is reserved for this flow, look for an
+		 * empty one.
+		 */
+		for (i = 0; i < CAKE_SET_WAYS;
+			 i++, k = (k + 1) % CAKE_SET_WAYS) {
+			if (!q->flows[outer_hash + k].set) {
+				q->way_misses++;
+				allocate_src = cake_dsrc(flow_mode);
+				allocate_dst = cake_ddst(flow_mode);
+				goto found;
+			}
+		}
+
+		/* With no empty queues, default to the original
+		 * queue, accept the collision, update the host tags.
+		 */
+		q->way_collisions++;
+		q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
+		q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+		allocate_src = cake_dsrc(flow_mode);
+		allocate_dst = cake_ddst(flow_mode);
+found:
+		/* reserve queue for future packets in same flow */
+		reduced_hash = outer_hash + k;
+		q->tags[reduced_hash] = flow_hash;
+
+		if (allocate_src) {
+			srchost_idx = srchost_hash % CAKE_QUEUES;
+			inner_hash = srchost_idx % CAKE_SET_WAYS;
+			outer_hash = srchost_idx - inner_hash;
+			for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+				i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (q->hosts[outer_hash + k].srchost_tag ==
+				    srchost_hash)
+					goto found_src;
+			}
+			for (i = 0; i < CAKE_SET_WAYS;
+				i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (!q->hosts[outer_hash + k].srchost_refcnt)
+					break;
+			}
+			q->hosts[outer_hash + k].srchost_tag = srchost_hash;
+found_src:
+			srchost_idx = outer_hash + k;
+			q->hosts[srchost_idx].srchost_refcnt++;
+			q->flows[reduced_hash].srchost = srchost_idx;
+		}
+
+		if (allocate_dst) {
+			dsthost_idx = dsthost_hash % CAKE_QUEUES;
+			inner_hash = dsthost_idx % CAKE_SET_WAYS;
+			outer_hash = dsthost_idx - inner_hash;
+			for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
+			     i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (q->hosts[outer_hash + k].dsthost_tag ==
+				    dsthost_hash)
+					goto found_dst;
+			}
+			for (i = 0; i < CAKE_SET_WAYS;
+			     i++, k = (k + 1) % CAKE_SET_WAYS) {
+				if (!q->hosts[outer_hash + k].dsthost_refcnt)
+					break;
+			}
+			q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
+found_dst:
+			dsthost_idx = outer_hash + k;
+			q->hosts[dsthost_idx].dsthost_refcnt++;
+			q->flows[reduced_hash].dsthost = dsthost_idx;
+		}
+	}
+
+	return reduced_hash;
+}
+
+/* helper functions : might be changed when/if skb use a standard list_head */
+/* remove one skb from head of slot queue */
+
+static struct sk_buff *dequeue_head(struct cake_flow *flow)
+{
+	struct sk_buff *skb = flow->head;
+
+	if (skb) {
+		flow->head = skb->next;
+		skb->next = NULL;
+	}
+
+	return skb;
+}
+
+/* add skb to flow queue (tail add) */
+
+static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb)
+{
+	if (!flow->head)
+		flow->head = skb;
+	else
+		flow->tail->next = skb;
+	flow->tail = skb;
+	skb->next = NULL;
+}
+
+static struct iphdr *cake_get_iphdr(const struct sk_buff *skb,
+				    struct ipv6hdr *buf)
+{
+	unsigned int offset = skb_network_offset(skb);
+	struct iphdr *iph;
+
+	iph = skb_header_pointer(skb, offset, sizeof(struct iphdr), buf);
+
+	if (!iph)
+		return NULL;
+
+	if (iph->version == 4 && iph->protocol == IPPROTO_IPV6)
+		return skb_header_pointer(skb, offset + iph->ihl * 4,
+					  sizeof(struct ipv6hdr), buf);
+
+	else if (iph->version == 4)
+		return iph;
+
+	else if (iph->version == 6)
+		return skb_header_pointer(skb, offset, sizeof(struct ipv6hdr),
+					  buf);
+
+	return NULL;
+}
+
+static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
+				      void *buf, unsigned int bufsize)
+{
+	unsigned int offset = skb_network_offset(skb);
+	const struct ipv6hdr *ipv6h;
+	const struct tcphdr *tcph;
+	const struct iphdr *iph;
+	struct ipv6hdr _ipv6h;
+	struct tcphdr _tcph;
+
+	ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
+
+	if (!ipv6h)
+		return NULL;
+
+	if (ipv6h->version == 4) {
+		iph = (struct iphdr *)ipv6h;
+		offset += iph->ihl * 4;
+
+		/* special-case 6in4 tunnelling, as that is a common way to get
+		 * v6 connectivity in the home
+		 */
+		if (iph->protocol == IPPROTO_IPV6) {
+			ipv6h = skb_header_pointer(skb, offset,
+						   sizeof(_ipv6h), &_ipv6h);
+
+			if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
+				return NULL;
+
+			offset += sizeof(struct ipv6hdr);
+
+		} else if (iph->protocol != IPPROTO_TCP) {
+			return NULL;
+		}
+
+	} else if (ipv6h->version == 6) {
+		if (ipv6h->nexthdr != IPPROTO_TCP)
+			return NULL;
+
+		offset += sizeof(struct ipv6hdr);
+	} else {
+		return NULL;
+	}
+
+	tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
+	if (!tcph)
+		return NULL;
+
+	return skb_header_pointer(skb, offset,
+				  min(__tcp_hdrlen(tcph), bufsize), buf);
+}
+
+static const void *cake_get_tcpopt(const struct tcphdr *tcph,
+				   int code, int *oplen)
+{
+	/* inspired by tcp_parse_options in tcp_input.c */
+	int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
+	const u8 *ptr = (const u8 *)(tcph + 1);
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		if (opcode == TCPOPT_EOL)
+			break;
+		if (opcode == TCPOPT_NOP) {
+			length--;
+			continue;
+		}
+		opsize = *ptr++;
+		if (opsize < 2 || opsize > length)
+			break;
+
+		if (opcode == code) {
+			*oplen = opsize;
+			return ptr;
+		}
+
+		ptr += opsize - 2;
+		length -= opsize;
+	}
+
+	return NULL;
+}
+
+/* Compare two SACK sequences. A sequence is considered greater if it SACKs more
+ * bytes than the other. In the case where both sequences ACKs bytes that the
+ * other doesn't, A is considered greater. DSACKs in A also makes A be
+ * considered greater.
+ *
+ * @return -1, 0 or 1 as normal compare functions
+ */
+static int cake_tcph_sack_compare(const struct tcphdr *tcph_a,
+				  const struct tcphdr *tcph_b)
+{
+	const struct tcp_sack_block_wire *sack_a, *sack_b;
+	u32 ack_seq_a = ntohl(tcph_a->ack_seq);
+	u32 bytes_a = 0, bytes_b = 0;
+	int oplen_a, oplen_b;
+	bool first = true;
+
+	sack_a = cake_get_tcpopt(tcph_a, TCPOPT_SACK, &oplen_a);
+	sack_b = cake_get_tcpopt(tcph_b, TCPOPT_SACK, &oplen_b);
+
+	/* pointers point to option contents */
+	oplen_a -= TCPOLEN_SACK_BASE;
+	oplen_b -= TCPOLEN_SACK_BASE;
+
+	if (sack_a && oplen_a >= sizeof(*sack_a) &&
+	    (!sack_b || oplen_b < sizeof(*sack_b)))
+		return -1;
+	else if (sack_b && oplen_b >= sizeof(*sack_b) &&
+		 (!sack_a || oplen_a < sizeof(*sack_a)))
+		return 1;
+	else if ((!sack_a || oplen_a < sizeof(*sack_a)) &&
+		 (!sack_b || oplen_b < sizeof(*sack_b)))
+		return 0;
+
+	while (oplen_a >= sizeof(*sack_a)) {
+		const struct tcp_sack_block_wire *sack_tmp = sack_b;
+		u32 start_a = get_unaligned_be32(&sack_a->start_seq);
+		u32 end_a = get_unaligned_be32(&sack_a->end_seq);
+		int oplen_tmp = oplen_b;
+		bool found = false;
+
+		/* DSACK; always considered greater to prevent dropping */
+		if (before(start_a, ack_seq_a))
+			return -1;
+
+		bytes_a += end_a - start_a;
+
+		while (oplen_tmp >= sizeof(*sack_tmp)) {
+			u32 start_b = get_unaligned_be32(&sack_tmp->start_seq);
+			u32 end_b = get_unaligned_be32(&sack_tmp->end_seq);
+
+			/* first time through we count the total size */
+			if (first)
+				bytes_b += end_b - start_b;
+
+			if (!after(start_b, start_a) && !before(end_b, end_a)) {
+				found = true;
+				if (!first)
+					break;
+			}
+			oplen_tmp -= sizeof(*sack_tmp);
+			sack_tmp++;
+		}
+
+		if (!found)
+			return -1;
+
+		oplen_a -= sizeof(*sack_a);
+		sack_a++;
+		first = false;
+	}
+
+	/* If we made it this far, all ranges SACKed by A are covered by B, so
+	 * either the SACKs are equal, or B SACKs more bytes.
+	 */
+	return bytes_b > bytes_a ? 1 : 0;
+}
+
+static void cake_tcph_get_tstamp(const struct tcphdr *tcph,
+				 u32 *tsval, u32 *tsecr)
+{
+	const u8 *ptr;
+	int opsize;
+
+	ptr = cake_get_tcpopt(tcph, TCPOPT_TIMESTAMP, &opsize);
+
+	if (ptr && opsize == TCPOLEN_TIMESTAMP) {
+		*tsval = get_unaligned_be32(ptr);
+		*tsecr = get_unaligned_be32(ptr + 4);
+	}
+}
+
+static bool cake_tcph_may_drop(const struct tcphdr *tcph,
+			       u32 tstamp_new, u32 tsecr_new)
+{
+	/* inspired by tcp_parse_options in tcp_input.c */
+	int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
+	const u8 *ptr = (const u8 *)(tcph + 1);
+	u32 tstamp, tsecr;
+
+	/* 3 reserved flags must be unset to avoid future breakage
+	 * ACK must be set
+	 * ECE/CWR are handled separately
+	 * All other flags URG/PSH/RST/SYN/FIN must be unset
+	 * 0x0FFF0000 = all TCP flags (confirm ACK=1, others zero)
+	 * 0x00C00000 = CWR/ECE (handled separately)
+	 * 0x0F3F0000 = 0x0FFF0000 & ~0x00C00000
+	 */
+	if (((tcp_flag_word(tcph) &
+	      cpu_to_be32(0x0F3F0000)) != TCP_FLAG_ACK))
+		return false;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		if (opcode == TCPOPT_EOL)
+			break;
+		if (opcode == TCPOPT_NOP) {
+			length--;
+			continue;
+		}
+		opsize = *ptr++;
+		if (opsize < 2 || opsize > length)
+			break;
+
+		switch (opcode) {
+		case TCPOPT_MD5SIG: /* doesn't influence state */
+			break;
+
+		case TCPOPT_SACK: /* stricter checking performed later */
+			if (opsize % 8 != 2)
+				return false;
+			break;
+
+		case TCPOPT_TIMESTAMP:
+			/* only drop timestamps lower than new */
+			if (opsize != TCPOLEN_TIMESTAMP)
+				return false;
+			tstamp = get_unaligned_be32(ptr);
+			tsecr = get_unaligned_be32(ptr + 4);
+			if (after(tstamp, tstamp_new) ||
+			    after(tsecr, tsecr_new))
+				return false;
+			break;
+
+		case TCPOPT_MSS:  /* these should only be set on SYN */
+		case TCPOPT_WINDOW:
+		case TCPOPT_SACK_PERM:
+		case TCPOPT_FASTOPEN:
+		case TCPOPT_EXP:
+		default: /* don't drop if any unknown options are present */
+			return false;
+		}
+
+		ptr += opsize - 2;
+		length -= opsize;
+	}
+
+	return true;
+}
+
+static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
+				       struct cake_flow *flow)
+{
+	bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE;
+	struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL;
+	struct sk_buff *skb_check, *skb_prev = NULL;
+	const struct ipv6hdr *ipv6h, *ipv6h_check;
+	unsigned char _tcph[64], _tcph_check[64];
+	const struct tcphdr *tcph, *tcph_check;
+	const struct iphdr *iph, *iph_check;
+	struct ipv6hdr _iph, _iph_check;
+	const struct sk_buff *skb;
+	int seglen, num_found = 0;
+	u32 tstamp = 0, tsecr = 0;
+	__be32 elig_flags = 0;
+	int sack_comp;
+
+	/* no other possible ACKs to filter */
+	if (flow->head == flow->tail)
+		return NULL;
+
+	skb = flow->tail;
+	tcph = cake_get_tcphdr(skb, _tcph, sizeof(_tcph));
+	iph = cake_get_iphdr(skb, &_iph);
+	if (!tcph)
+		return NULL;
+
+	cake_tcph_get_tstamp(tcph, &tstamp, &tsecr);
+
+	/* the 'triggering' packet need only have the ACK flag set.
+	 * also check that SYN is not set, as there won't be any previous ACKs.
+	 */
+	if ((tcp_flag_word(tcph) &
+	     (TCP_FLAG_ACK | TCP_FLAG_SYN)) != TCP_FLAG_ACK)
+		return NULL;
+
+	/* the 'triggering' ACK is at the tail of the queue, we have already
+	 * returned if it is the only packet in the flow. loop through the rest
+	 * of the queue looking for pure ACKs with the same 5-tuple as the
+	 * triggering one.
+	 */
+	for (skb_check = flow->head;
+	     skb_check && skb_check != skb;
+	     skb_prev = skb_check, skb_check = skb_check->next) {
+		iph_check = cake_get_iphdr(skb_check, &_iph_check);
+		tcph_check = cake_get_tcphdr(skb_check, &_tcph_check,
+					     sizeof(_tcph_check));
+
+		/* only TCP packets with matching 5-tuple are eligible, and only
+		 * drop safe headers
+		 */
+		if (!tcph_check || iph->version != iph_check->version ||
+		    tcph_check->source != tcph->source ||
+		    tcph_check->dest != tcph->dest)
+			continue;
+
+		if (iph_check->version == 4) {
+			if (iph_check->saddr != iph->saddr ||
+			    iph_check->daddr != iph->daddr)
+				continue;
+
+			seglen = ntohs(iph_check->tot_len) -
+				       (4 * iph_check->ihl);
+		} else if (iph_check->version == 6) {
+			ipv6h = (struct ipv6hdr *)iph;
+			ipv6h_check = (struct ipv6hdr *)iph_check;
+
+			if (ipv6_addr_cmp(&ipv6h_check->saddr, &ipv6h->saddr) ||
+			    ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr))
+				continue;
+
+			seglen = ntohs(ipv6h_check->payload_len);
+		} else {
+			WARN_ON(1);  /* shouldn't happen */
+			continue;
+		}
+
+		/* If the ECE/CWR flags changed from the previous eligible
+		 * packet in the same flow, we should no longer be dropping that
+		 * previous packet as this would lose information.
+		 */
+		if (elig_ack && (tcp_flag_word(tcph_check) &
+				 (TCP_FLAG_ECE | TCP_FLAG_CWR)) != elig_flags) {
+			elig_ack = NULL;
+			elig_ack_prev = NULL;
+			num_found--;
+		}
+
+		/* Check TCP options and flags, don't drop ACKs with segment
+		 * data, and don't drop ACKs with a higher cumulative ACK
+		 * counter than the triggering packet. Check ACK seqno here to
+		 * avoid parsing SACK options of packets we are going to exclude
+		 * anyway.
+		 */
+		if (!cake_tcph_may_drop(tcph_check, tstamp, tsecr) ||
+		    (seglen - __tcp_hdrlen(tcph_check)) != 0 ||
+		    after(ntohl(tcph_check->ack_seq), ntohl(tcph->ack_seq)))
+			continue;
+
+		/* Check SACK options. The triggering packet must SACK more data
+		 * than the ACK under consideration, or SACK the same range but
+		 * have a larger cumulative ACK counter. The latter is a
+		 * pathological case, but is contained in the following check
+		 * anyway, just to be safe.
+		 */
+		sack_comp = cake_tcph_sack_compare(tcph_check, tcph);
+
+		if (sack_comp < 0 ||
+		    (ntohl(tcph_check->ack_seq) == ntohl(tcph->ack_seq) &&
+		     sack_comp == 0))
+			continue;
+
+		/* At this point we have found an eligible pure ACK to drop; if
+		 * we are in aggressive mode, we are done. Otherwise, keep
+		 * searching unless this is the second eligible ACK we
+		 * found.
+		 *
+		 * Since we want to drop ACK closest to the head of the queue,
+		 * save the first eligible ACK we find, even if we need to loop
+		 * again.
+		 */
+		if (!elig_ack) {
+			elig_ack = skb_check;
+			elig_ack_prev = skb_prev;
+			elig_flags = (tcp_flag_word(tcph_check)
+				      & (TCP_FLAG_ECE | TCP_FLAG_CWR));
+		}
+
+		if (num_found++ > 0)
+			goto found;
+	}
+
+	/* We made it through the queue without finding two eligible ACKs . If
+	 * we found a single eligible ACK we can drop it in aggressive mode if
+	 * we can guarantee that this does not interfere with ECN flag
+	 * information. We ensure this by dropping it only if the enqueued
+	 * packet is consecutive with the eligible ACK, and their flags match.
+	 */
+	if (elig_ack && aggressive && elig_ack->next == skb &&
+	    (elig_flags == (tcp_flag_word(tcph) &
+			    (TCP_FLAG_ECE | TCP_FLAG_CWR))))
+		goto found;
+
+	return NULL;
+
+found:
+	if (elig_ack_prev)
+		elig_ack_prev->next = elig_ack->next;
+	else
+		flow->head = elig_ack->next;
+
+	elig_ack->next = NULL;
+
+	return elig_ack;
+}
+
+static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
+{
+	avg -= avg >> shift;
+	avg += sample >> shift;
+	return avg;
+}
+
+static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
+{
+	if (q->rate_flags & CAKE_FLAG_OVERHEAD)
+		len -= off;
+
+	if (q->max_netlen < len)
+		q->max_netlen = len;
+	if (q->min_netlen > len)
+		q->min_netlen = len;
+
+	len += q->rate_overhead;
+
+	if (len < q->rate_mpu)
+		len = q->rate_mpu;
+
+	if (q->atm_mode == CAKE_ATM_ATM) {
+		len += 47;
+		len /= 48;
+		len *= 53;
+	} else if (q->atm_mode == CAKE_ATM_PTM) {
+		/* Add one byte per 64 bytes or part thereof.
+		 * This is conservative and easier to calculate than the
+		 * precise value.
+		 */
+		len += (len + 63) / 64;
+	}
+
+	if (q->max_adjlen < len)
+		q->max_adjlen = len;
+	if (q->min_adjlen > len)
+		q->min_adjlen = len;
+
+	return len;
+}
+
+static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int hdr_len, last_len = 0;
+	u32 off = skb_network_offset(skb);
+	u32 len = qdisc_pkt_len(skb);
+	u16 segs = 1;
+
+	q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
+
+	if (!shinfo->gso_size)
+		return cake_calc_overhead(q, len, off);
+
+	/* borrowed from qdisc_pkt_len_init() */
+	hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+	/* + transport layer */
+	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
+						SKB_GSO_TCPV6))) {
+		const struct tcphdr *th;
+		struct tcphdr _tcphdr;
+
+		th = skb_header_pointer(skb, skb_transport_offset(skb),
+					sizeof(_tcphdr), &_tcphdr);
+		if (likely(th))
+			hdr_len += __tcp_hdrlen(th);
+	} else {
+		struct udphdr _udphdr;
+
+		if (skb_header_pointer(skb, skb_transport_offset(skb),
+				       sizeof(_udphdr), &_udphdr))
+			hdr_len += sizeof(struct udphdr);
+	}
+
+	if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
+		segs = DIV_ROUND_UP(skb->len - hdr_len,
+				    shinfo->gso_size);
+	else
+		segs = shinfo->gso_segs;
+
+	len = shinfo->gso_size + hdr_len;
+	last_len = skb->len - shinfo->gso_size * (segs - 1);
+
+	return (cake_calc_overhead(q, len, off) * (segs - 1) +
+		cake_calc_overhead(q, last_len, off));
+}
+
+static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
+{
+	struct cake_heap_entry ii = q->overflow_heap[i];
+	struct cake_heap_entry jj = q->overflow_heap[j];
+
+	q->overflow_heap[i] = jj;
+	q->overflow_heap[j] = ii;
+
+	q->tins[ii.t].overflow_idx[ii.b] = j;
+	q->tins[jj.t].overflow_idx[jj.b] = i;
+}
+
+static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i)
+{
+	struct cake_heap_entry ii = q->overflow_heap[i];
+
+	return q->tins[ii.t].backlogs[ii.b];
+}
+
+static void cake_heapify(struct cake_sched_data *q, u16 i)
+{
+	static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES;
+	u32 mb = cake_heap_get_backlog(q, i);
+	u32 m = i;
+
+	while (m < a) {
+		u32 l = m + m + 1;
+		u32 r = l + 1;
+
+		if (l < a) {
+			u32 lb = cake_heap_get_backlog(q, l);
+
+			if (lb > mb) {
+				m  = l;
+				mb = lb;
+			}
+		}
+
+		if (r < a) {
+			u32 rb = cake_heap_get_backlog(q, r);
+
+			if (rb > mb) {
+				m  = r;
+				mb = rb;
+			}
+		}
+
+		if (m != i) {
+			cake_heap_swap(q, i, m);
+			i = m;
+		} else {
+			break;
+		}
+	}
+}
+
+static void cake_heapify_up(struct cake_sched_data *q, u16 i)
+{
+	while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) {
+		u16 p = (i - 1) >> 1;
+		u32 ib = cake_heap_get_backlog(q, i);
+		u32 pb = cake_heap_get_backlog(q, p);
+
+		if (ib > pb) {
+			cake_heap_swap(q, i, p);
+			i = p;
+		} else {
+			break;
+		}
+	}
+}
+
+static int cake_advance_shaper(struct cake_sched_data *q,
+			       struct cake_tin_data *b,
+			       struct sk_buff *skb,
+			       ktime_t now, bool drop)
+{
+	u32 len = get_cobalt_cb(skb)->adjusted_len;
+
+	/* charge packet bandwidth to this tin
+	 * and to the global shaper.
+	 */
+	if (q->rate_ns) {
+		u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft;
+		u64 global_dur = (len * q->rate_ns) >> q->rate_shft;
+		u64 failsafe_dur = global_dur + (global_dur >> 1);
+
+		if (ktime_before(b->time_next_packet, now))
+			b->time_next_packet = ktime_add_ns(b->time_next_packet,
+							   tin_dur);
+
+		else if (ktime_before(b->time_next_packet,
+				      ktime_add_ns(now, tin_dur)))
+			b->time_next_packet = ktime_add_ns(now, tin_dur);
+
+		q->time_next_packet = ktime_add_ns(q->time_next_packet,
+						   global_dur);
+		if (!drop)
+			q->failsafe_next_packet = \
+				ktime_add_ns(q->failsafe_next_packet,
+					     failsafe_dur);
+	}
+	return len;
+}
+
+static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	ktime_t now = ktime_get();
+	u32 idx = 0, tin = 0, len;
+	struct cake_heap_entry qq;
+	struct cake_tin_data *b;
+	struct cake_flow *flow;
+	struct sk_buff *skb;
+
+	if (!q->overflow_timeout) {
+		int i;
+		/* Build fresh max-heap */
+		for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--)
+			cake_heapify(q, i);
+	}
+	q->overflow_timeout = 65535;
+
+	/* select longest queue for pruning */
+	qq  = q->overflow_heap[0];
+	tin = qq.t;
+	idx = qq.b;
+
+	b = &q->tins[tin];
+	flow = &b->flows[idx];
+	skb = dequeue_head(flow);
+	if (unlikely(!skb)) {
+		/* heap has gone wrong, rebuild it next time */
+		q->overflow_timeout = 0;
+		return idx + (tin << 16);
+	}
+
+	if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
+		b->unresponsive_flow_count++;
+
+	len = qdisc_pkt_len(skb);
+	q->buffer_used      -= skb->truesize;
+	b->backlogs[idx]    -= len;
+	b->tin_backlog      -= len;
+	sch->qstats.backlog -= len;
+	qdisc_tree_reduce_backlog(sch, 1, len);
+
+	flow->dropped++;
+	b->tin_dropped++;
+	sch->qstats.drops++;
+
+	if (q->rate_flags & CAKE_FLAG_INGRESS)
+		cake_advance_shaper(q, b, skb, now, true);
+
+	__qdisc_drop(skb, to_free);
+	sch->q.qlen--;
+
+	cake_heapify(q, 0);
+
+	return idx + (tin << 16);
+}
+
+static void cake_wash_diffserv(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+		break;
+	case htons(ETH_P_IPV6):
+		ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+		break;
+	default:
+		break;
+	}
+}
+
+static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+{
+	u8 dscp;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+		if (wash && dscp)
+			ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
+		return dscp;
+
+	case htons(ETH_P_IPV6):
+		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+		if (wash && dscp)
+			ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
+		return dscp;
+
+	case htons(ETH_P_ARP):
+		return 0x38;  /* CS7 - Net Control */
+
+	default:
+		/* If there is no Diffserv field, treat as best-effort */
+		return 0;
+	}
+}
+
+static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
+					     struct sk_buff *skb)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 tin;
+
+	if (TC_H_MAJ(skb->priority) == sch->handle &&
+	    TC_H_MIN(skb->priority) > 0 &&
+	    TC_H_MIN(skb->priority) <= q->tin_cnt) {
+		tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
+
+		if (q->rate_flags & CAKE_FLAG_WASH)
+			cake_wash_diffserv(skb);
+	} else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
+		/* extract the Diffserv Precedence field, if it exists */
+		/* and clear DSCP bits if washing */
+		tin = q->tin_index[cake_handle_diffserv(skb,
+				q->rate_flags & CAKE_FLAG_WASH)];
+		if (unlikely(tin >= q->tin_cnt))
+			tin = 0;
+	} else {
+		tin = 0;
+		if (q->rate_flags & CAKE_FLAG_WASH)
+			cake_wash_diffserv(skb);
+	}
+
+	return &q->tins[tin];
+}
+
+static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
+			 struct sk_buff *skb, int flow_mode, int *qerr)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct tcf_proto *filter;
+	struct tcf_result res;
+	u32 flow = 0;
+	int result;
+
+	filter = rcu_dereference_bh(q->filter_list);
+	if (!filter)
+		goto hash;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	result = tcf_classify(skb, filter, &res, false);
+
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+		case TC_ACT_TRAP:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+			/* fall through */
+		case TC_ACT_SHOT:
+			return 0;
+		}
+#endif
+		if (TC_H_MIN(res.classid) <= CAKE_QUEUES)
+			flow = TC_H_MIN(res.classid);
+	}
+hash:
+	*t = cake_select_tin(sch, skb);
+	return flow ?: cake_hash(*t, skb, flow_mode) + 1;
+}
+
+static void cake_reconfigure(struct Qdisc *sch);
+
+static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			struct sk_buff **to_free)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int len = qdisc_pkt_len(skb);
+	int uninitialized_var(ret);
+	struct sk_buff *ack = NULL;
+	ktime_t now = ktime_get();
+	struct cake_tin_data *b;
+	struct cake_flow *flow;
+	u32 idx;
+
+	/* choose flow to insert into */
+	idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
+	if (idx == 0) {
+		if (ret & __NET_XMIT_BYPASS)
+			qdisc_qstats_drop(sch);
+		__qdisc_drop(skb, to_free);
+		return ret;
+	}
+	idx--;
+	flow = &b->flows[idx];
+
+	/* ensure shaper state isn't stale */
+	if (!b->tin_backlog) {
+		if (ktime_before(b->time_next_packet, now))
+			b->time_next_packet = now;
+
+		if (!sch->q.qlen) {
+			if (ktime_before(q->time_next_packet, now)) {
+				q->failsafe_next_packet = now;
+				q->time_next_packet = now;
+			} else if (ktime_after(q->time_next_packet, now) &&
+				   ktime_after(q->failsafe_next_packet, now)) {
+				u64 next = \
+					min(ktime_to_ns(q->time_next_packet),
+					    ktime_to_ns(
+						   q->failsafe_next_packet));
+				sch->qstats.overlimits++;
+				qdisc_watchdog_schedule_ns(&q->watchdog, next);
+			}
+		}
+	}
+
+	if (unlikely(len > b->max_skblen))
+		b->max_skblen = len;
+
+	if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
+		struct sk_buff *segs, *nskb;
+		netdev_features_t features = netif_skb_features(skb);
+		unsigned int slen = 0;
+
+		segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+		if (IS_ERR_OR_NULL(segs))
+			return qdisc_drop(skb, sch, to_free);
+
+		while (segs) {
+			nskb = segs->next;
+			segs->next = NULL;
+			qdisc_skb_cb(segs)->pkt_len = segs->len;
+			cobalt_set_enqueue_time(segs, now);
+			get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
+									  segs);
+			flow_queue_add(flow, segs);
+
+			sch->q.qlen++;
+			slen += segs->len;
+			q->buffer_used += segs->truesize;
+			b->packets++;
+			segs = nskb;
+		}
+
+		/* stats */
+		b->bytes	    += slen;
+		b->backlogs[idx]    += slen;
+		b->tin_backlog      += slen;
+		sch->qstats.backlog += slen;
+		q->avg_window_bytes += slen;
+
+		qdisc_tree_reduce_backlog(sch, 1, len);
+		consume_skb(skb);
+	} else {
+		/* not splitting */
+		cobalt_set_enqueue_time(skb, now);
+		get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
+		flow_queue_add(flow, skb);
+
+		if (q->ack_filter)
+			ack = cake_ack_filter(q, flow);
+
+		if (ack) {
+			b->ack_drops++;
+			sch->qstats.drops++;
+			b->bytes += qdisc_pkt_len(ack);
+			len -= qdisc_pkt_len(ack);
+			q->buffer_used += skb->truesize - ack->truesize;
+			if (q->rate_flags & CAKE_FLAG_INGRESS)
+				cake_advance_shaper(q, b, ack, now, true);
+
+			qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack));
+			consume_skb(ack);
+		} else {
+			sch->q.qlen++;
+			q->buffer_used      += skb->truesize;
+		}
+
+		/* stats */
+		b->packets++;
+		b->bytes	    += len;
+		b->backlogs[idx]    += len;
+		b->tin_backlog      += len;
+		sch->qstats.backlog += len;
+		q->avg_window_bytes += len;
+	}
+
+	if (q->overflow_timeout)
+		cake_heapify_up(q, b->overflow_idx[idx]);
+
+	/* incoming bandwidth capacity estimate */
+	if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
+		u64 packet_interval = \
+			ktime_to_ns(ktime_sub(now, q->last_packet_time));
+
+		if (packet_interval > NSEC_PER_SEC)
+			packet_interval = NSEC_PER_SEC;
+
+		/* filter out short-term bursts, eg. wifi aggregation */
+		q->avg_packet_interval = \
+			cake_ewma(q->avg_packet_interval,
+				  packet_interval,
+				  (packet_interval > q->avg_packet_interval ?
+					  2 : 8));
+
+		q->last_packet_time = now;
+
+		if (packet_interval > q->avg_packet_interval) {
+			u64 window_interval = \
+				ktime_to_ns(ktime_sub(now,
+						      q->avg_window_begin));
+			u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
+
+			do_div(b, window_interval);
+			q->avg_peak_bandwidth =
+				cake_ewma(q->avg_peak_bandwidth, b,
+					  b > q->avg_peak_bandwidth ? 2 : 8);
+			q->avg_window_bytes = 0;
+			q->avg_window_begin = now;
+
+			if (ktime_after(now,
+					ktime_add_ms(q->last_reconfig_time,
+						     250))) {
+				q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
+				cake_reconfigure(sch);
+			}
+		}
+	} else {
+		q->avg_window_bytes = 0;
+		q->last_packet_time = now;
+	}
+
+	/* flowchain */
+	if (!flow->set || flow->set == CAKE_SET_DECAYING) {
+		struct cake_host *srchost = &b->hosts[flow->srchost];
+		struct cake_host *dsthost = &b->hosts[flow->dsthost];
+		u16 host_load = 1;
+
+		if (!flow->set) {
+			list_add_tail(&flow->flowchain, &b->new_flows);
+		} else {
+			b->decaying_flow_count--;
+			list_move_tail(&flow->flowchain, &b->new_flows);
+		}
+		flow->set = CAKE_SET_SPARSE;
+		b->sparse_flow_count++;
+
+		if (cake_dsrc(q->flow_mode))
+			host_load = max(host_load, srchost->srchost_refcnt);
+
+		if (cake_ddst(q->flow_mode))
+			host_load = max(host_load, dsthost->dsthost_refcnt);
+
+		flow->deficit = (b->flow_quantum *
+				 quantum_div[host_load]) >> 16;
+	} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+		/* this flow was empty, accounted as a sparse flow, but actually
+		 * in the bulk rotation.
+		 */
+		flow->set = CAKE_SET_BULK;
+		b->sparse_flow_count--;
+		b->bulk_flow_count++;
+	}
+
+	if (q->buffer_used > q->buffer_max_used)
+		q->buffer_max_used = q->buffer_used;
+
+	if (q->buffer_used > q->buffer_limit) {
+		u32 dropped = 0;
+
+		while (q->buffer_used > q->buffer_limit) {
+			dropped++;
+			cake_drop(sch, to_free);
+		}
+		b->drop_overlimit += dropped;
+	}
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[q->cur_tin];
+	struct cake_flow *flow = &b->flows[q->cur_flow];
+	struct sk_buff *skb = NULL;
+	u32 len;
+
+	if (flow->head) {
+		skb = dequeue_head(flow);
+		len = qdisc_pkt_len(skb);
+		b->backlogs[q->cur_flow] -= len;
+		b->tin_backlog		 -= len;
+		sch->qstats.backlog      -= len;
+		q->buffer_used		 -= skb->truesize;
+		sch->q.qlen--;
+
+		if (q->overflow_timeout)
+			cake_heapify(q, b->overflow_idx[q->cur_flow]);
+	}
+	return skb;
+}
+
+/* Discard leftover packets from a tin no longer in use. */
+static void cake_clear_tin(struct Qdisc *sch, u16 tin)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	q->cur_tin = tin;
+	for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++)
+		while (!!(skb = cake_dequeue_one(sch)))
+			kfree_skb(skb);
+}
+
+static struct sk_buff *cake_dequeue(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[q->cur_tin];
+	struct cake_host *srchost, *dsthost;
+	ktime_t now = ktime_get();
+	struct cake_flow *flow;
+	struct list_head *head;
+	bool first_flow = true;
+	struct sk_buff *skb;
+	u16 host_load;
+	u64 delay;
+	u32 len;
+
+begin:
+	if (!sch->q.qlen)
+		return NULL;
+
+	/* global hard shaper */
+	if (ktime_after(q->time_next_packet, now) &&
+	    ktime_after(q->failsafe_next_packet, now)) {
+		u64 next = min(ktime_to_ns(q->time_next_packet),
+			       ktime_to_ns(q->failsafe_next_packet));
+
+		sch->qstats.overlimits++;
+		qdisc_watchdog_schedule_ns(&q->watchdog, next);
+		return NULL;
+	}
+
+	/* Choose a class to work on. */
+	if (!q->rate_ns) {
+		/* In unlimited mode, can't rely on shaper timings, just balance
+		 * with DRR
+		 */
+		bool wrapped = false, empty = true;
+
+		while (b->tin_deficit < 0 ||
+		       !(b->sparse_flow_count + b->bulk_flow_count)) {
+			if (b->tin_deficit <= 0)
+				b->tin_deficit += b->tin_quantum_band;
+			if (b->sparse_flow_count + b->bulk_flow_count)
+				empty = false;
+
+			q->cur_tin++;
+			b++;
+			if (q->cur_tin >= q->tin_cnt) {
+				q->cur_tin = 0;
+				b = q->tins;
+
+				if (wrapped) {
+					/* It's possible for q->qlen to be
+					 * nonzero when we actually have no
+					 * packets anywhere.
+					 */
+					if (empty)
+						return NULL;
+				} else {
+					wrapped = true;
+				}
+			}
+		}
+	} else {
+		/* In shaped mode, choose:
+		 * - Highest-priority tin with queue and meeting schedule, or
+		 * - The earliest-scheduled tin with queue.
+		 */
+		ktime_t best_time = KTIME_MAX;
+		int tin, best_tin = 0;
+
+		for (tin = 0; tin < q->tin_cnt; tin++) {
+			b = q->tins + tin;
+			if ((b->sparse_flow_count + b->bulk_flow_count) > 0) {
+				ktime_t time_to_pkt = \
+					ktime_sub(b->time_next_packet, now);
+
+				if (ktime_to_ns(time_to_pkt) <= 0 ||
+				    ktime_compare(time_to_pkt,
+						  best_time) <= 0) {
+					best_time = time_to_pkt;
+					best_tin = tin;
+				}
+			}
+		}
+
+		q->cur_tin = best_tin;
+		b = q->tins + best_tin;
+
+		/* No point in going further if no packets to deliver. */
+		if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
+			return NULL;
+	}
+
+retry:
+	/* service this class */
+	head = &b->decaying_flows;
+	if (!first_flow || list_empty(head)) {
+		head = &b->new_flows;
+		if (list_empty(head)) {
+			head = &b->old_flows;
+			if (unlikely(list_empty(head))) {
+				head = &b->decaying_flows;
+				if (unlikely(list_empty(head)))
+					goto begin;
+			}
+		}
+	}
+	flow = list_first_entry(head, struct cake_flow, flowchain);
+	q->cur_flow = flow - b->flows;
+	first_flow = false;
+
+	/* triple isolation (modified DRR++) */
+	srchost = &b->hosts[flow->srchost];
+	dsthost = &b->hosts[flow->dsthost];
+	host_load = 1;
+
+	if (cake_dsrc(q->flow_mode))
+		host_load = max(host_load, srchost->srchost_refcnt);
+
+	if (cake_ddst(q->flow_mode))
+		host_load = max(host_load, dsthost->dsthost_refcnt);
+
+	WARN_ON(host_load > CAKE_QUEUES);
+
+	/* flow isolation (DRR++) */
+	if (flow->deficit <= 0) {
+		/* The shifted prandom_u32() is a way to apply dithering to
+		 * avoid accumulating roundoff errors
+		 */
+		flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+				  (prandom_u32() >> 16)) >> 16;
+		list_move_tail(&flow->flowchain, &b->old_flows);
+
+		/* Keep all flows with deficits out of the sparse and decaying
+		 * rotations.  No non-empty flow can go into the decaying
+		 * rotation, so they can't get deficits
+		 */
+		if (flow->set == CAKE_SET_SPARSE) {
+			if (flow->head) {
+				b->sparse_flow_count--;
+				b->bulk_flow_count++;
+				flow->set = CAKE_SET_BULK;
+			} else {
+				/* we've moved it to the bulk rotation for
+				 * correct deficit accounting but we still want
+				 * to count it as a sparse flow, not a bulk one.
+				 */
+				flow->set = CAKE_SET_SPARSE_WAIT;
+			}
+		}
+		goto retry;
+	}
+
+	/* Retrieve a packet via the AQM */
+	while (1) {
+		skb = cake_dequeue_one(sch);
+		if (!skb) {
+			/* this queue was actually empty */
+			if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
+				b->unresponsive_flow_count--;
+
+			if (flow->cvars.p_drop || flow->cvars.count ||
+			    ktime_before(now, flow->cvars.drop_next)) {
+				/* keep in the flowchain until the state has
+				 * decayed to rest
+				 */
+				list_move_tail(&flow->flowchain,
+					       &b->decaying_flows);
+				if (flow->set == CAKE_SET_BULK) {
+					b->bulk_flow_count--;
+					b->decaying_flow_count++;
+				} else if (flow->set == CAKE_SET_SPARSE ||
+					   flow->set == CAKE_SET_SPARSE_WAIT) {
+					b->sparse_flow_count--;
+					b->decaying_flow_count++;
+				}
+				flow->set = CAKE_SET_DECAYING;
+			} else {
+				/* remove empty queue from the flowchain */
+				list_del_init(&flow->flowchain);
+				if (flow->set == CAKE_SET_SPARSE ||
+				    flow->set == CAKE_SET_SPARSE_WAIT)
+					b->sparse_flow_count--;
+				else if (flow->set == CAKE_SET_BULK)
+					b->bulk_flow_count--;
+				else
+					b->decaying_flow_count--;
+
+				flow->set = CAKE_SET_NONE;
+				srchost->srchost_refcnt--;
+				dsthost->dsthost_refcnt--;
+			}
+			goto begin;
+		}
+
+		/* Last packet in queue may be marked, shouldn't be dropped */
+		if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
+					(b->bulk_flow_count *
+					 !!(q->rate_flags &
+					    CAKE_FLAG_INGRESS))) ||
+		    !flow->head)
+			break;
+
+		/* drop this packet, get another one */
+		if (q->rate_flags & CAKE_FLAG_INGRESS) {
+			len = cake_advance_shaper(q, b, skb,
+						  now, true);
+			flow->deficit -= len;
+			b->tin_deficit -= len;
+		}
+		flow->dropped++;
+		b->tin_dropped++;
+		qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
+		qdisc_qstats_drop(sch);
+		kfree_skb(skb);
+		if (q->rate_flags & CAKE_FLAG_INGRESS)
+			goto retry;
+	}
+
+	b->tin_ecn_mark += !!flow->cvars.ecn_marked;
+	qdisc_bstats_update(sch, skb);
+
+	/* collect delay stats */
+	delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
+	b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
+	b->peak_delay = cake_ewma(b->peak_delay, delay,
+				  delay > b->peak_delay ? 2 : 8);
+	b->base_delay = cake_ewma(b->base_delay, delay,
+				  delay < b->base_delay ? 2 : 8);
+
+	len = cake_advance_shaper(q, b, skb, now, false);
+	flow->deficit -= len;
+	b->tin_deficit -= len;
+
+	if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
+		u64 next = min(ktime_to_ns(q->time_next_packet),
+			       ktime_to_ns(q->failsafe_next_packet));
+
+		qdisc_watchdog_schedule_ns(&q->watchdog, next);
+	} else if (!sch->q.qlen) {
+		int i;
+
+		for (i = 0; i < q->tin_cnt; i++) {
+			if (q->tins[i].decaying_flow_count) {
+				ktime_t next = \
+					ktime_add_ns(now,
+						     q->tins[i].cparams.target);
+
+				qdisc_watchdog_schedule_ns(&q->watchdog,
+							   ktime_to_ns(next));
+				break;
+			}
+		}
+	}
+
+	if (q->overflow_timeout)
+		q->overflow_timeout--;
+
+	return skb;
+}
+
+static void cake_reset(struct Qdisc *sch)
+{
+	u32 c;
+
+	for (c = 0; c < CAKE_MAX_TINS; c++)
+		cake_clear_tin(sch, c);
+}
+
+static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
+	[TCA_CAKE_BASE_RATE64]   = { .type = NLA_U64 },
+	[TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 },
+	[TCA_CAKE_ATM]		 = { .type = NLA_U32 },
+	[TCA_CAKE_FLOW_MODE]     = { .type = NLA_U32 },
+	[TCA_CAKE_OVERHEAD]      = { .type = NLA_S32 },
+	[TCA_CAKE_RTT]		 = { .type = NLA_U32 },
+	[TCA_CAKE_TARGET]	 = { .type = NLA_U32 },
+	[TCA_CAKE_AUTORATE]      = { .type = NLA_U32 },
+	[TCA_CAKE_MEMORY]	 = { .type = NLA_U32 },
+	[TCA_CAKE_NAT]		 = { .type = NLA_U32 },
+	[TCA_CAKE_RAW]		 = { .type = NLA_U32 },
+	[TCA_CAKE_WASH]		 = { .type = NLA_U32 },
+	[TCA_CAKE_MPU]		 = { .type = NLA_U32 },
+	[TCA_CAKE_INGRESS]	 = { .type = NLA_U32 },
+	[TCA_CAKE_ACK_FILTER]	 = { .type = NLA_U32 },
+};
+
+static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+			  u64 target_ns, u64 rtt_est_ns)
+{
+	/* convert byte-rate into time-per-byte
+	 * so it will always unwedge in reasonable time.
+	 */
+	static const u64 MIN_RATE = 64;
+	u32 byte_target = mtu;
+	u64 byte_target_ns;
+	u8  rate_shft = 0;
+	u64 rate_ns = 0;
+
+	b->flow_quantum = 1514;
+	if (rate) {
+		b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
+		rate_shft = 34;
+		rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
+		rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
+		while (!!(rate_ns >> 34)) {
+			rate_ns >>= 1;
+			rate_shft--;
+		}
+	} /* else unlimited, ie. zero delay */
+
+	b->tin_rate_bps  = rate;
+	b->tin_rate_ns   = rate_ns;
+	b->tin_rate_shft = rate_shft;
+
+	byte_target_ns = (byte_target * rate_ns) >> rate_shft;
+
+	b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
+	b->cparams.interval = max(rtt_est_ns +
+				     b->cparams.target - target_ns,
+				     b->cparams.target * 2);
+	b->cparams.mtu_time = byte_target_ns;
+	b->cparams.p_inc = 1 << 24; /* 1/256 */
+	b->cparams.p_dec = 1 << 20; /* 1/4096 */
+}
+
+static int cake_config_besteffort(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct cake_tin_data *b = &q->tins[0];
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+
+	q->tin_cnt = 1;
+
+	q->tin_index = besteffort;
+	q->tin_order = normal_order;
+
+	cake_set_rate(b, rate, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	b->tin_quantum_band = 65535;
+	b->tin_quantum_prio = 65535;
+
+	return 0;
+}
+
+static int cake_config_precedence(struct Qdisc *sch)
+{
+	/* convert high-level (user visible) parameters into internal format */
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum1 = 256;
+	u32 quantum2 = 256;
+	u32 i;
+
+	q->tin_cnt = 8;
+	q->tin_index = precedence;
+	q->tin_order = normal_order;
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[i];
+
+		cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+			      us_to_ns(q->interval));
+
+		b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+		b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+		/* calculate next class's parameters */
+		rate  *= 7;
+		rate >>= 3;
+
+		quantum1  *= 3;
+		quantum1 >>= 1;
+
+		quantum2  *= 7;
+		quantum2 >>= 3;
+	}
+
+	return 0;
+}
+
+/*	List of known Diffserv codepoints:
+ *
+ *	Least Effort (CS1)
+ *	Best Effort (CS0)
+ *	Max Reliability & LLT "Lo" (TOS1)
+ *	Max Throughput (TOS2)
+ *	Min Delay (TOS4)
+ *	LLT "La" (TOS5)
+ *	Assured Forwarding 1 (AF1x) - x3
+ *	Assured Forwarding 2 (AF2x) - x3
+ *	Assured Forwarding 3 (AF3x) - x3
+ *	Assured Forwarding 4 (AF4x) - x3
+ *	Precedence Class 2 (CS2)
+ *	Precedence Class 3 (CS3)
+ *	Precedence Class 4 (CS4)
+ *	Precedence Class 5 (CS5)
+ *	Precedence Class 6 (CS6)
+ *	Precedence Class 7 (CS7)
+ *	Voice Admit (VA)
+ *	Expedited Forwarding (EF)
+
+ *	Total 25 codepoints.
+ */
+
+/*	List of traffic classes in RFC 4594:
+ *		(roughly descending order of contended priority)
+ *		(roughly ascending order of uncontended throughput)
+ *
+ *	Network Control (CS6,CS7)      - routing traffic
+ *	Telephony (EF,VA)         - aka. VoIP streams
+ *	Signalling (CS5)               - VoIP setup
+ *	Multimedia Conferencing (AF4x) - aka. video calls
+ *	Realtime Interactive (CS4)     - eg. games
+ *	Multimedia Streaming (AF3x)    - eg. YouTube, NetFlix, Twitch
+ *	Broadcast Video (CS3)
+ *	Low Latency Data (AF2x,TOS4)      - eg. database
+ *	Ops, Admin, Management (CS2,TOS1) - eg. ssh
+ *	Standard Service (CS0 & unrecognised codepoints)
+ *	High Throughput Data (AF1x,TOS2)  - eg. web traffic
+ *	Low Priority Data (CS1)           - eg. BitTorrent
+
+ *	Total 12 traffic classes.
+ */
+
+static int cake_config_diffserv8(struct Qdisc *sch)
+{
+/*	Pruned list of traffic classes for typical applications:
+ *
+ *		Network Control          (CS6, CS7)
+ *		Minimum Latency          (EF, VA, CS5, CS4)
+ *		Interactive Shell        (CS2, TOS1)
+ *		Low Latency Transactions (AF2x, TOS4)
+ *		Video Streaming          (AF4x, AF3x, CS3)
+ *		Bog Standard             (CS0 etc.)
+ *		High Throughput          (AF1x, TOS2)
+ *		Background Traffic       (CS1)
+ *
+ *		Total 8 traffic classes.
+ */
+
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum1 = 256;
+	u32 quantum2 = 256;
+	u32 i;
+
+	q->tin_cnt = 8;
+
+	/* codepoint to class mapping */
+	q->tin_index = diffserv8;
+	q->tin_order = normal_order;
+
+	/* class characteristics */
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[i];
+
+		cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+			      us_to_ns(q->interval));
+
+		b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+		b->tin_quantum_band = max_t(u16, 1U, quantum2);
+
+		/* calculate next class's parameters */
+		rate  *= 7;
+		rate >>= 3;
+
+		quantum1  *= 3;
+		quantum1 >>= 1;
+
+		quantum2  *= 7;
+		quantum2 >>= 3;
+	}
+
+	return 0;
+}
+
+static int cake_config_diffserv4(struct Qdisc *sch)
+{
+/*  Further pruned list of traffic classes for four-class system:
+ *
+ *	    Latency Sensitive  (CS7, CS6, EF, VA, CS5, CS4)
+ *	    Streaming Media    (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
+ *	    Best Effort        (CS0, AF1x, TOS2, and those not specified)
+ *	    Background Traffic (CS1)
+ *
+ *		Total 4 traffic classes.
+ */
+
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum = 1024;
+
+	q->tin_cnt = 4;
+
+	/* codepoint to class mapping */
+	q->tin_index = diffserv4;
+	q->tin_order = bulk_order;
+
+	/* class characteristics */
+	cake_set_rate(&q->tins[0], rate, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[1], rate >> 4, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[2], rate >> 1, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[3], rate >> 2, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+
+	/* priority weights */
+	q->tins[0].tin_quantum_prio = quantum;
+	q->tins[1].tin_quantum_prio = quantum >> 4;
+	q->tins[2].tin_quantum_prio = quantum << 2;
+	q->tins[3].tin_quantum_prio = quantum << 4;
+
+	/* bandwidth-sharing weights */
+	q->tins[0].tin_quantum_band = quantum;
+	q->tins[1].tin_quantum_band = quantum >> 4;
+	q->tins[2].tin_quantum_band = quantum >> 1;
+	q->tins[3].tin_quantum_band = quantum >> 2;
+
+	return 0;
+}
+
+static int cake_config_diffserv3(struct Qdisc *sch)
+{
+/*  Simplified Diffserv structure with 3 tins.
+ *		Low Priority		(CS1)
+ *		Best Effort
+ *		Latency Sensitive	(TOS4, VA, EF, CS6, CS7)
+ */
+	struct cake_sched_data *q = qdisc_priv(sch);
+	u32 mtu = psched_mtu(qdisc_dev(sch));
+	u64 rate = q->rate_bps;
+	u32 quantum = 1024;
+
+	q->tin_cnt = 3;
+
+	/* codepoint to class mapping */
+	q->tin_index = diffserv3;
+	q->tin_order = bulk_order;
+
+	/* class characteristics */
+	cake_set_rate(&q->tins[0], rate, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[1], rate >> 4, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+	cake_set_rate(&q->tins[2], rate >> 2, mtu,
+		      us_to_ns(q->target), us_to_ns(q->interval));
+
+	/* priority weights */
+	q->tins[0].tin_quantum_prio = quantum;
+	q->tins[1].tin_quantum_prio = quantum >> 4;
+	q->tins[2].tin_quantum_prio = quantum << 4;
+
+	/* bandwidth-sharing weights */
+	q->tins[0].tin_quantum_band = quantum;
+	q->tins[1].tin_quantum_band = quantum >> 4;
+	q->tins[2].tin_quantum_band = quantum >> 2;
+
+	return 0;
+}
+
+static void cake_reconfigure(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int c, ft;
+
+	switch (q->tin_mode) {
+	case CAKE_DIFFSERV_BESTEFFORT:
+		ft = cake_config_besteffort(sch);
+		break;
+
+	case CAKE_DIFFSERV_PRECEDENCE:
+		ft = cake_config_precedence(sch);
+		break;
+
+	case CAKE_DIFFSERV_DIFFSERV8:
+		ft = cake_config_diffserv8(sch);
+		break;
+
+	case CAKE_DIFFSERV_DIFFSERV4:
+		ft = cake_config_diffserv4(sch);
+		break;
+
+	case CAKE_DIFFSERV_DIFFSERV3:
+	default:
+		ft = cake_config_diffserv3(sch);
+		break;
+	}
+
+	for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
+		cake_clear_tin(sch, c);
+		q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
+	}
+
+	q->rate_ns   = q->tins[ft].tin_rate_ns;
+	q->rate_shft = q->tins[ft].tin_rate_shft;
+
+	if (q->buffer_config_limit) {
+		q->buffer_limit = q->buffer_config_limit;
+	} else if (q->rate_bps) {
+		u64 t = q->rate_bps * q->interval;
+
+		do_div(t, USEC_PER_SEC / 4);
+		q->buffer_limit = max_t(u32, t, 4U << 20);
+	} else {
+		q->buffer_limit = ~0;
+	}
+
+	sch->flags &= ~TCQ_F_CAN_BYPASS;
+
+	q->buffer_limit = min(q->buffer_limit,
+			      max(sch->limit * psched_mtu(qdisc_dev(sch)),
+				  q->buffer_config_limit));
+}
+
+static int cake_change(struct Qdisc *sch, struct nlattr *opt,
+		       struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_CAKE_MAX + 1];
+	int err;
+
+	if (!opt)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_CAKE_NAT]) {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+		q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
+		q->flow_mode |= CAKE_FLOW_NAT_FLAG *
+			!!nla_get_u32(tb[TCA_CAKE_NAT]);
+#else
+		NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
+				    "No conntrack support in kernel");
+		return -EOPNOTSUPP;
+#endif
+	}
+
+	if (tb[TCA_CAKE_BASE_RATE64])
+		q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
+
+	if (tb[TCA_CAKE_DIFFSERV_MODE])
+		q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
+
+	if (tb[TCA_CAKE_WASH]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
+			q->rate_flags |= CAKE_FLAG_WASH;
+		else
+			q->rate_flags &= ~CAKE_FLAG_WASH;
+	}
+
+	if (tb[TCA_CAKE_FLOW_MODE])
+		q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
+				(nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
+					CAKE_FLOW_MASK));
+
+	if (tb[TCA_CAKE_ATM])
+		q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
+
+	if (tb[TCA_CAKE_OVERHEAD]) {
+		q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
+		q->rate_flags |= CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_RAW]) {
+		q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
+
+		q->max_netlen = 0;
+		q->max_adjlen = 0;
+		q->min_netlen = ~0;
+		q->min_adjlen = ~0;
+	}
+
+	if (tb[TCA_CAKE_MPU])
+		q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
+
+	if (tb[TCA_CAKE_RTT]) {
+		q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
+
+		if (!q->interval)
+			q->interval = 1;
+	}
+
+	if (tb[TCA_CAKE_TARGET]) {
+		q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
+
+		if (!q->target)
+			q->target = 1;
+	}
+
+	if (tb[TCA_CAKE_AUTORATE]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
+			q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
+		else
+			q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
+	}
+
+	if (tb[TCA_CAKE_INGRESS]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
+			q->rate_flags |= CAKE_FLAG_INGRESS;
+		else
+			q->rate_flags &= ~CAKE_FLAG_INGRESS;
+	}
+
+	if (tb[TCA_CAKE_ACK_FILTER])
+		q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
+
+	if (tb[TCA_CAKE_MEMORY])
+		q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
+
+	if (q->rate_bps && q->rate_bps <= CAKE_SPLIT_GSO_THRESHOLD)
+		q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
+	else
+		q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+
+	if (q->tins) {
+		sch_tree_lock(sch);
+		cake_reconfigure(sch);
+		sch_tree_unlock(sch);
+	}
+
+	return 0;
+}
+
+static void cake_destroy(struct Qdisc *sch)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+
+	qdisc_watchdog_cancel(&q->watchdog);
+	tcf_block_put(q->block);
+	kvfree(q->tins);
+}
+
+static int cake_init(struct Qdisc *sch, struct nlattr *opt,
+		     struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	int i, j, err;
+
+	sch->limit = 10240;
+	q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
+	q->flow_mode  = CAKE_FLOW_TRIPLE;
+
+	q->rate_bps = 0; /* unlimited by default */
+
+	q->interval = 100000; /* 100ms default */
+	q->target   =   5000; /* 5ms: codel RFC argues
+			       * for 5 to 10% of interval
+			       */
+
+	q->cur_tin = 0;
+	q->cur_flow  = 0;
+
+	qdisc_watchdog_init(&q->watchdog, sch);
+
+	if (opt) {
+		int err = cake_change(sch, opt, extack);
+
+		if (err)
+			return err;
+	}
+
+	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+	if (err)
+		return err;
+
+	quantum_div[0] = ~0;
+	for (i = 1; i <= CAKE_QUEUES; i++)
+		quantum_div[i] = 65535 / i;
+
+	q->tins = kvzalloc(CAKE_MAX_TINS * sizeof(struct cake_tin_data),
+			   GFP_KERNEL);
+	if (!q->tins)
+		goto nomem;
+
+	for (i = 0; i < CAKE_MAX_TINS; i++) {
+		struct cake_tin_data *b = q->tins + i;
+
+		INIT_LIST_HEAD(&b->new_flows);
+		INIT_LIST_HEAD(&b->old_flows);
+		INIT_LIST_HEAD(&b->decaying_flows);
+		b->sparse_flow_count = 0;
+		b->bulk_flow_count = 0;
+		b->decaying_flow_count = 0;
+
+		for (j = 0; j < CAKE_QUEUES; j++) {
+			struct cake_flow *flow = b->flows + j;
+			u32 k = j * CAKE_MAX_TINS + i;
+
+			INIT_LIST_HEAD(&flow->flowchain);
+			cobalt_vars_init(&flow->cvars);
+
+			q->overflow_heap[k].t = i;
+			q->overflow_heap[k].b = j;
+			b->overflow_idx[j] = k;
+		}
+	}
+
+	cake_reconfigure(sch);
+	q->avg_peak_bandwidth = q->rate_bps;
+	q->min_netlen = ~0;
+	q->min_adjlen = ~0;
+	return 0;
+
+nomem:
+	cake_destroy(sch);
+	return -ENOMEM;
+}
+
+static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *opts;
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (!opts)
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
+			      TCA_CAKE_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
+			q->flow_mode & CAKE_FLOW_MASK))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
+			!!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_INGRESS,
+			!!(q->rate_flags & CAKE_FLAG_INGRESS)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_NAT,
+			!!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_WASH,
+			!!(q->rate_flags & CAKE_FLAG_WASH)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
+		goto nla_put_failure;
+
+	if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
+		if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
+			goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
+			!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	return -1;
+}
+
+static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP);
+	struct cake_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tstats, *ts;
+	int i;
+
+	if (!stats)
+		return -1;
+
+#define PUT_STAT_U32(attr, data) do {				       \
+		if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+#define PUT_STAT_U64(attr, data) do {				       \
+		if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \
+					data, TCA_CAKE_STATS_PAD)) \
+			goto nla_put_failure;			       \
+	} while (0)
+
+	PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
+	PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
+	PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
+	PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
+	PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
+	PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
+	PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
+	PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
+
+#undef PUT_STAT_U32
+#undef PUT_STAT_U64
+
+	tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS);
+	if (!tstats)
+		goto nla_put_failure;
+
+#define PUT_TSTAT_U32(attr, data) do {					\
+		if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \
+			goto nla_put_failure;				\
+	} while (0)
+#define PUT_TSTAT_U64(attr, data) do {					\
+		if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \
+					data, TCA_CAKE_TIN_STATS_PAD))	\
+			goto nla_put_failure;				\
+	} while (0)
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[q->tin_order[i]];
+
+		ts = nla_nest_start(d->skb, i + 1);
+		if (!ts)
+			goto nla_put_failure;
+
+		PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
+		PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
+		PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
+
+		PUT_TSTAT_U32(TARGET_US,
+			      ktime_to_us(ns_to_ktime(b->cparams.target)));
+		PUT_TSTAT_U32(INTERVAL_US,
+			      ktime_to_us(ns_to_ktime(b->cparams.interval)));
+
+		PUT_TSTAT_U32(SENT_PACKETS, b->packets);
+		PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
+		PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
+		PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
+
+		PUT_TSTAT_U32(PEAK_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->peak_delay)));
+		PUT_TSTAT_U32(AVG_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->avge_delay)));
+		PUT_TSTAT_U32(BASE_DELAY_US,
+			      ktime_to_us(ns_to_ktime(b->base_delay)));
+
+		PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
+		PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
+		PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
+
+		PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
+					    b->decaying_flow_count);
+		PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
+		PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
+		PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
+
+		PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
+		nla_nest_end(d->skb, ts);
+	}
+
+#undef PUT_TSTAT_U32
+#undef PUT_TSTAT_U64
+
+	nla_nest_end(d->skb, tstats);
+	return nla_nest_end(d->skb, stats);
+
+nla_put_failure:
+	nla_nest_cancel(d->skb, stats);
+	return -1;
+}
+
+static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long cake_find(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent,
+			       u32 classid)
+{
+	return 0;
+}
+
+static void cake_unbind(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl,
+					struct netlink_ext_ack *extack)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return q->block;
+}
+
+static int cake_dump_class(struct Qdisc *sch, unsigned long cl,
+			   struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				 struct gnet_dump *d)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	const struct cake_flow *flow = NULL;
+	struct gnet_stats_queue qs = { 0 };
+	struct nlattr *stats;
+	u32 idx = cl - 1;
+
+	if (idx < CAKE_QUEUES * q->tin_cnt) {
+		const struct cake_tin_data *b = \
+			&q->tins[q->tin_order[idx / CAKE_QUEUES]];
+		const struct sk_buff *skb;
+
+		flow = &b->flows[idx % CAKE_QUEUES];
+
+		if (flow->head) {
+			sch_tree_lock(sch);
+			skb = flow->head;
+			while (skb) {
+				qs.qlen++;
+				skb = skb->next;
+			}
+			sch_tree_unlock(sch);
+		}
+		qs.backlog = b->backlogs[idx % CAKE_QUEUES];
+		qs.drops = flow->dropped;
+	}
+	if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
+		return -1;
+	if (flow) {
+		ktime_t now = ktime_get();
+
+		stats = nla_nest_start(d->skb, TCA_STATS_APP);
+		if (!stats)
+			return -1;
+
+#define PUT_STAT_U32(attr, data) do {				       \
+		if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+#define PUT_STAT_S32(attr, data) do {				       \
+		if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
+			goto nla_put_failure;			       \
+	} while (0)
+
+		PUT_STAT_S32(DEFICIT, flow->deficit);
+		PUT_STAT_U32(DROPPING, flow->cvars.dropping);
+		PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
+		PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
+		if (flow->cvars.p_drop) {
+			PUT_STAT_S32(BLUE_TIMER_US,
+				     ktime_to_us(
+					     ktime_sub(now,
+						     flow->cvars.blue_timer)));
+		}
+		if (flow->cvars.dropping) {
+			PUT_STAT_S32(DROP_NEXT_US,
+				     ktime_to_us(
+					     ktime_sub(now,
+						       flow->cvars.drop_next)));
+		}
+
+		if (nla_nest_end(d->skb, stats) < 0)
+			return -1;
+	}
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(d->skb, stats);
+	return -1;
+}
+
+static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct cake_sched_data *q = qdisc_priv(sch);
+	unsigned int i, j;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->tin_cnt; i++) {
+		struct cake_tin_data *b = &q->tins[q->tin_order[i]];
+
+		for (j = 0; j < CAKE_QUEUES; j++) {
+			if (list_empty(&b->flows[j].flowchain) ||
+			    arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) {
+				arg->stop = 1;
+				break;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static const struct Qdisc_class_ops cake_class_ops = {
+	.leaf		=	cake_leaf,
+	.find		=	cake_find,
+	.tcf_block	=	cake_tcf_block,
+	.bind_tcf	=	cake_bind,
+	.unbind_tcf	=	cake_unbind,
+	.dump		=	cake_dump_class,
+	.dump_stats	=	cake_dump_class_stats,
+	.walk		=	cake_walk,
+};
+
+static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
+	.cl_ops		=	&cake_class_ops,
+	.id		=	"cake",
+	.priv_size	=	sizeof(struct cake_sched_data),
+	.enqueue	=	cake_enqueue,
+	.dequeue	=	cake_dequeue,
+	.peek		=	qdisc_peek_dequeued,
+	.init		=	cake_init,
+	.reset		=	cake_reset,
+	.destroy	=	cake_destroy,
+	.change		=	cake_change,
+	.dump		=	cake_dump,
+	.dump_stats	=	cake_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init cake_module_init(void)
+{
+	return register_qdisc(&cake_qdisc_ops);
+}
+
+static void __exit cake_module_exit(void)
+{
+	unregister_qdisc(&cake_qdisc_ops);
+}
+
+module_init(cake_module_init)
+module_exit(cake_module_exit)
+MODULE_AUTHOR("Jonathan Morton");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("The CAKE shaper.");
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
new file mode 100644
index 000000000000..1538d6fa8165
--- /dev/null
+++ b/net/sched/sch_etf.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* net/sched/sch_etf.c  Earliest TxTime First queueing discipline.
+ *
+ * Authors:	Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
+ *		Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/errqueue.h>
+#include <linux/rbtree.h>
+#include <linux/skbuff.h>
+#include <linux/posix-timers.h>
+#include <net/netlink.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+
+#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
+#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
+
+struct etf_sched_data {
+	bool offload;
+	bool deadline_mode;
+	int clockid;
+	int queue;
+	s32 delta; /* in ns */
+	ktime_t last; /* The txtime of the last skb sent to the netdevice. */
+	struct rb_root head;
+	struct qdisc_watchdog watchdog;
+	ktime_t (*get_time)(void);
+};
+
+static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
+	[TCA_ETF_PARMS]	= { .len = sizeof(struct tc_etf_qopt) },
+};
+
+static inline int validate_input_params(struct tc_etf_qopt *qopt,
+					struct netlink_ext_ack *extack)
+{
+	/* Check if params comply to the following rules:
+	 *	* Clockid and delta must be valid.
+	 *
+	 *	* Dynamic clockids are not supported.
+	 *
+	 *	* Delta must be a positive integer.
+	 *
+	 * Also note that for the HW offload case, we must
+	 * expect that system clocks have been synchronized to PHC.
+	 */
+	if (qopt->clockid < 0) {
+		NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
+		return -ENOTSUPP;
+	}
+
+	if (qopt->clockid != CLOCK_TAI) {
+		NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
+		return -EINVAL;
+	}
+
+	if (qopt->delta < 0) {
+		NL_SET_ERR_MSG(extack, "Delta must be positive");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	ktime_t txtime = nskb->tstamp;
+	struct sock *sk = nskb->sk;
+	ktime_t now;
+
+	if (!sk)
+		return false;
+
+	if (!sock_flag(sk, SOCK_TXTIME))
+		return false;
+
+	/* We don't perform crosstimestamping.
+	 * Drop if packet's clockid differs from qdisc's.
+	 */
+	if (sk->sk_clockid != q->clockid)
+		return false;
+
+	if (sk->sk_txtime_deadline_mode != q->deadline_mode)
+		return false;
+
+	now = q->get_time();
+	if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
+		return false;
+
+	return true;
+}
+
+static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct rb_node *p;
+
+	p = rb_first(&q->head);
+	if (!p)
+		return NULL;
+
+	return rb_to_skb(p);
+}
+
+static void reset_watchdog(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb = etf_peek_timesortedlist(sch);
+	ktime_t next;
+
+	if (!skb)
+		return;
+
+	next = ktime_sub_ns(skb->tstamp, q->delta);
+	qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
+}
+
+static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
+{
+	struct sock_exterr_skb *serr;
+	struct sk_buff *clone;
+	ktime_t txtime = skb->tstamp;
+
+	if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
+		return;
+
+	clone = skb_clone(skb, GFP_ATOMIC);
+	if (!clone)
+		return;
+
+	serr = SKB_EXT_ERR(clone);
+	serr->ee.ee_errno = err;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
+	serr->ee.ee_type = 0;
+	serr->ee.ee_code = code;
+	serr->ee.ee_pad = 0;
+	serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
+	serr->ee.ee_info = txtime; /* low part of tstamp */
+
+	if (sock_queue_err_skb(skb->sk, clone))
+		kfree_skb(clone);
+}
+
+static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
+				      struct sk_buff **to_free)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct rb_node **p = &q->head.rb_node, *parent = NULL;
+	ktime_t txtime = nskb->tstamp;
+
+	if (!is_packet_valid(sch, nskb)) {
+		report_sock_error(nskb, EINVAL,
+				  SO_EE_CODE_TXTIME_INVALID_PARAM);
+		return qdisc_drop(nskb, sch, to_free);
+	}
+
+	while (*p) {
+		struct sk_buff *skb;
+
+		parent = *p;
+		skb = rb_to_skb(parent);
+		if (ktime_after(txtime, skb->tstamp))
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&nskb->rbnode, parent, p);
+	rb_insert_color(&nskb->rbnode, &q->head);
+
+	qdisc_qstats_backlog_inc(sch, nskb);
+	sch->q.qlen++;
+
+	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
+	reset_watchdog(sch);
+
+	return NET_XMIT_SUCCESS;
+}
+
+static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
+				 bool drop)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+
+	rb_erase(&skb->rbnode, &q->head);
+
+	/* The rbnode field in the skb re-uses these fields, now that
+	 * we are done with the rbnode, reset them.
+	 */
+	skb->next = NULL;
+	skb->prev = NULL;
+	skb->dev = qdisc_dev(sch);
+
+	qdisc_qstats_backlog_dec(sch, skb);
+
+	if (drop) {
+		struct sk_buff *to_free = NULL;
+
+		report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
+
+		qdisc_drop(skb, sch, &to_free);
+		kfree_skb_list(to_free);
+		qdisc_qstats_overlimit(sch);
+	} else {
+		qdisc_bstats_update(sch, skb);
+
+		q->last = skb->tstamp;
+	}
+
+	sch->q.qlen--;
+}
+
+static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	ktime_t now, next;
+
+	skb = etf_peek_timesortedlist(sch);
+	if (!skb)
+		return NULL;
+
+	now = q->get_time();
+
+	/* Drop if packet has expired while in queue. */
+	if (ktime_before(skb->tstamp, now)) {
+		timesortedlist_erase(sch, skb, true);
+		skb = NULL;
+		goto out;
+	}
+
+	/* When in deadline mode, dequeue as soon as possible and change the
+	 * txtime from deadline to (now + delta).
+	 */
+	if (q->deadline_mode) {
+		timesortedlist_erase(sch, skb, false);
+		skb->tstamp = now;
+		goto out;
+	}
+
+	next = ktime_sub_ns(skb->tstamp, q->delta);
+
+	/* Dequeue only if now is within the [txtime - delta, txtime] range. */
+	if (ktime_after(now, next))
+		timesortedlist_erase(sch, skb, false);
+	else
+		skb = NULL;
+
+out:
+	/* Now we may need to re-arm the qdisc watchdog for the next packet. */
+	reset_watchdog(sch);
+
+	return skb;
+}
+
+static void etf_disable_offload(struct net_device *dev,
+				struct etf_sched_data *q)
+{
+	struct tc_etf_qopt_offload etf = { };
+	const struct net_device_ops *ops;
+	int err;
+
+	if (!q->offload)
+		return;
+
+	ops = dev->netdev_ops;
+	if (!ops->ndo_setup_tc)
+		return;
+
+	etf.queue = q->queue;
+	etf.enable = 0;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
+	if (err < 0)
+		pr_warn("Couldn't disable ETF offload for queue %d\n",
+			etf.queue);
+}
+
+static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
+			      struct netlink_ext_ack *extack)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct tc_etf_qopt_offload etf = { };
+	int err;
+
+	if (q->offload)
+		return 0;
+
+	if (!ops->ndo_setup_tc) {
+		NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
+		return -EOPNOTSUPP;
+	}
+
+	etf.queue = q->queue;
+	etf.enable = 1;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
+		return err;
+	}
+
+	return 0;
+}
+
+static int etf_init(struct Qdisc *sch, struct nlattr *opt,
+		    struct netlink_ext_ack *extack)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct nlattr *tb[TCA_ETF_MAX + 1];
+	struct tc_etf_qopt *qopt;
+	int err;
+
+	if (!opt) {
+		NL_SET_ERR_MSG(extack,
+			       "Missing ETF qdisc options which are mandatory");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, TCA_ETF_MAX, opt, etf_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_ETF_PARMS]) {
+		NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
+		return -EINVAL;
+	}
+
+	qopt = nla_data(tb[TCA_ETF_PARMS]);
+
+	pr_debug("delta %d clockid %d offload %s deadline %s\n",
+		 qopt->delta, qopt->clockid,
+		 OFFLOAD_IS_ON(qopt) ? "on" : "off",
+		 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
+
+	err = validate_input_params(qopt, extack);
+	if (err < 0)
+		return err;
+
+	q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
+
+	if (OFFLOAD_IS_ON(qopt)) {
+		err = etf_enable_offload(dev, q, extack);
+		if (err < 0)
+			return err;
+	}
+
+	/* Everything went OK, save the parameters used. */
+	q->delta = qopt->delta;
+	q->clockid = qopt->clockid;
+	q->offload = OFFLOAD_IS_ON(qopt);
+	q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
+
+	switch (q->clockid) {
+	case CLOCK_REALTIME:
+		q->get_time = ktime_get_real;
+		break;
+	case CLOCK_MONOTONIC:
+		q->get_time = ktime_get;
+		break;
+	case CLOCK_BOOTTIME:
+		q->get_time = ktime_get_boottime;
+		break;
+	case CLOCK_TAI:
+		q->get_time = ktime_get_clocktai;
+		break;
+	default:
+		NL_SET_ERR_MSG(extack, "Clockid is not supported");
+		return -ENOTSUPP;
+	}
+
+	qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
+
+	return 0;
+}
+
+static void timesortedlist_clear(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct rb_node *p = rb_first(&q->head);
+
+	while (p) {
+		struct sk_buff *skb = rb_to_skb(p);
+
+		p = rb_next(p);
+
+		rb_erase(&skb->rbnode, &q->head);
+		rtnl_kfree_skbs(skb, skb);
+		sch->q.qlen--;
+	}
+}
+
+static void etf_reset(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+
+	/* Only cancel watchdog if it's been initialized. */
+	if (q->watchdog.qdisc == sch)
+		qdisc_watchdog_cancel(&q->watchdog);
+
+	/* No matter which mode we are on, it's safe to clear both lists. */
+	timesortedlist_clear(sch);
+	__qdisc_reset_queue(&sch->q);
+
+	sch->qstats.backlog = 0;
+	sch->q.qlen = 0;
+
+	q->last = 0;
+}
+
+static void etf_destroy(struct Qdisc *sch)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+
+	/* Only cancel watchdog if it's been initialized. */
+	if (q->watchdog.qdisc == sch)
+		qdisc_watchdog_cancel(&q->watchdog);
+
+	etf_disable_offload(dev, q);
+}
+
+static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct etf_sched_data *q = qdisc_priv(sch);
+	struct tc_etf_qopt opt = { };
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	opt.delta = q->delta;
+	opt.clockid = q->clockid;
+	if (q->offload)
+		opt.flags |= TC_ETF_OFFLOAD_ON;
+
+	if (q->deadline_mode)
+		opt.flags |= TC_ETF_DEADLINE_MODE_ON;
+
+	if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
+	.id		=	"etf",
+	.priv_size	=	sizeof(struct etf_sched_data),
+	.enqueue	=	etf_enqueue_timesortedlist,
+	.dequeue	=	etf_dequeue_timesortedlist,
+	.peek		=	etf_peek_timesortedlist,
+	.init		=	etf_init,
+	.reset		=	etf_reset,
+	.destroy	=	etf_destroy,
+	.dump		=	etf_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init etf_module_init(void)
+{
+	return register_qdisc(&etf_qdisc_ops);
+}
+
+static void __exit etf_module_exit(void)
+{
+	unregister_qdisc(&etf_qdisc_ops);
+}
+module_init(etf_module_init)
+module_exit(etf_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 2a4ab7caf553..43c4bfe625a9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -126,7 +126,6 @@ struct htb_class {
 
 	union {
 		struct htb_class_leaf {
-			struct list_head drop_list;
 			int		deficit[TC_HTB_MAXDEPTH];
 			struct Qdisc	*q;
 		} leaf;
@@ -171,7 +170,6 @@ struct htb_sched {
 	struct qdisc_watchdog	watchdog;
 
 	s64			now;	/* cached dequeue time */
-	struct list_head	drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
 
 	/* time of nearest event per level (row) */
 	s64			near_ev_cache[TC_HTB_MAXDEPTH];
@@ -562,8 +560,6 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 	if (!cl->prio_activity) {
 		cl->prio_activity = 1 << cl->prio;
 		htb_activate_prios(q, cl);
-		list_add_tail(&cl->un.leaf.drop_list,
-			      q->drops + cl->prio);
 	}
 }
 
@@ -579,7 +575,6 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 
 	htb_deactivate_prios(q, cl);
 	cl->prio_activity = 0;
-	list_del_init(&cl->un.leaf.drop_list);
 }
 
 static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
@@ -981,7 +976,6 @@ static void htb_reset(struct Qdisc *sch)
 			else {
 				if (cl->un.leaf.q)
 					qdisc_reset(cl->un.leaf.q);
-				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 			}
 			cl->prio_activity = 0;
 			cl->cmode = HTB_CAN_SEND;
@@ -993,8 +987,6 @@ static void htb_reset(struct Qdisc *sch)
 	sch->qstats.backlog = 0;
 	memset(q->hlevel, 0, sizeof(q->hlevel));
 	memset(q->row_mask, 0, sizeof(q->row_mask));
-	for (i = 0; i < TC_HTB_NUMPRIO; i++)
-		INIT_LIST_HEAD(q->drops + i);
 }
 
 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
@@ -1024,7 +1016,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
 	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_glob *gopt;
 	int err;
-	int i;
 
 	qdisc_watchdog_init(&q->watchdog, sch);
 	INIT_WORK(&q->work, htb_work_func);
@@ -1050,8 +1041,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
 	err = qdisc_class_hash_init(&q->clhash);
 	if (err < 0)
 		return err;
-	for (i = 0; i < TC_HTB_NUMPRIO; i++)
-		INIT_LIST_HEAD(q->drops + i);
 
 	qdisc_skb_head_init(&q->direct_queue);
 
@@ -1224,7 +1213,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 
 	parent->level = 0;
 	memset(&parent->un.inner, 0, sizeof(parent->un.inner));
-	INIT_LIST_HEAD(&parent->un.leaf.drop_list);
 	parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
 	parent->tokens = parent->buffer;
 	parent->ctokens = parent->cbuffer;
@@ -1418,7 +1406,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		}
 
 		cl->children = 0;
-		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
 		RB_CLEAR_NODE(&cl->pq_node);
 
 		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7d6801fc5340..ad18a2052416 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -68,6 +68,11 @@
 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
 */
 
+struct disttable {
+	u32  size;
+	s16 table[0];
+};
+
 struct netem_sched_data {
 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
 	struct rb_root t_root;
@@ -99,10 +104,7 @@ struct netem_sched_data {
 		u32 rho;
 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
 
-	struct disttable {
-		u32  size;
-		s16 table[0];
-	} *delay_dist;
+	struct disttable *delay_dist;
 
 	enum  {
 		CLG_RANDOM,
@@ -142,6 +144,7 @@ struct netem_sched_data {
 		s32 bytes_left;
 	} slot;
 
+	struct disttable *slot_dist;
 };
 
 /* Time stamp put into socket buffer control block
@@ -180,7 +183,7 @@ static u32 get_crandom(struct crndstate *state)
 	u64 value, rho;
 	unsigned long answer;
 
-	if (state->rho == 0)	/* no correlation */
+	if (!state || state->rho == 0)	/* no correlation */
 		return prandom_u32();
 
 	value = prandom_u32();
@@ -601,10 +604,19 @@ finish_segs:
 
 static void get_slot_next(struct netem_sched_data *q, u64 now)
 {
-	q->slot.slot_next = now + q->slot_config.min_delay +
-		(prandom_u32() *
-			(q->slot_config.max_delay -
-				q->slot_config.min_delay) >> 32);
+	s64 next_delay;
+
+	if (!q->slot_dist)
+		next_delay = q->slot_config.min_delay +
+				(prandom_u32() *
+				 (q->slot_config.max_delay -
+				  q->slot_config.min_delay) >> 32);
+	else
+		next_delay = tabledist(q->slot_config.dist_delay,
+				       (s32)(q->slot_config.dist_jitter),
+				       NULL, q->slot_dist);
+
+	q->slot.slot_next = now + next_delay;
 	q->slot.packets_left = q->slot_config.max_packets;
 	q->slot.bytes_left = q->slot_config.max_bytes;
 }
@@ -721,9 +733,9 @@ static void dist_free(struct disttable *d)
  * signed 16 bit values.
  */
 
-static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
+static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
+			  const struct nlattr *attr)
 {
-	struct netem_sched_data *q = qdisc_priv(sch);
 	size_t n = nla_len(attr)/sizeof(__s16);
 	const __s16 *data = nla_data(attr);
 	spinlock_t *root_lock;
@@ -744,7 +756,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	root_lock = qdisc_root_sleeping_lock(sch);
 
 	spin_lock_bh(root_lock);
-	swap(q->delay_dist, d);
+	swap(*tbl, d);
 	spin_unlock_bh(root_lock);
 
 	dist_free(d);
@@ -762,7 +774,8 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
 		q->slot_config.max_bytes = INT_MAX;
 	q->slot.packets_left = q->slot_config.max_packets;
 	q->slot.bytes_left = q->slot_config.max_bytes;
-	if (q->slot_config.min_delay | q->slot_config.max_delay)
+	if (q->slot_config.min_delay | q->slot_config.max_delay |
+	    q->slot_config.dist_jitter)
 		q->slot.slot_next = ktime_get_ns();
 	else
 		q->slot.slot_next = 0;
@@ -926,16 +939,17 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (tb[TCA_NETEM_DELAY_DIST]) {
-		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
-		if (ret) {
-			/* recover clg and loss_model, in case of
-			 * q->clg and q->loss_model were modified
-			 * in get_loss_clg()
-			 */
-			q->clg = old_clg;
-			q->loss_model = old_loss_model;
-			return ret;
-		}
+		ret = get_dist_table(sch, &q->delay_dist,
+				     tb[TCA_NETEM_DELAY_DIST]);
+		if (ret)
+			goto get_table_failure;
+	}
+
+	if (tb[TCA_NETEM_SLOT_DIST]) {
+		ret = get_dist_table(sch, &q->slot_dist,
+				     tb[TCA_NETEM_SLOT_DIST]);
+		if (ret)
+			goto get_table_failure;
 	}
 
 	sch->limit = qopt->limit;
@@ -983,6 +997,15 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
 		get_slot(q, tb[TCA_NETEM_SLOT]);
 
 	return ret;
+
+get_table_failure:
+	/* recover clg and loss_model, in case of
+	 * q->clg and q->loss_model were modified
+	 * in get_loss_clg()
+	 */
+	q->clg = old_clg;
+	q->loss_model = old_loss_model;
+	return ret;
 }
 
 static int netem_init(struct Qdisc *sch, struct nlattr *opt,
@@ -1011,6 +1034,7 @@ static void netem_destroy(struct Qdisc *sch)
 	if (q->qdisc)
 		qdisc_destroy(q->qdisc);
 	dist_free(q->delay_dist);
+	dist_free(q->slot_dist);
 }
 
 static int dump_loss_model(const struct netem_sched_data *q,
@@ -1127,7 +1151,8 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (dump_loss_model(q, skb) != 0)
 		goto nla_put_failure;
 
-	if (q->slot_config.min_delay | q->slot_config.max_delay) {
+	if (q->slot_config.min_delay | q->slot_config.max_delay |
+	    q->slot_config.dist_jitter) {
 		slot = q->slot_config;
 		if (slot.max_packets == INT_MAX)
 			slot.max_packets = 0;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5d5a16204d50..297d9cf960b9 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -115,6 +115,9 @@ static struct sctp_association *sctp_association_init(
 	/* Initialize path max retrans value. */
 	asoc->pathmaxrxt = sp->pathmaxrxt;
 
+	asoc->flowlabel = sp->flowlabel;
+	asoc->dscp = sp->dscp;
+
 	/* Initialize default path MTU. */
 	asoc->pathmtu = sp->pathmtu;
 
@@ -647,6 +650,18 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	peer->sackdelay = asoc->sackdelay;
 	peer->sackfreq = asoc->sackfreq;
 
+	if (addr->sa.sa_family == AF_INET6) {
+		__be32 info = addr->v6.sin6_flowinfo;
+
+		if (info) {
+			peer->flowlabel = ntohl(info & IPV6_FLOWLABEL_MASK);
+			peer->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+		} else {
+			peer->flowlabel = asoc->flowlabel;
+		}
+	}
+	peer->dscp = asoc->dscp;
+
 	/* Enable/disable heartbeat, SACK delay, and path MTU discovery
 	 * based on association setting.
 	 */
diff --git a/net/sctp/input.c b/net/sctp/input.c
index ba8a6e6c36fa..9bbc5f92c941 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -56,6 +56,7 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/checksum.h>
 #include <net/net_namespace.h>
+#include <linux/rhashtable.h>
 
 /* Forward declarations for internal helpers. */
 static int sctp_rcv_ootb(struct sk_buff *);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0cd2e764f47f..fc6c5e4bffa5 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -209,12 +209,17 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	struct sock *sk = skb->sk;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi6 *fl6 = &transport->fl.u.ip6;
+	__u8 tclass = np->tclass;
 	int res;
 
 	pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
 		 skb->len, &fl6->saddr, &fl6->daddr);
 
-	IP6_ECN_flow_xmit(sk, fl6->flowlabel);
+	if (transport->dscp & SCTP_DSCP_SET_MASK)
+		tclass = transport->dscp & SCTP_DSCP_VAL_MASK;
+
+	if (INET_ECN_is_capable(tclass))
+		IP6_ECN_flow_xmit(sk, fl6->flowlabel);
 
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
 		skb->ignore_df = 1;
@@ -223,7 +228,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 
 	rcu_read_lock();
 	res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
-		       np->tclass);
+		       tclass);
 	rcu_read_unlock();
 	return res;
 }
@@ -254,6 +259,17 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		fl6->flowi6_oif = daddr->v6.sin6_scope_id;
 	else if (asoc)
 		fl6->flowi6_oif = asoc->base.sk->sk_bound_dev_if;
+	if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK)
+		fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK);
+
+	if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) {
+		struct ip6_flowlabel *flowlabel;
+
+		flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
+		if (!flowlabel)
+			goto out;
+		fl6_sock_release(flowlabel);
+	}
 
 	pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr);
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 67f73d3a1356..e948db29ab53 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -426,13 +426,16 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	struct dst_entry *dst = NULL;
 	union sctp_addr *daddr = &t->ipaddr;
 	union sctp_addr dst_saddr;
+	__u8 tos = inet_sk(sk)->tos;
 
+	if (t->dscp & SCTP_DSCP_SET_MASK)
+		tos = t->dscp & SCTP_DSCP_VAL_MASK;
 	memset(fl4, 0x0, sizeof(struct flowi4));
 	fl4->daddr  = daddr->v4.sin_addr.s_addr;
 	fl4->fl4_dport = daddr->v4.sin_port;
 	fl4->flowi4_proto = IPPROTO_SCTP;
 	if (asoc) {
-		fl4->flowi4_tos = RT_CONN_FLAGS(asoc->base.sk);
+		fl4->flowi4_tos = RT_CONN_FLAGS_TOS(asoc->base.sk, tos);
 		fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
 		fl4->fl4_sport = htons(asoc->base.bind_addr.port);
 	}
@@ -495,7 +498,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		fl4->fl4_sport = laddr->a.v4.sin_port;
 		flowi4_update_output(fl4,
 				     asoc->base.sk->sk_bound_dev_if,
-				     RT_CONN_FLAGS(asoc->base.sk),
+				     RT_CONN_FLAGS_TOS(asoc->base.sk, tos),
 				     daddr->v4.sin_addr.s_addr,
 				     laddr->a.v4.sin_addr.s_addr);
 
@@ -971,16 +974,21 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
 			       struct sctp_transport *transport)
 {
 	struct inet_sock *inet = inet_sk(skb->sk);
+	__u8 dscp = inet->tos;
 
 	pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb,
-		 skb->len, &transport->fl.u.ip4.saddr, &transport->fl.u.ip4.daddr);
+		 skb->len, &transport->fl.u.ip4.saddr,
+		 &transport->fl.u.ip4.daddr);
+
+	if (transport->dscp & SCTP_DSCP_SET_MASK)
+		dscp = transport->dscp & SCTP_DSCP_VAL_MASK;
 
 	inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
 			 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
 
 	SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
 
-	return ip_queue_xmit(&inet->sk, skb, &transport->fl);
+	return __ip_queue_xmit(&inet->sk, skb, &transport->fl, dscp);
 }
 
 static struct sctp_af sctp_af_inet;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ce620e878538..502c0d7cb105 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -66,6 +66,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/compat.h>
+#include <linux/rhashtable.h>
 
 #include <net/ip.h>
 #include <net/icmp.h>
@@ -1696,6 +1697,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 	struct sctp_association *asoc;
 	enum sctp_scope scope;
 	struct cmsghdr *cmsg;
+	__be32 flowinfo = 0;
 	struct sctp_af *af;
 	int err;
 
@@ -1780,6 +1782,9 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 	if (!cmsgs->addrs_msg)
 		return 0;
 
+	if (daddr->sa.sa_family == AF_INET6)
+		flowinfo = daddr->v6.sin6_flowinfo;
+
 	/* sendv addr list parse */
 	for_each_cmsghdr(cmsg, cmsgs->addrs_msg) {
 		struct sctp_transport *transport;
@@ -1812,6 +1817,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags,
 			}
 
 			dlen = sizeof(struct in6_addr);
+			daddr->v6.sin6_flowinfo = flowinfo;
 			daddr->v6.sin6_family = AF_INET6;
 			daddr->v6.sin6_port = htons(asoc->peer.port);
 			memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen);
@@ -2392,6 +2398,8 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  *     uint32_t                spp_pathmtu;
  *     uint32_t                spp_sackdelay;
  *     uint32_t                spp_flags;
+ *     uint32_t                spp_ipv6_flowlabel;
+ *     uint8_t                 spp_dscp;
  * };
  *
  *   spp_assoc_id    - (one-to-many style socket) This is filled in the
@@ -2471,6 +2479,45 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  *                     also that this field is mutually exclusive to
  *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
  *                     results.
+ *
+ *                     SPP_IPV6_FLOWLABEL:  Setting this flag enables the
+ *                     setting of the IPV6 flow label value.  The value is
+ *                     contained in the spp_ipv6_flowlabel field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_ipv6_flowlabel field has a valid value returned.
+ *                     If a specific destination address is set (in the
+ *                     spp_address field), then the value returned is that of
+ *                     the address.  If just an association is specified (and
+ *                     no address), then the association's default flow label
+ *                     is returned.  If neither an association nor a destination
+ *                     is specified, then the socket's default flow label is
+ *                     returned.  For non-IPv6 sockets, this flag will be left
+ *                     cleared.
+ *
+ *                     SPP_DSCP:  Setting this flag enables the setting of the
+ *                     Differentiated Services Code Point (DSCP) value
+ *                     associated with either the association or a specific
+ *                     address.  The value is obtained in the spp_dscp field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_dscp field has a valid value returned.  If a
+ *                     specific destination address is set when called (in the
+ *                     spp_address field), then that specific destination
+ *                     address's DSCP value is returned.  If just an association
+ *                     is specified, then the association's default DSCP is
+ *                     returned.  If neither an association nor a destination is
+ *                     specified, then the socket's default DSCP is returned.
+ *
+ *   spp_ipv6_flowlabel
+ *                   - This field is used in conjunction with the
+ *                     SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label.
+ *                     The 20 least significant bits are used for the flow
+ *                     label.  This setting has precedence over any IPv6-layer
+ *                     setting.
+ *
+ *   spp_dscp        - This field is used in conjunction with the SPP_DSCP flag
+ *                     and contains the DSCP.  The 6 most significant bits are
+ *                     used for the DSCP.  This setting has precedence over any
+ *                     IPv4- or IPv6- layer setting.
  */
 static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 				       struct sctp_transport   *trans,
@@ -2610,6 +2657,51 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 		}
 	}
 
+	if (params->spp_flags & SPP_IPV6_FLOWLABEL) {
+		if (trans && trans->ipaddr.sa.sa_family == AF_INET6) {
+			trans->flowlabel = params->spp_ipv6_flowlabel &
+					   SCTP_FLOWLABEL_VAL_MASK;
+			trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+		} else if (asoc) {
+			list_for_each_entry(trans,
+					    &asoc->peer.transport_addr_list,
+					    transports) {
+				if (trans->ipaddr.sa.sa_family != AF_INET6)
+					continue;
+				trans->flowlabel = params->spp_ipv6_flowlabel &
+						   SCTP_FLOWLABEL_VAL_MASK;
+				trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+			}
+			asoc->flowlabel = params->spp_ipv6_flowlabel &
+					  SCTP_FLOWLABEL_VAL_MASK;
+			asoc->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+		} else if (sctp_opt2sk(sp)->sk_family == AF_INET6) {
+			sp->flowlabel = params->spp_ipv6_flowlabel &
+					SCTP_FLOWLABEL_VAL_MASK;
+			sp->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+		}
+	}
+
+	if (params->spp_flags & SPP_DSCP) {
+		if (trans) {
+			trans->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
+			trans->dscp |= SCTP_DSCP_SET_MASK;
+		} else if (asoc) {
+			list_for_each_entry(trans,
+					    &asoc->peer.transport_addr_list,
+					    transports) {
+				trans->dscp = params->spp_dscp &
+					      SCTP_DSCP_VAL_MASK;
+				trans->dscp |= SCTP_DSCP_SET_MASK;
+			}
+			asoc->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
+			asoc->dscp |= SCTP_DSCP_SET_MASK;
+		} else {
+			sp->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
+			sp->dscp |= SCTP_DSCP_SET_MASK;
+		}
+	}
+
 	return 0;
 }
 
@@ -2624,11 +2716,18 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
 	int error;
 	int hb_change, pmtud_change, sackdelay_change;
 
-	if (optlen != sizeof(struct sctp_paddrparams))
+	if (optlen == sizeof(params)) {
+		if (copy_from_user(&params, optval, optlen))
+			return -EFAULT;
+	} else if (optlen == ALIGN(offsetof(struct sctp_paddrparams,
+					    spp_ipv6_flowlabel), 4)) {
+		if (copy_from_user(&params, optval, optlen))
+			return -EFAULT;
+		if (params.spp_flags & (SPP_DSCP | SPP_IPV6_FLOWLABEL))
+			return -EINVAL;
+	} else {
 		return -EINVAL;
-
-	if (copy_from_user(&params, optval, optlen))
-		return -EFAULT;
+	}
 
 	/* Validate flags and value parameters. */
 	hb_change        = params.spp_flags & SPP_HB;
@@ -4169,6 +4268,28 @@ out:
 	return retval;
 }
 
+static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval,
+				      unsigned int optlen)
+{
+	int val;
+
+	if (!sctp_style(sk, TCP))
+		return -EOPNOTSUPP;
+
+	if (sctp_sk(sk)->ep->base.bind_addr.port)
+		return -EFAULT;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->reuse = !!val;
+
+	return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4363,6 +4484,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_setsockopt_interleaving_supported(sk, optval,
 								optlen);
 		break;
+	case SCTP_REUSE_PORT:
+		retval = sctp_setsockopt_reuse_port(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -5427,6 +5551,45 @@ out:
  *                     also that this field is mutually exclusive to
  *                     SPP_SACKDELAY_ENABLE, setting both will have undefined
  *                     results.
+ *
+ *                     SPP_IPV6_FLOWLABEL:  Setting this flag enables the
+ *                     setting of the IPV6 flow label value.  The value is
+ *                     contained in the spp_ipv6_flowlabel field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_ipv6_flowlabel field has a valid value returned.
+ *                     If a specific destination address is set (in the
+ *                     spp_address field), then the value returned is that of
+ *                     the address.  If just an association is specified (and
+ *                     no address), then the association's default flow label
+ *                     is returned.  If neither an association nor a destination
+ *                     is specified, then the socket's default flow label is
+ *                     returned.  For non-IPv6 sockets, this flag will be left
+ *                     cleared.
+ *
+ *                     SPP_DSCP:  Setting this flag enables the setting of the
+ *                     Differentiated Services Code Point (DSCP) value
+ *                     associated with either the association or a specific
+ *                     address.  The value is obtained in the spp_dscp field.
+ *                     Upon retrieval, this flag will be set to indicate that
+ *                     the spp_dscp field has a valid value returned.  If a
+ *                     specific destination address is set when called (in the
+ *                     spp_address field), then that specific destination
+ *                     address's DSCP value is returned.  If just an association
+ *                     is specified, then the association's default DSCP is
+ *                     returned.  If neither an association nor a destination is
+ *                     specified, then the socket's default DSCP is returned.
+ *
+ *   spp_ipv6_flowlabel
+ *                   - This field is used in conjunction with the
+ *                     SPP_IPV6_FLOWLABEL flag and contains the IPv6 flow label.
+ *                     The 20 least significant bits are used for the flow
+ *                     label.  This setting has precedence over any IPv6-layer
+ *                     setting.
+ *
+ *   spp_dscp        - This field is used in conjunction with the SPP_DSCP flag
+ *                     and contains the DSCP.  The 6 most significant bits are
+ *                     used for the DSCP.  This setting has precedence over any
+ *                     IPv4- or IPv6- layer setting.
  */
 static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 					    char __user *optval, int __user *optlen)
@@ -5436,9 +5599,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 	struct sctp_association *asoc = NULL;
 	struct sctp_sock        *sp = sctp_sk(sk);
 
-	if (len < sizeof(struct sctp_paddrparams))
+	if (len >= sizeof(params))
+		len = sizeof(params);
+	else if (len >= ALIGN(offsetof(struct sctp_paddrparams,
+				       spp_ipv6_flowlabel), 4))
+		len = ALIGN(offsetof(struct sctp_paddrparams,
+				     spp_ipv6_flowlabel), 4);
+	else
 		return -EINVAL;
-	len = sizeof(struct sctp_paddrparams);
+
 	if (copy_from_user(&params, optval, len))
 		return -EFAULT;
 
@@ -5473,6 +5642,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 
 		/*draft-11 doesn't say what to return in spp_flags*/
 		params.spp_flags      = trans->param_flags;
+		if (trans->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
+			params.spp_ipv6_flowlabel = trans->flowlabel &
+						    SCTP_FLOWLABEL_VAL_MASK;
+			params.spp_flags |= SPP_IPV6_FLOWLABEL;
+		}
+		if (trans->dscp & SCTP_DSCP_SET_MASK) {
+			params.spp_dscp	= trans->dscp & SCTP_DSCP_VAL_MASK;
+			params.spp_flags |= SPP_DSCP;
+		}
 	} else if (asoc) {
 		/* Fetch association values. */
 		params.spp_hbinterval = jiffies_to_msecs(asoc->hbinterval);
@@ -5482,6 +5660,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 
 		/*draft-11 doesn't say what to return in spp_flags*/
 		params.spp_flags      = asoc->param_flags;
+		if (asoc->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
+			params.spp_ipv6_flowlabel = asoc->flowlabel &
+						    SCTP_FLOWLABEL_VAL_MASK;
+			params.spp_flags |= SPP_IPV6_FLOWLABEL;
+		}
+		if (asoc->dscp & SCTP_DSCP_SET_MASK) {
+			params.spp_dscp	= asoc->dscp & SCTP_DSCP_VAL_MASK;
+			params.spp_flags |= SPP_DSCP;
+		}
 	} else {
 		/* Fetch socket values. */
 		params.spp_hbinterval = sp->hbinterval;
@@ -5491,6 +5678,15 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 
 		/*draft-11 doesn't say what to return in spp_flags*/
 		params.spp_flags      = sp->param_flags;
+		if (sp->flowlabel & SCTP_FLOWLABEL_SET_MASK) {
+			params.spp_ipv6_flowlabel = sp->flowlabel &
+						    SCTP_FLOWLABEL_VAL_MASK;
+			params.spp_flags |= SPP_IPV6_FLOWLABEL;
+		}
+		if (sp->dscp & SCTP_DSCP_SET_MASK) {
+			params.spp_dscp	= sp->dscp & SCTP_DSCP_VAL_MASK;
+			params.spp_flags |= SPP_DSCP;
+		}
 	}
 
 	if (copy_to_user(optval, &params, len))
@@ -7196,6 +7392,26 @@ out:
 	return retval;
 }
 
+static int sctp_getsockopt_reuse_port(struct sock *sk, int len,
+				      char __user *optval,
+				      int __user *optlen)
+{
+	int val;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+	val = sctp_sk(sk)->reuse;
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -7391,6 +7607,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_interleaving_supported(sk, len, optval,
 								optlen);
 		break;
+	case SCTP_REUSE_PORT:
+		retval = sctp_getsockopt_reuse_port(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7428,6 +7647,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
 
 static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 {
+	bool reuse = (sk->sk_reuse || sctp_sk(sk)->reuse);
 	struct sctp_bind_hashbucket *head; /* hash list */
 	struct sctp_bind_bucket *pp;
 	unsigned short snum;
@@ -7500,13 +7720,11 @@ pp_found:
 		 * used by other socket (pp->owner not empty); that other
 		 * socket is going to be sk2.
 		 */
-		int reuse = sk->sk_reuse;
 		struct sock *sk2;
 
 		pr_debug("%s: found a possible match\n", __func__);
 
-		if (pp->fastreuse && sk->sk_reuse &&
-			sk->sk_state != SCTP_SS_LISTENING)
+		if (pp->fastreuse && reuse && sk->sk_state != SCTP_SS_LISTENING)
 			goto success;
 
 		/* Run through the list of sockets bound to the port
@@ -7524,7 +7742,7 @@ pp_found:
 			ep2 = sctp_sk(sk2)->ep;
 
 			if (sk == sk2 ||
-			    (reuse && sk2->sk_reuse &&
+			    (reuse && (sk2->sk_reuse || sctp_sk(sk2)->reuse) &&
 			     sk2->sk_state != SCTP_SS_LISTENING))
 				continue;
 
@@ -7548,12 +7766,12 @@ pp_not_found:
 	 * SO_REUSEADDR on this socket -sk-).
 	 */
 	if (hlist_empty(&pp->owner)) {
-		if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING)
+		if (reuse && sk->sk_state != SCTP_SS_LISTENING)
 			pp->fastreuse = 1;
 		else
 			pp->fastreuse = 0;
 	} else if (pp->fastreuse &&
-		(!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING))
+		   (!reuse || sk->sk_state == SCTP_SS_LISTENING))
 		pp->fastreuse = 0;
 
 	/* We are set, so fill up all the data in the hash table
@@ -7684,7 +7902,7 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 		err = 0;
 		sctp_unhash_endpoint(ep);
 		sk->sk_state = SCTP_SS_CLOSED;
-		if (sk->sk_reuse)
+		if (sk->sk_reuse || sctp_sk(sk)->reuse)
 			sctp_sk(sk)->bind_hash->fastreuse = 1;
 		goto out;
 	}
@@ -8551,6 +8769,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	newsk->sk_no_check_tx = sk->sk_no_check_tx;
 	newsk->sk_no_check_rx = sk->sk_no_check_rx;
 	newsk->sk_reuse = sk->sk_reuse;
+	sctp_sk(newsk)->reuse = sp->reuse;
 
 	newsk->sk_shutdown = sk->sk_shutdown;
 	newsk->sk_destruct = sctp_destruct_sock;
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 188104654b54..4df96b4b8130 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_SMC)	+= smc.o
 obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
-smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o
+smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 05e4ffe5aabd..143b2220c0c8 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/in.h>
 #include <linux/sched/signal.h>
+#include <linux/if_vlan.h>
 
 #include <net/sock.h>
 #include <net/tcp.h>
@@ -35,6 +36,7 @@
 #include "smc_cdc.h"
 #include "smc_core.h"
 #include "smc_ib.h"
+#include "smc_ism.h"
 #include "smc_pnet.h"
 #include "smc_tx.h"
 #include "smc_rx.h"
@@ -381,8 +383,8 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 	return 0;
 }
 
-static void smc_conn_save_peer_info(struct smc_sock *smc,
-				    struct smc_clc_msg_accept_confirm *clc)
+static void smcr_conn_save_peer_info(struct smc_sock *smc,
+				     struct smc_clc_msg_accept_confirm *clc)
 {
 	int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
 
@@ -393,6 +395,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
 	smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
 }
 
+static void smcd_conn_save_peer_info(struct smc_sock *smc,
+				     struct smc_clc_msg_accept_confirm *clc)
+{
+	int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
+
+	smc->conn.peer_rmbe_idx = clc->dmbe_idx;
+	smc->conn.peer_token = clc->token;
+	/* msg header takes up space in the buffer */
+	smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
+	atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
+	smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
+}
+
+static void smc_conn_save_peer_info(struct smc_sock *smc,
+				    struct smc_clc_msg_accept_confirm *clc)
+{
+	if (smc->conn.lgr->is_smcd)
+		smcd_conn_save_peer_info(smc, clc);
+	else
+		smcr_conn_save_peer_info(smc, clc);
+}
+
 static void smc_link_save_peer_info(struct smc_link *link,
 				    struct smc_clc_msg_accept_confirm *clc)
 {
@@ -463,15 +487,51 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
 	return reason_code;
 }
 
+/* check if there is an ISM device available for this connection. */
+/* called for connect and listen */
+static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
+{
+	/* Find ISM device with same PNETID as connecting interface  */
+	smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
+	if (!(*ismdev))
+		return SMC_CLC_DECL_CNFERR; /* configuration error */
+	return 0;
+}
+
+/* Check for VLAN ID and register it on ISM device just for CLC handshake */
+static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
+				      struct smcd_dev *ismdev,
+				      unsigned short vlan_id)
+{
+	if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
+		return SMC_CLC_DECL_CNFERR;
+	return 0;
+}
+
+/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
+ * used, the VLAN ID will be registered again during the connection setup.
+ */
+static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
+					struct smcd_dev *ismdev,
+					unsigned short vlan_id)
+{
+	if (!is_smcd)
+		return 0;
+	if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
+		return SMC_CLC_DECL_CNFERR;
+	return 0;
+}
+
 /* CLC handshake during connect */
-static int smc_connect_clc(struct smc_sock *smc,
+static int smc_connect_clc(struct smc_sock *smc, int smc_type,
 			   struct smc_clc_msg_accept_confirm *aclc,
-			   struct smc_ib_device *ibdev, u8 ibport)
+			   struct smc_ib_device *ibdev, u8 ibport,
+			   struct smcd_dev *ismdev)
 {
 	int rc = 0;
 
 	/* do inband token exchange */
-	rc = smc_clc_send_proposal(smc, ibdev, ibport);
+	rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, ismdev);
 	if (rc)
 		return rc;
 	/* receive SMC Accept CLC message */
@@ -488,8 +548,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	int reason_code = 0;
 
 	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl,
-					aclc->hdr.flag);
+	local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
+					ibport, &aclc->lcl, NULL, 0);
 	if (local_contact < 0) {
 		if (local_contact == -ENOMEM)
 			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -504,7 +564,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	smc_conn_save_peer_info(smc, aclc);
 
 	/* create send buffer and rmb */
-	if (smc_buf_create(smc))
+	if (smc_buf_create(smc, false))
 		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
 
 	if (local_contact == SMC_FIRST_CONTACT)
@@ -551,11 +611,50 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	return 0;
 }
 
+/* setup for ISM connection of client */
+static int smc_connect_ism(struct smc_sock *smc,
+			   struct smc_clc_msg_accept_confirm *aclc,
+			   struct smcd_dev *ismdev)
+{
+	int local_contact = SMC_FIRST_CONTACT;
+	int rc = 0;
+
+	mutex_lock(&smc_create_lgr_pending);
+	local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
+					NULL, ismdev, aclc->gid);
+	if (local_contact < 0)
+		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
+
+	/* Create send and receive buffers */
+	if (smc_buf_create(smc, true))
+		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
+
+	smc_conn_save_peer_info(smc, aclc);
+	smc_close_init(smc);
+	smc_rx_init(smc);
+	smc_tx_init(smc);
+
+	rc = smc_clc_send_confirm(smc);
+	if (rc)
+		return smc_connect_abort(smc, rc, local_contact);
+	mutex_unlock(&smc_create_lgr_pending);
+
+	smc_copy_sock_settings_to_clc(smc);
+	if (smc->sk.sk_state == SMC_INIT)
+		smc->sk.sk_state = SMC_ACTIVE;
+
+	return 0;
+}
+
 /* perform steps before actually connecting */
 static int __smc_connect(struct smc_sock *smc)
 {
+	bool ism_supported = false, rdma_supported = false;
 	struct smc_clc_msg_accept_confirm aclc;
 	struct smc_ib_device *ibdev;
+	struct smcd_dev *ismdev;
+	unsigned short vlan;
+	int smc_type;
 	int rc = 0;
 	u8 ibport;
 
@@ -572,20 +671,52 @@ static int __smc_connect(struct smc_sock *smc)
 	if (using_ipsec(smc))
 		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
 
-	/* check if a RDMA device is available; if not, fall back */
-	if (smc_check_rdma(smc, &ibdev, &ibport))
+	/* check for VLAN ID */
+	if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
+		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
+
+	/* check if there is an ism device available */
+	if (!smc_check_ism(smc, &ismdev) &&
+	    !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
+		/* ISM is supported for this connection */
+		ism_supported = true;
+		smc_type = SMC_TYPE_D;
+	}
+
+	/* check if there is a rdma device available */
+	if (!smc_check_rdma(smc, &ibdev, &ibport)) {
+		/* RDMA is supported for this connection */
+		rdma_supported = true;
+		if (ism_supported)
+			smc_type = SMC_TYPE_B; /* both */
+		else
+			smc_type = SMC_TYPE_R; /* only RDMA */
+	}
+
+	/* if neither ISM nor RDMA are supported, fallback */
+	if (!rdma_supported && !ism_supported)
 		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
 
 	/* perform CLC handshake */
-	rc = smc_connect_clc(smc, &aclc, ibdev, ibport);
-	if (rc)
+	rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, ismdev);
+	if (rc) {
+		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
 		return smc_connect_decline_fallback(smc, rc);
+	}
 
-	/* connect using rdma */
-	rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
-	if (rc)
+	/* depending on previous steps, connect using rdma or ism */
+	if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
+		rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
+	else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
+		rc = smc_connect_ism(smc, &aclc, ismdev);
+	else
+		rc = SMC_CLC_DECL_CNFERR;
+	if (rc) {
+		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
 		return smc_connect_decline_fallback(smc, rc);
+	}
 
+	smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
 	return 0;
 }
 
@@ -953,7 +1084,8 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
 				int *local_contact)
 {
 	/* allocate connection / link group */
-	*local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0);
+	*local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
+					 &pclc->lcl, NULL, 0);
 	if (*local_contact < 0) {
 		if (*local_contact == -ENOMEM)
 			return SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -961,12 +1093,50 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
 	}
 
 	/* create send buffer and rmb */
-	if (smc_buf_create(new_smc))
+	if (smc_buf_create(new_smc, false))
 		return SMC_CLC_DECL_MEM;
 
 	return 0;
 }
 
+/* listen worker: initialize connection and buffers for SMC-D */
+static int smc_listen_ism_init(struct smc_sock *new_smc,
+			       struct smc_clc_msg_proposal *pclc,
+			       struct smcd_dev *ismdev,
+			       int *local_contact)
+{
+	struct smc_clc_msg_smcd *pclc_smcd;
+
+	pclc_smcd = smc_get_clc_msg_smcd(pclc);
+	*local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
+					 ismdev, pclc_smcd->gid);
+	if (*local_contact < 0) {
+		if (*local_contact == -ENOMEM)
+			return SMC_CLC_DECL_MEM;/* insufficient memory*/
+		return SMC_CLC_DECL_INTERR; /* other error */
+	}
+
+	/* Check if peer can be reached via ISM device */
+	if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
+			    new_smc->conn.lgr->vlan_id,
+			    new_smc->conn.lgr->smcd)) {
+		if (*local_contact == SMC_FIRST_CONTACT)
+			smc_lgr_forget(new_smc->conn.lgr);
+		smc_conn_free(&new_smc->conn);
+		return SMC_CLC_DECL_CNFERR;
+	}
+
+	/* Create send and receive buffers */
+	if (smc_buf_create(new_smc, true)) {
+		if (*local_contact == SMC_FIRST_CONTACT)
+			smc_lgr_forget(new_smc->conn.lgr);
+		smc_conn_free(&new_smc->conn);
+		return SMC_CLC_DECL_MEM;
+	}
+
+	return 0;
+}
+
 /* listen worker: register buffers */
 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 {
@@ -1025,6 +1195,8 @@ static void smc_listen_work(struct work_struct *work)
 	struct smc_clc_msg_accept_confirm cclc;
 	struct smc_clc_msg_proposal *pclc;
 	struct smc_ib_device *ibdev;
+	bool ism_supported = false;
+	struct smcd_dev *ismdev;
 	u8 buf[SMC_CLC_MAX_LEN];
 	int local_contact = 0;
 	int reason_code = 0;
@@ -1065,12 +1237,21 @@ static void smc_listen_work(struct work_struct *work)
 	smc_rx_init(new_smc);
 	smc_tx_init(new_smc);
 
+	/* check if ISM is available */
+	if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
+	    !smc_check_ism(new_smc, &ismdev) &&
+	    !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
+		ism_supported = true;
+	}
+
 	/* check if RDMA is available */
-	if (smc_check_rdma(new_smc, &ibdev, &ibport) ||
-	    smc_listen_rdma_check(new_smc, pclc) ||
-	    smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
-				 &local_contact) ||
-	    smc_listen_rdma_reg(new_smc, local_contact)) {
+	if (!ism_supported &&
+	    ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
+	     smc_check_rdma(new_smc, &ibdev, &ibport) ||
+	     smc_listen_rdma_check(new_smc, pclc) ||
+	     smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
+				  &local_contact) ||
+	     smc_listen_rdma_reg(new_smc, local_contact))) {
 		/* SMC not supported, decline */
 		mutex_unlock(&smc_create_lgr_pending);
 		smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
@@ -1095,7 +1276,8 @@ static void smc_listen_work(struct work_struct *work)
 	}
 
 	/* finish worker */
-	smc_listen_rdma_finish(new_smc, &cclc, local_contact);
+	if (!ism_supported)
+		smc_listen_rdma_finish(new_smc, &cclc, local_contact);
 	smc_conn_save_peer_info(new_smc, &cclc);
 	mutex_unlock(&smc_create_lgr_pending);
 	smc_listen_out_connected(new_smc);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index d7ca26570482..be20acd7b5ab 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -21,8 +21,6 @@
 #define SMCPROTO_SMC		0	/* SMC protocol, IPv4 */
 #define SMCPROTO_SMC6		1	/* SMC protocol, IPv6 */
 
-#define SMC_MAX_PORTS		2	/* Max # of ports */
-
 extern struct proto smc_proto;
 extern struct proto smc_proto6;
 
@@ -185,6 +183,11 @@ struct smc_connection {
 	spinlock_t		acurs_lock;	/* protect cursors */
 #endif
 	struct work_struct	close_work;	/* peer sent some closing */
+	struct tasklet_struct	rx_tsklet;	/* Receiver tasklet for SMC-D */
+	u8			rx_off;		/* receive offset:
+						 * 0 for SMC-R, 32 for SMC-D
+						 */
+	u64			peer_token;	/* SMC-D token of peer */
 };
 
 struct smc_connect_info {
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index a7e8d63fc8ae..621d8cca570b 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -117,7 +117,7 @@ int smc_cdc_msg_send(struct smc_connection *conn,
 	return rc;
 }
 
-int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
+static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
 {
 	struct smc_cdc_tx_pend *pend;
 	struct smc_wr_buf *wr_buf;
@@ -130,6 +130,21 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
 	return smc_cdc_msg_send(conn, wr_buf, pend);
 }
 
+int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
+{
+	int rc;
+
+	if (conn->lgr->is_smcd) {
+		spin_lock_bh(&conn->send_lock);
+		rc = smcd_cdc_msg_send(conn);
+		spin_unlock_bh(&conn->send_lock);
+	} else {
+		rc = smcr_cdc_get_slot_and_msg_send(conn);
+	}
+
+	return rc;
+}
+
 static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
 			      unsigned long data)
 {
@@ -157,6 +172,45 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
 				(unsigned long)conn);
 }
 
+/* Send a SMC-D CDC header.
+ * This increments the free space available in our send buffer.
+ * Also update the confirmed receive buffer with what was sent to the peer.
+ */
+int smcd_cdc_msg_send(struct smc_connection *conn)
+{
+	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+	struct smcd_cdc_msg cdc;
+	int rc, diff;
+
+	memset(&cdc, 0, sizeof(cdc));
+	cdc.common.type = SMC_CDC_MSG_TYPE;
+	cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
+	cdc.prod_count = conn->local_tx_ctrl.prod.count;
+
+	cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
+	cdc.cons_count = conn->local_tx_ctrl.cons.count;
+	cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
+	cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
+	rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
+	if (rc)
+		return rc;
+	smc_curs_write(&conn->rx_curs_confirmed,
+		       smc_curs_read(&conn->local_tx_ctrl.cons, conn), conn);
+	/* Calculate transmitted data and increment free send buffer space */
+	diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
+			     &conn->tx_curs_sent);
+	/* increased by confirmed number of bytes */
+	smp_mb__before_atomic();
+	atomic_add(diff, &conn->sndbuf_space);
+	/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
+	smp_mb__after_atomic();
+	smc_curs_write(&conn->tx_curs_fin,
+		       smc_curs_read(&conn->tx_curs_sent, conn), conn);
+
+	smc_tx_sndbuf_nonfull(smc);
+	return rc;
+}
+
 /********************************* receive ***********************************/
 
 static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -178,7 +232,7 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,
 	if (!sock_flag(&smc->sk, SOCK_URGINLINE))
 		/* we'll skip the urgent byte, so don't account for it */
 		(*diff_prod)--;
-	base = (char *)conn->rmb_desc->cpu_addr;
+	base = (char *)conn->rmb_desc->cpu_addr + conn->rx_off;
 	if (conn->urg_curs.count)
 		conn->urg_rx_byte = *(base + conn->urg_curs.count - 1);
 	else
@@ -276,6 +330,34 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
 	sock_put(&smc->sk); /* no free sk in softirq-context */
 }
 
+/* Schedule a tasklet for this connection. Triggered from the ISM device IRQ
+ * handler to indicate update in the DMBE.
+ *
+ * Context:
+ * - tasklet context
+ */
+static void smcd_cdc_rx_tsklet(unsigned long data)
+{
+	struct smc_connection *conn = (struct smc_connection *)data;
+	struct smcd_cdc_msg cdc;
+	struct smc_sock *smc;
+
+	if (!conn)
+		return;
+
+	memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
+	smc = container_of(conn, struct smc_sock, conn);
+	smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
+}
+
+/* Initialize receive tasklet. Called from ISM device IRQ handler to start
+ * receiver side.
+ */
+void smcd_cdc_rx_init(struct smc_connection *conn)
+{
+	tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn);
+}
+
 /***************************** init, exit, misc ******************************/
 
 static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index f60082fee5b8..8fbce4fee3e4 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -50,6 +50,20 @@ struct smc_cdc_msg {
 	u8				reserved[18];
 } __packed;					/* format defined in RFC7609 */
 
+/* CDC message for SMC-D */
+struct smcd_cdc_msg {
+	struct smc_wr_rx_hdr common;	/* Type = 0xFE */
+	u8 res1[7];
+	u16 prod_wrap;
+	u32 prod_count;
+	u8 res2[2];
+	u16 cons_wrap;
+	u32 cons_count;
+	struct smc_cdc_producer_flags	prod_flags;
+	struct smc_cdc_conn_state_flags conn_state_flags;
+	u8 res3[8];
+} __packed;
+
 static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
 {
 	return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort ||
@@ -204,9 +218,9 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,
 	smc_curs_write(local, smc_curs_read(&temp, conn), conn);
 }
 
-static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
-				       struct smc_cdc_msg *peer,
-				       struct smc_connection *conn)
+static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
+					struct smc_cdc_msg *peer,
+					struct smc_connection *conn)
 {
 	local->common.type = peer->common.type;
 	local->len = peer->len;
@@ -218,6 +232,27 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
 	local->conn_state_flags = peer->conn_state_flags;
 }
 
+static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
+					struct smcd_cdc_msg *peer)
+{
+	local->prod.wrap = peer->prod_wrap;
+	local->prod.count = peer->prod_count;
+	local->cons.wrap = peer->cons_wrap;
+	local->cons.count = peer->cons_count;
+	local->prod_flags = peer->prod_flags;
+	local->conn_state_flags = peer->conn_state_flags;
+}
+
+static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
+				       struct smc_cdc_msg *peer,
+				       struct smc_connection *conn)
+{
+	if (conn->lgr->is_smcd)
+		smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer);
+	else
+		smcr_cdc_msg_to_host(local, peer, conn);
+}
+
 struct smc_cdc_tx_pend;
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
@@ -227,6 +262,8 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
 int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
 		     struct smc_cdc_tx_pend *pend);
 int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
+int smcd_cdc_msg_send(struct smc_connection *conn);
 int smc_cdc_init(void) __init;
+void smcd_cdc_rx_init(struct smc_connection *conn);
 
 #endif /* SMC_CDC_H */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index ae5d168653ce..ad39efdb4f1c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -23,9 +23,15 @@
 #include "smc_core.h"
 #include "smc_clc.h"
 #include "smc_ib.h"
+#include "smc_ism.h"
+
+#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
+#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
 
 /* eye catcher "SMCR" EBCDIC for CLC messages */
 static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
+/* eye catcher "SMCD" EBCDIC for CLC messages */
+static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
 
 /* check if received message has a correct header length and contains valid
  * heading and trailing eyecatchers
@@ -38,10 +44,14 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
 	struct smc_clc_msg_decline *dclc;
 	struct smc_clc_msg_trail *trl;
 
-	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
+	    memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
 		return false;
 	switch (clcm->type) {
 	case SMC_CLC_PROPOSAL:
+		if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
+		    clcm->path != SMC_TYPE_B)
+			return false;
 		pclc = (struct smc_clc_msg_proposal *)clcm;
 		pclc_prfx = smc_clc_proposal_get_prefix(pclc);
 		if (ntohs(pclc->hdr.length) !=
@@ -56,10 +66,16 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
 		break;
 	case SMC_CLC_ACCEPT:
 	case SMC_CLC_CONFIRM:
+		if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D)
+			return false;
 		clc = (struct smc_clc_msg_accept_confirm *)clcm;
-		if (ntohs(clc->hdr.length) != sizeof(*clc))
+		if ((clcm->path == SMC_TYPE_R &&
+		     ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
+		    (clcm->path == SMC_TYPE_D &&
+		     ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
 			return false;
-		trl = &clc->trl;
+		trl = (struct smc_clc_msg_trail *)
+			((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
 		break;
 	case SMC_CLC_DECLINE:
 		dclc = (struct smc_clc_msg_decline *)clcm;
@@ -70,7 +86,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
 	default:
 		return false;
 	}
-	if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+	if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
+	    memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
 		return false;
 	return true;
 }
@@ -296,6 +313,9 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 	datlen = ntohs(clcm->length);
 	if ((len < sizeof(struct smc_clc_msg_hdr)) ||
 	    (datlen > buflen) ||
+	    (clcm->version != SMC_CLC_V1) ||
+	    (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
+	     clcm->path != SMC_TYPE_B) ||
 	    ((clcm->type != SMC_CLC_DECLINE) &&
 	     (clcm->type != expected_type))) {
 		smc->sk.sk_err = EPROTO;
@@ -357,17 +377,18 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
 }
 
 /* send CLC PROPOSAL message across internal TCP socket */
-int smc_clc_send_proposal(struct smc_sock *smc,
-			  struct smc_ib_device *smcibdev,
-			  u8 ibport)
+int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
+			  struct smc_ib_device *ibdev, u8 ibport,
+			  struct smcd_dev *ismdev)
 {
 	struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
 	struct smc_clc_msg_proposal_prefix pclc_prfx;
+	struct smc_clc_msg_smcd pclc_smcd;
 	struct smc_clc_msg_proposal pclc;
 	struct smc_clc_msg_trail trl;
 	int len, i, plen, rc;
 	int reason_code = 0;
-	struct kvec vec[4];
+	struct kvec vec[5];
 	struct msghdr msg;
 
 	/* retrieve ip prefixes for CLC proposal msg */
@@ -382,18 +403,34 @@ int smc_clc_send_proposal(struct smc_sock *smc,
 	memset(&pclc, 0, sizeof(pclc));
 	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	pclc.hdr.type = SMC_CLC_PROPOSAL;
-	pclc.hdr.length = htons(plen);
 	pclc.hdr.version = SMC_CLC_V1;		/* SMC version */
-	memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
-	memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
-	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
-	pclc.iparea_offset = htons(0);
+	pclc.hdr.path = smc_type;
+	if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) {
+		/* add SMC-R specifics */
+		memcpy(pclc.lcl.id_for_peer, local_systemid,
+		       sizeof(local_systemid));
+		memcpy(&pclc.lcl.gid, &ibdev->gid[ibport - 1], SMC_GID_SIZE);
+		memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN);
+		pclc.iparea_offset = htons(0);
+	}
+	if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
+		/* add SMC-D specifics */
+		memset(&pclc_smcd, 0, sizeof(pclc_smcd));
+		plen += sizeof(pclc_smcd);
+		pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
+		pclc_smcd.gid = ismdev->local_gid;
+	}
+	pclc.hdr.length = htons(plen);
 
 	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	memset(&msg, 0, sizeof(msg));
 	i = 0;
 	vec[i].iov_base = &pclc;
 	vec[i++].iov_len = sizeof(pclc);
+	if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
+		vec[i].iov_base = &pclc_smcd;
+		vec[i++].iov_len = sizeof(pclc_smcd);
+	}
 	vec[i].iov_base = &pclc_prfx;
 	vec[i++].iov_len = sizeof(pclc_prfx);
 	if (pclc_prfx.ipv6_prefixes_cnt > 0) {
@@ -429,35 +466,56 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 	struct kvec vec;
 	int len;
 
-	link = &conn->lgr->lnk[SMC_SINGLE_LINK];
 	/* send SMC Confirm CLC msg */
 	memset(&cclc, 0, sizeof(cclc));
-	memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	cclc.hdr.type = SMC_CLC_CONFIRM;
-	cclc.hdr.length = htons(sizeof(cclc));
 	cclc.hdr.version = SMC_CLC_V1;		/* SMC version */
-	memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
-	memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
-	       SMC_GID_SIZE);
-	memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
-	hton24(cclc.qpn, link->roce_qp->qp_num);
-	cclc.rmb_rkey =
-		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
-	cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
-	cclc.rmbe_alert_token = htonl(conn->alert_token_local);
-	cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
-	cclc.rmbe_size = conn->rmbe_size_short;
-	cclc.rmb_dma_addr = cpu_to_be64(
-		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
-	hton24(cclc.psn, link->psn_initial);
-
-	memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+	if (smc->conn.lgr->is_smcd) {
+		/* SMC-D specific settings */
+		memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER,
+		       sizeof(SMCD_EYECATCHER));
+		cclc.hdr.path = SMC_TYPE_D;
+		cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
+		cclc.gid = conn->lgr->smcd->local_gid;
+		cclc.token = conn->rmb_desc->token;
+		cclc.dmbe_size = conn->rmbe_size_short;
+		cclc.dmbe_idx = 0;
+		memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
+		memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
+		       sizeof(SMCD_EYECATCHER));
+	} else {
+		/* SMC-R specific settings */
+		link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+		memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
+		       sizeof(SMC_EYECATCHER));
+		cclc.hdr.path = SMC_TYPE_R;
+		cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
+		memcpy(cclc.lcl.id_for_peer, local_systemid,
+		       sizeof(local_systemid));
+		memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
+		       SMC_GID_SIZE);
+		memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
+		       ETH_ALEN);
+		hton24(cclc.qpn, link->roce_qp->qp_num);
+		cclc.rmb_rkey =
+			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+		cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
+		cclc.rmbe_alert_token = htonl(conn->alert_token_local);
+		cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
+		cclc.rmbe_size = conn->rmbe_size_short;
+		cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
+				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+		hton24(cclc.psn, link->psn_initial);
+		memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
+		       sizeof(SMC_EYECATCHER));
+	}
 
 	memset(&msg, 0, sizeof(msg));
 	vec.iov_base = &cclc;
-	vec.iov_len = sizeof(cclc);
-	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
-	if (len < sizeof(cclc)) {
+	vec.iov_len = ntohs(cclc.hdr.length);
+	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
+			     ntohs(cclc.hdr.length));
+	if (len < ntohs(cclc.hdr.length)) {
 		if (len >= 0) {
 			reason_code = -ENETUNREACH;
 			smc->sk.sk_err = -reason_code;
@@ -480,35 +538,58 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 	int rc = 0;
 	int len;
 
-	link = &conn->lgr->lnk[SMC_SINGLE_LINK];
 	memset(&aclc, 0, sizeof(aclc));
-	memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 	aclc.hdr.type = SMC_CLC_ACCEPT;
-	aclc.hdr.length = htons(sizeof(aclc));
 	aclc.hdr.version = SMC_CLC_V1;		/* SMC version */
 	if (srv_first_contact)
 		aclc.hdr.flag = 1;
-	memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
-	memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
-	       SMC_GID_SIZE);
-	memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
-	hton24(aclc.qpn, link->roce_qp->qp_num);
-	aclc.rmb_rkey =
-		htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
-	aclc.rmbe_idx = 1;			/* as long as 1 RMB = 1 RMBE */
-	aclc.rmbe_alert_token = htonl(conn->alert_token_local);
-	aclc.qp_mtu = link->path_mtu;
-	aclc.rmbe_size = conn->rmbe_size_short,
-	aclc.rmb_dma_addr = cpu_to_be64(
-		(u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
-	hton24(aclc.psn, link->psn_initial);
-	memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+
+	if (new_smc->conn.lgr->is_smcd) {
+		/* SMC-D specific settings */
+		aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
+		memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER,
+		       sizeof(SMCD_EYECATCHER));
+		aclc.hdr.path = SMC_TYPE_D;
+		aclc.gid = conn->lgr->smcd->local_gid;
+		aclc.token = conn->rmb_desc->token;
+		aclc.dmbe_size = conn->rmbe_size_short;
+		aclc.dmbe_idx = 0;
+		memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
+		memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
+		       sizeof(SMCD_EYECATCHER));
+	} else {
+		/* SMC-R specific settings */
+		aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
+		memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
+		       sizeof(SMC_EYECATCHER));
+		aclc.hdr.path = SMC_TYPE_R;
+		link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+		memcpy(aclc.lcl.id_for_peer, local_systemid,
+		       sizeof(local_systemid));
+		memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
+		       SMC_GID_SIZE);
+		memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1],
+		       ETH_ALEN);
+		hton24(aclc.qpn, link->roce_qp->qp_num);
+		aclc.rmb_rkey =
+			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+		aclc.rmbe_idx = 1;		/* as long as 1 RMB = 1 RMBE */
+		aclc.rmbe_alert_token = htonl(conn->alert_token_local);
+		aclc.qp_mtu = link->path_mtu;
+		aclc.rmbe_size = conn->rmbe_size_short,
+		aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
+				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+		hton24(aclc.psn, link->psn_initial);
+		memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
+		       sizeof(SMC_EYECATCHER));
+	}
 
 	memset(&msg, 0, sizeof(msg));
 	vec.iov_base = &aclc;
-	vec.iov_len = sizeof(aclc);
-	len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
-	if (len < sizeof(aclc)) {
+	vec.iov_len = ntohs(aclc.hdr.length);
+	len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1,
+			     ntohs(aclc.hdr.length));
+	if (len < ntohs(aclc.hdr.length)) {
 		if (len >= 0)
 			new_smc->sk.sk_err = EPROTO;
 		else
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 41ff9ea96139..100e988ad1a8 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -23,6 +23,9 @@
 #define SMC_CLC_DECLINE		0x04
 
 #define SMC_CLC_V1		0x1		/* SMC version                */
+#define SMC_TYPE_R		0		/* SMC-R only		      */
+#define SMC_TYPE_D		1		/* SMC-D only		      */
+#define SMC_TYPE_B		3		/* SMC-R and SMC-D	      */
 #define CLC_WAIT_TIME		(6 * HZ)	/* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM	0x01010000  /* insufficient memory resources  */
 #define SMC_CLC_DECL_TIMEOUT	0x02000000  /* timeout                        */
@@ -42,9 +45,11 @@ struct smc_clc_msg_hdr {	/* header1 of clc messages */
 #if defined(__BIG_ENDIAN_BITFIELD)
 	u8 version : 4,
 	   flag    : 1,
-	   rsvd    : 3;
+	   rsvd	   : 1,
+	   path	   : 2;
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-	u8 rsvd    : 3,
+	u8 path    : 2,
+	   rsvd    : 1,
 	   flag    : 1,
 	   version : 4;
 #endif
@@ -77,6 +82,11 @@ struct smc_clc_msg_proposal_prefix {	/* prefix part of clc proposal message*/
 	u8 ipv6_prefixes_cnt;	/* number of IPv6 prefixes in prefix array */
 } __aligned(4);
 
+struct smc_clc_msg_smcd {	/* SMC-D GID information */
+	u64 gid;		/* ISM GID of requestor */
+	u8 res[32];
+};
+
 struct smc_clc_msg_proposal {	/* clc proposal message sent by Linux */
 	struct smc_clc_msg_hdr hdr;
 	struct smc_clc_msg_local lcl;
@@ -94,23 +104,45 @@ struct smc_clc_msg_proposal {	/* clc proposal message sent by Linux */
 
 struct smc_clc_msg_accept_confirm {	/* clc accept / confirm message */
 	struct smc_clc_msg_hdr hdr;
-	struct smc_clc_msg_local lcl;
-	u8 qpn[3];		/* QP number */
-	__be32 rmb_rkey;	/* RMB rkey */
-	u8 rmbe_idx;		/* Index of RMBE in RMB */
-	__be32 rmbe_alert_token;/* unique connection id */
+	union {
+		struct { /* SMC-R */
+			struct smc_clc_msg_local lcl;
+			u8 qpn[3];		/* QP number */
+			__be32 rmb_rkey;	/* RMB rkey */
+			u8 rmbe_idx;		/* Index of RMBE in RMB */
+			__be32 rmbe_alert_token;/* unique connection id */
 #if defined(__BIG_ENDIAN_BITFIELD)
-	u8 rmbe_size : 4,	/* RMBE buf size (compressed notation) */
-	   qp_mtu   : 4;	/* QP mtu */
+			u8 rmbe_size : 4,	/* buf size (compressed) */
+			   qp_mtu   : 4;	/* QP mtu */
 #elif defined(__LITTLE_ENDIAN_BITFIELD)
-	u8 qp_mtu   : 4,
-	   rmbe_size : 4;
+			u8 qp_mtu   : 4,
+			   rmbe_size : 4;
 #endif
-	u8 reserved;
-	__be64 rmb_dma_addr;	/* RMB virtual address */
-	u8 reserved2;
-	u8 psn[3];		/* initial packet sequence number */
-	struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
+			u8 reserved;
+			__be64 rmb_dma_addr;	/* RMB virtual address */
+			u8 reserved2;
+			u8 psn[3];		/* packet sequence number */
+			struct smc_clc_msg_trail smcr_trl;
+						/* eye catcher "SMCR" EBCDIC */
+		} __packed;
+		struct { /* SMC-D */
+			u64 gid;		/* Sender GID */
+			u64 token;		/* DMB token */
+			u8 dmbe_idx;		/* DMBE index */
+#if defined(__BIG_ENDIAN_BITFIELD)
+			u8 dmbe_size : 4,	/* buf size (compressed) */
+			   reserved3 : 4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+			u8 reserved3 : 4,
+			   dmbe_size : 4;
+#endif
+			u16 reserved4;
+			u32 linkid;		/* Link identifier */
+			u32 reserved5[3];
+			struct smc_clc_msg_trail smcd_trl;
+						/* eye catcher "SMCD" EBCDIC */
+		} __packed;
+	};
 } __packed;			/* format defined in RFC7609 */
 
 struct smc_clc_msg_decline {	/* clc decline message */
@@ -129,13 +161,26 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
 	       ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
 }
 
+/* get SMC-D info from proposal message */
+static inline struct smc_clc_msg_smcd *
+smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
+{
+	if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd))
+		return NULL;
+
+	return (struct smc_clc_msg_smcd *)(prop + 1);
+}
+
+struct smcd_dev;
+
 int smc_clc_prfx_match(struct socket *clcsock,
 		       struct smc_clc_msg_proposal_prefix *prop);
 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
-int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
-			  u8 ibport);
+int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
+			  struct smc_ib_device *smcibdev, u8 ibport,
+			  struct smcd_dev *ismdev);
 int smc_clc_send_confirm(struct smc_sock *smc);
 int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index add82b0266f3..66741e61a3b0 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -25,6 +25,7 @@
 #include "smc_llc.h"
 #include "smc_cdc.h"
 #include "smc_close.h"
+#include "smc_ism.h"
 
 #define SMC_LGR_NUM_INCR		256
 #define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
@@ -46,8 +47,8 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
 	 * otherwise there is a risk of out-of-sync link groups.
 	 */
 	mod_delayed_work(system_wq, &lgr->free_work,
-			 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT :
-						 SMC_LGR_FREE_DELAY_SERV);
+			 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
+			 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
 }
 
 /* Register connection's alert token in our lookup structure.
@@ -153,16 +154,18 @@ static void smc_lgr_free_work(struct work_struct *work)
 free:
 	spin_unlock_bh(&smc_lgr_list.lock);
 	if (!delayed_work_pending(&lgr->free_work)) {
-		if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE)
+		if (!lgr->is_smcd &&
+		    lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE)
 			smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 		smc_lgr_free(lgr);
 	}
 }
 
 /* create a new SMC link group */
-static int smc_lgr_create(struct smc_sock *smc,
+static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
 			  struct smc_ib_device *smcibdev, u8 ibport,
-			  char *peer_systemid, unsigned short vlan_id)
+			  char *peer_systemid, unsigned short vlan_id,
+			  struct smcd_dev *smcismdev, u64 peer_gid)
 {
 	struct smc_link_group *lgr;
 	struct smc_link *lnk;
@@ -170,17 +173,23 @@ static int smc_lgr_create(struct smc_sock *smc,
 	int rc = 0;
 	int i;
 
+	if (is_smcd && vlan_id) {
+		rc = smc_ism_get_vlan(smcismdev, vlan_id);
+		if (rc)
+			goto out;
+	}
+
 	lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
 	if (!lgr) {
 		rc = -ENOMEM;
 		goto out;
 	}
-	lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
+	lgr->is_smcd = is_smcd;
 	lgr->sync_err = 0;
-	memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
 	lgr->vlan_id = vlan_id;
 	rwlock_init(&lgr->sndbufs_lock);
 	rwlock_init(&lgr->rmbs_lock);
+	rwlock_init(&lgr->conns_lock);
 	for (i = 0; i < SMC_RMBE_SIZES; i++) {
 		INIT_LIST_HEAD(&lgr->sndbufs[i]);
 		INIT_LIST_HEAD(&lgr->rmbs[i]);
@@ -189,36 +198,44 @@ static int smc_lgr_create(struct smc_sock *smc,
 	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
 	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
 	lgr->conns_all = RB_ROOT;
-
-	lnk = &lgr->lnk[SMC_SINGLE_LINK];
-	/* initialize link */
-	lnk->state = SMC_LNK_ACTIVATING;
-	lnk->link_id = SMC_SINGLE_LINK;
-	lnk->smcibdev = smcibdev;
-	lnk->ibport = ibport;
-	lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
-	if (!smcibdev->initialized)
-		smc_ib_setup_per_ibdev(smcibdev);
-	get_random_bytes(rndvec, sizeof(rndvec));
-	lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16);
-	rc = smc_llc_link_init(lnk);
-	if (rc)
-		goto free_lgr;
-	rc = smc_wr_alloc_link_mem(lnk);
-	if (rc)
-		goto clear_llc_lnk;
-	rc = smc_ib_create_protection_domain(lnk);
-	if (rc)
-		goto free_link_mem;
-	rc = smc_ib_create_queue_pair(lnk);
-	if (rc)
-		goto dealloc_pd;
-	rc = smc_wr_create_link(lnk);
-	if (rc)
-		goto destroy_qp;
-
+	if (is_smcd) {
+		/* SMC-D specific settings */
+		lgr->peer_gid = peer_gid;
+		lgr->smcd = smcismdev;
+	} else {
+		/* SMC-R specific settings */
+		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
+		memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
+
+		lnk = &lgr->lnk[SMC_SINGLE_LINK];
+		/* initialize link */
+		lnk->state = SMC_LNK_ACTIVATING;
+		lnk->link_id = SMC_SINGLE_LINK;
+		lnk->smcibdev = smcibdev;
+		lnk->ibport = ibport;
+		lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
+		if (!smcibdev->initialized)
+			smc_ib_setup_per_ibdev(smcibdev);
+		get_random_bytes(rndvec, sizeof(rndvec));
+		lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
+			(rndvec[2] << 16);
+		rc = smc_llc_link_init(lnk);
+		if (rc)
+			goto free_lgr;
+		rc = smc_wr_alloc_link_mem(lnk);
+		if (rc)
+			goto clear_llc_lnk;
+		rc = smc_ib_create_protection_domain(lnk);
+		if (rc)
+			goto free_link_mem;
+		rc = smc_ib_create_queue_pair(lnk);
+		if (rc)
+			goto dealloc_pd;
+		rc = smc_wr_create_link(lnk);
+		if (rc)
+			goto destroy_qp;
+	}
 	smc->conn.lgr = lgr;
-	rwlock_init(&lgr->conns_lock);
 	spin_lock_bh(&smc_lgr_list.lock);
 	list_add(&lgr->list, &smc_lgr_list.list);
 	spin_unlock_bh(&smc_lgr_list.lock);
@@ -264,7 +281,12 @@ void smc_conn_free(struct smc_connection *conn)
 {
 	if (!conn->lgr)
 		return;
-	smc_cdc_tx_dismiss_slots(conn);
+	if (conn->lgr->is_smcd) {
+		smc_ism_unset_conn(conn);
+		tasklet_kill(&conn->rx_tsklet);
+	} else {
+		smc_cdc_tx_dismiss_slots(conn);
+	}
 	smc_lgr_unregister_conn(conn);
 	smc_buf_unuse(conn);
 }
@@ -280,8 +302,8 @@ static void smc_link_clear(struct smc_link *lnk)
 	smc_wr_free_link_mem(lnk);
 }
 
-static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
-			 struct smc_buf_desc *buf_desc)
+static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
+			  struct smc_buf_desc *buf_desc)
 {
 	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
 
@@ -301,6 +323,28 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
 	kfree(buf_desc);
 }
 
+static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
+			  struct smc_buf_desc *buf_desc)
+{
+	if (is_dmb) {
+		/* restore original buf len */
+		buf_desc->len += sizeof(struct smcd_cdc_msg);
+		smc_ism_unregister_dmb(lgr->smcd, buf_desc);
+	} else {
+		kfree(buf_desc->cpu_addr);
+	}
+	kfree(buf_desc);
+}
+
+static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
+			 struct smc_buf_desc *buf_desc)
+{
+	if (lgr->is_smcd)
+		smcd_buf_free(lgr, is_rmb, buf_desc);
+	else
+		smcr_buf_free(lgr, is_rmb, buf_desc);
+}
+
 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
 {
 	struct smc_buf_desc *buf_desc, *bf_desc;
@@ -332,7 +376,10 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
 void smc_lgr_free(struct smc_link_group *lgr)
 {
 	smc_lgr_free_bufs(lgr);
-	smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
+	if (lgr->is_smcd)
+		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
+	else
+		smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
 	kfree(lgr);
 }
 
@@ -357,7 +404,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
 	lgr->terminating = 1;
 	if (!list_empty(&lgr->list)) /* forget lgr */
 		list_del_init(&lgr->list);
-	smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+	if (!lgr->is_smcd)
+		smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 
 	write_lock_bh(&lgr->conns_lock);
 	node = rb_first(&lgr->conns_all);
@@ -374,7 +422,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
 		node = rb_first(&lgr->conns_all);
 	}
 	write_unlock_bh(&lgr->conns_lock);
-	wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
+	if (!lgr->is_smcd)
+		wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
 	smc_lgr_schedule_free_work(lgr);
 }
 
@@ -392,17 +441,44 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
 
 	spin_lock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
-		if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+		if (!lgr->is_smcd &&
+		    lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
 		    lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
 			__smc_lgr_terminate(lgr);
 	}
 	spin_unlock_bh(&smc_lgr_list.lock);
 }
 
+/* Called when SMC-D device is terminated or peer is lost */
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
+{
+	struct smc_link_group *lgr, *l;
+	LIST_HEAD(lgr_free_list);
+
+	/* run common cleanup function and build free list */
+	spin_lock_bh(&smc_lgr_list.lock);
+	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+		if (lgr->is_smcd && lgr->smcd == dev &&
+		    (!peer_gid || lgr->peer_gid == peer_gid) &&
+		    !list_empty(&lgr->list)) {
+			__smc_lgr_terminate(lgr);
+			list_move(&lgr->list, &lgr_free_list);
+		}
+	}
+	spin_unlock_bh(&smc_lgr_list.lock);
+
+	/* cancel the regular free workers and actually free lgrs */
+	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
+		list_del_init(&lgr->list);
+		cancel_delayed_work_sync(&lgr->free_work);
+		smc_lgr_free(lgr);
+	}
+}
+
 /* Determine vlan of internal TCP socket.
  * @vlan_id: address to store the determined vlan id into
  */
-static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
+int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
 {
 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
 	struct net_device *ndev;
@@ -477,10 +553,30 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
 	return -ENODEV;
 }
 
+static bool smcr_lgr_match(struct smc_link_group *lgr,
+			   struct smc_clc_msg_local *lcl,
+			   enum smc_lgr_role role)
+{
+	return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
+		       SMC_SYSTEMID_LEN) &&
+		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
+			SMC_GID_SIZE) &&
+		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
+			sizeof(lcl->mac)) &&
+		lgr->role == role;
+}
+
+static bool smcd_lgr_match(struct smc_link_group *lgr,
+			   struct smcd_dev *smcismdev, u64 peer_gid)
+{
+	return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
+}
+
 /* create a new SMC connection (and a new link group if necessary) */
-int smc_conn_create(struct smc_sock *smc,
+int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
 		    struct smc_ib_device *smcibdev, u8 ibport,
-		    struct smc_clc_msg_local *lcl, int srv_first_contact)
+		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
+		    u64 peer_gid)
 {
 	struct smc_connection *conn = &smc->conn;
 	int local_contact = SMC_FIRST_CONTACT;
@@ -502,17 +598,12 @@ int smc_conn_create(struct smc_sock *smc,
 	spin_lock_bh(&smc_lgr_list.lock);
 	list_for_each_entry(lgr, &smc_lgr_list.list, list) {
 		write_lock_bh(&lgr->conns_lock);
-		if (!memcmp(lgr->peer_systemid, lcl->id_for_peer,
-			    SMC_SYSTEMID_LEN) &&
-		    !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
-			    SMC_GID_SIZE) &&
-		    !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
-			    sizeof(lcl->mac)) &&
+		if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
+		     smcr_lgr_match(lgr, lcl, role)) &&
 		    !lgr->sync_err &&
-		    (lgr->role == role) &&
-		    (lgr->vlan_id == vlan_id) &&
-		    ((role == SMC_CLNT) ||
-		     (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
+		    lgr->vlan_id == vlan_id &&
+		    (role == SMC_CLNT ||
+		     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
 			/* link group found */
 			local_contact = SMC_REUSE_CONTACT;
 			conn->lgr = lgr;
@@ -535,16 +626,21 @@ int smc_conn_create(struct smc_sock *smc,
 
 create:
 	if (local_contact == SMC_FIRST_CONTACT) {
-		rc = smc_lgr_create(smc, smcibdev, ibport,
-				    lcl->id_for_peer, vlan_id);
+		rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport,
+				    lcl->id_for_peer, vlan_id, smcd, peer_gid);
 		if (rc)
 			goto out;
 		smc_lgr_register_conn(conn); /* add smc conn to lgr */
-		rc = smc_link_determine_gid(conn->lgr);
+		if (!is_smcd)
+			rc = smc_link_determine_gid(conn->lgr);
 	}
 	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
 	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
 	conn->urg_state = SMC_URG_READ;
+	if (is_smcd) {
+		conn->rx_off = sizeof(struct smcd_cdc_msg);
+		smcd_cdc_rx_init(conn); /* init tasklet for this conn */
+	}
 #ifndef KERNEL_HAS_ATOMIC64
 	spin_lock_init(&conn->acurs_lock);
 #endif
@@ -609,8 +705,8 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
 	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 }
 
-static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
-					       bool is_rmb, int bufsize)
+static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
+						bool is_rmb, int bufsize)
 {
 	struct smc_buf_desc *buf_desc;
 	struct smc_link *lnk;
@@ -668,7 +764,44 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
 	return buf_desc;
 }
 
-static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
+#define SMCD_DMBE_SIZES		7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+
+static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
+						bool is_dmb, int bufsize)
+{
+	struct smc_buf_desc *buf_desc;
+	int rc;
+
+	if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
+		return ERR_PTR(-EAGAIN);
+
+	/* try to alloc a new DMB */
+	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
+	if (!buf_desc)
+		return ERR_PTR(-ENOMEM);
+	if (is_dmb) {
+		rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
+		if (rc) {
+			kfree(buf_desc);
+			return ERR_PTR(-EAGAIN);
+		}
+		buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
+		/* CDC header stored in buf. So, pretend it was smaller */
+		buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
+	} else {
+		buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
+					     __GFP_NOWARN | __GFP_NORETRY |
+					     __GFP_NOMEMALLOC);
+		if (!buf_desc->cpu_addr) {
+			kfree(buf_desc);
+			return ERR_PTR(-EAGAIN);
+		}
+		buf_desc->len = bufsize;
+	}
+	return buf_desc;
+}
+
+static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 {
 	struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
 	struct smc_connection *conn = &smc->conn;
@@ -706,7 +839,11 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 			break; /* found reusable slot */
 		}
 
-		buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize);
+		if (is_smcd)
+			buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
+		else
+			buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
+
 		if (PTR_ERR(buf_desc) == -ENOMEM)
 			break;
 		if (IS_ERR(buf_desc))
@@ -727,7 +864,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
 		conn->rmbe_size_short = bufsize_short;
 		smc->sk.sk_rcvbuf = bufsize * 2;
 		atomic_set(&conn->bytes_to_rcv, 0);
-		conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize);
+		conn->rmbe_update_limit =
+			smc_rmb_wnd_update_limit(buf_desc->len);
+		if (is_smcd)
+			smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
 	} else {
 		conn->sndbuf_desc = buf_desc;
 		smc->sk.sk_sndbuf = bufsize * 2;
@@ -740,6 +880,8 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
 
+	if (!conn->lgr || conn->lgr->is_smcd)
+		return;
 	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 			       conn->sndbuf_desc, DMA_TO_DEVICE);
 }
@@ -748,6 +890,8 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
 
+	if (!conn->lgr || conn->lgr->is_smcd)
+		return;
 	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 				  conn->sndbuf_desc, DMA_TO_DEVICE);
 }
@@ -756,6 +900,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
 
+	if (!conn->lgr || conn->lgr->is_smcd)
+		return;
 	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 			       conn->rmb_desc, DMA_FROM_DEVICE);
 }
@@ -764,6 +910,8 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
 
+	if (!conn->lgr || conn->lgr->is_smcd)
+		return;
 	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
 				  conn->rmb_desc, DMA_FROM_DEVICE);
 }
@@ -774,16 +922,16 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
  * extra RMB for every connection in a link group
  */
-int smc_buf_create(struct smc_sock *smc)
+int smc_buf_create(struct smc_sock *smc, bool is_smcd)
 {
 	int rc;
 
 	/* create send buffer */
-	rc = __smc_buf_create(smc, false);
+	rc = __smc_buf_create(smc, is_smcd, false);
 	if (rc)
 		return rc;
 	/* create rmb */
-	rc = __smc_buf_create(smc, true);
+	rc = __smc_buf_create(smc, is_smcd, true);
 	if (rc)
 		smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
 	return rc;
@@ -865,7 +1013,8 @@ void smc_core_exit(void)
 	spin_unlock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
 		list_del_init(&lgr->list);
-		smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+		if (!lgr->is_smcd)
+			smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
 		cancel_delayed_work_sync(&lgr->free_work);
 		smc_lgr_free(lgr); /* free link group */
 	}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 93cb3523bf50..8b47e0168fc3 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -124,15 +124,28 @@ struct smc_buf_desc {
 	void			*cpu_addr;	/* virtual address of buffer */
 	struct page		*pages;
 	int			len;		/* length of buffer */
-	struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
-	struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
-						/* for rmb only: memory region
-						 * incl. rkey provided to peer
-						 */
-	u32			order;		/* allocation order */
 	u32			used;		/* currently used / unused */
 	u8			reused	: 1;	/* new created / reused */
 	u8			regerr	: 1;	/* err during registration */
+	union {
+		struct { /* SMC-R */
+			struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];
+						/* virtual buffer */
+			struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
+						/* for rmb only: memory region
+						 * incl. rkey provided to peer
+						 */
+			u32			order;	/* allocation order */
+		};
+		struct { /* SMC-D */
+			unsigned short		sba_idx;
+						/* SBA index number */
+			u64			token;
+						/* DMB token number */
+			dma_addr_t		dma_addr;
+						/* DMA address */
+		};
+	};
 };
 
 struct smc_rtoken {				/* address/key of remote RMB */
@@ -148,12 +161,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
  * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
  */
 
+struct smcd_dev;
+
 struct smc_link_group {
 	struct list_head	list;
-	enum smc_lgr_role	role;		/* client or server */
-	struct smc_link		lnk[SMC_LINKS_PER_LGR_MAX];	/* smc link */
-	char			peer_systemid[SMC_SYSTEMID_LEN];
-						/* unique system_id of peer */
 	struct rb_root		conns_all;	/* connection tree */
 	rwlock_t		conns_lock;	/* protects conns_all */
 	unsigned int		conns_num;	/* current # of connections */
@@ -163,17 +174,35 @@ struct smc_link_group {
 	rwlock_t		sndbufs_lock;	/* protects tx buffers */
 	struct list_head	rmbs[SMC_RMBE_SIZES];	/* rx buffers */
 	rwlock_t		rmbs_lock;	/* protects rx buffers */
-	struct smc_rtoken	rtokens[SMC_RMBS_PER_LGR_MAX]
-				       [SMC_LINKS_PER_LGR_MAX];
-						/* remote addr/key pairs */
-	unsigned long		rtokens_used_mask[BITS_TO_LONGS(
-							SMC_RMBS_PER_LGR_MAX)];
-						/* used rtoken elements */
 
 	u8			id[SMC_LGR_ID_SIZE];	/* unique lgr id */
 	struct delayed_work	free_work;	/* delayed freeing of an lgr */
 	u8			sync_err : 1;	/* lgr no longer fits to peer */
 	u8			terminating : 1;/* lgr is terminating */
+
+	bool			is_smcd;	/* SMC-R or SMC-D */
+	union {
+		struct { /* SMC-R */
+			enum smc_lgr_role	role;
+						/* client or server */
+			struct smc_link		lnk[SMC_LINKS_PER_LGR_MAX];
+						/* smc link */
+			char			peer_systemid[SMC_SYSTEMID_LEN];
+						/* unique system_id of peer */
+			struct smc_rtoken	rtokens[SMC_RMBS_PER_LGR_MAX]
+						[SMC_LINKS_PER_LGR_MAX];
+						/* remote addr/key pairs */
+			unsigned long		rtokens_used_mask[BITS_TO_LONGS
+							(SMC_RMBS_PER_LGR_MAX)];
+						/* used rtoken elements */
+		};
+		struct { /* SMC-D */
+			u64			peer_gid;
+						/* Peer GID (remote) */
+			struct smcd_dev		*smcd;
+						/* ISM device for VLAN reg. */
+		};
+	};
 };
 
 /* Find the connection associated with the given alert token in the link group.
@@ -217,7 +246,8 @@ void smc_lgr_free(struct smc_link_group *lgr);
 void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_buf_create(struct smc_sock *smc);
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
+int smc_buf_create(struct smc_sock *smc, bool is_smcd);
 int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
@@ -227,9 +257,13 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
+int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
+
 void smc_conn_free(struct smc_connection *conn);
-int smc_conn_create(struct smc_sock *smc,
+int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
 		    struct smc_ib_device *smcibdev, u8 ibport,
-		    struct smc_clc_msg_local *lcl, int srv_first_contact);
+		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
+		    u64 peer_gid);
+void smcd_conn_free(struct smc_connection *conn);
 void smc_core_exit(void);
 #endif
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 839354402215..6d83eef1b743 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -136,7 +136,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 			goto errout;
 	}
 
-	if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
+	if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
+	    (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
 	    !list_empty(&smc->conn.lgr->list)) {
 		struct smc_diag_lgrinfo linfo = {
 			.role = smc->conn.lgr->role,
@@ -155,6 +156,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 		if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
 			goto errout;
 	}
+	if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
+	    (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
+	    !list_empty(&smc->conn.lgr->list)) {
+		struct smc_connection *conn = &smc->conn;
+		struct smcd_diag_dmbinfo dinfo = {
+			.linkid = *((u32 *)conn->lgr->id),
+			.peer_gid = conn->lgr->peer_gid,
+			.my_gid = conn->lgr->smcd->local_gid,
+			.token = conn->rmb_desc->token,
+			.peer_token = conn->peer_token
+		};
+
+		if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0)
+			goto errout;
+	}
 
 	nlmsg_end(skb, nlh);
 	return 0;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 0eed7ab9f28b..36de2fd76170 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -143,6 +143,62 @@ out:
 	return rc;
 }
 
+static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	struct ib_gid_attr gattr;
+	int rc;
+
+	rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
+			  &smcibdev->gid[ibport - 1], &gattr);
+	if (rc || !gattr.ndev)
+		return -ENODEV;
+
+	memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
+	dev_put(gattr.ndev);
+	return 0;
+}
+
+/* Create an identifier unique for this instance of SMC-R.
+ * The MAC-address of the first active registered IB device
+ * plus a random 2-byte number is used to create this identifier.
+ * This name is delivered to the peer during connection initialization.
+ */
+static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
+						u8 ibport)
+{
+	memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
+	       sizeof(smcibdev->mac[ibport - 1]));
+	get_random_bytes(&local_systemid[0], 2);
+}
+
+bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
+}
+
+static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	int rc;
+
+	memset(&smcibdev->pattr[ibport - 1], 0,
+	       sizeof(smcibdev->pattr[ibport - 1]));
+	rc = ib_query_port(smcibdev->ibdev, ibport,
+			   &smcibdev->pattr[ibport - 1]);
+	if (rc)
+		goto out;
+	/* the SMC protocol requires specification of the RoCE MAC address */
+	rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
+	if (rc)
+		goto out;
+	if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
+		     sizeof(local_systemid)) &&
+	    smc_ib_port_active(smcibdev, ibport))
+		/* create unique system identifier */
+		smc_ib_define_local_systemid(smcibdev, ibport);
+out:
+	return rc;
+}
+
 /* process context wrapper for might_sleep smc_ib_remember_port_attr */
 static void smc_ib_port_event_work(struct work_struct *work)
 {
@@ -370,62 +426,6 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
 	buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
 }
 
-static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
-{
-	struct ib_gid_attr gattr;
-	int rc;
-
-	rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
-			  &smcibdev->gid[ibport - 1], &gattr);
-	if (rc || !gattr.ndev)
-		return -ENODEV;
-
-	memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
-	dev_put(gattr.ndev);
-	return 0;
-}
-
-/* Create an identifier unique for this instance of SMC-R.
- * The MAC-address of the first active registered IB device
- * plus a random 2-byte number is used to create this identifier.
- * This name is delivered to the peer during connection initialization.
- */
-static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
-						u8 ibport)
-{
-	memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
-	       sizeof(smcibdev->mac[ibport - 1]));
-	get_random_bytes(&local_systemid[0], 2);
-}
-
-bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
-{
-	return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
-}
-
-int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
-{
-	int rc;
-
-	memset(&smcibdev->pattr[ibport - 1], 0,
-	       sizeof(smcibdev->pattr[ibport - 1]));
-	rc = ib_query_port(smcibdev->ibdev, ibport,
-			   &smcibdev->pattr[ibport - 1]);
-	if (rc)
-		goto out;
-	/* the SMC protocol requires specification of the RoCE MAC address */
-	rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
-	if (rc)
-		goto out;
-	if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
-		     sizeof(local_systemid)) &&
-	    smc_ib_port_active(smcibdev, ibport))
-		/* create unique system identifier */
-		smc_ib_define_local_systemid(smcibdev, ibport);
-out:
-	return rc;
-}
-
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
 {
 	struct ib_cq_init_attr cqattr =	{
@@ -454,9 +454,6 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
 		smcibdev->roce_cq_recv = NULL;
 		goto err;
 	}
-	INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
-			      smc_ib_global_event_handler);
-	ib_register_event_handler(&smcibdev->event_handler);
 	smc_wr_add_dev(smcibdev);
 	smcibdev->initialized = 1;
 	return rc;
@@ -472,7 +469,6 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
 		return;
 	smcibdev->initialized = 0;
 	smc_wr_remove_dev(smcibdev);
-	ib_unregister_event_handler(&smcibdev->event_handler);
 	ib_destroy_cq(smcibdev->roce_cq_recv);
 	ib_destroy_cq(smcibdev->roce_cq_send);
 }
@@ -483,6 +479,8 @@ static struct ib_client smc_ib_client;
 static void smc_ib_add_dev(struct ib_device *ibdev)
 {
 	struct smc_ib_device *smcibdev;
+	u8 port_cnt;
+	int i;
 
 	if (ibdev->node_type != RDMA_NODE_IB_CA)
 		return;
@@ -498,6 +496,21 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
 	list_add_tail(&smcibdev->list, &smc_ib_devices.list);
 	spin_unlock(&smc_ib_devices.lock);
 	ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
+	INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
+			      smc_ib_global_event_handler);
+	ib_register_event_handler(&smcibdev->event_handler);
+
+	/* trigger reading of the port attributes */
+	port_cnt = smcibdev->ibdev->phys_port_cnt;
+	for (i = 0;
+	     i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
+	     i++) {
+		set_bit(i, &smcibdev->port_event_mask);
+		/* determine pnetids of the port */
+		smc_pnetid_by_dev_port(ibdev->dev.parent, i,
+				       smcibdev->pnetid[i]);
+	}
+	schedule_work(&smcibdev->port_event_work);
 }
 
 /* callback function for ib_register_client() */
@@ -512,6 +525,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
 	spin_unlock(&smc_ib_devices.lock);
 	smc_pnet_remove_by_ibdev(smcibdev);
 	smc_ib_cleanup_per_ibdev(smcibdev);
+	ib_unregister_event_handler(&smcibdev->event_handler);
 	kfree(smcibdev);
 }
 
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index e90630dadf8e..7c1223c91229 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -15,6 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/if_ether.h>
 #include <rdma/ib_verbs.h>
+#include <net/smc.h>
 
 #define SMC_MAX_PORTS			2	/* Max # of ports */
 #define SMC_GID_SIZE			sizeof(union ib_gid)
@@ -40,6 +41,8 @@ struct smc_ib_device {				/* ib-device infos for smc */
 	char			mac[SMC_MAX_PORTS][ETH_ALEN];
 						/* mac address per port*/
 	union ib_gid		gid[SMC_MAX_PORTS]; /* gid per port */
+	u8			pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
+						/* pnetid per port */
 	u8			initialized : 1; /* ib dev CQ, evthdl done */
 	struct work_struct	port_event_work;
 	unsigned long		port_event_mask;
@@ -51,7 +54,6 @@ struct smc_link;
 int smc_ib_register_client(void) __init;
 void smc_ib_unregister_client(void);
 bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
 int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
 		      struct smc_buf_desc *buf_slot,
 		      enum dma_data_direction data_direction);
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
new file mode 100644
index 000000000000..cfade7fdcc6d
--- /dev/null
+++ b/net/smc/smc_ism.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Shared Memory Communications Direct over ISM devices (SMC-D)
+ *
+ * Functions for ISM device.
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+
+#include "smc.h"
+#include "smc_core.h"
+#include "smc_ism.h"
+#include "smc_pnet.h"
+
+struct smcd_dev_list smcd_dev_list = {
+	.list = LIST_HEAD_INIT(smcd_dev_list.list),
+	.lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock)
+};
+
+/* Test if an ISM communication is possible. */
+int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
+{
+	return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
+					   vlan_id);
+}
+
+int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
+		  void *data, size_t len)
+{
+	int rc;
+
+	rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal,
+				  pos->offset, data, len);
+
+	return rc < 0 ? rc : 0;
+}
+
+/* Set a connection using this DMBE. */
+void smc_ism_set_conn(struct smc_connection *conn)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
+	conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn;
+	spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
+}
+
+/* Unset a connection using this DMBE. */
+void smc_ism_unset_conn(struct smc_connection *conn)
+{
+	unsigned long flags;
+
+	if (!conn->rmb_desc)
+		return;
+
+	spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
+	conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL;
+	spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
+}
+
+/* Register a VLAN identifier with the ISM device. Use a reference count
+ * and add a VLAN identifier only when the first DMB using this VLAN is
+ * registered.
+ */
+int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid)
+{
+	struct smc_ism_vlanid *new_vlan, *vlan;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!vlanid)			/* No valid vlan id */
+		return -EINVAL;
+
+	/* create new vlan entry, in case we need it */
+	new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL);
+	if (!new_vlan)
+		return -ENOMEM;
+	new_vlan->vlanid = vlanid;
+	refcount_set(&new_vlan->refcnt, 1);
+
+	/* if there is an existing entry, increase count and return */
+	spin_lock_irqsave(&smcd->lock, flags);
+	list_for_each_entry(vlan, &smcd->vlan, list) {
+		if (vlan->vlanid == vlanid) {
+			refcount_inc(&vlan->refcnt);
+			kfree(new_vlan);
+			goto out;
+		}
+	}
+
+	/* no existing entry found.
+	 * add new entry to device; might fail, e.g., if HW limit reached
+	 */
+	if (smcd->ops->add_vlan_id(smcd, vlanid)) {
+		kfree(new_vlan);
+		rc = -EIO;
+		goto out;
+	}
+	list_add_tail(&new_vlan->list, &smcd->vlan);
+out:
+	spin_unlock_irqrestore(&smcd->lock, flags);
+	return rc;
+}
+
+/* Unregister a VLAN identifier with the ISM device. Use a reference count
+ * and remove a VLAN identifier only when the last DMB using this VLAN is
+ * unregistered.
+ */
+int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid)
+{
+	struct smc_ism_vlanid *vlan;
+	unsigned long flags;
+	bool found = false;
+	int rc = 0;
+
+	if (!vlanid)			/* No valid vlan id */
+		return -EINVAL;
+
+	spin_lock_irqsave(&smcd->lock, flags);
+	list_for_each_entry(vlan, &smcd->vlan, list) {
+		if (vlan->vlanid == vlanid) {
+			if (!refcount_dec_and_test(&vlan->refcnt))
+				goto out;
+			found = true;
+			break;
+		}
+	}
+	if (!found) {
+		rc = -ENOENT;
+		goto out;		/* VLAN id not in table */
+	}
+
+	/* Found and the last reference just gone */
+	if (smcd->ops->del_vlan_id(smcd, vlanid))
+		rc = -EIO;
+	list_del(&vlan->list);
+	kfree(vlan);
+out:
+	spin_unlock_irqrestore(&smcd->lock, flags);
+	return rc;
+}
+
+int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
+{
+	struct smcd_dmb dmb;
+
+	memset(&dmb, 0, sizeof(dmb));
+	dmb.dmb_tok = dmb_desc->token;
+	dmb.sba_idx = dmb_desc->sba_idx;
+	dmb.cpu_addr = dmb_desc->cpu_addr;
+	dmb.dma_addr = dmb_desc->dma_addr;
+	dmb.dmb_len = dmb_desc->len;
+	return smcd->ops->unregister_dmb(smcd, &dmb);
+}
+
+int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
+			 struct smc_buf_desc *dmb_desc)
+{
+	struct smcd_dmb dmb;
+	int rc;
+
+	memset(&dmb, 0, sizeof(dmb));
+	dmb.dmb_len = dmb_len;
+	dmb.sba_idx = dmb_desc->sba_idx;
+	dmb.vlan_id = lgr->vlan_id;
+	dmb.rgid = lgr->peer_gid;
+	rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb);
+	if (!rc) {
+		dmb_desc->sba_idx = dmb.sba_idx;
+		dmb_desc->token = dmb.dmb_tok;
+		dmb_desc->cpu_addr = dmb.cpu_addr;
+		dmb_desc->dma_addr = dmb.dma_addr;
+		dmb_desc->len = dmb.dmb_len;
+	}
+	return rc;
+}
+
+struct smc_ism_event_work {
+	struct work_struct work;
+	struct smcd_dev *smcd;
+	struct smcd_event event;
+};
+
+/* worker for SMC-D events */
+static void smc_ism_event_work(struct work_struct *work)
+{
+	struct smc_ism_event_work *wrk =
+		container_of(work, struct smc_ism_event_work, work);
+
+	switch (wrk->event.type) {
+	case ISM_EVENT_GID:	/* GID event, token is peer GID */
+		smc_smcd_terminate(wrk->smcd, wrk->event.tok);
+		break;
+	case ISM_EVENT_DMB:
+		break;
+	}
+	kfree(wrk);
+}
+
+static void smcd_release(struct device *dev)
+{
+	struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev);
+
+	kfree(smcd->conn);
+	kfree(smcd);
+}
+
+struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
+				const struct smcd_ops *ops, int max_dmbs)
+{
+	struct smcd_dev *smcd;
+
+	smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
+	if (!smcd)
+		return NULL;
+	smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
+			     GFP_KERNEL);
+	if (!smcd->conn) {
+		kfree(smcd);
+		return NULL;
+	}
+
+	smcd->dev.parent = parent;
+	smcd->dev.release = smcd_release;
+	device_initialize(&smcd->dev);
+	dev_set_name(&smcd->dev, name);
+	smcd->ops = ops;
+	smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
+
+	spin_lock_init(&smcd->lock);
+	INIT_LIST_HEAD(&smcd->vlan);
+	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
+						 WQ_MEM_RECLAIM, name);
+	return smcd;
+}
+EXPORT_SYMBOL_GPL(smcd_alloc_dev);
+
+int smcd_register_dev(struct smcd_dev *smcd)
+{
+	spin_lock(&smcd_dev_list.lock);
+	list_add_tail(&smcd->list, &smcd_dev_list.list);
+	spin_unlock(&smcd_dev_list.lock);
+
+	return device_add(&smcd->dev);
+}
+EXPORT_SYMBOL_GPL(smcd_register_dev);
+
+void smcd_unregister_dev(struct smcd_dev *smcd)
+{
+	spin_lock(&smcd_dev_list.lock);
+	list_del(&smcd->list);
+	spin_unlock(&smcd_dev_list.lock);
+	flush_workqueue(smcd->event_wq);
+	destroy_workqueue(smcd->event_wq);
+	smc_smcd_terminate(smcd, 0);
+
+	device_del(&smcd->dev);
+}
+EXPORT_SYMBOL_GPL(smcd_unregister_dev);
+
+void smcd_free_dev(struct smcd_dev *smcd)
+{
+	put_device(&smcd->dev);
+}
+EXPORT_SYMBOL_GPL(smcd_free_dev);
+
+/* SMCD Device event handler. Called from ISM device interrupt handler.
+ * Parameters are smcd device pointer,
+ * - event->type (0 --> DMB, 1 --> GID),
+ * - event->code (event code),
+ * - event->tok (either DMB token when event type 0, or GID when event type 1)
+ * - event->time (time of day)
+ * - event->info (debug info).
+ *
+ * Context:
+ * - Function called in IRQ context from ISM device driver event handler.
+ */
+void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
+{
+	struct smc_ism_event_work *wrk;
+
+	/* copy event to event work queue, and let it be handled there */
+	wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
+	if (!wrk)
+		return;
+	INIT_WORK(&wrk->work, smc_ism_event_work);
+	wrk->smcd = smcd;
+	wrk->event = *event;
+	queue_work(smcd->event_wq, &wrk->work);
+}
+EXPORT_SYMBOL_GPL(smcd_handle_event);
+
+/* SMCD Device interrupt handler. Called from ISM device interrupt handler.
+ * Parameters are smcd device pointer and DMB number. Find the connection and
+ * schedule the tasklet for this connection.
+ *
+ * Context:
+ * - Function called in IRQ context from ISM device driver IRQ handler.
+ */
+void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
+{
+	struct smc_connection *conn = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&smcd->lock, flags);
+	conn = smcd->conn[dmbno];
+	if (conn)
+		tasklet_schedule(&conn->rx_tsklet);
+	spin_unlock_irqrestore(&smcd->lock, flags);
+}
+EXPORT_SYMBOL_GPL(smcd_handle_irq);
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
new file mode 100644
index 000000000000..aee45b860b79
--- /dev/null
+++ b/net/smc/smc_ism.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Shared Memory Communications Direct over ISM devices (SMC-D)
+ *
+ * SMC-D ISM device structure definitions.
+ *
+ * Copyright IBM Corp. 2018
+ */
+
+#ifndef SMCD_ISM_H
+#define SMCD_ISM_H
+
+#include <linux/uio.h>
+
+#include "smc.h"
+
+struct smcd_dev_list {	/* List of SMCD devices */
+	struct list_head list;
+	spinlock_t lock;	/* Protects list of devices */
+};
+
+extern struct smcd_dev_list	smcd_dev_list; /* list of smcd devices */
+
+struct smc_ism_vlanid {			/* VLAN id set on ISM device */
+	struct list_head list;
+	unsigned short vlanid;		/* Vlan id */
+	refcount_t refcnt;		/* Reference count */
+};
+
+struct smc_ism_position {	/* ISM device position to write to */
+	u64 token;		/* Token of DMB */
+	u32 offset;		/* Offset into DMBE */
+	u8 index;		/* Index of DMBE */
+	u8 signal;		/* Generate interrupt on owner side */
+};
+
+struct smcd_dev;
+
+int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev);
+void smc_ism_set_conn(struct smc_connection *conn);
+void smc_ism_unset_conn(struct smc_connection *conn);
+int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id);
+int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
+int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
+			 struct smc_buf_desc *dmb_desc);
+int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
+int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
+		  void *data, size_t len);
+#endif
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index d7b88b2d1b22..1b6c066d3495 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -22,13 +22,12 @@
 
 #include "smc_pnet.h"
 #include "smc_ib.h"
-
-#define SMC_MAX_PNET_ID_LEN	16	/* Max. length of PNET id */
+#include "smc_ism.h"
 
 static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
 	[SMC_PNETID_NAME] = {
 		.type = NLA_NUL_STRING,
-		.len = SMC_MAX_PNET_ID_LEN - 1
+		.len = SMC_MAX_PNETID_LEN - 1
 	},
 	[SMC_PNETID_ETHNAME] = {
 		.type = NLA_NUL_STRING,
@@ -65,7 +64,7 @@ static struct smc_pnettable {
  */
 struct smc_pnetentry {
 	struct list_head list;
-	char pnet_name[SMC_MAX_PNET_ID_LEN + 1];
+	char pnet_name[SMC_MAX_PNETID_LEN + 1];
 	struct net_device *ndev;
 	struct smc_ib_device *smcibdev;
 	u8 ib_port;
@@ -209,7 +208,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
 		return false;
 	while (--end >= bf && isspace(*end))
 		;
-	if (end - bf >= SMC_MAX_PNET_ID_LEN)
+	if (end - bf >= SMC_MAX_PNETID_LEN)
 		return false;
 	while (bf <= end) {
 		if (!isalnum(*bf))
@@ -358,9 +357,6 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
 		kfree(pnetelem);
 		return rc;
 	}
-	rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port);
-	if (rc)
-		smc_pnet_remove_by_pnetid(pnetelem->pnet_name);
 	return rc;
 }
 
@@ -485,10 +481,10 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
 	case NETDEV_REBOOT:
 	case NETDEV_UNREGISTER:
 		smc_pnet_remove_by_ndev(event_dev);
+		return NOTIFY_OK;
 	default:
-		break;
+		return NOTIFY_DONE;
 	}
-	return NOTIFY_DONE;
 }
 
 static struct notifier_block smc_netdev_notifier = {
@@ -515,26 +511,91 @@ void smc_pnet_exit(void)
 	genl_unregister_family(&smc_pnet_nl_family);
 }
 
-/* PNET table analysis for a given sock:
- * determine ib_device and port belonging to used internal TCP socket
- * ethernet interface.
+/* Determine one base device for stacked net devices.
+ * If the lower device level contains more than one devices
+ * (for instance with bonding slaves), just the first device
+ * is used to reach a base device.
  */
-void smc_pnet_find_roce_resource(struct sock *sk,
-				 struct smc_ib_device **smcibdev, u8 *ibport)
+static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
 {
-	struct dst_entry *dst = sk_dst_get(sk);
-	struct smc_pnetentry *pnetelem;
+	int i, nest_lvl;
 
-	*smcibdev = NULL;
-	*ibport = 0;
+	rtnl_lock();
+	nest_lvl = dev_get_nest_level(ndev);
+	for (i = 0; i < nest_lvl; i++) {
+		struct list_head *lower = &ndev->adj_list.lower;
+
+		if (list_empty(lower))
+			break;
+		lower = lower->next;
+		ndev = netdev_lower_get_next(ndev, &lower);
+	}
+	rtnl_unlock();
+	return ndev;
+}
+
+/* Determine the corresponding IB device port based on the hardware PNETID.
+ * Searching stops at the first matching active IB device port.
+ */
+static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
+					 struct smc_ib_device **smcibdev,
+					 u8 *ibport)
+{
+	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+	struct smc_ib_device *ibdev;
+	int i;
+
+	ndev = pnet_find_base_ndev(ndev);
+	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+				   ndev_pnetid))
+		return; /* pnetid could not be determined */
+
+	spin_lock(&smc_ib_devices.lock);
+	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+		for (i = 1; i <= SMC_MAX_PORTS; i++) {
+			if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
+				    SMC_MAX_PNETID_LEN) &&
+			    smc_ib_port_active(ibdev, i)) {
+				*smcibdev = ibdev;
+				*ibport = i;
+				break;
+			}
+		}
+	}
+	spin_unlock(&smc_ib_devices.lock);
+}
+
+static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
+					struct smcd_dev **smcismdev)
+{
+	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+	struct smcd_dev *ismdev;
+
+	ndev = pnet_find_base_ndev(ndev);
+	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+				   ndev_pnetid))
+		return; /* pnetid could not be determined */
+
+	spin_lock(&smcd_dev_list.lock);
+	list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
+		if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
+			*smcismdev = ismdev;
+			break;
+		}
+	}
+	spin_unlock(&smcd_dev_list.lock);
+}
+
+/* Lookup of coupled ib_device via SMC pnet table */
+static void smc_pnet_find_roce_by_table(struct net_device *netdev,
+					struct smc_ib_device **smcibdev,
+					u8 *ibport)
+{
+	struct smc_pnetentry *pnetelem;
 
-	if (!dst)
-		return;
-	if (!dst->dev)
-		goto out_rel;
 	read_lock(&smc_pnettable.lock);
 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
-		if (dst->dev == pnetelem->ndev) {
+		if (netdev == pnetelem->ndev) {
 			if (smc_ib_port_active(pnetelem->smcibdev,
 					       pnetelem->ib_port)) {
 				*smcibdev = pnetelem->smcibdev;
@@ -544,6 +605,54 @@ void smc_pnet_find_roce_resource(struct sock *sk,
 		}
 	}
 	read_unlock(&smc_pnettable.lock);
+}
+
+/* PNET table analysis for a given sock:
+ * determine ib_device and port belonging to used internal TCP socket
+ * ethernet interface.
+ */
+void smc_pnet_find_roce_resource(struct sock *sk,
+				 struct smc_ib_device **smcibdev, u8 *ibport)
+{
+	struct dst_entry *dst = sk_dst_get(sk);
+
+	*smcibdev = NULL;
+	*ibport = 0;
+
+	if (!dst)
+		goto out;
+	if (!dst->dev)
+		goto out_rel;
+
+	/* if possible, lookup via hardware-defined pnetid */
+	smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport);
+	if (*smcibdev)
+		goto out_rel;
+
+	/* lookup via SMC PNET table */
+	smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport);
+
+out_rel:
+	dst_release(dst);
+out:
+	return;
+}
+
+void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
+{
+	struct dst_entry *dst = sk_dst_get(sk);
+
+	*smcismdev = NULL;
+	if (!dst)
+		goto out;
+	if (!dst->dev)
+		goto out_rel;
+
+	/* if possible, lookup via hardware-defined pnetid */
+	smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
+
 out_rel:
 	dst_release(dst);
+out:
+	return;
 }
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 5a29519db976..1e94fd4df7bc 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -12,12 +12,28 @@
 #ifndef _SMC_PNET_H
 #define _SMC_PNET_H
 
+#if IS_ENABLED(CONFIG_HAVE_PNETID)
+#include <asm/pnet.h>
+#endif
+
 struct smc_ib_device;
+struct smcd_dev;
+
+static inline int smc_pnetid_by_dev_port(struct device *dev,
+					 unsigned short port, u8 *pnetid)
+{
+#if IS_ENABLED(CONFIG_HAVE_PNETID)
+	return pnet_id_by_dev_port(dev, port, pnetid);
+#else
+	return -ENOENT;
+#endif
+}
 
 int smc_pnet_init(void) __init;
 void smc_pnet_exit(void);
 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
 void smc_pnet_find_roce_resource(struct sock *sk,
 				 struct smc_ib_device **smcibdev, u8 *ibport);
+void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
 
 #endif
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 3d77b383cccd..b329803c8339 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -305,7 +305,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
 	/* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
-	rcvbuf_base = conn->rmb_desc->cpu_addr;
+	rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;
 
 	do { /* while (read_remaining) */
 		if (read_done >= target || (pipe && read_done))
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index f82886b7d1d8..142bcb134dd6 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -24,6 +24,7 @@
 #include "smc.h"
 #include "smc_wr.h"
 #include "smc_cdc.h"
+#include "smc_ism.h"
 #include "smc_tx.h"
 
 #define SMC_TX_WORK_DELAY	HZ
@@ -250,6 +251,24 @@ out_err:
 
 /***************************** sndbuf consumer *******************************/
 
+/* sndbuf consumer: actual data transfer of one target chunk with ISM write */
+int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
+		      u32 offset, int signal)
+{
+	struct smc_ism_position pos;
+	int rc;
+
+	memset(&pos, 0, sizeof(pos));
+	pos.token = conn->peer_token;
+	pos.index = conn->peer_rmbe_idx;
+	pos.offset = conn->tx_off + offset;
+	pos.signal = signal;
+	rc = smc_ism_write(conn->lgr->smcd, &pos, data, len);
+	if (rc)
+		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+	return rc;
+}
+
 /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
 static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
 			     int num_sges, struct ib_sge sges[])
@@ -297,21 +316,104 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
 	smc_curs_add(conn->sndbuf_desc->len, sent, len);
 }
 
+/* SMC-R helper for smc_tx_rdma_writes() */
+static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
+			       size_t src_off, size_t src_len,
+			       size_t dst_off, size_t dst_len)
+{
+	dma_addr_t dma_addr =
+		sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
+	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+	int src_len_sum = src_len, dst_len_sum = dst_len;
+	struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
+	int sent_count = src_off;
+	int srcchunk, dstchunk;
+	int num_sges;
+	int rc;
+
+	for (dstchunk = 0; dstchunk < 2; dstchunk++) {
+		num_sges = 0;
+		for (srcchunk = 0; srcchunk < 2; srcchunk++) {
+			sges[srcchunk].addr = dma_addr + src_off;
+			sges[srcchunk].length = src_len;
+			sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
+			num_sges++;
+
+			src_off += src_len;
+			if (src_off >= conn->sndbuf_desc->len)
+				src_off -= conn->sndbuf_desc->len;
+						/* modulo in send ring */
+			if (src_len_sum == dst_len)
+				break; /* either on 1st or 2nd iteration */
+			/* prepare next (== 2nd) iteration */
+			src_len = dst_len - src_len; /* remainder */
+			src_len_sum += src_len;
+		}
+		rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
+		if (rc)
+			return rc;
+		if (dst_len_sum == len)
+			break; /* either on 1st or 2nd iteration */
+		/* prepare next (== 2nd) iteration */
+		dst_off = 0; /* modulo offset in RMBE ring buffer */
+		dst_len = len - dst_len; /* remainder */
+		dst_len_sum += dst_len;
+		src_len = min_t(int, dst_len, conn->sndbuf_desc->len -
+				sent_count);
+		src_len_sum = src_len;
+	}
+	return 0;
+}
+
+/* SMC-D helper for smc_tx_rdma_writes() */
+static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
+			       size_t src_off, size_t src_len,
+			       size_t dst_off, size_t dst_len)
+{
+	int src_len_sum = src_len, dst_len_sum = dst_len;
+	int srcchunk, dstchunk;
+	int rc;
+
+	for (dstchunk = 0; dstchunk < 2; dstchunk++) {
+		for (srcchunk = 0; srcchunk < 2; srcchunk++) {
+			void *data = conn->sndbuf_desc->cpu_addr + src_off;
+
+			rc = smcd_tx_ism_write(conn, data, src_len, dst_off +
+					       sizeof(struct smcd_cdc_msg), 0);
+			if (rc)
+				return rc;
+			dst_off += src_len;
+			src_off += src_len;
+			if (src_off >= conn->sndbuf_desc->len)
+				src_off -= conn->sndbuf_desc->len;
+						/* modulo in send ring */
+			if (src_len_sum == dst_len)
+				break; /* either on 1st or 2nd iteration */
+			/* prepare next (== 2nd) iteration */
+			src_len = dst_len - src_len; /* remainder */
+			src_len_sum += src_len;
+		}
+		if (dst_len_sum == len)
+			break; /* either on 1st or 2nd iteration */
+		/* prepare next (== 2nd) iteration */
+		dst_off = 0; /* modulo offset in RMBE ring buffer */
+		dst_len = len - dst_len; /* remainder */
+		dst_len_sum += dst_len;
+		src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off);
+		src_len_sum = src_len;
+	}
+	return 0;
+}
+
 /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
  * usable snd_wnd as max transmit
  */
 static int smc_tx_rdma_writes(struct smc_connection *conn)
 {
-	size_t src_off, src_len, dst_off, dst_len; /* current chunk values */
-	size_t len, dst_len_sum, src_len_sum, dstchunk, srcchunk;
+	size_t len, src_len, dst_off, dst_len; /* current chunk values */
 	union smc_host_cursor sent, prep, prod, cons;
-	struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
-	struct smc_link_group *lgr = conn->lgr;
 	struct smc_cdc_producer_flags *pflags;
 	int to_send, rmbespace;
-	struct smc_link *link;
-	dma_addr_t dma_addr;
-	int num_sges;
 	int rc;
 
 	/* source: sndbuf */
@@ -341,7 +443,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 	len = min(to_send, rmbespace);
 
 	/* initialize variables for first iteration of subsequent nested loop */
-	link = &lgr->lnk[SMC_SINGLE_LINK];
 	dst_off = prod.count;
 	if (prod.wrap == cons.wrap) {
 		/* the filled destination area is unwrapped,
@@ -358,8 +459,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 		 */
 		dst_len = len;
 	}
-	dst_len_sum = dst_len;
-	src_off = sent.count;
 	/* dst_len determines the maximum src_len */
 	if (sent.count + dst_len <= conn->sndbuf_desc->len) {
 		/* unwrapped src case: single chunk of entire dst_len */
@@ -368,38 +467,15 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 		/* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
 		src_len = conn->sndbuf_desc->len - sent.count;
 	}
-	src_len_sum = src_len;
-	dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
-	for (dstchunk = 0; dstchunk < 2; dstchunk++) {
-		num_sges = 0;
-		for (srcchunk = 0; srcchunk < 2; srcchunk++) {
-			sges[srcchunk].addr = dma_addr + src_off;
-			sges[srcchunk].length = src_len;
-			sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
-			num_sges++;
-			src_off += src_len;
-			if (src_off >= conn->sndbuf_desc->len)
-				src_off -= conn->sndbuf_desc->len;
-						/* modulo in send ring */
-			if (src_len_sum == dst_len)
-				break; /* either on 1st or 2nd iteration */
-			/* prepare next (== 2nd) iteration */
-			src_len = dst_len - src_len; /* remainder */
-			src_len_sum += src_len;
-		}
-		rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
-		if (rc)
-			return rc;
-		if (dst_len_sum == len)
-			break; /* either on 1st or 2nd iteration */
-		/* prepare next (== 2nd) iteration */
-		dst_off = 0; /* modulo offset in RMBE ring buffer */
-		dst_len = len - dst_len; /* remainder */
-		dst_len_sum += dst_len;
-		src_len = min_t(int,
-				dst_len, conn->sndbuf_desc->len - sent.count);
-		src_len_sum = src_len;
-	}
+
+	if (conn->lgr->is_smcd)
+		rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len,
+					 dst_off, dst_len);
+	else
+		rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
+					 dst_off, dst_len);
+	if (rc)
+		return rc;
 
 	if (conn->urg_tx_pend && len == to_send)
 		pflags->urg_data_present = 1;
@@ -420,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
 /* Wakeup sndbuf consumers from any context (IRQ or process)
  * since there is more data to transmit; usable snd_wnd as max transmit
  */
-int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 {
 	struct smc_cdc_producer_flags *pflags;
 	struct smc_cdc_tx_pend *pend;
@@ -467,6 +543,37 @@ out_unlock:
 	return rc;
 }
 
+static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+	struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
+	int rc = 0;
+
+	spin_lock_bh(&conn->send_lock);
+	if (!pflags->urg_data_present)
+		rc = smc_tx_rdma_writes(conn);
+	if (!rc)
+		rc = smcd_cdc_msg_send(conn);
+
+	if (!rc && pflags->urg_data_present) {
+		pflags->urg_data_pending = 0;
+		pflags->urg_data_present = 0;
+	}
+	spin_unlock_bh(&conn->send_lock);
+	return rc;
+}
+
+int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+	int rc;
+
+	if (conn->lgr->is_smcd)
+		rc = smcd_tx_sndbuf_nonempty(conn);
+	else
+		rc = smcr_tx_sndbuf_nonempty(conn);
+
+	return rc;
+}
+
 /* Wakeup sndbuf consumers from process context
  * since there is more data to transmit
  */
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 9d2238909fa0..b22bdc5694c4 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -33,5 +33,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
 int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
 void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
 void smc_tx_consumer_update(struct smc_connection *conn, bool force);
+int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
+		      u32 offset, int signal);
 
 #endif /* SMC_TX_H */
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 625acb27efcc..3a512936eea9 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -140,11 +140,13 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			/* We are going to append to the frags_list of head.
 			 * Need to unshare the frag_list.
 			 */
-			err = skb_unclone(head, GFP_ATOMIC);
-			if (err) {
-				STRP_STATS_INCR(strp->stats.mem_fail);
-				desc->error = err;
-				return 0;
+			if (skb_has_frag_list(head)) {
+				err = skb_unclone(head, GFP_ATOMIC);
+				if (err) {
+					STRP_STATS_INCR(strp->stats.mem_fail);
+					desc->error = err;
+					return 0;
+				}
 			}
 
 			if (unlikely(skb_shinfo(head)->frag_list)) {
@@ -201,14 +203,16 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 			memset(stm, 0, sizeof(*stm));
 			stm->strp.offset = orig_offset + eaten;
 		} else {
-			/* Unclone since we may be appending to an skb that we
+			/* Unclone if we are appending to an skb that we
 			 * already share a frag_list with.
 			 */
-			err = skb_unclone(skb, GFP_ATOMIC);
-			if (err) {
-				STRP_STATS_INCR(strp->stats.mem_fail);
-				desc->error = err;
-				break;
+			if (skb_has_frag_list(skb)) {
+				err = skb_unclone(skb, GFP_ATOMIC);
+				if (err) {
+					STRP_STATS_INCR(strp->stats.mem_fail);
+					desc->error = err;
+					break;
+				}
 			}
 
 			stm = _strp_msg(head);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2dfb492a7c94..fd6d8f18955c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -610,6 +610,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 	case NETDEV_CHANGE:
 		if (netif_carrier_ok(dev))
 			break;
+		/* else: fall through */
 	case NETDEV_UP:
 		test_and_set_bit_lock(0, &b->up);
 		break;
diff --git a/net/tipc/group.c b/net/tipc/group.c
index d7a7befeddd4..8f43e7d6046b 100644
--- a/net/tipc/group.c
+++ b/net/tipc/group.c
@@ -159,11 +159,6 @@ u32 tipc_group_exclude(struct tipc_group *grp)
 	return 0;
 }
 
-int tipc_group_size(struct tipc_group *grp)
-{
-	return grp->member_cnt;
-}
-
 struct tipc_group *tipc_group_create(struct net *net, u32 portid,
 				     struct tipc_group_req *mreq,
 				     bool *group_is_open)
@@ -918,3 +913,35 @@ void tipc_group_member_evt(struct tipc_group *grp,
 	}
 	*sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
 }
+
+int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb)
+{
+	struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP);
+
+	if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID,
+			grp->type) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE,
+			grp->instance) ||
+	    nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_BC_SEND_NEXT,
+			grp->bc_snd_nxt))
+		goto group_msg_cancel;
+
+	if (grp->scope == TIPC_NODE_SCOPE)
+		if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_NODE_SCOPE))
+			goto group_msg_cancel;
+
+	if (grp->scope == TIPC_CLUSTER_SCOPE)
+		if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_CLUSTER_SCOPE))
+			goto group_msg_cancel;
+
+	if (*grp->open)
+		if (nla_put_flag(skb, TIPC_NLA_SOCK_GROUP_OPEN))
+			goto group_msg_cancel;
+
+	nla_nest_end(skb, group);
+	return 0;
+
+group_msg_cancel:
+	nla_nest_cancel(skb, group);
+	return -1;
+}
diff --git a/net/tipc/group.h b/net/tipc/group.h
index 5996af6e9f1d..76b4e5a7b39d 100644
--- a/net/tipc/group.h
+++ b/net/tipc/group.h
@@ -72,4 +72,5 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
 			       u32 port, struct sk_buff_head *xmitq);
 u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
 void tipc_group_update_member(struct tipc_member *m, int len);
+int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb);
 #endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 695acb783969..6987ffc8e7a1 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -106,7 +106,8 @@ struct tipc_stats {
  * @backlogq: queue for messages waiting to be sent
  * @snt_nxt: next sequence number to use for outbound messages
  * @last_retransmitted: sequence number of most recently retransmitted message
- * @stale_count: # of identical retransmit requests made by peer
+ * @stale_cnt: counter for number of identical retransmit attempts
+ * @stale_limit: time when repeated identical retransmits must force link reset
  * @ackers: # of peers that needs to ack each packet before it can be released
  * @acked: # last packet acked by a certain peer. Used for broadcast.
  * @rcv_nxt: next sequence number to expect for inbound messages
@@ -127,14 +128,17 @@ struct tipc_link {
 	struct net *net;
 
 	/* Management and link supervision data */
-	u32 peer_session;
-	u32 session;
+	u16 peer_session;
+	u16 session;
+	u16 snd_nxt_state;
+	u16 rcv_nxt_state;
 	u32 peer_bearer_id;
 	u32 bearer_id;
 	u32 tolerance;
 	u32 abort_limit;
 	u32 state;
 	u16 peer_caps;
+	bool in_session;
 	bool active;
 	u32 silent_intv_cnt;
 	char if_name[TIPC_MAX_IF_NAME];
@@ -161,7 +165,8 @@ struct tipc_link {
 	u16 snd_nxt;
 	u16 last_retransm;
 	u16 window;
-	u32 stale_count;
+	u16 stale_cnt;
+	unsigned long stale_limit;
 
 	/* Reception */
 	u16 rcv_nxt;
@@ -212,11 +217,6 @@ enum {
  */
 #define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
 
-/* Wildcard value for link session numbers. When it is known that
- * peer endpoint is down, any session number must be accepted.
- */
-#define ANY_SESSION 0x10000
-
 /* Link FSM states:
  */
 enum {
@@ -297,11 +297,6 @@ static bool link_is_bc_rcvlink(struct tipc_link *l)
 	return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l));
 }
 
-int tipc_link_is_active(struct tipc_link *l)
-{
-	return l->active;
-}
-
 void tipc_link_set_active(struct tipc_link *l, bool active)
 {
 	l->active = active;
@@ -337,6 +332,11 @@ char tipc_link_plane(struct tipc_link *l)
 	return l->net_plane;
 }
 
+void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
+{
+	l->peer_caps = capabilities;
+}
+
 void tipc_link_add_bc_peer(struct tipc_link *snd_l,
 			   struct tipc_link *uc_l,
 			   struct sk_buff_head *xmitq)
@@ -469,7 +469,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 	l->addr = peer;
 	l->peer_caps = peer_caps;
 	l->net = net;
-	l->peer_session = ANY_SESSION;
+	l->in_session = false;
 	l->bearer_id = bearer_id;
 	l->tolerance = tolerance;
 	l->net_plane = net_plane;
@@ -838,7 +838,7 @@ void link_prepare_wakeup(struct tipc_link *l)
 
 void tipc_link_reset(struct tipc_link *l)
 {
-	l->peer_session = ANY_SESSION;
+	l->in_session = false;
 	l->session++;
 	l->mtu = l->advertised_mtu;
 	__skb_queue_purge(&l->transmq);
@@ -857,10 +857,12 @@ void tipc_link_reset(struct tipc_link *l)
 	l->rcv_unacked = 0;
 	l->snd_nxt = 1;
 	l->rcv_nxt = 1;
+	l->snd_nxt_state = 1;
+	l->rcv_nxt_state = 1;
 	l->acked = 0;
 	l->silent_intv_cnt = 0;
 	l->rst_cnt = 0;
-	l->stale_count = 0;
+	l->stale_cnt = 0;
 	l->bc_peer_is_up = false;
 	memset(&l->mon_state, 0, sizeof(l->mon_state));
 	tipc_link_reset_stats(l);
@@ -997,39 +999,41 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
 		msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
 }
 
-int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
+/* tipc_link_retrans() - retransmit one or more packets
+ * @l: the link to transmit on
+ * @r: the receiving link ordering the retransmit. Same as l if unicast
+ * @from: retransmit from (inclusive) this sequence number
+ * @to: retransmit to (inclusive) this sequence number
+ * xmitq: queue for accumulating the retransmitted packets
+ */
+int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
 		      u16 from, u16 to, struct sk_buff_head *xmitq)
 {
 	struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
-	struct tipc_msg *hdr;
-	u16 ack = l->rcv_nxt - 1;
 	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+	u16 ack = l->rcv_nxt - 1;
+	struct tipc_msg *hdr;
 
 	if (!skb)
 		return 0;
 
 	/* Detect repeated retransmit failures on same packet */
-	if (nacker->last_retransm != buf_seqno(skb)) {
-		nacker->last_retransm = buf_seqno(skb);
-		nacker->stale_count = 1;
-	} else if (++nacker->stale_count > 100) {
+	if (r->last_retransm != buf_seqno(skb)) {
+		r->last_retransm = buf_seqno(skb);
+		r->stale_limit = jiffies + msecs_to_jiffies(l->tolerance);
+	} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
 		link_retransmit_failure(l, skb);
-		nacker->stale_count = 0;
 		if (link_is_bc_sndlink(l))
 			return TIPC_LINK_DOWN_EVT;
 		return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 	}
 
-	/* Move forward to where retransmission should start */
 	skb_queue_walk(&l->transmq, skb) {
-		if (!less(buf_seqno(skb), from))
-			break;
-	}
-
-	skb_queue_walk_from(&l->transmq, skb) {
-		if (more(buf_seqno(skb), to))
-			break;
 		hdr = buf_msg(skb);
+		if (less(msg_seqno(hdr), from))
+			continue;
+		if (more(msg_seqno(hdr), to))
+			break;
 		_skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
 		if (!_skb)
 			return 0;
@@ -1063,6 +1067,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
 			skb_queue_tail(mc_inputq, skb);
 			return true;
 		}
+		/* else: fall through */
 	case CONN_MANAGER:
 		skb_queue_tail(inputq, skb);
 		return true;
@@ -1271,6 +1276,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 
 		/* Forward queues and wake up waiting users */
 		if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
+			l->stale_cnt = 0;
 			tipc_link_advance_backlog(l, xmitq);
 			if (unlikely(!skb_queue_empty(&l->wakeupq)))
 				link_prepare_wakeup(l);
@@ -1347,6 +1353,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 	msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
 
 	if (mtyp == STATE_MSG) {
+		if (l->peer_caps & TIPC_LINK_PROTO_SEQNO)
+			msg_set_seqno(hdr, l->snd_nxt_state++);
 		msg_set_seq_gap(hdr, rcvgap);
 		msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
 		msg_set_probe(hdr, probe);
@@ -1438,6 +1446,44 @@ tnl:
 	}
 }
 
+/* tipc_link_validate_msg(): validate message against current link state
+ * Returns true if message should be accepted, otherwise false
+ */
+bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr)
+{
+	u16 curr_session = l->peer_session;
+	u16 session = msg_session(hdr);
+	int mtyp = msg_type(hdr);
+
+	if (msg_user(hdr) != LINK_PROTOCOL)
+		return true;
+
+	switch (mtyp) {
+	case RESET_MSG:
+		if (!l->in_session)
+			return true;
+		/* Accept only RESET with new session number */
+		return more(session, curr_session);
+	case ACTIVATE_MSG:
+		if (!l->in_session)
+			return true;
+		/* Accept only ACTIVATE with new or current session number */
+		return !less(session, curr_session);
+	case STATE_MSG:
+		/* Accept only STATE with current session number */
+		if (!l->in_session)
+			return false;
+		if (session != curr_session)
+			return false;
+		if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO))
+			return true;
+		/* Accept only STATE with new sequence number */
+		return !less(msg_seqno(hdr), l->rcv_nxt_state);
+	default:
+		return false;
+	}
+}
+
 /* tipc_link_proto_rcv(): receive link level protocol message :
  * Note that network plane id propagates through the network, and may
  * change at any time. The node with lowest numerical id determines
@@ -1471,17 +1517,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 	hdr = buf_msg(skb);
 	data = msg_data(hdr);
 
+	if (!tipc_link_validate_msg(l, hdr))
+		goto exit;
+
 	switch (mtyp) {
 	case RESET_MSG:
-
-		/* Ignore duplicate RESET with old session number */
-		if ((less_eq(msg_session(hdr), l->peer_session)) &&
-		    (l->peer_session != ANY_SESSION))
-			break;
-		/* fall thru' */
-
 	case ACTIVATE_MSG:
-
 		/* Complete own link name with peer's interface name */
 		if_name =  strrchr(l->name, ':') + 1;
 		if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME)
@@ -1509,12 +1550,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 			rc = TIPC_LINK_UP_EVT;
 
 		l->peer_session = msg_session(hdr);
+		l->in_session = true;
 		l->peer_bearer_id = msg_bearer_id(hdr);
 		if (l->mtu > msg_max_pkt(hdr))
 			l->mtu = msg_max_pkt(hdr);
 		break;
 
 	case STATE_MSG:
+		l->rcv_nxt_state = msg_seqno(hdr) + 1;
 
 		/* Update own tolerance if peer indicates a non-zero value */
 		if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
diff --git a/net/tipc/link.h b/net/tipc/link.h
index ec59348a81e8..7bc494a33fdf 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -110,6 +110,8 @@ char *tipc_link_name(struct tipc_link *l);
 char tipc_link_plane(struct tipc_link *l);
 int tipc_link_prio(struct tipc_link *l);
 int tipc_link_window(struct tipc_link *l);
+void tipc_link_update_caps(struct tipc_link *l, u16 capabilities);
+bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr);
 unsigned long tipc_link_tolerance(struct tipc_link *l);
 void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
 			     struct sk_buff_head *xmitq);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index b6c45dccba3d..b61891054709 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -416,26 +416,31 @@ bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu)
  */
 bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
 {
-	struct tipc_msg *msg;
-	int imsz, offset;
+	struct tipc_msg *hdr, *ihdr;
+	int imsz;
 
 	*iskb = NULL;
 	if (unlikely(skb_linearize(skb)))
 		goto none;
 
-	msg = buf_msg(skb);
-	offset = msg_hdr_sz(msg) + *pos;
-	if (unlikely(offset > (msg_size(msg) - MIN_H_SIZE)))
+	hdr = buf_msg(skb);
+	if (unlikely(*pos > (msg_data_sz(hdr) - MIN_H_SIZE)))
 		goto none;
 
-	*iskb = skb_clone(skb, GFP_ATOMIC);
-	if (unlikely(!*iskb))
+	ihdr = (struct tipc_msg *)(msg_data(hdr) + *pos);
+	imsz = msg_size(ihdr);
+
+	if ((*pos + imsz) > msg_data_sz(hdr))
 		goto none;
-	skb_pull(*iskb, offset);
-	imsz = msg_size(buf_msg(*iskb));
-	skb_trim(*iskb, imsz);
+
+	*iskb = tipc_buf_acquire(imsz, GFP_ATOMIC);
+	if (!*iskb)
+		goto none;
+
+	skb_copy_to_linear_data(*iskb, ihdr, imsz);
 	if (unlikely(!tipc_msg_validate(iskb)))
 		goto none;
+
 	*pos += align(imsz);
 	return true;
 none:
@@ -531,12 +536,6 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
 	}
 
-	if (skb_cloned(_skb) &&
-	    pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
-		goto exit;
-
-	/* reassign after skb header modifications */
-	hdr = buf_msg(_skb);
 	/* Now reverse the concerned fields */
 	msg_set_errcode(hdr, err);
 	msg_set_non_seq(hdr, 0);
@@ -595,10 +594,6 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 	if (!skb_cloned(skb))
 		return true;
 
-	/* Unclone buffer in case it was bundled */
-	if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
-		return false;
-
 	return true;
 }
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0453bd451ce8..68014f1b6976 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -45,6 +45,7 @@
 #include "netlink.h"
 
 #define INVALID_NODE_SIG	0x10000
+#define NODE_CLEANUP_AFTER	300000
 
 /* Flags used to take different actions according to flag type
  * TIPC_NOTIFY_NODE_DOWN: notify node is down
@@ -96,6 +97,7 @@ struct tipc_bclink_entry {
  * @link_id: local and remote bearer ids of changing link, if any
  * @publ_list: list of publications
  * @rcu: rcu struct for tipc_node
+ * @delete_at: indicates the time for deleting a down node
  */
 struct tipc_node {
 	u32 addr;
@@ -121,6 +123,7 @@ struct tipc_node {
 	unsigned long keepalive_intv;
 	struct timer_list timer;
 	struct rcu_head rcu;
+	unsigned long delete_at;
 };
 
 /* Node FSM states and events:
@@ -160,6 +163,7 @@ static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
 static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
 static void tipc_node_put(struct tipc_node *node);
 static bool node_is_up(struct tipc_node *n);
+static void tipc_node_delete_from_list(struct tipc_node *node);
 
 struct tipc_sock_conn {
 	u32 port;
@@ -359,13 +363,24 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *n, *temp_node;
+	struct tipc_link *l;
+	int bearer_id;
 	int i;
 
 	spin_lock_bh(&tn->node_list_lock);
 	n = tipc_node_find(net, addr);
 	if (n) {
+		if (n->capabilities == capabilities)
+			goto exit;
 		/* Same node may come back with new capabilities */
+		write_lock_bh(&n->lock);
 		n->capabilities = capabilities;
+		for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+			l = n->links[bearer_id].link;
+			if (l)
+				tipc_link_update_caps(l, capabilities);
+		}
+		write_unlock_bh(&n->lock);
 		goto exit;
 	}
 	n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -390,6 +405,7 @@ static struct tipc_node *tipc_node_create(struct net *net, u32 addr,
 	for (i = 0; i < MAX_BEARERS; i++)
 		spin_lock_init(&n->links[i].lock);
 	n->state = SELF_DOWN_PEER_LEAVING;
+	n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
 	n->signature = INVALID_NODE_SIG;
 	n->active_links[0] = INVALID_BEARER_ID;
 	n->active_links[1] = INVALID_BEARER_ID;
@@ -433,11 +449,16 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
 	tipc_link_set_abort_limit(l, tol / n->keepalive_intv);
 }
 
-static void tipc_node_delete(struct tipc_node *node)
+static void tipc_node_delete_from_list(struct tipc_node *node)
 {
 	list_del_rcu(&node->list);
 	hlist_del_rcu(&node->hash);
 	tipc_node_put(node);
+}
+
+static void tipc_node_delete(struct tipc_node *node)
+{
+	tipc_node_delete_from_list(node);
 
 	del_timer_sync(&node->timer);
 	tipc_node_put(node);
@@ -544,6 +565,42 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
 	tipc_node_put(node);
 }
 
+static void  tipc_node_clear_links(struct tipc_node *node)
+{
+	int i;
+
+	for (i = 0; i < MAX_BEARERS; i++) {
+		struct tipc_link_entry *le = &node->links[i];
+
+		if (le->link) {
+			kfree(le->link);
+			le->link = NULL;
+			node->link_cnt--;
+		}
+	}
+}
+
+/* tipc_node_cleanup - delete nodes that does not
+ * have active links for NODE_CLEANUP_AFTER time
+ */
+static int tipc_node_cleanup(struct tipc_node *peer)
+{
+	struct tipc_net *tn = tipc_net(peer->net);
+	bool deleted = false;
+
+	spin_lock_bh(&tn->node_list_lock);
+	tipc_node_write_lock(peer);
+
+	if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) {
+		tipc_node_clear_links(peer);
+		tipc_node_delete_from_list(peer);
+		deleted = true;
+	}
+	tipc_node_write_unlock(peer);
+	spin_unlock_bh(&tn->node_list_lock);
+	return deleted;
+}
+
 /* tipc_node_timeout - handle expiration of node timer
  */
 static void tipc_node_timeout(struct timer_list *t)
@@ -551,21 +608,29 @@ static void tipc_node_timeout(struct timer_list *t)
 	struct tipc_node *n = from_timer(n, t, timer);
 	struct tipc_link_entry *le;
 	struct sk_buff_head xmitq;
+	int remains = n->link_cnt;
 	int bearer_id;
 	int rc = 0;
 
+	if (!node_is_up(n) && tipc_node_cleanup(n)) {
+		/*Removing the reference of Timer*/
+		tipc_node_put(n);
+		return;
+	}
+
 	__skb_queue_head_init(&xmitq);
 
-	for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+	for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) {
 		tipc_node_read_lock(n);
 		le = &n->links[bearer_id];
-		spin_lock_bh(&le->lock);
 		if (le->link) {
+			spin_lock_bh(&le->lock);
 			/* Link tolerance may change asynchronously: */
 			tipc_node_calculate_timer(n, le->link);
 			rc = tipc_link_timeout(le->link, &xmitq);
+			spin_unlock_bh(&le->lock);
+			remains--;
 		}
-		spin_unlock_bh(&le->lock);
 		tipc_node_read_unlock(n);
 		tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr);
 		if (rc & TIPC_LINK_DOWN_EVT)
@@ -1174,6 +1239,7 @@ static void node_lost_contact(struct tipc_node *n,
 	uint i;
 
 	pr_debug("Lost contact with %x\n", n->addr);
+	n->delete_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
 
 	/* Clean up broadcast state */
 	tipc_bcast_remove_peer(n->net, n->bc_entry.link);
@@ -1481,7 +1547,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id
  * tipc_node_check_state - check and if necessary update node state
  * @skb: TIPC packet
  * @bearer_id: identity of bearer delivering the packet
- * Returns true if state is ok, otherwise consumes buffer and returns false
+ * Returns true if state and msg are ok, otherwise false
  */
 static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 				  int bearer_id, struct sk_buff_head *xmitq)
@@ -1515,6 +1581,9 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 		}
 	}
 
+	if (!tipc_link_validate_msg(l, hdr))
+		return false;
+
 	/* Check and update node accesibility if applicable */
 	if (state == SELF_UP_PEER_COMING) {
 		if (!tipc_link_is_up(l))
@@ -1743,7 +1812,6 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
 	struct tipc_node *peer;
 	u32 addr;
 	int err;
-	int i;
 
 	/* We identify the peer by its net */
 	if (!info->attrs[TIPC_NLA_NET])
@@ -1778,15 +1846,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
 		goto err_out;
 	}
 
-	for (i = 0; i < MAX_BEARERS; i++) {
-		struct tipc_link_entry *le = &peer->links[i];
-
-		if (le->link) {
-			kfree(le->link);
-			le->link = NULL;
-			peer->link_cnt--;
-		}
-	}
+	tipc_node_clear_links(peer);
 	tipc_node_write_unlock(peer);
 	tipc_node_delete(peer);
 
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 846c8f240872..48b3298a248d 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -49,14 +49,16 @@ enum {
 	TIPC_BCAST_STATE_NACK = (1 << 2),
 	TIPC_BLOCK_FLOWCTL    = (1 << 3),
 	TIPC_BCAST_RCAST      = (1 << 4),
-	TIPC_NODE_ID128       = (1 << 5)
+	TIPC_NODE_ID128       = (1 << 5),
+	TIPC_LINK_PROTO_SEQNO = (1 << 6)
 };
 
-#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
-				TIPC_BCAST_STATE_NACK | \
-				TIPC_BCAST_RCAST | \
-				TIPC_BLOCK_FLOWCTL | \
-				TIPC_NODE_ID128)
+#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH       |  \
+				TIPC_BCAST_STATE_NACK  |  \
+				TIPC_BCAST_RCAST       |  \
+				TIPC_BLOCK_FLOWCTL     |  \
+				TIPC_NODE_ID128        |  \
+				TIPC_LINK_PROTO_SEQNO)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 930852c54d7a..3d21414ba357 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3320,6 +3320,11 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
 		goto stat_msg_cancel;
 
 	nla_nest_end(skb, stat);
+
+	if (tsk->group)
+		if (tipc_group_fill_sock_diag(tsk->group, skb))
+			goto stat_msg_cancel;
+
 	nla_nest_end(skb, attrs);
 
 	return 0;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a7a8f8e20ff3..1e968d238adf 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,9 +52,12 @@ static DEFINE_SPINLOCK(tls_device_lock);
 
 static void tls_device_free_ctx(struct tls_context *ctx)
 {
-	struct tls_offload_context *offload_ctx = tls_offload_ctx(ctx);
+	if (ctx->tx_conf == TLS_HW)
+		kfree(tls_offload_ctx_tx(ctx));
+
+	if (ctx->rx_conf == TLS_HW)
+		kfree(tls_offload_ctx_rx(ctx));
 
-	kfree(offload_ctx);
 	kfree(ctx);
 }
 
@@ -71,10 +74,11 @@ static void tls_device_gc_task(struct work_struct *work)
 	list_for_each_entry_safe(ctx, tmp, &gc_list, list) {
 		struct net_device *netdev = ctx->netdev;
 
-		if (netdev) {
+		if (netdev && ctx->tx_conf == TLS_HW) {
 			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
 							TLS_OFFLOAD_CTX_DIR_TX);
 			dev_put(netdev);
+			ctx->netdev = NULL;
 		}
 
 		list_del(&ctx->list);
@@ -82,6 +86,22 @@ static void tls_device_gc_task(struct work_struct *work)
 	}
 }
 
+static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
+			      struct net_device *netdev)
+{
+	if (sk->sk_destruct != tls_device_sk_destruct) {
+		refcount_set(&ctx->refcount, 1);
+		dev_hold(netdev);
+		ctx->netdev = netdev;
+		spin_lock_irq(&tls_device_lock);
+		list_add_tail(&ctx->list, &tls_device_list);
+		spin_unlock_irq(&tls_device_lock);
+
+		ctx->sk_destruct = sk->sk_destruct;
+		sk->sk_destruct = tls_device_sk_destruct;
+	}
+}
+
 static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
 {
 	unsigned long flags;
@@ -125,7 +145,7 @@ static void destroy_record(struct tls_record_info *record)
 	kfree(record);
 }
 
-static void delete_all_records(struct tls_offload_context *offload_ctx)
+static void delete_all_records(struct tls_offload_context_tx *offload_ctx)
 {
 	struct tls_record_info *info, *temp;
 
@@ -141,14 +161,14 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_record_info *info, *temp;
-	struct tls_offload_context *ctx;
+	struct tls_offload_context_tx *ctx;
 	u64 deleted_records = 0;
 	unsigned long flags;
 
 	if (!tls_ctx)
 		return;
 
-	ctx = tls_offload_ctx(tls_ctx);
+	ctx = tls_offload_ctx_tx(tls_ctx);
 
 	spin_lock_irqsave(&ctx->lock, flags);
 	info = ctx->retransmit_hint;
@@ -179,15 +199,17 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
 void tls_device_sk_destruct(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 
-	if (ctx->open_record)
-		destroy_record(ctx->open_record);
+	tls_ctx->sk_destruct(sk);
 
-	delete_all_records(ctx);
-	crypto_free_aead(ctx->aead_send);
-	ctx->sk_destruct(sk);
-	clean_acked_data_disable(inet_csk(sk));
+	if (tls_ctx->tx_conf == TLS_HW) {
+		if (ctx->open_record)
+			destroy_record(ctx->open_record);
+		delete_all_records(ctx);
+		crypto_free_aead(ctx->aead_send);
+		clean_acked_data_disable(inet_csk(sk));
+	}
 
 	if (refcount_dec_and_test(&tls_ctx->refcount))
 		tls_device_queue_ctx_destruction(tls_ctx);
@@ -219,7 +241,7 @@ static void tls_append_frag(struct tls_record_info *record,
 
 static int tls_push_record(struct sock *sk,
 			   struct tls_context *ctx,
-			   struct tls_offload_context *offload_ctx,
+			   struct tls_offload_context_tx *offload_ctx,
 			   struct tls_record_info *record,
 			   struct page_frag *pfrag,
 			   int flags,
@@ -264,7 +286,7 @@ static int tls_push_record(struct sock *sk,
 	return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
 }
 
-static int tls_create_new_record(struct tls_offload_context *offload_ctx,
+static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
 				 struct page_frag *pfrag,
 				 size_t prepend_size)
 {
@@ -290,7 +312,7 @@ static int tls_create_new_record(struct tls_offload_context *offload_ctx,
 }
 
 static int tls_do_allocation(struct sock *sk,
-			     struct tls_offload_context *offload_ctx,
+			     struct tls_offload_context_tx *offload_ctx,
 			     struct page_frag *pfrag,
 			     size_t prepend_size)
 {
@@ -324,7 +346,7 @@ static int tls_push_data(struct sock *sk,
 			 unsigned char record_type)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 	int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
 	int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
 	struct tls_record_info *record = ctx->open_record;
@@ -477,7 +499,7 @@ out:
 	return rc;
 }
 
-struct tls_record_info *tls_get_record(struct tls_offload_context *context,
+struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 				       u32 seq, u64 *p_record_sn)
 {
 	u64 record_sn = context->hint_record_sn;
@@ -520,11 +542,123 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
 	return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
 }
 
+void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct net_device *netdev = tls_ctx->netdev;
+	struct tls_offload_context_rx *rx_ctx;
+	u32 is_req_pending;
+	s64 resync_req;
+	u32 req_seq;
+
+	if (tls_ctx->rx_conf != TLS_HW)
+		return;
+
+	rx_ctx = tls_offload_ctx_rx(tls_ctx);
+	resync_req = atomic64_read(&rx_ctx->resync_req);
+	req_seq = ntohl(resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1);
+	is_req_pending = resync_req;
+
+	if (unlikely(is_req_pending) && req_seq == seq &&
+	    atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
+		netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk,
+						      seq + TLS_HEADER_SIZE - 1,
+						      rcd_sn);
+}
+
+static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
+{
+	struct strp_msg *rxm = strp_msg(skb);
+	int err = 0, offset = rxm->offset, copy, nsg;
+	struct sk_buff *skb_iter, *unused;
+	struct scatterlist sg[1];
+	char *orig_buf, *buf;
+
+	orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE +
+			   TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation);
+	if (!orig_buf)
+		return -ENOMEM;
+	buf = orig_buf;
+
+	nsg = skb_cow_data(skb, 0, &unused);
+	if (unlikely(nsg < 0)) {
+		err = nsg;
+		goto free_buf;
+	}
+
+	sg_init_table(sg, 1);
+	sg_set_buf(&sg[0], buf,
+		   rxm->full_len + TLS_HEADER_SIZE +
+		   TLS_CIPHER_AES_GCM_128_IV_SIZE);
+	skb_copy_bits(skb, offset, buf,
+		      TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE);
+
+	/* We are interested only in the decrypted data not the auth */
+	err = decrypt_skb(sk, skb, sg);
+	if (err != -EBADMSG)
+		goto free_buf;
+	else
+		err = 0;
+
+	copy = min_t(int, skb_pagelen(skb) - offset,
+		     rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+
+	if (skb->decrypted)
+		skb_store_bits(skb, offset, buf, copy);
+
+	offset += copy;
+	buf += copy;
+
+	skb_walk_frags(skb, skb_iter) {
+		copy = min_t(int, skb_iter->len,
+			     rxm->full_len - offset + rxm->offset -
+			     TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+
+		if (skb_iter->decrypted)
+			skb_store_bits(skb_iter, offset, buf, copy);
+
+		offset += copy;
+		buf += copy;
+	}
+
+free_buf:
+	kfree(orig_buf);
+	return err;
+}
+
+int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_offload_context_rx *ctx = tls_offload_ctx_rx(tls_ctx);
+	int is_decrypted = skb->decrypted;
+	int is_encrypted = !is_decrypted;
+	struct sk_buff *skb_iter;
+
+	/* Skip if it is already decrypted */
+	if (ctx->sw.decrypted)
+		return 0;
+
+	/* Check if all the data is decrypted already */
+	skb_walk_frags(skb, skb_iter) {
+		is_decrypted &= skb_iter->decrypted;
+		is_encrypted &= !skb_iter->decrypted;
+	}
+
+	ctx->sw.decrypted |= is_decrypted;
+
+	/* Return immedeatly if the record is either entirely plaintext or
+	 * entirely ciphertext. Otherwise handle reencrypt partially decrypted
+	 * record.
+	 */
+	return (is_encrypted || is_decrypted) ? 0 :
+		tls_device_reencrypt(sk, skb);
+}
+
 int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 {
 	u16 nonce_size, tag_size, iv_size, rec_seq_size;
 	struct tls_record_info *start_marker_record;
-	struct tls_offload_context *offload_ctx;
+	struct tls_offload_context_tx *offload_ctx;
 	struct tls_crypto_info *crypto_info;
 	struct net_device *netdev;
 	char *iv, *rec_seq;
@@ -546,7 +680,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 		goto out;
 	}
 
-	offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE, GFP_KERNEL);
+	offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL);
 	if (!offload_ctx) {
 		rc = -ENOMEM;
 		goto free_marker_record;
@@ -609,7 +743,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 
 	clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked);
 	ctx->push_pending_record = tls_device_push_pending_record;
-	offload_ctx->sk_destruct = sk->sk_destruct;
 
 	/* TLS offload is greatly simplified if we don't send
 	 * SKBs where only part of the payload needs to be encrypted.
@@ -619,8 +752,6 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 	if (skb)
 		TCP_SKB_CB(skb)->eor = 1;
 
-	refcount_set(&ctx->refcount, 1);
-
 	/* We support starting offload on multiple sockets
 	 * concurrently, so we only need a read lock here.
 	 * This lock must precede get_netdev_for_sock to prevent races between
@@ -655,19 +786,14 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 	if (rc)
 		goto release_netdev;
 
-	ctx->netdev = netdev;
-
-	spin_lock_irq(&tls_device_lock);
-	list_add_tail(&ctx->list, &tls_device_list);
-	spin_unlock_irq(&tls_device_lock);
+	tls_device_attach(ctx, sk, netdev);
 
-	sk->sk_validate_xmit_skb = tls_validate_xmit_skb;
 	/* following this assignment tls_is_sk_tx_device_offloaded
 	 * will return true and the context might be accessed
 	 * by the netdev's xmit function.
 	 */
-	smp_store_release(&sk->sk_destruct,
-			  &tls_device_sk_destruct);
+	smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb);
+	dev_put(netdev);
 	up_read(&device_offload_lock);
 	goto out;
 
@@ -690,6 +816,105 @@ out:
 	return rc;
 }
 
+int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
+{
+	struct tls_offload_context_rx *context;
+	struct net_device *netdev;
+	int rc = 0;
+
+	/* We support starting offload on multiple sockets
+	 * concurrently, so we only need a read lock here.
+	 * This lock must precede get_netdev_for_sock to prevent races between
+	 * NETDEV_DOWN and setsockopt.
+	 */
+	down_read(&device_offload_lock);
+	netdev = get_netdev_for_sock(sk);
+	if (!netdev) {
+		pr_err_ratelimited("%s: netdev not found\n", __func__);
+		rc = -EINVAL;
+		goto release_lock;
+	}
+
+	if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
+		pr_err_ratelimited("%s: netdev %s with no TLS offload\n",
+				   __func__, netdev->name);
+		rc = -ENOTSUPP;
+		goto release_netdev;
+	}
+
+	/* Avoid offloading if the device is down
+	 * We don't want to offload new flows after
+	 * the NETDEV_DOWN event
+	 */
+	if (!(netdev->flags & IFF_UP)) {
+		rc = -EINVAL;
+		goto release_netdev;
+	}
+
+	context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL);
+	if (!context) {
+		rc = -ENOMEM;
+		goto release_netdev;
+	}
+
+	ctx->priv_ctx_rx = context;
+	rc = tls_set_sw_offload(sk, ctx, 0);
+	if (rc)
+		goto release_ctx;
+
+	rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
+					     &ctx->crypto_recv,
+					     tcp_sk(sk)->copied_seq);
+	if (rc) {
+		pr_err_ratelimited("%s: The netdev has refused to offload this socket\n",
+				   __func__);
+		goto free_sw_resources;
+	}
+
+	tls_device_attach(ctx, sk, netdev);
+	goto release_netdev;
+
+free_sw_resources:
+	tls_sw_free_resources_rx(sk);
+release_ctx:
+	ctx->priv_ctx_rx = NULL;
+release_netdev:
+	dev_put(netdev);
+release_lock:
+	up_read(&device_offload_lock);
+	return rc;
+}
+
+void tls_device_offload_cleanup_rx(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct net_device *netdev;
+
+	down_read(&device_offload_lock);
+	netdev = tls_ctx->netdev;
+	if (!netdev)
+		goto out;
+
+	if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
+		pr_err_ratelimited("%s: device is missing NETIF_F_HW_TLS_RX cap\n",
+				   __func__);
+		goto out;
+	}
+
+	netdev->tlsdev_ops->tls_dev_del(netdev, tls_ctx,
+					TLS_OFFLOAD_CTX_DIR_RX);
+
+	if (tls_ctx->tx_conf != TLS_HW) {
+		dev_put(netdev);
+		tls_ctx->netdev = NULL;
+	}
+out:
+	up_read(&device_offload_lock);
+	kfree(tls_ctx->rx.rec_seq);
+	kfree(tls_ctx->rx.iv);
+	tls_sw_release_resources_rx(sk);
+}
+
 static int tls_device_down(struct net_device *netdev)
 {
 	struct tls_context *ctx, *tmp;
@@ -710,8 +935,12 @@ static int tls_device_down(struct net_device *netdev)
 	spin_unlock_irqrestore(&tls_device_lock, flags);
 
 	list_for_each_entry_safe(ctx, tmp, &list, list)	{
-		netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
-						TLS_OFFLOAD_CTX_DIR_TX);
+		if (ctx->tx_conf == TLS_HW)
+			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+							TLS_OFFLOAD_CTX_DIR_TX);
+		if (ctx->rx_conf == TLS_HW)
+			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
+							TLS_OFFLOAD_CTX_DIR_RX);
 		ctx->netdev = NULL;
 		dev_put(netdev);
 		list_del_init(&ctx->list);
@@ -732,12 +961,16 @@ static int tls_dev_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 
-	if (!(dev->features & NETIF_F_HW_TLS_TX))
+	if (!(dev->features & (NETIF_F_HW_TLS_RX | NETIF_F_HW_TLS_TX)))
 		return NOTIFY_DONE;
 
 	switch (event) {
 	case NETDEV_REGISTER:
 	case NETDEV_FEAT_CHANGE:
+		if ((dev->features & NETIF_F_HW_TLS_RX) &&
+		    !dev->tlsdev_ops->tls_dev_resync_rx)
+			return NOTIFY_BAD;
+
 		if  (dev->tlsdev_ops &&
 		     dev->tlsdev_ops->tls_dev_add &&
 		     dev->tlsdev_ops->tls_dev_del)
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 748914abdb60..e3313c45663f 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -214,7 +214,7 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
 
 static int fill_sg_in(struct scatterlist *sg_in,
 		      struct sk_buff *skb,
-		      struct tls_offload_context *ctx,
+		      struct tls_offload_context_tx *ctx,
 		      u64 *rcd_sn,
 		      s32 *sync_size,
 		      int *resync_sgs)
@@ -299,7 +299,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx,
 				   s32 sync_size, u64 rcd_sn)
 {
 	int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
-	struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 	int payload_len = skb->len - tcp_payload_offset;
 	void *buf, *iv, *aad, *dummy_buf;
 	struct aead_request *aead_req;
@@ -361,7 +361,7 @@ static struct sk_buff *tls_sw_fallback(struct sock *sk, struct sk_buff *skb)
 {
 	int tcp_payload_offset = skb_transport_offset(skb) + tcp_hdrlen(skb);
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx);
+	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 	int payload_len = skb->len - tcp_payload_offset;
 	struct scatterlist *sg_in, sg_out[3];
 	struct sk_buff *nskb = NULL;
@@ -413,9 +413,10 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 
 	return tls_sw_fallback(sk, skb);
 }
+EXPORT_SYMBOL_GPL(tls_validate_xmit_skb);
 
 int tls_sw_fallback_init(struct sock *sk,
-			 struct tls_offload_context *offload_ctx,
+			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info)
 {
 	const u8 *key;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 301f22430469..b09867c8b817 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -51,15 +51,6 @@ enum {
 	TLSV6,
 	TLS_NUM_PROTS,
 };
-enum {
-	TLS_BASE,
-	TLS_SW,
-#ifdef CONFIG_TLS_DEVICE
-	TLS_HW,
-#endif
-	TLS_HW_RECORD,
-	TLS_NUM_CONFIG,
-};
 
 static struct proto *saved_tcpv6_prot;
 static DEFINE_MUTEX(tcpv6_prot_mutex);
@@ -290,7 +281,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	}
 
 #ifdef CONFIG_TLS_DEVICE
-	if (ctx->tx_conf != TLS_HW) {
+	if (ctx->rx_conf == TLS_HW)
+		tls_device_offload_cleanup_rx(sk);
+
+	if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) {
 #else
 	{
 #endif
@@ -470,8 +464,16 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 			conf = TLS_SW;
 		}
 	} else {
-		rc = tls_set_sw_offload(sk, ctx, 0);
-		conf = TLS_SW;
+#ifdef CONFIG_TLS_DEVICE
+		rc = tls_set_device_offload_rx(sk, ctx);
+		conf = TLS_HW;
+		if (rc) {
+#else
+		{
+#endif
+			rc = tls_set_sw_offload(sk, ctx, 0);
+			conf = TLS_SW;
+		}
 	}
 
 	if (rc)
@@ -629,6 +631,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 	prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
 	prot[TLS_HW][TLS_SW].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_SW].sendpage		= tls_device_sendpage;
+
+	prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW];
+
+	prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW];
+
+	prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW];
 #endif
 
 	prot[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 4618f1c31137..0c2d029c9d4c 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -53,18 +53,14 @@ static int tls_do_decryption(struct sock *sk,
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	struct strp_msg *rxm = strp_msg(skb);
 	struct aead_request *aead_req;
 
 	int ret;
-	unsigned int req_size = sizeof(struct aead_request) +
-		crypto_aead_reqsize(ctx->aead_recv);
 
-	aead_req = kzalloc(req_size, flags);
+	aead_req = aead_request_alloc(ctx->aead_recv, flags);
 	if (!aead_req)
 		return -ENOMEM;
 
-	aead_request_set_tfm(aead_req, ctx->aead_recv);
 	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
 	aead_request_set_crypt(aead_req, sgin, sgout,
 			       data_len + tls_ctx->rx.tag_size,
@@ -74,19 +70,7 @@ static int tls_do_decryption(struct sock *sk,
 
 	ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &ctx->async_wait);
 
-	if (ret < 0)
-		goto out;
-
-	rxm->offset += tls_ctx->rx.prepend_size;
-	rxm->full_len -= tls_ctx->rx.overhead_size;
-	tls_advance_record_sn(sk, &tls_ctx->rx);
-
-	ctx->decrypted = true;
-
-	ctx->saved_data_ready(sk);
-
-out:
-	kfree(aead_req);
+	aead_request_free(aead_req);
 	return ret;
 }
 
@@ -224,8 +208,7 @@ static int tls_push_record(struct sock *sk, int flags,
 	struct aead_request *req;
 	int rc;
 
-	req = kzalloc(sizeof(struct aead_request) +
-		      crypto_aead_reqsize(ctx->aead_send), sk->sk_allocation);
+	req = aead_request_alloc(ctx->aead_send, sk->sk_allocation);
 	if (!req)
 		return -ENOMEM;
 
@@ -267,7 +250,7 @@ static int tls_push_record(struct sock *sk, int flags,
 
 	tls_advance_record_sn(sk, &tls_ctx->tx);
 out_req:
-	kfree(req);
+	aead_request_free(req);
 	return rc;
 }
 
@@ -280,7 +263,7 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 			      int length, int *pages_used,
 			      unsigned int *size_used,
 			      struct scatterlist *to, int to_max_pages,
-			      bool charge)
+			      bool charge, bool revert)
 {
 	struct page *pages[MAX_SKB_FRAGS];
 
@@ -331,6 +314,8 @@ static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 out:
 	*size_used = size;
 	*pages_used = num_elem;
+	if (revert)
+		iov_iter_revert(from, size);
 
 	return rc;
 }
@@ -432,7 +417,7 @@ alloc_encrypted:
 				&ctx->sg_plaintext_size,
 				ctx->sg_plaintext_data,
 				ARRAY_SIZE(ctx->sg_plaintext_data),
-				true);
+				true, false);
 			if (ret)
 				goto fallback_to_reg_send;
 
@@ -670,8 +655,38 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
 	return skb;
 }
 
-static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
-		       struct scatterlist *sgout)
+static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
+			      struct scatterlist *sgout, bool *zc)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct strp_msg *rxm = strp_msg(skb);
+	int err = 0;
+
+#ifdef CONFIG_TLS_DEVICE
+	err = tls_device_decrypted(sk, skb);
+	if (err < 0)
+		return err;
+#endif
+	if (!ctx->decrypted) {
+		err = decrypt_skb(sk, skb, sgout);
+		if (err < 0)
+			return err;
+	} else {
+		*zc = false;
+	}
+
+	rxm->offset += tls_ctx->rx.prepend_size;
+	rxm->full_len -= tls_ctx->rx.overhead_size;
+	tls_advance_record_sn(sk, &tls_ctx->rx);
+	ctx->decrypted = true;
+	ctx->saved_data_ready(sk);
+
+	return err;
+}
+
+int decrypt_skb(struct sock *sk, struct sk_buff *skb,
+		struct scatterlist *sgout)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -817,11 +832,11 @@ int tls_sw_recvmsg(struct sock *sk,
 				err = zerocopy_from_iter(sk, &msg->msg_iter,
 							 to_copy, &pages,
 							 &chunk, &sgin[1],
-							 MAX_SKB_FRAGS,	false);
+							 MAX_SKB_FRAGS,	false, true);
 				if (err < 0)
 					goto fallback_to_reg_recv;
 
-				err = decrypt_skb(sk, skb, sgin);
+				err = decrypt_skb_update(sk, skb, sgin, &zc);
 				for (; pages > 0; pages--)
 					put_page(sg_page(&sgin[pages]));
 				if (err < 0) {
@@ -830,7 +845,7 @@ int tls_sw_recvmsg(struct sock *sk,
 				}
 			} else {
 fallback_to_reg_recv:
-				err = decrypt_skb(sk, skb, NULL);
+				err = decrypt_skb_update(sk, skb, NULL, &zc);
 				if (err < 0) {
 					tls_err_abort(sk, EBADMSG);
 					goto recv_end;
@@ -885,6 +900,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	int err = 0;
 	long timeo;
 	int chunk;
+	bool zc;
 
 	lock_sock(sk);
 
@@ -901,7 +917,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	}
 
 	if (!ctx->decrypted) {
-		err = decrypt_skb(sk, skb, NULL);
+		err = decrypt_skb_update(sk, skb, NULL, &zc);
 
 		if (err < 0) {
 			tls_err_abort(sk, EBADMSG);
@@ -947,7 +963,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	char header[tls_ctx->rx.prepend_size];
+	char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
 	struct strp_msg *rxm = strp_msg(skb);
 	size_t cipher_overhead;
 	size_t data_len = 0;
@@ -957,6 +973,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 	if (rxm->offset + tls_ctx->rx.prepend_size > skb->len)
 		return 0;
 
+	/* Sanity-check size of on-stack buffer. */
+	if (WARN_ON(tls_ctx->rx.prepend_size > sizeof(header))) {
+		ret = -EINVAL;
+		goto read_failure;
+	}
+
 	/* Linearize header to local buffer */
 	ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size);
 
@@ -984,6 +1006,10 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 		goto read_failure;
 	}
 
+#ifdef CONFIG_TLS_DEVICE
+	handle_device_resync(strp->sk, TCP_SKB_CB(skb)->seq + rxm->offset,
+			     *(u64*)tls_ctx->rx.rec_seq);
+#endif
 	return data_len + TLS_HEADER_SIZE;
 
 read_failure:
@@ -996,9 +1022,6 @@ static void tls_queue(struct strparser *strp, struct sk_buff *skb)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	struct strp_msg *rxm;
-
-	rxm = strp_msg(skb);
 
 	ctx->decrypted = false;
 
@@ -1028,7 +1051,7 @@ void tls_sw_free_resources_tx(struct sock *sk)
 	kfree(ctx);
 }
 
-void tls_sw_free_resources_rx(struct sock *sk)
+void tls_sw_release_resources_rx(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1047,6 +1070,14 @@ void tls_sw_free_resources_rx(struct sock *sk)
 		strp_done(&ctx->strp);
 		lock_sock(sk);
 	}
+}
+
+void tls_sw_free_resources_rx(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+
+	tls_sw_release_resources_rx(sk);
 
 	kfree(ctx);
 }
@@ -1071,28 +1102,38 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	}
 
 	if (tx) {
-		sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
-		if (!sw_ctx_tx) {
-			rc = -ENOMEM;
-			goto out;
+		if (!ctx->priv_ctx_tx) {
+			sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
+			if (!sw_ctx_tx) {
+				rc = -ENOMEM;
+				goto out;
+			}
+			ctx->priv_ctx_tx = sw_ctx_tx;
+		} else {
+			sw_ctx_tx =
+				(struct tls_sw_context_tx *)ctx->priv_ctx_tx;
 		}
-		crypto_init_wait(&sw_ctx_tx->async_wait);
-		ctx->priv_ctx_tx = sw_ctx_tx;
 	} else {
-		sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
-		if (!sw_ctx_rx) {
-			rc = -ENOMEM;
-			goto out;
+		if (!ctx->priv_ctx_rx) {
+			sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
+			if (!sw_ctx_rx) {
+				rc = -ENOMEM;
+				goto out;
+			}
+			ctx->priv_ctx_rx = sw_ctx_rx;
+		} else {
+			sw_ctx_rx =
+				(struct tls_sw_context_rx *)ctx->priv_ctx_rx;
 		}
-		crypto_init_wait(&sw_ctx_rx->async_wait);
-		ctx->priv_ctx_rx = sw_ctx_rx;
 	}
 
 	if (tx) {
+		crypto_init_wait(&sw_ctx_tx->async_wait);
 		crypto_info = &ctx->crypto_send;
 		cctx = &ctx->tx;
 		aead = &sw_ctx_tx->aead_send;
 	} else {
+		crypto_init_wait(&sw_ctx_rx->async_wait);
 		crypto_info = &ctx->crypto_recv;
 		cctx = &ctx->rx;
 		aead = &sw_ctx_rx->aead_recv;
@@ -1117,7 +1158,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	}
 
 	/* Sanity-check the IV size for stack allocations. */
-	if (iv_size > MAX_IV_SIZE) {
+	if (iv_size > MAX_IV_SIZE || nonce_size > MAX_IV_SIZE) {
 		rc = -EINVAL;
 		goto free_priv;
 	}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 48e8097339ab..a88551f3bc43 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright 2015	Intel Deutschland GmbH
+ * Copyright 2015-2017	Intel Deutschland GmbH
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -744,6 +744,8 @@ int wiphy_register(struct wiphy *wiphy)
 
 	/* sanity check supported bands/channels */
 	for (band = 0; band < NUM_NL80211_BANDS; band++) {
+		u16 types = 0;
+
 		sband = wiphy->bands[band];
 		if (!sband)
 			continue;
@@ -788,6 +790,23 @@ int wiphy_register(struct wiphy *wiphy)
 			sband->channels[i].band = band;
 		}
 
+		for (i = 0; i < sband->n_iftype_data; i++) {
+			const struct ieee80211_sband_iftype_data *iftd;
+
+			iftd = &sband->iftype_data[i];
+
+			if (WARN_ON(!iftd->types_mask))
+				return -EINVAL;
+			if (WARN_ON(types & iftd->types_mask))
+				return -EINVAL;
+
+			/* at least one piece of information must be present */
+			if (WARN_ON(!iftd->he_cap.has_he))
+				return -EINVAL;
+
+			types |= iftd->types_mask;
+		}
+
 		have_band = true;
 	}
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 63eb1b5fdd04..7f52ef569320 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -76,7 +76,7 @@ struct cfg80211_registered_device {
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
 	struct sk_buff *scan_msg;
 	struct list_head sched_scan_req_list;
-	unsigned long suspend_at;
+	time64_t suspend_at;
 	struct work_struct scan_done_wk;
 
 	struct genl_info *cur_cmd_info;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4eece06be1e7..e4e5f025d16b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -428,6 +428,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TXQ_QUANTUM] = { .type = NLA_U32 },
+	[NL80211_ATTR_HE_CAPABILITY] = { .type = NLA_BINARY,
+					 .len = NL80211_HE_MAX_CAPABILITY_LEN },
 };
 
 /* policy for the key attributes */
@@ -1324,6 +1326,34 @@ static int nl80211_send_coalesce(struct sk_buff *msg,
 	return 0;
 }
 
+static int
+nl80211_send_iftype_data(struct sk_buff *msg,
+			 const struct ieee80211_sband_iftype_data *iftdata)
+{
+	const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap;
+
+	if (nl80211_put_iftypes(msg, NL80211_BAND_IFTYPE_ATTR_IFTYPES,
+				iftdata->types_mask))
+		return -ENOBUFS;
+
+	if (he_cap->has_he) {
+		if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC,
+			    sizeof(he_cap->he_cap_elem.mac_cap_info),
+			    he_cap->he_cap_elem.mac_cap_info) ||
+		    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
+			    sizeof(he_cap->he_cap_elem.phy_cap_info),
+			    he_cap->he_cap_elem.phy_cap_info) ||
+		    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
+			    sizeof(he_cap->he_mcs_nss_supp),
+			    &he_cap->he_mcs_nss_supp) ||
+		    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+			    sizeof(he_cap->ppe_thres), he_cap->ppe_thres))
+			return -ENOBUFS;
+	}
+
+	return 0;
+}
+
 static int nl80211_send_band_rateinfo(struct sk_buff *msg,
 				      struct ieee80211_supported_band *sband)
 {
@@ -1353,6 +1383,32 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg,
 			 sband->vht_cap.cap)))
 		return -ENOBUFS;
 
+	if (sband->n_iftype_data) {
+		struct nlattr *nl_iftype_data =
+			nla_nest_start(msg, NL80211_BAND_ATTR_IFTYPE_DATA);
+		int err;
+
+		if (!nl_iftype_data)
+			return -ENOBUFS;
+
+		for (i = 0; i < sband->n_iftype_data; i++) {
+			struct nlattr *iftdata;
+
+			iftdata = nla_nest_start(msg, i + 1);
+			if (!iftdata)
+				return -ENOBUFS;
+
+			err = nl80211_send_iftype_data(msg,
+						       &sband->iftype_data[i]);
+			if (err)
+				return err;
+
+			nla_nest_end(msg, iftdata);
+		}
+
+		nla_nest_end(msg, nl_iftype_data);
+	}
+
 	/* add bitrates */
 	nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
 	if (!nl_rates)
@@ -2757,7 +2813,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) ||
 	    nla_put_u32(msg, NL80211_ATTR_GENERATION,
 			rdev->devlist_generation ^
-			(cfg80211_rdev_list_generation << 2)))
+			(cfg80211_rdev_list_generation << 2)) ||
+	    nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr))
 		goto nla_put_failure;
 
 	if (rdev->ops->get_channel) {
@@ -4471,6 +4528,9 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 	case RATE_INFO_BW_160:
 		rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH;
 		break;
+	case RATE_INFO_BW_HE_RU:
+		rate_flg = 0;
+		WARN_ON(!(info->flags & RATE_INFO_FLAGS_HE_MCS));
 	}
 
 	if (rate_flg && nla_put_flag(msg, rate_flg))
@@ -4490,6 +4550,19 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 		if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
 			return false;
+	} else if (info->flags & RATE_INFO_FLAGS_HE_MCS) {
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_MCS, info->mcs))
+			return false;
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_NSS, info->nss))
+			return false;
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_GI, info->he_gi))
+			return false;
+		if (nla_put_u8(msg, NL80211_RATE_INFO_HE_DCM, info->he_dcm))
+			return false;
+		if (info->bw == RATE_INFO_BW_HE_RU &&
+		    nla_put_u8(msg, NL80211_RATE_INFO_HE_RU_ALLOC,
+			       info->he_ru_alloc))
+			return false;
 	}
 
 	nla_nest_end(msg, rate);
@@ -4546,13 +4619,13 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 #define PUT_SINFO(attr, memb, type) do {				\
 	BUILD_BUG_ON(sizeof(type) == sizeof(u64));			\
-	if (sinfo->filled & (1ULL << NL80211_STA_INFO_ ## attr) &&	\
+	if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) &&	\
 	    nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr,		\
 			     sinfo->memb))				\
 		goto nla_put_failure;					\
 	} while (0)
 #define PUT_SINFO_U64(attr, memb) do {					\
-	if (sinfo->filled & (1ULL << NL80211_STA_INFO_ ## attr) &&	\
+	if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) &&	\
 	    nla_put_u64_64bit(msg, NL80211_STA_INFO_ ## attr,		\
 			      sinfo->memb, NL80211_STA_INFO_PAD))	\
 		goto nla_put_failure;					\
@@ -4561,14 +4634,14 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	PUT_SINFO(CONNECTED_TIME, connected_time, u32);
 	PUT_SINFO(INACTIVE_TIME, inactive_time, u32);
 
-	if (sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES) |
-			     BIT(NL80211_STA_INFO_RX_BYTES64)) &&
+	if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) |
+			     BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES,
 			(u32)sinfo->rx_bytes))
 		goto nla_put_failure;
 
-	if (sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES) |
-			     BIT(NL80211_STA_INFO_TX_BYTES64)) &&
+	if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES) |
+			     BIT_ULL(NL80211_STA_INFO_TX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES,
 			(u32)sinfo->tx_bytes))
 		goto nla_put_failure;
@@ -4588,24 +4661,24 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	default:
 		break;
 	}
-	if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL)) {
+	if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) {
 		if (!nl80211_put_signal(msg, sinfo->chains,
 					sinfo->chain_signal,
 					NL80211_STA_INFO_CHAIN_SIGNAL))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) {
+	if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) {
 		if (!nl80211_put_signal(msg, sinfo->chains,
 					sinfo->chain_signal_avg,
 					NL80211_STA_INFO_CHAIN_SIGNAL_AVG))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE)) {
+	if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) {
 		if (!nl80211_put_sta_rate(msg, &sinfo->txrate,
 					  NL80211_STA_INFO_TX_BITRATE))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE)) {
+	if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) {
 		if (!nl80211_put_sta_rate(msg, &sinfo->rxrate,
 					  NL80211_STA_INFO_RX_BITRATE))
 			goto nla_put_failure;
@@ -4621,7 +4694,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	PUT_SINFO(PEER_PM, peer_pm, u32);
 	PUT_SINFO(NONPEER_PM, nonpeer_pm, u32);
 
-	if (sinfo->filled & BIT(NL80211_STA_INFO_BSS_PARAM)) {
+	if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) {
 		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
 		if (!bss_param)
 			goto nla_put_failure;
@@ -4640,7 +4713,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 		nla_nest_end(msg, bss_param);
 	}
-	if ((sinfo->filled & BIT(NL80211_STA_INFO_STA_FLAGS)) &&
+	if ((sinfo->filled & BIT_ULL(NL80211_STA_INFO_STA_FLAGS)) &&
 	    nla_put(msg, NL80211_STA_INFO_STA_FLAGS,
 		    sizeof(struct nl80211_sta_flag_update),
 		    &sinfo->sta_flags))
@@ -4886,7 +4959,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 			return -EINVAL;
 		if (params->supported_rates)
 			return -EINVAL;
-		if (params->ext_capab || params->ht_capa || params->vht_capa)
+		if (params->ext_capab || params->ht_capa || params->vht_capa ||
+		    params->he_capa)
 			return -EINVAL;
 	}
 
@@ -5092,6 +5166,15 @@ static int nl80211_set_station_tdls(struct genl_info *info,
 	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
 		params->vht_capa =
 			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
+	if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
+		params->he_capa =
+			nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+		params->he_capa_len =
+			nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+		if (params->he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
+			return -EINVAL;
+	}
 
 	err = nl80211_parse_sta_channel_info(info, params);
 	if (err)
@@ -5319,6 +5402,17 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.vht_capa =
 			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
 
+	if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) {
+		params.he_capa =
+			nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+		params.he_capa_len =
+			nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
+		/* max len is validated in nla policy */
+		if (params.he_capa_len < NL80211_HE_MIN_CAPABILITY_LEN)
+			return -EINVAL;
+	}
+
 	if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
 		params.opmode_notif_used = true;
 		params.opmode_notif =
@@ -5351,6 +5445,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) {
 		params.ht_capa = NULL;
 		params.vht_capa = NULL;
+
+		/* HE requires WME */
+		if (params.he_capa_len)
+			return -EINVAL;
 	}
 
 	/* When you run into this, adjust the code below for the new flag */
@@ -6848,6 +6946,16 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
 	return regulatory_pre_cac_allowed(wdev->wiphy);
 }
 
+static bool nl80211_check_scan_feat(struct wiphy *wiphy, u32 flags, u32 flag,
+				    enum nl80211_ext_feature_index feat)
+{
+	if (!(flags & flag))
+		return true;
+	if (wiphy_ext_feature_isset(wiphy, feat))
+		return true;
+	return false;
+}
+
 static int
 nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
 			 void *request, struct nlattr **attrs,
@@ -6882,15 +6990,33 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
 
 	if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
 	     !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) ||
-	    ((*flags & NL80211_SCAN_FLAG_LOW_SPAN) &&
-	     !wiphy_ext_feature_isset(wiphy,
-				      NL80211_EXT_FEATURE_LOW_SPAN_SCAN)) ||
-	    ((*flags & NL80211_SCAN_FLAG_LOW_POWER) &&
-	     !wiphy_ext_feature_isset(wiphy,
-				      NL80211_EXT_FEATURE_LOW_POWER_SCAN)) ||
-	    ((*flags & NL80211_SCAN_FLAG_HIGH_ACCURACY) &&
-	     !wiphy_ext_feature_isset(wiphy,
-				      NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN)))
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_LOW_SPAN,
+				     NL80211_EXT_FEATURE_LOW_SPAN_SCAN) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_LOW_POWER,
+				     NL80211_EXT_FEATURE_LOW_POWER_SCAN) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_HIGH_ACCURACY,
+				     NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME,
+				     NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP,
+				     NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
+				     NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE,
+				     NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_RANDOM_SN,
+				     NL80211_EXT_FEATURE_SCAN_RANDOM_SN) ||
+	    !nl80211_check_scan_feat(wiphy, *flags,
+				     NL80211_SCAN_FLAG_MIN_PREQ_CONTENT,
+				     NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT))
 		return -EOPNOTSUPP;
 
 	if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
@@ -6905,26 +7031,6 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
 			return err;
 	}
 
-	if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
-	    !wiphy_ext_feature_isset(wiphy,
-				     NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
-		return -EOPNOTSUPP;
-
-	if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
-	   !wiphy_ext_feature_isset(wiphy,
-				    NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
-		return -EOPNOTSUPP;
-
-	if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
-	    !wiphy_ext_feature_isset(wiphy,
-				     NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
-		return -EOPNOTSUPP;
-
-	if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
-	    !wiphy_ext_feature_isset(wiphy,
-				     NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
-		return -EOPNOTSUPP;
-
 	return 0;
 }
 
@@ -10147,7 +10253,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
 		if (err)
 			return err;
 
-		if (sinfo.filled & BIT(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
+		if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
 			wdev->cqm_config->last_rssi_event_value =
 				(s8) sinfo.rx_beacon_signal_avg;
 	}
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 570a2b67ca10..6ab32f6a1961 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -102,7 +102,7 @@ static int wiphy_suspend(struct device *dev)
 	struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
 	int ret = 0;
 
-	rdev->suspend_at = get_seconds();
+	rdev->suspend_at = ktime_get_boottime_seconds();
 
 	rtnl_lock();
 	if (rdev->wiphy.registered) {
@@ -130,7 +130,7 @@ static int wiphy_resume(struct device *dev)
 	int ret = 0;
 
 	/* Age scan results with time spent in suspend */
-	cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
+	cfg80211_bss_age(rdev, ktime_get_boottime_seconds() - rdev->suspend_at);
 
 	rtnl_lock();
 	if (rdev->wiphy.registered && rdev->ops->resume)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3c654cd7ba56..e0825a019e9f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -4,6 +4,7 @@
  *
  * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright 2017	Intel Deutschland GmbH
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
@@ -1142,6 +1143,85 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 	return 0;
 }
 
+static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate)
+{
+#define SCALE 2048
+	u16 mcs_divisors[12] = {
+		34133, /* 16.666666... */
+		17067, /*  8.333333... */
+		11378, /*  5.555555... */
+		 8533, /*  4.166666... */
+		 5689, /*  2.777777... */
+		 4267, /*  2.083333... */
+		 3923, /*  1.851851... */
+		 3413, /*  1.666666... */
+		 2844, /*  1.388888... */
+		 2560, /*  1.250000... */
+		 2276, /*  1.111111... */
+		 2048, /*  1.000000... */
+	};
+	u32 rates_160M[3] = { 960777777, 907400000, 816666666 };
+	u32 rates_969[3] =  { 480388888, 453700000, 408333333 };
+	u32 rates_484[3] =  { 229411111, 216666666, 195000000 };
+	u32 rates_242[3] =  { 114711111, 108333333,  97500000 };
+	u32 rates_106[3] =  {  40000000,  37777777,  34000000 };
+	u32 rates_52[3]  =  {  18820000,  17777777,  16000000 };
+	u32 rates_26[3]  =  {   9411111,   8888888,   8000000 };
+	u64 tmp;
+	u32 result;
+
+	if (WARN_ON_ONCE(rate->mcs > 11))
+		return 0;
+
+	if (WARN_ON_ONCE(rate->he_gi > NL80211_RATE_INFO_HE_GI_3_2))
+		return 0;
+	if (WARN_ON_ONCE(rate->he_ru_alloc >
+			 NL80211_RATE_INFO_HE_RU_ALLOC_2x996))
+		return 0;
+	if (WARN_ON_ONCE(rate->nss < 1 || rate->nss > 8))
+		return 0;
+
+	if (rate->bw == RATE_INFO_BW_160)
+		result = rates_160M[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_80 ||
+		 (rate->bw == RATE_INFO_BW_HE_RU &&
+		  rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_996))
+		result = rates_969[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_40 ||
+		 (rate->bw == RATE_INFO_BW_HE_RU &&
+		  rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_484))
+		result = rates_484[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_20 ||
+		 (rate->bw == RATE_INFO_BW_HE_RU &&
+		  rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_242))
+		result = rates_242[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_HE_RU &&
+		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_106)
+		result = rates_106[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_HE_RU &&
+		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_52)
+		result = rates_52[rate->he_gi];
+	else if (rate->bw == RATE_INFO_BW_HE_RU &&
+		 rate->he_ru_alloc == NL80211_RATE_INFO_HE_RU_ALLOC_26)
+		result = rates_26[rate->he_gi];
+	else if (WARN(1, "invalid HE MCS: bw:%d, ru:%d\n",
+		      rate->bw, rate->he_ru_alloc))
+		return 0;
+
+	/* now scale to the appropriate MCS */
+	tmp = result;
+	tmp *= SCALE;
+	do_div(tmp, mcs_divisors[rate->mcs]);
+	result = tmp;
+
+	/* and take NSS, DCM into account */
+	result = (result * rate->nss) / 8;
+	if (rate->he_dcm)
+		result /= 2;
+
+	return result;
+}
+
 u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 {
 	if (rate->flags & RATE_INFO_FLAGS_MCS)
@@ -1150,6 +1230,8 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 		return cfg80211_calculate_bitrate_60g(rate);
 	if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
 		return cfg80211_calculate_bitrate_vht(rate);
+	if (rate->flags & RATE_INFO_FLAGS_HE_MCS)
+		return cfg80211_calculate_bitrate_he(rate);
 
 	return rate->legacy;
 }
@@ -1791,8 +1873,9 @@ bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
 
 int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp)
 {
-	sinfo->pertid = kcalloc(sizeof(*(sinfo->pertid)),
-				IEEE80211_NUM_TIDS + 1, gfp);
+	sinfo->pertid = kcalloc(IEEE80211_NUM_TIDS + 1,
+				sizeof(*(sinfo->pertid)),
+				gfp);
 	if (!sinfo->pertid)
 		return -ENOMEM;
 
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 05186a47878f..167f7025ac98 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1278,7 +1278,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
 	if (err)
 		return err;
 
-	if (!(sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)))
+	if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)))
 		return -EOPNOTSUPP;
 
 	rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate);
@@ -1320,7 +1320,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
-		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
+		if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) {
 			int sig = sinfo.signal;
 			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
 			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
@@ -1334,7 +1334,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 			break;
 		}
 	case CFG80211_SIGNAL_TYPE_UNSPEC:
-		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
+		if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) {
 			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
 			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
 			wstats.qual.level = sinfo.signal;
@@ -1347,9 +1347,9 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	}
 
 	wstats.qual.updated |= IW_QUAL_NOISE_INVALID;
-	if (sinfo.filled & BIT(NL80211_STA_INFO_RX_DROP_MISC))
+	if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC))
 		wstats.discard.misc = sinfo.rx_dropped_misc;
-	if (sinfo.filled & BIT(NL80211_STA_INFO_TX_FAILED))
+	if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))
 		wstats.discard.retries = sinfo.tx_failed;
 
 	return &wstats;