271 files changed, 13589 insertions, 4434 deletions
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 723408e399ab..7985c6615f3c 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -135,6 +135,30 @@ Either way, the returned buffer is either NULL, or of size buffer_szk. Without t
 annotation, the verifier will reject the program if a null pointer is passed in with
 a nonzero size.
 
+2.2.5 __str Annotation
+----------------------------
+This annotation is used to indicate that the argument is a constant string.
+
+An example is given below::
+
+        __bpf_kfunc bpf_get_file_xattr(..., const char *name__str, ...)
+        {
+        ...
+        }
+
+In this case, ``bpf_get_file_xattr()`` can be called as::
+
+        bpf_get_file_xattr(..., "xattr_name", ...);
+
+Or::
+
+        const char name[] = "xattr_name";  /* This need to be global */
+        int BPF_PROG(...)
+        {
+                ...
+                bpf_get_file_xattr(..., name, ...);
+                ...
+        }
 
 .. _BPF_kfunc_nodef:
 
diff --git a/Documentation/devicetree/bindings/net/marvell,aquantia.yaml b/Documentation/devicetree/bindings/net/marvell,aquantia.yaml
new file mode 100644
index 000000000000..9854fab4c4db
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell,aquantia.yaml
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/marvell,aquantia.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Aquantia Ethernet PHY
+
+maintainers:
+  - Christian Marangi <ansuelsmth@gmail.com>
+
+description: |
+  Marvell Aquantia Ethernet PHY require a firmware to be loaded to actually
+  work.
+
+  This can be done and is implemented by OEM in 3 different way:
+    - Attached SPI flash directly to the PHY with the firmware. The PHY
+      will self load the firmware in the presence of this configuration.
+    - Read from a dedicated partition on system NAND declared in an
+      NVMEM cell, and loaded to the PHY using its mailbox interface.
+    - Manually provided firmware loaded from a file in the filesystem.
+
+allOf:
+  - $ref: ethernet-phy.yaml#
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - ethernet-phy-id03a1.b445
+          - ethernet-phy-id03a1.b460
+          - ethernet-phy-id03a1.b4a2
+          - ethernet-phy-id03a1.b4d0
+          - ethernet-phy-id03a1.b4e0
+          - ethernet-phy-id03a1.b5c2
+          - ethernet-phy-id03a1.b4b0
+          - ethernet-phy-id03a1.b662
+          - ethernet-phy-id03a1.b712
+          - ethernet-phy-id31c3.1c12
+  required:
+    - compatible
+
+properties:
+  reg:
+    maxItems: 1
+
+  firmware-name:
+    description: specify the name of PHY firmware to load
+
+  nvmem-cells:
+    description: phandle to the firmware nvmem cell
+    maxItems: 1
+
+  nvmem-cell-names:
+    const: firmware
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    mdio {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        ethernet-phy@0 {
+            compatible = "ethernet-phy-id31c3.1c12",
+                         "ethernet-phy-ieee802.3-c45";
+
+            reg = <0>;
+            firmware-name = "AQR-G4_v5.4.C-AQR_CIG_WF-1945_0x8_ID44776_VER1630.cld";
+        };
+
+        ethernet-phy@1 {
+            compatible = "ethernet-phy-id31c3.1c12",
+                         "ethernet-phy-ieee802.3-c45";
+
+            reg = <1>;
+            nvmem-cells = <&aqr_fw>;
+            nvmem-cell-names = "firmware";
+        };
+    };
+
+    flash {
+        compatible = "jedec,spi-nor";
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        partitions {
+            compatible = "fixed-partitions";
+            #address-cells = <1>;
+            #size-cells = <1>;
+
+            /* ... */
+
+            partition@650000 {
+                compatible = "nvmem-cells";
+                label = "0:ethphyfw";
+                reg = <0x650000 0x80000>;
+                read-only;
+                #address-cells = <1>;
+                #size-cells = <1>;
+
+                aqr_fw: aqr_fw@0 {
+                    reg = <0x0 0x5f42a>;
+                };
+            };
+
+            /* ... */
+
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
index 5d074f27d462..d3306b186000 100644
--- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
@@ -55,7 +55,7 @@ properties:
 
       - items:
           - enum:
-              - renesas,r9a07g043-gbeth # RZ/G2UL
+              - renesas,r9a07g043-gbeth # RZ/G2UL and RZ/Five
               - renesas,r9a07g044-gbeth # RZ/G2{L,LC}
               - renesas,r9a07g054-gbeth # RZ/V2L
           - const: renesas,rzg2l-gbeth  # RZ/{G2L,G2UL,V2L} family
diff --git a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
new file mode 100644
index 000000000000..475aff7714d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml
@@ -0,0 +1,133 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/renesas,ethertsn.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Ethernet TSN End-station
+
+maintainers:
+  - Niklas Söderlund <niklas.soderlund@ragnatech.se>
+
+description:
+  The RTSN device provides Ethernet network using a 10 Mbps, 100 Mbps, or 1
+  Gbps full-duplex link via MII/GMII/RMII/RGMII. Depending on the connected PHY.
+
+allOf:
+  - $ref: ethernet-controller.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,r8a779g0-ethertsn       # R-Car V4H
+      - const: renesas,rcar-gen4-ethertsn
+
+  reg:
+    items:
+      - description: TSN End Station target
+      - description: generalized Precision Time Protocol target
+
+  reg-names:
+    items:
+      - const: tsnes
+      - const: gptp
+
+  interrupts:
+    items:
+      - description: TX data interrupt
+      - description: RX data interrupt
+
+  interrupt-names:
+    items:
+      - const: tx
+      - const: rx
+
+  clocks:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  phy-mode:
+    contains:
+      enum:
+        - mii
+        - rgmii
+
+  phy-handle:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Specifies a reference to a node representing a PHY device.
+
+  rx-internal-delay-ps:
+    enum: [0, 1800]
+
+  tx-internal-delay-ps:
+    enum: [0, 2000]
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+patternProperties:
+  "^ethernet-phy@[0-9a-f]$":
+    type: object
+    $ref: ethernet-phy.yaml#
+    unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - interrupts
+  - interrupt-names
+  - clocks
+  - power-domains
+  - resets
+  - phy-mode
+  - phy-handle
+  - '#address-cells'
+  - '#size-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a779g0-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/r8a779g0-sysc.h>
+    #include <dt-bindings/gpio/gpio.h>
+
+    tsn0: ethernet@e6460000 {
+        compatible = "renesas,r8a779g0-ethertsn", "renesas,rcar-gen4-ethertsn";
+        reg = <0xe6460000 0x7000>,
+              <0xe6449000 0x500>;
+        reg-names = "tsnes", "gptp";
+        interrupts = <GIC_SPI 429 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-names = "tx", "rx";
+        clocks = <&cpg CPG_MOD 2723>;
+        power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>;
+        resets = <&cpg 2723>;
+
+        phy-mode = "rgmii";
+        tx-internal-delay-ps = <2000>;
+        phy-handle = <&phy3>;
+
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        phy3: ethernet-phy@3 {
+            compatible = "ethernet-phy-ieee802.3-c45";
+            reg = <0>;
+            interrupt-parent = <&gpio4>;
+            interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+            reset-gpios = <&gpio1 23 GPIO_ACTIVE_LOW>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml
index 1d33d80af11c..bbe89ea9590c 100644
--- a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml
+++ b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml
@@ -122,6 +122,20 @@ properties:
       and "phy-handle" should point to an external PHY if exists.
     maxItems: 1
 
+  dmas:
+    minItems: 2
+    maxItems: 32
+    description: TX and RX DMA channel phandle
+
+  dma-names:
+    items:
+      pattern: "^[tr]x_chan([0-9]|1[0-5])$"
+    description:
+      Should be "tx_chan0", "tx_chan1" ... "tx_chan15" for DMA Tx channel
+      Should be "rx_chan0", "rx_chan1" ... "rx_chan15" for DMA Rx channel
+    minItems: 2
+    maxItems: 32
+
 required:
   - compatible
   - interrupts
@@ -143,6 +157,8 @@ examples:
         clocks = <&axi_clk>, <&axi_clk>, <&pl_enet_ref_clk>, <&mgt_clk>;
         phy-mode = "mii";
         reg = <0x40c00000 0x40000>,<0x50c00000 0x40000>;
+        dmas = <&xilinx_dma 0>, <&xilinx_dma 1>;
+        dma-names = "tx_chan0", "rx_chan0";
         xlnx,rxcsum = <0x2>;
         xlnx,rxmem = <0x800>;
         xlnx,txcsum = <0x2>;
diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
index 572d83a414d0..43067e1f63aa 100644
--- a/Documentation/netlink/specs/devlink.yaml
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -1484,8 +1484,8 @@ operations:
       dont-validate: [ strict ]
       flags: [ admin-perm ]
       do:
-        pre: devlink-nl-pre-doit
-        post: devlink-nl-post-doit
+        pre: devlink-nl-pre-doit-dev-lock
+        post: devlink-nl-post-doit-dev-lock
         request:
           attributes:
             - bus-name
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst
index cad96c8d1f97..613a818d5db6 100644
--- a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst
@@ -24,6 +24,10 @@ Supported Devices
 Currently, this driver support following devices:
  * Network controller: Cavium, Inc. Device b200
  * Network controller: Cavium, Inc. Device b400
+ * Network controller: Cavium, Inc. Device b900
+ * Network controller: Cavium, Inc. Device ba00
+ * Network controller: Cavium, Inc. Device bc00
+ * Network controller: Cavium, Inc. Device bd00
 
 Interface Control
 =================
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index dc2c7b979656..7edf0fd58c34 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -985,7 +985,8 @@ static int alb_upper_dev_walk(struct net_device *upper,
 	if (netif_is_macvlan(upper) && !strict_match) {
 		tags = bond_verify_device_path(bond->dev, upper, 0);
 		if (IS_ERR_OR_NULL(tags))
-			BUG();
+			return -ENOMEM;
+
 		alb_send_lp_vid(slave, upper->dev_addr,
 				tags[0].vlan_proto, tags[0].vlan_id);
 		kfree(tags);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8e6cc0e133b7..d987432cee3b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2973,8 +2973,11 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
 
 	if (start_dev == end_dev) {
 		tags = kcalloc(level + 1, sizeof(*tags), GFP_ATOMIC);
-		if (!tags)
+		if (!tags) {
+			net_err_ratelimited("%s: %s: Failed to allocate tags\n",
+					    __func__, start_dev->name);
 			return ERR_PTR(-ENOMEM);
+		}
 		tags[level].vlan_proto = BOND_VLAN_PROTO_NONE;
 		return tags;
 	}
@@ -5755,10 +5758,8 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct ethtool_ts_info ts_info;
-	const struct ethtool_ops *ops;
 	struct net_device *real_dev;
 	bool sw_tx_support = false;
-	struct phy_device *phydev;
 	struct list_head *iter;
 	struct slave *slave;
 	int ret = 0;
@@ -5769,29 +5770,12 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
 	rcu_read_unlock();
 
 	if (real_dev) {
-		ops = real_dev->ethtool_ops;
-		phydev = real_dev->phydev;
-
-		if (phy_has_tsinfo(phydev)) {
-			ret = phy_ts_info(phydev, info);
-			goto out;
-		} else if (ops->get_ts_info) {
-			ret = ops->get_ts_info(real_dev, info);
-			goto out;
-		}
+		ret = ethtool_get_ts_info_by_layer(real_dev, info);
 	} else {
 		/* Check if all slaves support software tx timestamping */
 		rcu_read_lock();
 		bond_for_each_slave_rcu(bond, slave, iter) {
-			ret = -1;
-			ops = slave->dev->ethtool_ops;
-			phydev = slave->dev->phydev;
-
-			if (phy_has_tsinfo(phydev))
-				ret = phy_ts_info(phydev, &ts_info);
-			else if (ops->get_ts_info)
-				ret = ops->get_ts_info(slave->dev, &ts_info);
-
+			ret = ethtool_get_ts_info_by_layer(slave->dev, &ts_info);
 			if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_TX_SOFTWARE)) {
 				sw_tx_support = true;
 				continue;
@@ -5803,15 +5787,9 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev,
 		rcu_read_unlock();
 	}
 
-	ret = 0;
-	info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
-				SOF_TIMESTAMPING_SOFTWARE;
 	if (sw_tx_support)
 		info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE;
 
-	info->phc_index = -1;
-
-out:
 	dev_put(real_dev);
 	return ret;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d0359b569afe..d8cf7b30bd03 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -254,22 +254,22 @@ static bool bnxt_vf_pciid(enum board_idx idx)
 		writel(DB_CP_IRQ_DIS_FLAGS, db)
 
 #define BNXT_DB_CQ(db, idx)						\
-	writel(DB_CP_FLAGS | RING_CMP(idx), (db)->doorbell)
+	writel(DB_CP_FLAGS | DB_RING_IDX(db, idx), (db)->doorbell)
 
 #define BNXT_DB_NQ_P5(db, idx)						\
-	bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ | RING_CMP(idx),	\
+	bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ | DB_RING_IDX(db, idx),\
 		    (db)->doorbell)
 
 #define BNXT_DB_CQ_ARM(db, idx)						\
-	writel(DB_CP_REARM_FLAGS | RING_CMP(idx), (db)->doorbell)
+	writel(DB_CP_REARM_FLAGS | DB_RING_IDX(db, idx), (db)->doorbell)
 
 #define BNXT_DB_NQ_ARM_P5(db, idx)					\
-	bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ_ARM | RING_CMP(idx),\
-		    (db)->doorbell)
+	bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ_ARM |		\
+		    DB_RING_IDX(db, idx), (db)->doorbell)
 
 static void bnxt_db_nq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		BNXT_DB_NQ_P5(db, idx);
 	else
 		BNXT_DB_CQ(db, idx);
@@ -277,7 +277,7 @@ static void bnxt_db_nq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
 
 static void bnxt_db_nq_arm(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		BNXT_DB_NQ_ARM_P5(db, idx);
 	else
 		BNXT_DB_CQ_ARM(db, idx);
@@ -285,9 +285,9 @@ static void bnxt_db_nq_arm(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
 
 static void bnxt_db_cq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		bnxt_writeq(bp, db->db_key64 | DBR_TYPE_CQ_ARMALL |
-			    RING_CMP(idx), db->doorbell);
+			    DB_RING_IDX(db, idx), db->doorbell);
 	else
 		BNXT_DB_CQ(db, idx);
 }
@@ -321,7 +321,7 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
 {
 	if (!rxr->bnapi->in_reset) {
 		rxr->bnapi->in_reset = true;
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 			set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
 		else
 			set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
@@ -331,16 +331,16 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
 }
 
 void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
-			  int idx)
+			  u16 curr)
 {
 	struct bnxt_napi *bnapi = txr->bnapi;
 
 	if (bnapi->tx_fault)
 		return;
 
-	netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)",
-		   txr->txq_index, bnapi->tx_pkts,
-		   txr->tx_cons, txr->tx_prod, idx);
+	netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_hw_cons:%u cons:%u prod:%u curr:%u)",
+		   txr->txq_index, txr->tx_hw_cons,
+		   txr->tx_cons, txr->tx_prod, curr);
 	WARN_ON_ONCE(1);
 	bnapi->tx_fault = 1;
 	bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
@@ -381,6 +381,8 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
 static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
 			     u16 prod)
 {
+	/* Sync BD data before updating doorbell */
+	wmb();
 	bnxt_db_write(bp, &txr->tx_db, prod);
 	txr->kick_pending = 0;
 }
@@ -388,7 +390,7 @@ static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
 static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bnxt *bp = netdev_priv(dev);
-	struct tx_bd *txbd;
+	struct tx_bd *txbd, *txbd0;
 	struct tx_bd_ext *txbd1;
 	struct netdev_queue *txq;
 	int i;
@@ -430,11 +432,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	len = skb_headlen(skb);
 	last_frag = skb_shinfo(skb)->nr_frags;
 
-	txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+	txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
 
-	txbd->tx_bd_opaque = prod;
-
-	tx_buf = &txr->tx_buf_ring[prod];
+	tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
 	tx_buf->skb = skb;
 	tx_buf->nr_frags = last_frag;
 
@@ -519,12 +519,15 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		txbd->tx_bd_len_flags_type = tx_push->tx_bd_len_flags_type;
 		txbd->tx_bd_haddr = txr->data_mapping;
+		txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 2);
 		prod = NEXT_TX(prod);
-		txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+		tx_push->tx_bd_opaque = txbd->tx_bd_opaque;
+		txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
 		memcpy(txbd, tx_push1, sizeof(*txbd));
 		prod = NEXT_TX(prod);
 		tx_push->doorbell =
-			cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
+			cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH |
+				    DB_RING_IDX(&txr->tx_db, prod));
 		WRITE_ONCE(txr->tx_prod, prod);
 
 		tx_buf->is_push = 1;
@@ -562,10 +565,11 @@ normal_tx:
 		((last_frag + 2) << TX_BD_FLAGS_BD_CNT_SHIFT);
 
 	txbd->tx_bd_haddr = cpu_to_le64(mapping);
+	txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 2 + last_frag);
 
 	prod = NEXT_TX(prod);
 	txbd1 = (struct tx_bd_ext *)
-		&txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+		&txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
 
 	txbd1->tx_bd_hsize_lflags = lflags;
 	if (skb_is_gso(skb)) {
@@ -601,11 +605,12 @@ normal_tx:
 	txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
 	txbd1->tx_bd_cfa_action =
 			cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
+	txbd0 = txbd;
 	for (i = 0; i < last_frag; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 		prod = NEXT_TX(prod);
-		txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+		txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
 
 		len = skb_frag_size(frag);
 		mapping = skb_frag_dma_map(&pdev->dev, frag, 0, len,
@@ -614,7 +619,7 @@ normal_tx:
 		if (unlikely(dma_mapping_error(&pdev->dev, mapping)))
 			goto tx_dma_error;
 
-		tx_buf = &txr->tx_buf_ring[prod];
+		tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
 		dma_unmap_addr_set(tx_buf, mapping, mapping);
 
 		txbd->tx_bd_haddr = cpu_to_le64(mapping);
@@ -632,16 +637,17 @@ normal_tx:
 
 	skb_tx_timestamp(skb);
 
-	/* Sync BD data before updating doorbell */
-	wmb();
-
 	prod = NEXT_TX(prod);
 	WRITE_ONCE(txr->tx_prod, prod);
 
-	if (!netdev_xmit_more() || netif_xmit_stopped(txq))
+	if (!netdev_xmit_more() || netif_xmit_stopped(txq)) {
 		bnxt_txr_db_kick(bp, txr, prod);
-	else
+	} else {
+		if (free_size >= bp->tx_wake_thresh)
+			txbd0->tx_bd_len_flags_type |=
+				cpu_to_le32(TX_BD_FLAGS_NO_CMPL);
 		txr->kick_pending = 1;
+	}
 
 tx_done:
 
@@ -662,7 +668,7 @@ tx_dma_error:
 
 	/* start back at beginning and unmap skb */
 	prod = txr->tx_prod;
-	tx_buf = &txr->tx_buf_ring[prod];
+	tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
 	dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
 			 skb_headlen(skb), DMA_TO_DEVICE);
 	prod = NEXT_TX(prod);
@@ -670,7 +676,7 @@ tx_dma_error:
 	/* unmap remaining mapped pages */
 	for (i = 0; i < last_frag; i++) {
 		prod = NEXT_TX(prod);
-		tx_buf = &txr->tx_buf_ring[prod];
+		tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
 		dma_unmap_page(&pdev->dev, dma_unmap_addr(tx_buf, mapping),
 			       skb_frag_size(&skb_shinfo(skb)->frags[i]),
 			       DMA_TO_DEVICE);
@@ -686,31 +692,32 @@ tx_kick_pending:
 	return NETDEV_TX_OK;
 }
 
-static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
+			  int budget)
 {
-	struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
 	struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index);
-	u16 cons = txr->tx_cons;
 	struct pci_dev *pdev = bp->pdev;
-	int nr_pkts = bnapi->tx_pkts;
-	int i;
+	u16 hw_cons = txr->tx_hw_cons;
 	unsigned int tx_bytes = 0;
+	u16 cons = txr->tx_cons;
+	int tx_pkts = 0;
 
-	for (i = 0; i < nr_pkts; i++) {
+	while (RING_TX(bp, cons) != hw_cons) {
 		struct bnxt_sw_tx_bd *tx_buf;
 		struct sk_buff *skb;
 		int j, last;
 
-		tx_buf = &txr->tx_buf_ring[cons];
+		tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)];
 		cons = NEXT_TX(cons);
 		skb = tx_buf->skb;
 		tx_buf->skb = NULL;
 
 		if (unlikely(!skb)) {
-			bnxt_sched_reset_txr(bp, txr, i);
+			bnxt_sched_reset_txr(bp, txr, cons);
 			return;
 		}
 
+		tx_pkts++;
 		tx_bytes += skb->len;
 
 		if (tx_buf->is_push) {
@@ -724,7 +731,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 
 		for (j = 0; j < last; j++) {
 			cons = NEXT_TX(cons);
-			tx_buf = &txr->tx_buf_ring[cons];
+			tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)];
 			dma_unmap_page(
 				&pdev->dev,
 				dma_unmap_addr(tx_buf, mapping),
@@ -732,7 +739,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 				DMA_TO_DEVICE);
 		}
 		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
-			if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			if (BNXT_CHIP_P5(bp)) {
 				/* PTP worker takes ownership of the skb */
 				if (!bnxt_get_tx_ts_p5(bp, skb))
 					skb = NULL;
@@ -747,14 +754,25 @@ next_tx_int:
 		dev_consume_skb_any(skb);
 	}
 
-	bnapi->tx_pkts = 0;
 	WRITE_ONCE(txr->tx_cons, cons);
 
-	__netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
+	__netif_txq_completed_wake(txq, tx_pkts, tx_bytes,
 				   bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
 				   READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING);
 }
 
+static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+{
+	struct bnxt_tx_ring_info *txr;
+	int i;
+
+	bnxt_for_each_napi_tx(i, bnapi, txr) {
+		if (txr->tx_hw_cons != txr->tx_cons)
+			__bnxt_tx_int(bp, txr, budget);
+	}
+	bnapi->events &= ~BNXT_TX_CMP_EVENT;
+}
+
 static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
 					 struct bnxt_rx_ring_info *rxr,
 					 unsigned int *offset,
@@ -803,8 +821,8 @@ static inline u8 *__bnxt_alloc_rx_frag(struct bnxt *bp, dma_addr_t *mapping,
 int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 		       u16 prod, gfp_t gfp)
 {
-	struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
-	struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[prod];
+	struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(bp, prod)][RX_IDX(prod)];
+	struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[RING_RX(bp, prod)];
 	dma_addr_t mapping;
 
 	if (BNXT_RX_PAGE_MODE(bp)) {
@@ -837,9 +855,10 @@ void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data)
 {
 	u16 prod = rxr->rx_prod;
 	struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf;
+	struct bnxt *bp = rxr->bnapi->bp;
 	struct rx_bd *cons_bd, *prod_bd;
 
-	prod_rx_buf = &rxr->rx_buf_ring[prod];
+	prod_rx_buf = &rxr->rx_buf_ring[RING_RX(bp, prod)];
 	cons_rx_buf = &rxr->rx_buf_ring[cons];
 
 	prod_rx_buf->data = data;
@@ -847,8 +866,8 @@ void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data)
 
 	prod_rx_buf->mapping = cons_rx_buf->mapping;
 
-	prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
-	cons_bd = &rxr->rx_desc_ring[RX_RING(cons)][RX_IDX(cons)];
+	prod_bd = &rxr->rx_desc_ring[RX_RING(bp, prod)][RX_IDX(prod)];
+	cons_bd = &rxr->rx_desc_ring[RX_RING(bp, cons)][RX_IDX(cons)];
 
 	prod_bd->rx_bd_haddr = cons_bd->rx_bd_haddr;
 }
@@ -868,7 +887,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
 				     u16 prod, gfp_t gfp)
 {
 	struct rx_bd *rxbd =
-		&rxr->rx_agg_desc_ring[RX_RING(prod)][RX_IDX(prod)];
+		&rxr->rx_agg_desc_ring[RX_AGG_RING(bp, prod)][RX_IDX(prod)];
 	struct bnxt_sw_rx_agg_bd *rx_agg_buf;
 	struct page *page;
 	dma_addr_t mapping;
@@ -885,7 +904,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
 
 	__set_bit(sw_prod, rxr->rx_agg_bmap);
 	rx_agg_buf = &rxr->rx_agg_ring[sw_prod];
-	rxr->rx_sw_agg_prod = NEXT_RX_AGG(sw_prod);
+	rxr->rx_sw_agg_prod = RING_RX_AGG(bp, NEXT_RX_AGG(sw_prod));
 
 	rx_agg_buf->page = page;
 	rx_agg_buf->offset = offset;
@@ -927,7 +946,7 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
 	bool p5_tpa = false;
 	u32 i;
 
-	if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
+	if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && tpa)
 		p5_tpa = true;
 
 	for (i = 0; i < agg_bufs; i++) {
@@ -961,13 +980,13 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
 
 		prod_rx_buf->mapping = cons_rx_buf->mapping;
 
-		prod_bd = &rxr->rx_agg_desc_ring[RX_RING(prod)][RX_IDX(prod)];
+		prod_bd = &rxr->rx_agg_desc_ring[RX_AGG_RING(bp, prod)][RX_IDX(prod)];
 
 		prod_bd->rx_bd_haddr = cpu_to_le64(cons_rx_buf->mapping);
 		prod_bd->rx_bd_opaque = sw_prod;
 
 		prod = NEXT_RX_AGG(prod);
-		sw_prod = NEXT_RX_AGG(sw_prod);
+		sw_prod = RING_RX_AGG(bp, NEXT_RX_AGG(sw_prod));
 	}
 	rxr->rx_agg_prod = prod;
 	rxr->rx_sw_agg_prod = sw_prod;
@@ -1094,7 +1113,7 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
 	u32 i, total_frag_len = 0;
 	bool p5_tpa = false;
 
-	if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa)
+	if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && tpa)
 		p5_tpa = true;
 
 	for (i = 0; i < agg_bufs; i++) {
@@ -1249,7 +1268,7 @@ static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	} else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) {
 		struct rx_tpa_end_cmp *tpa_end = cmp;
 
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 			return 0;
 
 		agg_bufs = TPA_END_AGG_BUFS(tpa_end);
@@ -1300,7 +1319,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	struct rx_bd *prod_bd;
 	dma_addr_t mapping;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		agg_id = TPA_START_AGG_ID_P5(tpa_start);
 		agg_id = bnxt_alloc_agg_idx(rxr, agg_id);
 	} else {
@@ -1309,7 +1328,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	cons = tpa_start->rx_tpa_start_cmp_opaque;
 	prod = rxr->rx_prod;
 	cons_rx_buf = &rxr->rx_buf_ring[cons];
-	prod_rx_buf = &rxr->rx_buf_ring[prod];
+	prod_rx_buf = &rxr->rx_buf_ring[RING_RX(bp, prod)];
 	tpa_info = &rxr->rx_tpa[agg_id];
 
 	if (unlikely(cons != rxr->rx_next_cons ||
@@ -1330,7 +1349,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	mapping = tpa_info->mapping;
 	prod_rx_buf->mapping = mapping;
 
-	prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
+	prod_bd = &rxr->rx_desc_ring[RX_RING(bp, prod)][RX_IDX(prod)];
 
 	prod_bd->rx_bd_haddr = cpu_to_le64(mapping);
 
@@ -1363,8 +1382,8 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	tpa_info->agg_count = 0;
 
 	rxr->rx_prod = NEXT_RX(prod);
-	cons = NEXT_RX(cons);
-	rxr->rx_next_cons = NEXT_RX(cons);
+	cons = RING_RX(bp, NEXT_RX(cons));
+	rxr->rx_next_cons = RING_RX(bp, NEXT_RX(cons));
 	cons_rx_buf = &rxr->rx_buf_ring[cons];
 
 	bnxt_reuse_rx_data(rxr, cons, cons_rx_buf->data);
@@ -1563,7 +1582,7 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp,
 	skb_shinfo(skb)->gso_size =
 		le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len);
 	skb_shinfo(skb)->gso_type = tpa_info->gso_type;
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		payload_off = TPA_END_PAYLOAD_OFF_P5(tpa_end1);
 	else
 		payload_off = TPA_END_PAYLOAD_OFF(tpa_end);
@@ -1611,7 +1630,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 		return NULL;
 	}
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		agg_id = TPA_END_AGG_ID_P5(tpa_end);
 		agg_id = bnxt_lookup_agg_idx(rxr, agg_id);
 		agg_bufs = TPA_END_AGG_BUFS_P5(tpa_end1);
@@ -1876,7 +1895,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 		rc = -EIO;
 		if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
 			bnapi->cp_ring.sw_stats.rx.rx_buf_errors++;
-			if (!(bp->flags & BNXT_FLAG_CHIP_P5) &&
+			if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
 			    !(bp->fw_cap & BNXT_FW_CAP_RING_MONITOR)) {
 				netdev_warn_once(bp->dev, "RX buffer error %x\n",
 						 rx_err);
@@ -2007,7 +2026,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 
 	if (unlikely((flags & RX_CMP_FLAGS_ITYPES_MASK) ==
 		     RX_CMP_FLAGS_ITYPE_PTP_W_TS) || bp->ptp_all_rx_tstamp) {
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 			u32 cmpl_ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp);
 			u64 ns, ts;
 
@@ -2032,7 +2051,7 @@ next_rx:
 
 next_rx_no_len:
 	rxr->rx_prod = NEXT_RX(prod);
-	rxr->rx_next_cons = NEXT_RX(cons);
+	rxr->rx_next_cons = RING_RX(bp, NEXT_RX(cons));
 
 next_rx_no_prod_no_len:
 	*raw_cons = tmp_raw_cons;
@@ -2415,7 +2434,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
 		struct bnxt_rx_ring_info *rxr;
 		u16 grp_idx;
 
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 			goto async_event_process_exit;
 
 		netdev_warn(bp->dev, "Ring monitor event, ring type %lu id 0x%x\n",
@@ -2588,7 +2607,6 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	struct bnxt_napi *bnapi = cpr->bnapi;
 	u32 raw_cons = cpr->cp_raw_cons;
 	u32 cons;
-	int tx_pkts = 0;
 	int rx_pkts = 0;
 	u8 event = 0;
 	struct tx_cmp *txcmp;
@@ -2609,9 +2627,17 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 		 */
 		dma_rmb();
 		if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) {
-			tx_pkts++;
+			u32 opaque = txcmp->tx_cmp_opaque;
+			struct bnxt_tx_ring_info *txr;
+			u16 tx_freed;
+
+			txr = bnapi->tx_ring[TX_OPAQUE_RING(opaque)];
+			event |= BNXT_TX_CMP_EVENT;
+			txr->tx_hw_cons = TX_OPAQUE_PROD(bp, opaque);
+			tx_freed = (txr->tx_hw_cons - txr->tx_cons) &
+				   bp->tx_ring_mask;
 			/* return full budget so NAPI will complete. */
-			if (unlikely(tx_pkts >= bp->tx_wake_thresh)) {
+			if (unlikely(tx_freed >= bp->tx_wake_thresh)) {
 				rx_pkts = budget;
 				raw_cons = NEXT_RAW_CMP(raw_cons);
 				if (budget)
@@ -2655,7 +2681,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 		xdp_do_flush();
 
 	if (event & BNXT_TX_EVENT) {
-		struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
+		struct bnxt_tx_ring_info *txr = bnapi->tx_ring[0];
 		u16 prod = txr->tx_prod;
 
 		/* Sync BD data before updating doorbell */
@@ -2665,7 +2691,6 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	}
 
 	cpr->cp_raw_cons = raw_cons;
-	bnapi->tx_pkts += tx_pkts;
 	bnapi->events |= event;
 	return rx_pkts;
 }
@@ -2673,7 +2698,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi,
 				  int budget)
 {
-	if (bnapi->tx_pkts && !bnapi->tx_fault)
+	if ((bnapi->events & BNXT_TX_CMP_EVENT) && !bnapi->tx_fault)
 		bnapi->tx_int(bp, bnapi, budget);
 
 	if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) {
@@ -2686,7 +2711,7 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi,
 
 		bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod);
 	}
-	bnapi->events = 0;
+	bnapi->events &= BNXT_TX_CMP_EVENT;
 }
 
 static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
@@ -2826,10 +2851,10 @@ static int __bnxt_poll_cqs(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	int i, work_done = 0;
 
-	for (i = 0; i < 2; i++) {
-		struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[i];
+	for (i = 0; i < cpr->cp_ring_count; i++) {
+		struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[i];
 
-		if (cpr2) {
+		if (cpr2->had_nqe_notify) {
 			work_done += __bnxt_poll_work(bp, cpr2,
 						      budget - work_done);
 			cpr->has_more_work |= cpr2->has_more_work;
@@ -2844,15 +2869,18 @@ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
 	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	int i;
 
-	for (i = 0; i < 2; i++) {
-		struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[i];
+	for (i = 0; i < cpr->cp_ring_count; i++) {
+		struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[i];
 		struct bnxt_db_info *db;
 
-		if (cpr2 && cpr2->had_work_done) {
+		if (cpr2->had_work_done) {
 			db = &cpr2->cp_db;
 			bnxt_writeq(bp, db->db_key64 | dbr_type |
-				    RING_CMP(cpr2->cp_raw_cons), db->doorbell);
+				    DB_RING_IDX(db, cpr2->cp_raw_cons),
+				    db->doorbell);
 			cpr2->had_work_done = 0;
+			if (dbr_type == DBR_TYPE_CQ_ARMALL)
+				cpr2->had_nqe_notify = 0;
 		}
 	}
 	__bnxt_poll_work_done(bp, bnapi, budget);
@@ -2901,13 +2929,17 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
 
 		if (nqcmp->type == cpu_to_le16(NQ_CN_TYPE_CQ_NOTIFICATION)) {
 			u32 idx = le32_to_cpu(nqcmp->cq_handle_low);
+			u32 cq_type = BNXT_NQ_HDL_TYPE(idx);
 			struct bnxt_cp_ring_info *cpr2;
 
 			/* No more budget for RX work */
-			if (budget && work_done >= budget && idx == BNXT_RX_HDL)
+			if (budget && work_done >= budget &&
+			    cq_type == BNXT_NQ_HDL_TYPE_RX)
 				break;
 
-			cpr2 = cpr->cp_ring_arr[idx];
+			idx = BNXT_NQ_HDL_IDX(idx);
+			cpr2 = &cpr->cp_ring_arr[idx];
+			cpr2->had_nqe_notify = 1;
 			work_done += __bnxt_poll_work(bp, cpr2,
 						      budget - work_done);
 			cpr->has_more_work |= cpr2->has_more_work;
@@ -2922,8 +2954,9 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
 		BNXT_DB_NQ_P5(&cpr->cp_db, raw_cons);
 	}
 poll_done:
-	cpr_rx = cpr->cp_ring_arr[BNXT_RX_HDL];
-	if (cpr_rx && (bp->flags & BNXT_FLAG_DIM)) {
+	cpr_rx = &cpr->cp_ring_arr[0];
+	if (cpr_rx->cp_ring_type == BNXT_NQ_HDL_TYPE_RX &&
+	    (bp->flags & BNXT_FLAG_DIM)) {
 		struct dim_sample dim_sample = {};
 
 		dim_update_sample(cpr->event_ctr,
@@ -3097,20 +3130,20 @@ static void bnxt_free_skbs(struct bnxt *bp)
 	bnxt_free_rx_skbs(bp);
 }
 
-static void bnxt_init_ctx_mem(struct bnxt_mem_init *mem_init, void *p, int len)
+static void bnxt_init_ctx_mem(struct bnxt_ctx_mem_type *ctxm, void *p, int len)
 {
-	u8 init_val = mem_init->init_val;
-	u16 offset = mem_init->offset;
+	u8 init_val = ctxm->init_value;
+	u16 offset = ctxm->init_offset;
 	u8 *p2 = p;
 	int i;
 
 	if (!init_val)
 		return;
-	if (offset == BNXT_MEM_INVALID_OFFSET) {
+	if (offset == BNXT_CTX_INIT_INVALID_OFFSET) {
 		memset(p, init_val, len);
 		return;
 	}
-	for (i = 0; i < len; i += mem_init->size)
+	for (i = 0; i < len; i += ctxm->entry_size)
 		*(p2 + i + offset) = init_val;
 }
 
@@ -3177,8 +3210,8 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
 		if (!rmem->pg_arr[i])
 			return -ENOMEM;
 
-		if (rmem->mem_init)
-			bnxt_init_ctx_mem(rmem->mem_init, rmem->pg_arr[i],
+		if (rmem->ctx_mem)
+			bnxt_init_ctx_mem(rmem->ctx_mem, rmem->pg_arr[i],
 					  rmem->page_size);
 		if (rmem->nr_pages > 1 || rmem->depth > 0) {
 			if (i == rmem->nr_pages - 2 &&
@@ -3225,7 +3258,7 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
 	int i, j;
 
 	bp->max_tpa = MAX_TPA;
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		if (!bp->max_tpa_v2)
 			return 0;
 		bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
@@ -3240,7 +3273,7 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
 		if (!rxr->rx_tpa)
 			return -ENOMEM;
 
-		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 			continue;
 		for (j = 0; j < bp->max_tpa; j++) {
 			agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL);
@@ -3395,6 +3428,15 @@ static void bnxt_free_tx_rings(struct bnxt *bp)
 	}
 }
 
+#define BNXT_TC_TO_RING_BASE(bp, tc)	\
+	((tc) * (bp)->tx_nr_rings_per_tc)
+
+#define BNXT_RING_TO_TC_OFF(bp, tx)	\
+	((tx) % (bp)->tx_nr_rings_per_tc)
+
+#define BNXT_RING_TO_TC(bp, tx)		\
+	((tx) / (bp)->tx_nr_rings_per_tc)
+
 static int bnxt_alloc_tx_rings(struct bnxt *bp)
 {
 	int i, j, rc;
@@ -3450,7 +3492,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
 		spin_lock_init(&txr->xdp_tx_lock);
 		if (i < bp->tx_nr_rings_xdp)
 			continue;
-		if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
+		if (BNXT_RING_TO_TC_OFF(bp, i) == (bp->tx_nr_rings_per_tc - 1))
 			j++;
 	}
 	return 0;
@@ -3533,36 +3575,33 @@ static void bnxt_free_cp_rings(struct bnxt *bp)
 
 		bnxt_free_ring(bp, &ring->ring_mem);
 
-		for (j = 0; j < 2; j++) {
-			struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+		if (!cpr->cp_ring_arr)
+			continue;
 
-			if (cpr2) {
-				ring = &cpr2->cp_ring_struct;
-				bnxt_free_ring(bp, &ring->ring_mem);
-				bnxt_free_cp_arrays(cpr2);
-				kfree(cpr2);
-				cpr->cp_ring_arr[j] = NULL;
-			}
+		for (j = 0; j < cpr->cp_ring_count; j++) {
+			struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j];
+
+			ring = &cpr2->cp_ring_struct;
+			bnxt_free_ring(bp, &ring->ring_mem);
+			bnxt_free_cp_arrays(cpr2);
 		}
+		kfree(cpr->cp_ring_arr);
+		cpr->cp_ring_arr = NULL;
+		cpr->cp_ring_count = 0;
 	}
 }
 
-static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
+static int bnxt_alloc_cp_sub_ring(struct bnxt *bp,
+				  struct bnxt_cp_ring_info *cpr)
 {
 	struct bnxt_ring_mem_info *rmem;
 	struct bnxt_ring_struct *ring;
-	struct bnxt_cp_ring_info *cpr;
 	int rc;
 
-	cpr = kzalloc(sizeof(*cpr), GFP_KERNEL);
-	if (!cpr)
-		return NULL;
-
 	rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages);
 	if (rc) {
 		bnxt_free_cp_arrays(cpr);
-		kfree(cpr);
-		return NULL;
+		return -ENOMEM;
 	}
 	ring = &cpr->cp_ring_struct;
 	rmem = &ring->ring_mem;
@@ -3575,23 +3614,26 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
 	if (rc) {
 		bnxt_free_ring(bp, rmem);
 		bnxt_free_cp_arrays(cpr);
-		kfree(cpr);
-		cpr = NULL;
 	}
-	return cpr;
+	return rc;
 }
 
 static int bnxt_alloc_cp_rings(struct bnxt *bp)
 {
 	bool sh = !!(bp->flags & BNXT_FLAG_SHARED_RINGS);
-	int i, rc, ulp_base_vec, ulp_msix;
+	int i, j, rc, ulp_base_vec, ulp_msix;
+	int tcs = netdev_get_num_tc(bp->dev);
 
+	if (!tcs)
+		tcs = 1;
 	ulp_msix = bnxt_get_ulp_msix_num(bp);
 	ulp_base_vec = bnxt_get_ulp_msix_base(bp);
-	for (i = 0; i < bp->cp_nr_rings; i++) {
+	for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
-		struct bnxt_cp_ring_info *cpr;
+		struct bnxt_cp_ring_info *cpr, *cpr2;
 		struct bnxt_ring_struct *ring;
+		int cp_count = 0, k;
+		int rx = 0, tx = 0;
 
 		if (!bnapi)
 			continue;
@@ -3609,35 +3651,55 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp)
 		else
 			ring->map_idx = i;
 
-		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 			continue;
 
 		if (i < bp->rx_nr_rings) {
-			struct bnxt_cp_ring_info *cpr2 =
-				bnxt_alloc_cp_sub_ring(bp);
-
-			cpr->cp_ring_arr[BNXT_RX_HDL] = cpr2;
-			if (!cpr2)
-				return -ENOMEM;
-			cpr2->bnapi = bnapi;
+			cp_count++;
+			rx = 1;
+		}
+		if (i < bp->tx_nr_rings_xdp) {
+			cp_count++;
+			tx = 1;
+		} else if ((sh && i < bp->tx_nr_rings) ||
+			 (!sh && i >= bp->rx_nr_rings)) {
+			cp_count += tcs;
+			tx = 1;
 		}
-		if ((sh && i < bp->tx_nr_rings) ||
-		    (!sh && i >= bp->rx_nr_rings)) {
-			struct bnxt_cp_ring_info *cpr2 =
-				bnxt_alloc_cp_sub_ring(bp);
 
-			cpr->cp_ring_arr[BNXT_TX_HDL] = cpr2;
-			if (!cpr2)
-				return -ENOMEM;
+		cpr->cp_ring_arr = kcalloc(cp_count, sizeof(*cpr),
+					   GFP_KERNEL);
+		if (!cpr->cp_ring_arr)
+			return -ENOMEM;
+		cpr->cp_ring_count = cp_count;
+
+		for (k = 0; k < cp_count; k++) {
+			cpr2 = &cpr->cp_ring_arr[k];
+			rc = bnxt_alloc_cp_sub_ring(bp, cpr2);
+			if (rc)
+				return rc;
 			cpr2->bnapi = bnapi;
+			cpr2->cp_idx = k;
+			if (!k && rx) {
+				bp->rx_ring[i].rx_cpr = cpr2;
+				cpr2->cp_ring_type = BNXT_NQ_HDL_TYPE_RX;
+			} else {
+				int n, tc = k - rx;
+
+				n = BNXT_TC_TO_RING_BASE(bp, tc) + j;
+				bp->tx_ring[n].tx_cpr = cpr2;
+				cpr2->cp_ring_type = BNXT_NQ_HDL_TYPE_TX;
+			}
 		}
+		if (tx)
+			j++;
 	}
 	return 0;
 }
 
 static void bnxt_init_ring_struct(struct bnxt *bp)
 {
-	int i;
+	int i, j;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
@@ -3682,18 +3744,16 @@ static void bnxt_init_ring_struct(struct bnxt *bp)
 		rmem->vmem = (void **)&rxr->rx_agg_ring;
 
 skip_rx:
-		txr = bnapi->tx_ring;
-		if (!txr)
-			continue;
-
-		ring = &txr->tx_ring_struct;
-		rmem = &ring->ring_mem;
-		rmem->nr_pages = bp->tx_nr_pages;
-		rmem->page_size = HW_RXBD_RING_SIZE;
-		rmem->pg_arr = (void **)txr->tx_desc_ring;
-		rmem->dma_arr = txr->tx_desc_mapping;
-		rmem->vmem_size = SW_TXBD_RING_SIZE * bp->tx_nr_pages;
-		rmem->vmem = (void **)&txr->tx_buf_ring;
+		bnxt_for_each_napi_tx(j, bnapi, txr) {
+			ring = &txr->tx_ring_struct;
+			rmem = &ring->ring_mem;
+			rmem->nr_pages = bp->tx_nr_pages;
+			rmem->page_size = HW_TXBD_RING_SIZE;
+			rmem->pg_arr = (void **)txr->tx_desc_ring;
+			rmem->dma_arr = txr->tx_desc_mapping;
+			rmem->vmem_size = SW_TXBD_RING_SIZE * bp->tx_nr_pages;
+			rmem->vmem = (void **)&txr->tx_buf_ring;
+		}
 	}
 }
 
@@ -3814,11 +3874,10 @@ static void bnxt_init_cp_rings(struct bnxt *bp)
 		ring->fw_ring_id = INVALID_HW_RING_ID;
 		cpr->rx_ring_coal.coal_ticks = bp->rx_coal.coal_ticks;
 		cpr->rx_ring_coal.coal_bufs = bp->rx_coal.coal_bufs;
-		for (j = 0; j < 2; j++) {
-			struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
-
-			if (!cpr2)
-				continue;
+		if (!cpr->cp_ring_arr)
+			continue;
+		for (j = 0; j < cpr->cp_ring_count; j++) {
+			struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j];
 
 			ring = &cpr2->cp_ring_struct;
 			ring->fw_ring_id = INVALID_HW_RING_ID;
@@ -3906,7 +3965,7 @@ static int bnxt_alloc_vnics(struct bnxt *bp)
 	int num_vnics = 1;
 
 #ifdef CONFIG_RFS_ACCEL
-	if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS)
+	if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5_PLUS)) == BNXT_FLAG_RFS)
 		num_vnics += bp->rx_nr_rings;
 #endif
 
@@ -4179,7 +4238,7 @@ static int bnxt_alloc_vnic_attributes(struct bnxt *bp)
 			}
 		}
 
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 			goto vnic_skip_grps;
 
 		if (vnic->flags & BNXT_VNIC_RSS_FLAG)
@@ -4199,7 +4258,7 @@ vnic_skip_grps:
 
 		/* Allocate rss table and hash key */
 		size = L1_CACHE_ALIGN(HW_HASH_INDEX_SIZE * sizeof(u16));
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 			size = L1_CACHE_ALIGN(BNXT_MAX_RSS_TABLE_SIZE_P5);
 
 		vnic->rss_table_size = size + HW_HASH_KEY_SIZE;
@@ -4309,7 +4368,7 @@ static int bnxt_hwrm_func_qstat_ext(struct bnxt *bp,
 	int rc;
 
 	if (!(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED) ||
-	    !(bp->flags & BNXT_FLAG_CHIP_P5))
+	    !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		return -EOPNOTSUPP;
 
 	rc = hwrm_req_init(bp, req, HWRM_FUNC_QSTATS_EXT);
@@ -4347,7 +4406,7 @@ static void bnxt_init_stats(struct bnxt *bp)
 	stats = &cpr->stats;
 	rc = bnxt_hwrm_func_qstat_ext(bp, stats);
 	if (rc) {
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 			mask = (1ULL << 48) - 1;
 		else
 			mask = -1ULL;
@@ -4493,7 +4552,7 @@ alloc_tx_ext_stats:
 
 static void bnxt_clear_ring_indices(struct bnxt *bp)
 {
-	int i;
+	int i, j;
 
 	if (!bp->bnapi)
 		return;
@@ -4510,10 +4569,10 @@ static void bnxt_clear_ring_indices(struct bnxt *bp)
 		cpr = &bnapi->cp_ring;
 		cpr->cp_raw_cons = 0;
 
-		txr = bnapi->tx_ring;
-		if (txr) {
+		bnxt_for_each_napi_tx(j, bnapi, txr) {
 			txr->tx_prod = 0;
 			txr->tx_cons = 0;
+			txr->tx_hw_cons = 0;
 		}
 
 		rxr = bnapi->rx_ring;
@@ -4523,6 +4582,7 @@ static void bnxt_clear_ring_indices(struct bnxt *bp)
 			rxr->rx_sw_agg_prod = 0;
 			rxr->rx_next_cons = 0;
 		}
+		bnapi->events = 0;
 	}
 }
 
@@ -4626,7 +4686,7 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
 			bp->bnapi[i] = bnapi;
 			bp->bnapi[i]->index = i;
 			bp->bnapi[i]->bp = bp;
-			if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 				struct bnxt_cp_ring_info *cpr =
 					&bp->bnapi[i]->cp_ring;
 
@@ -4644,11 +4704,13 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
 		for (i = 0; i < bp->rx_nr_rings; i++) {
 			struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
 
-			if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 				rxr->rx_ring_struct.ring_mem.flags =
 					BNXT_RMEM_RING_PTE_FLAG;
 				rxr->rx_agg_ring_struct.ring_mem.flags =
 					BNXT_RMEM_RING_PTE_FLAG;
+			} else {
+				rxr->rx_cpr =  &bp->bnapi[i]->cp_ring;
 			}
 			rxr->bnapi = bp->bnapi[i];
 			bp->bnapi[i]->rx_ring = &bp->rx_ring[i];
@@ -4671,22 +4733,33 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
 		else
 			j = bp->rx_nr_rings;
 
-		for (i = 0; i < bp->tx_nr_rings; i++, j++) {
+		for (i = 0; i < bp->tx_nr_rings; i++) {
 			struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
+			struct bnxt_napi *bnapi2;
 
-			if (bp->flags & BNXT_FLAG_CHIP_P5)
+			if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 				txr->tx_ring_struct.ring_mem.flags =
 					BNXT_RMEM_RING_PTE_FLAG;
-			txr->bnapi = bp->bnapi[j];
-			bp->bnapi[j]->tx_ring = txr;
 			bp->tx_ring_map[i] = bp->tx_nr_rings_xdp + i;
 			if (i >= bp->tx_nr_rings_xdp) {
+				int k = j + BNXT_RING_TO_TC_OFF(bp, i);
+
+				bnapi2 = bp->bnapi[k];
 				txr->txq_index = i - bp->tx_nr_rings_xdp;
-				bp->bnapi[j]->tx_int = bnxt_tx_int;
+				txr->tx_napi_idx =
+					BNXT_RING_TO_TC(bp, txr->txq_index);
+				bnapi2->tx_ring[txr->tx_napi_idx] = txr;
+				bnapi2->tx_int = bnxt_tx_int;
 			} else {
-				bp->bnapi[j]->flags |= BNXT_NAPI_FLAG_XDP;
-				bp->bnapi[j]->tx_int = bnxt_tx_int_xdp;
+				bnapi2 = bp->bnapi[j];
+				bnapi2->flags |= BNXT_NAPI_FLAG_XDP;
+				bnapi2->tx_ring[0] = txr;
+				bnapi2->tx_int = bnxt_tx_int_xdp;
+				j++;
 			}
+			txr->bnapi = bnapi2;
+			if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
+				txr->tx_cpr = &bnapi2->cp_ring;
 		}
 
 		rc = bnxt_alloc_stats(bp);
@@ -5211,7 +5284,7 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags)
 			nsegs = (MAX_SKB_FRAGS - n) / n;
 		}
 
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 			segs = MAX_TPA_SEGS_P5;
 			max_aggs = bp->max_tpa;
 		} else {
@@ -5237,35 +5310,25 @@ static u16 bnxt_cp_ring_from_grp(struct bnxt *bp, struct bnxt_ring_struct *ring)
 
 static u16 bnxt_cp_ring_for_rx(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
-		struct bnxt_napi *bnapi = rxr->bnapi;
-		struct bnxt_cp_ring_info *cpr;
-
-		cpr = bnapi->cp_ring.cp_ring_arr[BNXT_RX_HDL];
-		return cpr->cp_ring_struct.fw_ring_id;
-	} else {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+		return rxr->rx_cpr->cp_ring_struct.fw_ring_id;
+	else
 		return bnxt_cp_ring_from_grp(bp, &rxr->rx_ring_struct);
-	}
 }
 
 static u16 bnxt_cp_ring_for_tx(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
-		struct bnxt_napi *bnapi = txr->bnapi;
-		struct bnxt_cp_ring_info *cpr;
-
-		cpr = bnapi->cp_ring.cp_ring_arr[BNXT_TX_HDL];
-		return cpr->cp_ring_struct.fw_ring_id;
-	} else {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+		return txr->tx_cpr->cp_ring_struct.fw_ring_id;
+	else
 		return bnxt_cp_ring_from_grp(bp, &txr->tx_ring_struct);
-	}
 }
 
 static int bnxt_alloc_rss_indir_tbl(struct bnxt *bp)
 {
 	int entries;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		entries = BNXT_MAX_RSS_TABLE_ENTRIES_P5;
 	else
 		entries = HW_HASH_INDEX_SIZE;
@@ -5315,7 +5378,7 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp)
 
 int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		return DIV_ROUND_UP(rx_rings, BNXT_RSS_TABLE_ENTRIES_P5);
 	if (BNXT_CHIP_TYPE_NITRO_A0(bp))
 		return 2;
@@ -5361,7 +5424,7 @@ static void
 __bnxt_hwrm_vnic_set_rss(struct bnxt *bp, struct hwrm_vnic_rss_cfg_input *req,
 			 struct bnxt_vnic_info *vnic)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		bnxt_fill_hw_rss_tbl_p5(bp, vnic);
 	else
 		bnxt_fill_hw_rss_tbl(bp, vnic);
@@ -5386,7 +5449,7 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
 	struct hwrm_vnic_rss_cfg_input *req;
 	int rc;
 
-	if ((bp->flags & BNXT_FLAG_CHIP_P5) ||
+	if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) ||
 	    vnic->fw_rss_cos_lb_ctx[0] == INVALID_HW_RING_ID)
 		return 0;
 
@@ -5551,7 +5614,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 	if (rc)
 		return rc;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[0];
 
 		req->default_rx_ring_id =
@@ -5651,7 +5714,7 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id,
 	if (rc)
 		return rc;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		goto vnic_no_ring_grps;
 
 	/* map ring groups to this vnic */
@@ -5699,7 +5762,7 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
 	if (!rc) {
 		u32 flags = le32_to_cpu(resp->flags);
 
-		if (!(bp->flags & BNXT_FLAG_CHIP_P5) &&
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
 		    (flags & VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP))
 			bp->flags |= BNXT_FLAG_NEW_RSS_CAP;
 		if (flags &
@@ -5710,14 +5773,14 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp)
 		 * VLAN_STRIP_CAP properly.
 		 */
 		if ((flags & VNIC_QCAPS_RESP_FLAGS_VLAN_STRIP_CAP) ||
-		    (BNXT_CHIP_P5_THOR(bp) &&
+		    (BNXT_CHIP_P5(bp) &&
 		     !(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED)))
 			bp->fw_cap |= BNXT_FW_CAP_VLAN_RX_STRIP;
 		if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_HASH_TYPE_DELTA_CAP)
 			bp->fw_cap |= BNXT_FW_CAP_RSS_HASH_TYPE_DELTA;
 		bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported);
 		if (bp->max_tpa_v2) {
-			if (BNXT_CHIP_P5_THOR(bp))
+			if (BNXT_CHIP_P5(bp))
 				bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5;
 			else
 				bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5_SR2;
@@ -5734,7 +5797,7 @@ static int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp)
 	int rc;
 	u16 i;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		return 0;
 
 	rc = hwrm_req_init(bp, req, HWRM_RING_GRP_ALLOC);
@@ -5767,7 +5830,7 @@ static void bnxt_hwrm_ring_grp_free(struct bnxt *bp)
 	struct hwrm_ring_grp_free_input *req;
 	u16 i;
 
-	if (!bp->grp_info || (bp->flags & BNXT_FLAG_CHIP_P5))
+	if (!bp->grp_info || (bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		return;
 
 	if (hwrm_req_init(bp, req, HWRM_RING_GRP_FREE))
@@ -5832,7 +5895,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
 	case HWRM_RING_ALLOC_RX:
 		req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX;
 		req->length = cpu_to_le32(bp->rx_ring_mask + 1);
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 			u16 flags = 0;
 
 			/* Association of rx ring with stats context */
@@ -5847,7 +5910,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
 		}
 		break;
 	case HWRM_RING_ALLOC_AGG:
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 			req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG;
 			/* Association of agg ring with rx ring */
 			grp_info = &bp->grp_info[ring->grp_idx];
@@ -5865,7 +5928,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp,
 	case HWRM_RING_ALLOC_CMPL:
 		req->ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
 		req->length = cpu_to_le32(bp->cp_ring_mask + 1);
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 			/* Association of cp ring with nq */
 			grp_info = &bp->grp_info[map_index];
 			req->nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id);
@@ -5933,10 +5996,30 @@ static int bnxt_hwrm_set_async_event_cr(struct bnxt *bp, int idx)
 	}
 }
 
+static void bnxt_set_db_mask(struct bnxt *bp, struct bnxt_db_info *db,
+			     u32 ring_type)
+{
+	switch (ring_type) {
+	case HWRM_RING_ALLOC_TX:
+		db->db_ring_mask = bp->tx_ring_mask;
+		break;
+	case HWRM_RING_ALLOC_RX:
+		db->db_ring_mask = bp->rx_ring_mask;
+		break;
+	case HWRM_RING_ALLOC_AGG:
+		db->db_ring_mask = bp->rx_agg_ring_mask;
+		break;
+	case HWRM_RING_ALLOC_CMPL:
+	case HWRM_RING_ALLOC_NQ:
+		db->db_ring_mask = bp->cp_ring_mask;
+		break;
+	}
+}
+
 static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type,
 			u32 map_idx, u32 xid)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		if (BNXT_PF(bp))
 			db->doorbell = bp->bar1 + DB_PF_OFFSET_P5;
 		else
@@ -5972,6 +6055,7 @@ static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type,
 			break;
 		}
 	}
+	bnxt_set_db_mask(bp, db, ring_type);
 }
 
 static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
@@ -5980,7 +6064,7 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
 	int i, rc = 0;
 	u32 type;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		type = HWRM_RING_ALLOC_NQ;
 	else
 		type = HWRM_RING_ALLOC_CMPL;
@@ -6016,15 +6100,13 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
 		struct bnxt_ring_struct *ring;
 		u32 map_idx;
 
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+			struct bnxt_cp_ring_info *cpr2 = txr->tx_cpr;
 			struct bnxt_napi *bnapi = txr->bnapi;
-			struct bnxt_cp_ring_info *cpr, *cpr2;
 			u32 type2 = HWRM_RING_ALLOC_CMPL;
 
-			cpr = &bnapi->cp_ring;
-			cpr2 = cpr->cp_ring_arr[BNXT_TX_HDL];
 			ring = &cpr2->cp_ring_struct;
-			ring->handle = BNXT_TX_HDL;
+			ring->handle = BNXT_SET_NQ_HDL(cpr2);
 			map_idx = bnapi->index;
 			rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx);
 			if (rc)
@@ -6056,14 +6138,12 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp)
 		if (!agg_rings)
 			bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod);
 		bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id;
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
-			struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+			struct bnxt_cp_ring_info *cpr2 = rxr->rx_cpr;
 			u32 type2 = HWRM_RING_ALLOC_CMPL;
-			struct bnxt_cp_ring_info *cpr2;
 
-			cpr2 = cpr->cp_ring_arr[BNXT_RX_HDL];
 			ring = &cpr2->cp_ring_struct;
-			ring->handle = BNXT_RX_HDL;
+			ring->handle = BNXT_SET_NQ_HDL(cpr2);
 			rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx);
 			if (rc)
 				goto err_out;
@@ -6171,7 +6251,7 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
 		}
 	}
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		type = RING_FREE_REQ_RING_TYPE_RX_AGG;
 	else
 		type = RING_FREE_REQ_RING_TYPE_RX;
@@ -6198,7 +6278,7 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
 	 */
 	bnxt_disable_int_sync(bp);
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		type = RING_FREE_REQ_RING_TYPE_NQ;
 	else
 		type = RING_FREE_REQ_RING_TYPE_L2_CMPL;
@@ -6208,18 +6288,16 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
 		struct bnxt_ring_struct *ring;
 		int j;
 
-		for (j = 0; j < 2; j++) {
-			struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
-
-			if (cpr2) {
-				ring = &cpr2->cp_ring_struct;
-				if (ring->fw_ring_id == INVALID_HW_RING_ID)
-					continue;
-				hwrm_ring_free_send_msg(bp, ring,
-					RING_FREE_REQ_RING_TYPE_L2_CMPL,
-					INVALID_HW_RING_ID);
-				ring->fw_ring_id = INVALID_HW_RING_ID;
-			}
+		for (j = 0; j < cpr->cp_ring_count && cpr->cp_ring_arr; j++) {
+			struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j];
+
+			ring = &cpr2->cp_ring_struct;
+			if (ring->fw_ring_id == INVALID_HW_RING_ID)
+				continue;
+			hwrm_ring_free_send_msg(bp, ring,
+						RING_FREE_REQ_RING_TYPE_L2_CMPL,
+						INVALID_HW_RING_ID);
+			ring->fw_ring_id = INVALID_HW_RING_ID;
 		}
 		ring = &cpr->cp_ring_struct;
 		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
@@ -6267,14 +6345,15 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
 		cp = le16_to_cpu(resp->alloc_cmpl_rings);
 		stats = le16_to_cpu(resp->alloc_stat_ctx);
 		hw_resc->resv_irqs = cp;
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 			int rx = hw_resc->resv_rx_rings;
 			int tx = hw_resc->resv_tx_rings;
 
 			if (bp->flags & BNXT_FLAG_AGG_RINGS)
 				rx >>= 1;
 			if (cp < (rx + tx)) {
-				bnxt_trim_rings(bp, &rx, &tx, cp, false);
+				rx = cp / 2;
+				tx = rx;
 				if (bp->flags & BNXT_FLAG_AGG_RINGS)
 					rx <<= 1;
 				hw_resc->resv_rx_rings = rx;
@@ -6331,7 +6410,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 	if (BNXT_NEW_RM(bp)) {
 		enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
 		enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 			enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0;
 			enables |= tx_rings + ring_grps ?
 				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
@@ -6347,7 +6426,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 		enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0;
 
 		req->num_rx_rings = cpu_to_le16(rx_rings);
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 			req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
 			req->num_msix = cpu_to_le16(cp_rings);
 			req->num_rsscos_ctxs =
@@ -6382,7 +6461,7 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 	enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
 			      FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
 	enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		enables |= tx_rings + ring_grps ?
 			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
 	} else {
@@ -6397,7 +6476,7 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 	req->num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX);
 	req->num_tx_rings = cpu_to_le16(tx_rings);
 	req->num_rx_rings = cpu_to_le16(rx_rings);
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps);
 		req->num_rsscos_ctxs = cpu_to_le16(DIV_ROUND_UP(ring_grps, 64));
 	} else {
@@ -6493,7 +6572,7 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp)
 {
 	int cp;
 
-	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		return bnxt_nq_rings_in_use(bp);
 
 	cp = bp->tx_nr_rings + bp->rx_nr_rings;
@@ -6550,7 +6629,8 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
 		bnxt_check_rss_tbl_no_rmgr(bp);
 		return false;
 	}
-	if ((bp->flags & BNXT_FLAG_RFS) && !(bp->flags & BNXT_FLAG_CHIP_P5))
+	if ((bp->flags & BNXT_FLAG_RFS) &&
+	    !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		vnic = rx + 1;
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx <<= 1;
@@ -6558,9 +6638,9 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
 	if (hw_resc->resv_rx_rings != rx || hw_resc->resv_cp_rings != cp ||
 	    hw_resc->resv_vnics != vnic || hw_resc->resv_stat_ctxs != stat ||
 	    (hw_resc->resv_hw_ring_grps != grp &&
-	     !(bp->flags & BNXT_FLAG_CHIP_P5)))
+	     !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)))
 		return true;
-	if ((bp->flags & BNXT_FLAG_CHIP_P5) && BNXT_PF(bp) &&
+	if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && BNXT_PF(bp) &&
 	    hw_resc->resv_irqs != nq)
 		return true;
 	return false;
@@ -6575,13 +6655,15 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	int grp, rx_rings, rc;
 	int vnic = 1, stat;
 	bool sh = false;
+	int tx_cp;
 
 	if (!bnxt_need_reserve_rings(bp))
 		return 0;
 
 	if (bp->flags & BNXT_FLAG_SHARED_RINGS)
 		sh = true;
-	if ((bp->flags & BNXT_FLAG_RFS) && !(bp->flags & BNXT_FLAG_CHIP_P5))
+	if ((bp->flags & BNXT_FLAG_RFS) &&
+	    !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		vnic = rx + 1;
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx <<= 1;
@@ -6624,7 +6706,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	rc = bnxt_trim_rings(bp, &rx_rings, &tx, cp, sh);
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx = rx_rings << 1;
-	cp = sh ? max_t(int, tx, rx_rings) : tx + rx_rings;
+	tx_cp = bnxt_num_tx_to_cp(bp, tx);
+	cp = sh ? max_t(int, tx_cp, rx_rings) : tx_cp + rx_rings;
 	bp->tx_nr_rings = tx;
 
 	/* If we cannot reserve all the RX rings, reset the RSS map only
@@ -6671,7 +6754,7 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 		FUNC_VF_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST |
 		FUNC_VF_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST;
-	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		flags |= FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST;
 
 	req->flags = cpu_to_le32(flags);
@@ -6693,7 +6776,7 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings,
 			 FUNC_CFG_REQ_FLAGS_CMPL_ASSETS_TEST |
 			 FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST |
 			 FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST;
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 			flags |= FUNC_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST |
 				 FUNC_CFG_REQ_FLAGS_NQ_ASSETS_TEST;
 		else
@@ -6887,10 +6970,40 @@ int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi)
 	return hwrm_req_send(bp, req_rx);
 }
 
+static int
+bnxt_hwrm_set_rx_coal(struct bnxt *bp, struct bnxt_napi *bnapi,
+		      struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req)
+{
+	u16 ring_id = bnxt_cp_ring_for_rx(bp, bnapi->rx_ring);
+
+	req->ring_id = cpu_to_le16(ring_id);
+	return hwrm_req_send(bp, req);
+}
+
+static int
+bnxt_hwrm_set_tx_coal(struct bnxt *bp, struct bnxt_napi *bnapi,
+		      struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req)
+{
+	struct bnxt_tx_ring_info *txr;
+	int i, rc;
+
+	bnxt_for_each_napi_tx(i, bnapi, txr) {
+		u16 ring_id;
+
+		ring_id = bnxt_cp_ring_for_tx(bp, txr);
+		req->ring_id = cpu_to_le16(ring_id);
+		rc = hwrm_req_send(bp, req);
+		if (rc)
+			return rc;
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
+			return 0;
+	}
+	return 0;
+}
+
 int bnxt_hwrm_set_coal(struct bnxt *bp)
 {
-	struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx, *req_tx,
-							   *req;
+	struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx, *req_tx;
 	int i, rc;
 
 	rc = hwrm_req_init(bp, req_rx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS);
@@ -6911,29 +7024,19 @@ int bnxt_hwrm_set_coal(struct bnxt *bp)
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_coal *hw_coal;
-		u16 ring_id;
-
-		req = req_rx;
-		if (!bnapi->rx_ring) {
-			ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
-			req = req_tx;
-		} else {
-			ring_id = bnxt_cp_ring_for_rx(bp, bnapi->rx_ring);
-		}
-		req->ring_id = cpu_to_le16(ring_id);
 
-		rc = hwrm_req_send(bp, req);
+		if (!bnapi->rx_ring)
+			rc = bnxt_hwrm_set_tx_coal(bp, bnapi, req_tx);
+		else
+			rc = bnxt_hwrm_set_rx_coal(bp, bnapi, req_rx);
 		if (rc)
 			break;
 
-		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 			continue;
 
-		if (bnapi->rx_ring && bnapi->tx_ring) {
-			req = req_tx;
-			ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring);
-			req->ring_id = cpu_to_le16(ring_id);
-			rc = hwrm_req_send(bp, req);
+		if (bnapi->rx_ring && bnapi->tx_ring[0]) {
+			rc = bnxt_hwrm_set_tx_coal(bp, bnapi, req_tx);
 			if (rc)
 				break;
 		}
@@ -7087,7 +7190,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 	if (bp->db_size)
 		goto func_qcfg_exit;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		if (BNXT_PF(bp))
 			min_db_offset = DB_PF_OFFSET_P5;
 		else
@@ -7104,37 +7207,103 @@ func_qcfg_exit:
 	return rc;
 }
 
-static void bnxt_init_ctx_initializer(struct bnxt_ctx_mem_info *ctx,
-			struct hwrm_func_backing_store_qcaps_output *resp)
+static void bnxt_init_ctx_initializer(struct bnxt_ctx_mem_type *ctxm,
+				      u8 init_val, u8 init_offset,
+				      bool init_mask_set)
 {
-	struct bnxt_mem_init *mem_init;
-	u16 init_mask;
-	u8 init_val;
-	u8 *offset;
-	int i;
+	ctxm->init_value = init_val;
+	ctxm->init_offset = BNXT_CTX_INIT_INVALID_OFFSET;
+	if (init_mask_set)
+		ctxm->init_offset = init_offset * 4;
+	else
+		ctxm->init_value = 0;
+}
+
+static int bnxt_alloc_all_ctx_pg_info(struct bnxt *bp, int ctx_max)
+{
+	struct bnxt_ctx_mem_info *ctx = bp->ctx;
+	u16 type;
+
+	for (type = 0; type < ctx_max; type++) {
+		struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type];
+		int n = 1;
 
-	init_val = resp->ctx_kind_initializer;
-	init_mask = le16_to_cpu(resp->ctx_init_mask);
-	offset = &resp->qp_init_offset;
-	mem_init = &ctx->mem_init[BNXT_CTX_MEM_INIT_QP];
-	for (i = 0; i < BNXT_CTX_MEM_INIT_MAX; i++, mem_init++, offset++) {
-		mem_init->init_val = init_val;
-		mem_init->offset = BNXT_MEM_INVALID_OFFSET;
-		if (!init_mask)
+		if (!ctxm->max_entries)
 			continue;
-		if (i == BNXT_CTX_MEM_INIT_STAT)
-			offset = &resp->stat_init_offset;
-		if (init_mask & (1 << i))
-			mem_init->offset = *offset * 4;
-		else
-			mem_init->init_val = 0;
+
+		if (ctxm->instance_bmap)
+			n = hweight32(ctxm->instance_bmap);
+		ctxm->pg_info = kcalloc(n, sizeof(*ctxm->pg_info), GFP_KERNEL);
+		if (!ctxm->pg_info)
+			return -ENOMEM;
 	}
-	ctx->mem_init[BNXT_CTX_MEM_INIT_QP].size = ctx->qp_entry_size;
-	ctx->mem_init[BNXT_CTX_MEM_INIT_SRQ].size = ctx->srq_entry_size;
-	ctx->mem_init[BNXT_CTX_MEM_INIT_CQ].size = ctx->cq_entry_size;
-	ctx->mem_init[BNXT_CTX_MEM_INIT_VNIC].size = ctx->vnic_entry_size;
-	ctx->mem_init[BNXT_CTX_MEM_INIT_STAT].size = ctx->stat_entry_size;
-	ctx->mem_init[BNXT_CTX_MEM_INIT_MRAV].size = ctx->mrav_entry_size;
+	return 0;
+}
+
+#define BNXT_CTX_INIT_VALID(flags)	\
+	(!!((flags) &			\
+	    FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT))
+
+static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp)
+{
+	struct hwrm_func_backing_store_qcaps_v2_output *resp;
+	struct hwrm_func_backing_store_qcaps_v2_input *req;
+	u16 last_valid_type = BNXT_CTX_INV;
+	struct bnxt_ctx_mem_info *ctx;
+	u16 type;
+	int rc;
+
+	rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS_V2);
+	if (rc)
+		return rc;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	bp->ctx = ctx;
+
+	resp = hwrm_req_hold(bp, req);
+
+	for (type = 0; type < BNXT_CTX_V2_MAX; ) {
+		struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type];
+		u8 init_val, init_off, i;
+		__le32 *p;
+		u32 flags;
+
+		req->type = cpu_to_le16(type);
+		rc = hwrm_req_send(bp, req);
+		if (rc)
+			goto ctx_done;
+		flags = le32_to_cpu(resp->flags);
+		type = le16_to_cpu(resp->next_valid_type);
+		if (!(flags & FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID))
+			continue;
+
+		ctxm->type = le16_to_cpu(resp->type);
+		last_valid_type = ctxm->type;
+		ctxm->entry_size = le16_to_cpu(resp->entry_size);
+		ctxm->flags = flags;
+		ctxm->instance_bmap = le32_to_cpu(resp->instance_bit_map);
+		ctxm->entry_multiple = resp->entry_multiple;
+		ctxm->max_entries = le32_to_cpu(resp->max_num_entries);
+		ctxm->min_entries = le32_to_cpu(resp->min_num_entries);
+		init_val = resp->ctx_init_value;
+		init_off = resp->ctx_init_offset;
+		bnxt_init_ctx_initializer(ctxm, init_val, init_off,
+					  BNXT_CTX_INIT_VALID(flags));
+		ctxm->split_entry_cnt = min_t(u8, resp->subtype_valid_cnt,
+					      BNXT_MAX_SPLIT_ENTRY);
+		for (i = 0, p = &resp->split_entry_0; i < ctxm->split_entry_cnt;
+		     i++, p++)
+			ctxm->split[i] = le32_to_cpu(*p);
+	}
+	if (last_valid_type < BNXT_CTX_V2_MAX)
+		ctx->ctx_arr[last_valid_type].last = true;
+	rc = bnxt_alloc_all_ctx_pg_info(bp, BNXT_CTX_V2_MAX);
+
+ctx_done:
+	hwrm_req_drop(bp, req);
+	return rc;
 }
 
 static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
@@ -7146,6 +7315,9 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
 	if (bp->hwrm_spec_code < 0x10902 || BNXT_VF(bp) || bp->ctx)
 		return 0;
 
+	if (bp->fw_cap & BNXT_FW_CAP_BACKING_STORE_V2)
+		return bnxt_hwrm_func_backing_store_qcaps_v2(bp);
+
 	rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS);
 	if (rc)
 		return rc;
@@ -7153,48 +7325,83 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
 	resp = hwrm_req_hold(bp, req);
 	rc = hwrm_req_send_silent(bp, req);
 	if (!rc) {
-		struct bnxt_ctx_pg_info *ctx_pg;
+		struct bnxt_ctx_mem_type *ctxm;
 		struct bnxt_ctx_mem_info *ctx;
-		int i, tqm_rings;
+		u8 init_val, init_idx = 0;
+		u16 init_mask;
 
-		ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+		ctx = bp->ctx;
 		if (!ctx) {
-			rc = -ENOMEM;
-			goto ctx_err;
-		}
-		ctx->qp_max_entries = le32_to_cpu(resp->qp_max_entries);
-		ctx->qp_min_qp1_entries = le16_to_cpu(resp->qp_min_qp1_entries);
-		ctx->qp_max_l2_entries = le16_to_cpu(resp->qp_max_l2_entries);
-		ctx->qp_entry_size = le16_to_cpu(resp->qp_entry_size);
-		ctx->srq_max_l2_entries = le16_to_cpu(resp->srq_max_l2_entries);
-		ctx->srq_max_entries = le32_to_cpu(resp->srq_max_entries);
-		ctx->srq_entry_size = le16_to_cpu(resp->srq_entry_size);
-		ctx->cq_max_l2_entries = le16_to_cpu(resp->cq_max_l2_entries);
-		ctx->cq_max_entries = le32_to_cpu(resp->cq_max_entries);
-		ctx->cq_entry_size = le16_to_cpu(resp->cq_entry_size);
-		ctx->vnic_max_vnic_entries =
-			le16_to_cpu(resp->vnic_max_vnic_entries);
-		ctx->vnic_max_ring_table_entries =
+			ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+			if (!ctx) {
+				rc = -ENOMEM;
+				goto ctx_err;
+			}
+			bp->ctx = ctx;
+		}
+		init_val = resp->ctx_kind_initializer;
+		init_mask = le16_to_cpu(resp->ctx_init_mask);
+
+		ctxm = &ctx->ctx_arr[BNXT_CTX_QP];
+		ctxm->max_entries = le32_to_cpu(resp->qp_max_entries);
+		ctxm->qp_qp1_entries = le16_to_cpu(resp->qp_min_qp1_entries);
+		ctxm->qp_l2_entries = le16_to_cpu(resp->qp_max_l2_entries);
+		ctxm->entry_size = le16_to_cpu(resp->qp_entry_size);
+		bnxt_init_ctx_initializer(ctxm, init_val, resp->qp_init_offset,
+					  (init_mask & (1 << init_idx++)) != 0);
+
+		ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ];
+		ctxm->srq_l2_entries = le16_to_cpu(resp->srq_max_l2_entries);
+		ctxm->max_entries = le32_to_cpu(resp->srq_max_entries);
+		ctxm->entry_size = le16_to_cpu(resp->srq_entry_size);
+		bnxt_init_ctx_initializer(ctxm, init_val, resp->srq_init_offset,
+					  (init_mask & (1 << init_idx++)) != 0);
+
+		ctxm = &ctx->ctx_arr[BNXT_CTX_CQ];
+		ctxm->cq_l2_entries = le16_to_cpu(resp->cq_max_l2_entries);
+		ctxm->max_entries = le32_to_cpu(resp->cq_max_entries);
+		ctxm->entry_size = le16_to_cpu(resp->cq_entry_size);
+		bnxt_init_ctx_initializer(ctxm, init_val, resp->cq_init_offset,
+					  (init_mask & (1 << init_idx++)) != 0);
+
+		ctxm = &ctx->ctx_arr[BNXT_CTX_VNIC];
+		ctxm->vnic_entries = le16_to_cpu(resp->vnic_max_vnic_entries);
+		ctxm->max_entries = ctxm->vnic_entries +
 			le16_to_cpu(resp->vnic_max_ring_table_entries);
-		ctx->vnic_entry_size = le16_to_cpu(resp->vnic_entry_size);
-		ctx->stat_max_entries = le32_to_cpu(resp->stat_max_entries);
-		ctx->stat_entry_size = le16_to_cpu(resp->stat_entry_size);
-		ctx->tqm_entry_size = le16_to_cpu(resp->tqm_entry_size);
-		ctx->tqm_min_entries_per_ring =
-			le32_to_cpu(resp->tqm_min_entries_per_ring);
-		ctx->tqm_max_entries_per_ring =
-			le32_to_cpu(resp->tqm_max_entries_per_ring);
-		ctx->tqm_entries_multiple = resp->tqm_entries_multiple;
-		if (!ctx->tqm_entries_multiple)
-			ctx->tqm_entries_multiple = 1;
-		ctx->mrav_max_entries = le32_to_cpu(resp->mrav_max_entries);
-		ctx->mrav_entry_size = le16_to_cpu(resp->mrav_entry_size);
-		ctx->mrav_num_entries_units =
+		ctxm->entry_size = le16_to_cpu(resp->vnic_entry_size);
+		bnxt_init_ctx_initializer(ctxm, init_val,
+					  resp->vnic_init_offset,
+					  (init_mask & (1 << init_idx++)) != 0);
+
+		ctxm = &ctx->ctx_arr[BNXT_CTX_STAT];
+		ctxm->max_entries = le32_to_cpu(resp->stat_max_entries);
+		ctxm->entry_size = le16_to_cpu(resp->stat_entry_size);
+		bnxt_init_ctx_initializer(ctxm, init_val,
+					  resp->stat_init_offset,
+					  (init_mask & (1 << init_idx++)) != 0);
+
+		ctxm = &ctx->ctx_arr[BNXT_CTX_STQM];
+		ctxm->entry_size = le16_to_cpu(resp->tqm_entry_size);
+		ctxm->min_entries = le32_to_cpu(resp->tqm_min_entries_per_ring);
+		ctxm->max_entries = le32_to_cpu(resp->tqm_max_entries_per_ring);
+		ctxm->entry_multiple = resp->tqm_entries_multiple;
+		if (!ctxm->entry_multiple)
+			ctxm->entry_multiple = 1;
+
+		memcpy(&ctx->ctx_arr[BNXT_CTX_FTQM], ctxm, sizeof(*ctxm));
+
+		ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV];
+		ctxm->max_entries = le32_to_cpu(resp->mrav_max_entries);
+		ctxm->entry_size = le16_to_cpu(resp->mrav_entry_size);
+		ctxm->mrav_num_entries_units =
 			le16_to_cpu(resp->mrav_num_entries_units);
-		ctx->tim_entry_size = le16_to_cpu(resp->tim_entry_size);
-		ctx->tim_max_entries = le32_to_cpu(resp->tim_max_entries);
+		bnxt_init_ctx_initializer(ctxm, init_val,
+					  resp->mrav_init_offset,
+					  (init_mask & (1 << init_idx++)) != 0);
 
-		bnxt_init_ctx_initializer(ctx, resp);
+		ctxm = &ctx->ctx_arr[BNXT_CTX_TIM];
+		ctxm->entry_size = le16_to_cpu(resp->tim_entry_size);
+		ctxm->max_entries = le32_to_cpu(resp->tim_max_entries);
 
 		ctx->tqm_fp_rings_count = resp->tqm_fp_rings_count;
 		if (!ctx->tqm_fp_rings_count)
@@ -7202,16 +7409,11 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
 		else if (ctx->tqm_fp_rings_count > BNXT_MAX_TQM_FP_RINGS)
 			ctx->tqm_fp_rings_count = BNXT_MAX_TQM_FP_RINGS;
 
-		tqm_rings = ctx->tqm_fp_rings_count + BNXT_MAX_TQM_SP_RINGS;
-		ctx_pg = kcalloc(tqm_rings, sizeof(*ctx_pg), GFP_KERNEL);
-		if (!ctx_pg) {
-			kfree(ctx);
-			rc = -ENOMEM;
-			goto ctx_err;
-		}
-		for (i = 0; i < tqm_rings; i++, ctx_pg++)
-			ctx->tqm_mem[i] = ctx_pg;
-		bp->ctx = ctx;
+		ctxm = &ctx->ctx_arr[BNXT_CTX_FTQM];
+		memcpy(ctxm, &ctx->ctx_arr[BNXT_CTX_STQM], sizeof(*ctxm));
+		ctxm->instance_bmap = (1 << ctx->tqm_fp_rings_count) - 1;
+
+		rc = bnxt_alloc_all_ctx_pg_info(bp, BNXT_CTX_MAX);
 	} else {
 		rc = 0;
 	}
@@ -7250,6 +7452,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 	struct hwrm_func_backing_store_cfg_input *req;
 	struct bnxt_ctx_mem_info *ctx = bp->ctx;
 	struct bnxt_ctx_pg_info *ctx_pg;
+	struct bnxt_ctx_mem_type *ctxm;
 	void **__req = (void **)&req;
 	u32 req_len = sizeof(*req);
 	__le32 *num_entries;
@@ -7271,82 +7474,99 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 
 	req->enables = cpu_to_le32(enables);
 	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP) {
-		ctx_pg = &ctx->qp_mem;
+		ctxm = &ctx->ctx_arr[BNXT_CTX_QP];
+		ctx_pg = ctxm->pg_info;
 		req->qp_num_entries = cpu_to_le32(ctx_pg->entries);
-		req->qp_num_qp1_entries = cpu_to_le16(ctx->qp_min_qp1_entries);
-		req->qp_num_l2_entries = cpu_to_le16(ctx->qp_max_l2_entries);
-		req->qp_entry_size = cpu_to_le16(ctx->qp_entry_size);
+		req->qp_num_qp1_entries = cpu_to_le16(ctxm->qp_qp1_entries);
+		req->qp_num_l2_entries = cpu_to_le16(ctxm->qp_l2_entries);
+		req->qp_entry_size = cpu_to_le16(ctxm->entry_size);
 		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
 				      &req->qpc_pg_size_qpc_lvl,
 				      &req->qpc_page_dir);
 	}
 	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ) {
-		ctx_pg = &ctx->srq_mem;
+		ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ];
+		ctx_pg = ctxm->pg_info;
 		req->srq_num_entries = cpu_to_le32(ctx_pg->entries);
-		req->srq_num_l2_entries = cpu_to_le16(ctx->srq_max_l2_entries);
-		req->srq_entry_size = cpu_to_le16(ctx->srq_entry_size);
+		req->srq_num_l2_entries = cpu_to_le16(ctxm->srq_l2_entries);
+		req->srq_entry_size = cpu_to_le16(ctxm->entry_size);
 		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
 				      &req->srq_pg_size_srq_lvl,
 				      &req->srq_page_dir);
 	}
 	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ) {
-		ctx_pg = &ctx->cq_mem;
+		ctxm = &ctx->ctx_arr[BNXT_CTX_CQ];
+		ctx_pg = ctxm->pg_info;
 		req->cq_num_entries = cpu_to_le32(ctx_pg->entries);
-		req->cq_num_l2_entries = cpu_to_le16(ctx->cq_max_l2_entries);
-		req->cq_entry_size = cpu_to_le16(ctx->cq_entry_size);
+		req->cq_num_l2_entries = cpu_to_le16(ctxm->cq_l2_entries);
+		req->cq_entry_size = cpu_to_le16(ctxm->entry_size);
 		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
 				      &req->cq_pg_size_cq_lvl,
 				      &req->cq_page_dir);
 	}
 	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC) {
-		ctx_pg = &ctx->vnic_mem;
-		req->vnic_num_vnic_entries =
-			cpu_to_le16(ctx->vnic_max_vnic_entries);
+		ctxm = &ctx->ctx_arr[BNXT_CTX_VNIC];
+		ctx_pg = ctxm->pg_info;
+		req->vnic_num_vnic_entries = cpu_to_le16(ctxm->vnic_entries);
 		req->vnic_num_ring_table_entries =
-			cpu_to_le16(ctx->vnic_max_ring_table_entries);
-		req->vnic_entry_size = cpu_to_le16(ctx->vnic_entry_size);
+			cpu_to_le16(ctxm->max_entries - ctxm->vnic_entries);
+		req->vnic_entry_size = cpu_to_le16(ctxm->entry_size);
 		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
 				      &req->vnic_pg_size_vnic_lvl,
 				      &req->vnic_page_dir);
 	}
 	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT) {
-		ctx_pg = &ctx->stat_mem;
-		req->stat_num_entries = cpu_to_le32(ctx->stat_max_entries);
-		req->stat_entry_size = cpu_to_le16(ctx->stat_entry_size);
+		ctxm = &ctx->ctx_arr[BNXT_CTX_STAT];
+		ctx_pg = ctxm->pg_info;
+		req->stat_num_entries = cpu_to_le32(ctxm->max_entries);
+		req->stat_entry_size = cpu_to_le16(ctxm->entry_size);
 		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
 				      &req->stat_pg_size_stat_lvl,
 				      &req->stat_page_dir);
 	}
 	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV) {
-		ctx_pg = &ctx->mrav_mem;
+		u32 units;
+
+		ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV];
+		ctx_pg = ctxm->pg_info;
 		req->mrav_num_entries = cpu_to_le32(ctx_pg->entries);
-		if (ctx->mrav_num_entries_units)
-			flags |=
-			FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT;
-		req->mrav_entry_size = cpu_to_le16(ctx->mrav_entry_size);
+		units = ctxm->mrav_num_entries_units;
+		if (units) {
+			u32 num_mr, num_ah = ctxm->mrav_av_entries;
+			u32 entries;
+
+			num_mr = ctx_pg->entries - num_ah;
+			entries = ((num_mr / units) << 16) | (num_ah / units);
+			req->mrav_num_entries = cpu_to_le32(entries);
+			flags |= FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT;
+		}
+		req->mrav_entry_size = cpu_to_le16(ctxm->entry_size);
 		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
 				      &req->mrav_pg_size_mrav_lvl,
 				      &req->mrav_page_dir);
 	}
 	if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM) {
-		ctx_pg = &ctx->tim_mem;
+		ctxm = &ctx->ctx_arr[BNXT_CTX_TIM];
+		ctx_pg = ctxm->pg_info;
 		req->tim_num_entries = cpu_to_le32(ctx_pg->entries);
-		req->tim_entry_size = cpu_to_le16(ctx->tim_entry_size);
+		req->tim_entry_size = cpu_to_le16(ctxm->entry_size);
 		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
 				      &req->tim_pg_size_tim_lvl,
 				      &req->tim_page_dir);
 	}
+	ctxm = &ctx->ctx_arr[BNXT_CTX_STQM];
 	for (i = 0, num_entries = &req->tqm_sp_num_entries,
 	     pg_attr = &req->tqm_sp_pg_size_tqm_sp_lvl,
 	     pg_dir = &req->tqm_sp_page_dir,
-	     ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP;
+	     ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP,
+	     ctx_pg = ctxm->pg_info;
 	     i < BNXT_MAX_TQM_RINGS;
+	     ctx_pg = &ctx->ctx_arr[BNXT_CTX_FTQM].pg_info[i],
 	     i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) {
 		if (!(enables & ena))
 			continue;
 
-		req->tqm_entry_size = cpu_to_le16(ctx->tqm_entry_size);
-		ctx_pg = ctx->tqm_mem[i];
+		req->tqm_entry_size = cpu_to_le16(ctxm->entry_size);
 		*num_entries = cpu_to_le32(ctx_pg->entries);
 		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir);
 	}
@@ -7370,7 +7590,7 @@ static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
 
 static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp,
 				  struct bnxt_ctx_pg_info *ctx_pg, u32 mem_size,
-				  u8 depth, struct bnxt_mem_init *mem_init)
+				  u8 depth, struct bnxt_ctx_mem_type *ctxm)
 {
 	struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;
 	int rc;
@@ -7408,7 +7628,7 @@ static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp,
 			rmem->pg_tbl_map = ctx_pg->ctx_dma_arr[i];
 			rmem->depth = 1;
 			rmem->nr_pages = MAX_CTX_PAGES;
-			rmem->mem_init = mem_init;
+			rmem->ctx_mem = ctxm;
 			if (i == (nr_tbls - 1)) {
 				int rem = ctx_pg->nr_pages % MAX_CTX_PAGES;
 
@@ -7423,7 +7643,7 @@ static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp,
 		rmem->nr_pages = DIV_ROUND_UP(mem_size, BNXT_PAGE_SIZE);
 		if (rmem->nr_pages > 1 || depth)
 			rmem->depth = 1;
-		rmem->mem_init = mem_init;
+		rmem->ctx_mem = ctxm;
 		rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg);
 	}
 	return rc;
@@ -7458,38 +7678,131 @@ static void bnxt_free_ctx_pg_tbls(struct bnxt *bp,
 	ctx_pg->nr_pages = 0;
 }
 
+static int bnxt_setup_ctxm_pg_tbls(struct bnxt *bp,
+				   struct bnxt_ctx_mem_type *ctxm, u32 entries,
+				   u8 pg_lvl)
+{
+	struct bnxt_ctx_pg_info *ctx_pg = ctxm->pg_info;
+	int i, rc = 0, n = 1;
+	u32 mem_size;
+
+	if (!ctxm->entry_size || !ctx_pg)
+		return -EINVAL;
+	if (ctxm->instance_bmap)
+		n = hweight32(ctxm->instance_bmap);
+	if (ctxm->entry_multiple)
+		entries = roundup(entries, ctxm->entry_multiple);
+	entries = clamp_t(u32, entries, ctxm->min_entries, ctxm->max_entries);
+	mem_size = entries * ctxm->entry_size;
+	for (i = 0; i < n && !rc; i++) {
+		ctx_pg[i].entries = entries;
+		rc = bnxt_alloc_ctx_pg_tbls(bp, &ctx_pg[i], mem_size, pg_lvl,
+					    ctxm->init_value ? ctxm : NULL);
+	}
+	return rc;
+}
+
+static int bnxt_hwrm_func_backing_store_cfg_v2(struct bnxt *bp,
+					       struct bnxt_ctx_mem_type *ctxm,
+					       bool last)
+{
+	struct hwrm_func_backing_store_cfg_v2_input *req;
+	u32 instance_bmap = ctxm->instance_bmap;
+	int i, j, rc = 0, n = 1;
+	__le32 *p;
+
+	if (!(ctxm->flags & BNXT_CTX_MEM_TYPE_VALID) || !ctxm->pg_info)
+		return 0;
+
+	if (instance_bmap)
+		n = hweight32(ctxm->instance_bmap);
+	else
+		instance_bmap = 1;
+
+	rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_CFG_V2);
+	if (rc)
+		return rc;
+	hwrm_req_hold(bp, req);
+	req->type = cpu_to_le16(ctxm->type);
+	req->entry_size = cpu_to_le16(ctxm->entry_size);
+	req->subtype_valid_cnt = ctxm->split_entry_cnt;
+	for (i = 0, p = &req->split_entry_0; i < ctxm->split_entry_cnt; i++)
+		p[i] = cpu_to_le32(ctxm->split[i]);
+	for (i = 0, j = 0; j < n && !rc; i++) {
+		struct bnxt_ctx_pg_info *ctx_pg;
+
+		if (!(instance_bmap & (1 << i)))
+			continue;
+		req->instance = cpu_to_le16(i);
+		ctx_pg = &ctxm->pg_info[j++];
+		if (!ctx_pg->entries)
+			continue;
+		req->num_entries = cpu_to_le32(ctx_pg->entries);
+		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem,
+				      &req->page_size_pbl_level,
+				      &req->page_dir);
+		if (last && j == n)
+			req->flags =
+				cpu_to_le32(FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_BS_CFG_ALL_DONE);
+		rc = hwrm_req_send(bp, req);
+	}
+	hwrm_req_drop(bp, req);
+	return rc;
+}
+
+static int bnxt_backing_store_cfg_v2(struct bnxt *bp)
+{
+	struct bnxt_ctx_mem_info *ctx = bp->ctx;
+	struct bnxt_ctx_mem_type *ctxm;
+	int rc = 0;
+	u16 type;
+
+	for (type = 0 ; type < BNXT_CTX_V2_MAX; type++) {
+		ctxm = &ctx->ctx_arr[type];
+
+		rc = bnxt_hwrm_func_backing_store_cfg_v2(bp, ctxm, ctxm->last);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
 void bnxt_free_ctx_mem(struct bnxt *bp)
 {
 	struct bnxt_ctx_mem_info *ctx = bp->ctx;
-	int i;
+	u16 type;
 
 	if (!ctx)
 		return;
 
-	if (ctx->tqm_mem[0]) {
-		for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++)
-			bnxt_free_ctx_pg_tbls(bp, ctx->tqm_mem[i]);
-		kfree(ctx->tqm_mem[0]);
-		ctx->tqm_mem[0] = NULL;
+	for (type = 0; type < BNXT_CTX_V2_MAX; type++) {
+		struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type];
+		struct bnxt_ctx_pg_info *ctx_pg = ctxm->pg_info;
+		int i, n = 1;
+
+		if (!ctx_pg)
+			continue;
+		if (ctxm->instance_bmap)
+			n = hweight32(ctxm->instance_bmap);
+		for (i = 0; i < n; i++)
+			bnxt_free_ctx_pg_tbls(bp, &ctx_pg[i]);
+
+		kfree(ctx_pg);
+		ctxm->pg_info = NULL;
 	}
 
-	bnxt_free_ctx_pg_tbls(bp, &ctx->tim_mem);
-	bnxt_free_ctx_pg_tbls(bp, &ctx->mrav_mem);
-	bnxt_free_ctx_pg_tbls(bp, &ctx->stat_mem);
-	bnxt_free_ctx_pg_tbls(bp, &ctx->vnic_mem);
-	bnxt_free_ctx_pg_tbls(bp, &ctx->cq_mem);
-	bnxt_free_ctx_pg_tbls(bp, &ctx->srq_mem);
-	bnxt_free_ctx_pg_tbls(bp, &ctx->qp_mem);
 	ctx->flags &= ~BNXT_CTX_FLAG_INITED;
+	kfree(ctx);
+	bp->ctx = NULL;
 }
 
 static int bnxt_alloc_ctx_mem(struct bnxt *bp)
 {
-	struct bnxt_ctx_pg_info *ctx_pg;
+	struct bnxt_ctx_mem_type *ctxm;
 	struct bnxt_ctx_mem_info *ctx;
-	struct bnxt_mem_init *init;
-	u32 mem_size, ena, entries;
-	u32 entries_sp, min;
+	u32 l2_qps, qp1_qps, max_qps;
+	u32 ena, entries_sp, entries;
+	u32 srqs, max_srqs, min;
 	u32 num_mr, num_ah;
 	u32 extra_srqs = 0;
 	u32 extra_qps = 0;
@@ -7506,120 +7819,93 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp)
 	if (!ctx || (ctx->flags & BNXT_CTX_FLAG_INITED))
 		return 0;
 
+	ctxm = &ctx->ctx_arr[BNXT_CTX_QP];
+	l2_qps = ctxm->qp_l2_entries;
+	qp1_qps = ctxm->qp_qp1_entries;
+	max_qps = ctxm->max_entries;
+	ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ];
+	srqs = ctxm->srq_l2_entries;
+	max_srqs = ctxm->max_entries;
 	if ((bp->flags & BNXT_FLAG_ROCE_CAP) && !is_kdump_kernel()) {
 		pg_lvl = 2;
-		extra_qps = 65536;
-		extra_srqs = 8192;
+		extra_qps = min_t(u32, 65536, max_qps - l2_qps - qp1_qps);
+		extra_srqs = min_t(u32, 8192, max_srqs - srqs);
 	}
 
-	ctx_pg = &ctx->qp_mem;
-	ctx_pg->entries = ctx->qp_min_qp1_entries + ctx->qp_max_l2_entries +
-			  extra_qps;
-	if (ctx->qp_entry_size) {
-		mem_size = ctx->qp_entry_size * ctx_pg->entries;
-		init = &ctx->mem_init[BNXT_CTX_MEM_INIT_QP];
-		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl, init);
-		if (rc)
-			return rc;
-	}
+	ctxm = &ctx->ctx_arr[BNXT_CTX_QP];
+	rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, l2_qps + qp1_qps + extra_qps,
+				     pg_lvl);
+	if (rc)
+		return rc;
 
-	ctx_pg = &ctx->srq_mem;
-	ctx_pg->entries = ctx->srq_max_l2_entries + extra_srqs;
-	if (ctx->srq_entry_size) {
-		mem_size = ctx->srq_entry_size * ctx_pg->entries;
-		init = &ctx->mem_init[BNXT_CTX_MEM_INIT_SRQ];
-		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl, init);
-		if (rc)
-			return rc;
-	}
+	ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ];
+	rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, srqs + extra_srqs, pg_lvl);
+	if (rc)
+		return rc;
 
-	ctx_pg = &ctx->cq_mem;
-	ctx_pg->entries = ctx->cq_max_l2_entries + extra_qps * 2;
-	if (ctx->cq_entry_size) {
-		mem_size = ctx->cq_entry_size * ctx_pg->entries;
-		init = &ctx->mem_init[BNXT_CTX_MEM_INIT_CQ];
-		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl, init);
-		if (rc)
-			return rc;
-	}
+	ctxm = &ctx->ctx_arr[BNXT_CTX_CQ];
+	rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, ctxm->cq_l2_entries +
+				     extra_qps * 2, pg_lvl);
+	if (rc)
+		return rc;
 
-	ctx_pg = &ctx->vnic_mem;
-	ctx_pg->entries = ctx->vnic_max_vnic_entries +
-			  ctx->vnic_max_ring_table_entries;
-	if (ctx->vnic_entry_size) {
-		mem_size = ctx->vnic_entry_size * ctx_pg->entries;
-		init = &ctx->mem_init[BNXT_CTX_MEM_INIT_VNIC];
-		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, init);
-		if (rc)
-			return rc;
-	}
+	ctxm = &ctx->ctx_arr[BNXT_CTX_VNIC];
+	rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, ctxm->max_entries, 1);
+	if (rc)
+		return rc;
 
-	ctx_pg = &ctx->stat_mem;
-	ctx_pg->entries = ctx->stat_max_entries;
-	if (ctx->stat_entry_size) {
-		mem_size = ctx->stat_entry_size * ctx_pg->entries;
-		init = &ctx->mem_init[BNXT_CTX_MEM_INIT_STAT];
-		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, init);
-		if (rc)
-			return rc;
-	}
+	ctxm = &ctx->ctx_arr[BNXT_CTX_STAT];
+	rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, ctxm->max_entries, 1);
+	if (rc)
+		return rc;
 
 	ena = 0;
 	if (!(bp->flags & BNXT_FLAG_ROCE_CAP))
 		goto skip_rdma;
 
-	ctx_pg = &ctx->mrav_mem;
+	ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV];
 	/* 128K extra is needed to accommodate static AH context
 	 * allocation by f/w.
 	 */
-	num_mr = 1024 * 256;
-	num_ah = 1024 * 128;
-	ctx_pg->entries = num_mr + num_ah;
-	if (ctx->mrav_entry_size) {
-		mem_size = ctx->mrav_entry_size * ctx_pg->entries;
-		init = &ctx->mem_init[BNXT_CTX_MEM_INIT_MRAV];
-		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 2, init);
-		if (rc)
-			return rc;
-	}
+	num_mr = min_t(u32, ctxm->max_entries / 2, 1024 * 256);
+	num_ah = min_t(u32, num_mr, 1024 * 128);
+	ctxm->split_entry_cnt = BNXT_CTX_MRAV_AV_SPLIT_ENTRY + 1;
+	if (!ctxm->mrav_av_entries || ctxm->mrav_av_entries > num_ah)
+		ctxm->mrav_av_entries = num_ah;
+
+	rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, num_mr + num_ah, 2);
+	if (rc)
+		return rc;
 	ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV;
-	if (ctx->mrav_num_entries_units)
-		ctx_pg->entries =
-			((num_mr / ctx->mrav_num_entries_units) << 16) |
-			 (num_ah / ctx->mrav_num_entries_units);
-
-	ctx_pg = &ctx->tim_mem;
-	ctx_pg->entries = ctx->qp_mem.entries;
-	if (ctx->tim_entry_size) {
-		mem_size = ctx->tim_entry_size * ctx_pg->entries;
-		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, NULL);
-		if (rc)
-			return rc;
-	}
+
+	ctxm = &ctx->ctx_arr[BNXT_CTX_TIM];
+	rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, l2_qps + qp1_qps + extra_qps, 1);
+	if (rc)
+		return rc;
 	ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM;
 
 skip_rdma:
-	min = ctx->tqm_min_entries_per_ring;
-	entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries +
-		     2 * (extra_qps + ctx->qp_min_qp1_entries) + min;
-	entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple);
-	entries = ctx->qp_max_l2_entries + 2 * (extra_qps + ctx->qp_min_qp1_entries);
-	entries = roundup(entries, ctx->tqm_entries_multiple);
-	entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring);
-	for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) {
-		ctx_pg = ctx->tqm_mem[i];
-		ctx_pg->entries = i ? entries : entries_sp;
-		if (ctx->tqm_entry_size) {
-			mem_size = ctx->tqm_entry_size * ctx_pg->entries;
-			rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1,
-						    NULL);
-			if (rc)
-				return rc;
-		}
+	ctxm = &ctx->ctx_arr[BNXT_CTX_STQM];
+	min = ctxm->min_entries;
+	entries_sp = ctx->ctx_arr[BNXT_CTX_VNIC].vnic_entries + l2_qps +
+		     2 * (extra_qps + qp1_qps) + min;
+	rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, entries_sp, 2);
+	if (rc)
+		return rc;
+
+	ctxm = &ctx->ctx_arr[BNXT_CTX_FTQM];
+	entries = l2_qps + 2 * (extra_qps + qp1_qps);
+	rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, entries, 2);
+	if (rc)
+		return rc;
+	for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++)
 		ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP << i;
-	}
 	ena |= FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES;
-	rc = bnxt_hwrm_func_backing_store_cfg(bp, ena);
+
+	if (bp->fw_cap & BNXT_FW_CAP_BACKING_STORE_V2)
+		rc = bnxt_backing_store_cfg_v2(bp);
+	else
+		rc = bnxt_hwrm_func_backing_store_cfg(bp, ena);
 	if (rc) {
 		netdev_err(bp->dev, "Failed configuring context mem, rc = %d.\n",
 			   rc);
@@ -7667,7 +7953,7 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
 	hw_resc->min_stat_ctxs = le16_to_cpu(resp->min_stat_ctx);
 	hw_resc->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx);
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		u16 max_msix = le16_to_cpu(resp->max_msix);
 
 		hw_resc->max_nqs = max_msix;
@@ -7696,7 +7982,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
 	u8 flags;
 	int rc;
 
-	if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5_THOR(bp)) {
+	if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5(bp)) {
 		rc = -ENODEV;
 		goto no_ptp;
 	}
@@ -7728,7 +8014,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
 	if (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK) {
 		ptp->refclk_regs[0] = le32_to_cpu(resp->ts_ref_clock_reg_lower);
 		ptp->refclk_regs[1] = le32_to_cpu(resp->ts_ref_clock_reg_upper);
-	} else if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	} else if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		ptp->refclk_regs[0] = BNXT_TS_REG_TIMESYNC_TS0_LOWER;
 		ptp->refclk_regs[1] = BNXT_TS_REG_TIMESYNC_TS0_UPPER;
 	} else {
@@ -7800,6 +8086,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 		bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF;
 	if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED))
 		bp->fw_cap |= BNXT_FW_CAP_LIVEPATCH;
+	if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED)
+		bp->fw_cap |= BNXT_FW_CAP_BACKING_STORE_V2;
 
 	flags_ext2 = le32_to_cpu(resp->flags_ext2);
 	if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_RX_ALL_PKTS_TIMESTAMPS_SUPPORTED)
@@ -8435,7 +8723,7 @@ static void bnxt_accumulate_all_stats(struct bnxt *bp)
 	int i;
 
 	/* Chip bug.  Counter intermittently becomes 0. */
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		ignore_zero = true;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -8629,7 +8917,7 @@ static void bnxt_clear_vnic(struct bnxt *bp)
 		return;
 
 	bnxt_hwrm_clear_vnic_filter(bp);
-	if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) {
 		/* clear all RSS setting before free vnic ctx */
 		bnxt_hwrm_clear_vnic_rss(bp);
 		bnxt_hwrm_vnic_ctx_free(bp);
@@ -8638,7 +8926,7 @@ static void bnxt_clear_vnic(struct bnxt *bp)
 	if (bp->flags & BNXT_FLAG_TPA)
 		bnxt_set_tpa(bp, false);
 	bnxt_hwrm_vnic_free(bp);
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		bnxt_hwrm_vnic_ctx_free(bp);
 }
 
@@ -8795,7 +9083,7 @@ static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id)
 
 static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		return __bnxt_setup_vnic_p5(bp, vnic_id);
 	else
 		return __bnxt_setup_vnic(bp, vnic_id);
@@ -8806,7 +9094,7 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
 #ifdef CONFIG_RFS_ACCEL
 	int i, rc = 0;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		return 0;
 
 	for (i = 0; i < bp->rx_nr_rings; i++) {
@@ -9031,8 +9319,8 @@ static int bnxt_set_real_num_queues(struct bnxt *bp)
 	return rc;
 }
 
-static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
-			   bool shared)
+static int __bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
+			     bool shared)
 {
 	int _rx = *rx, _tx = *tx;
 
@@ -9055,6 +9343,46 @@ static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
 	return 0;
 }
 
+static int __bnxt_num_tx_to_cp(struct bnxt *bp, int tx, int tx_sets, int tx_xdp)
+{
+	return (tx - tx_xdp) / tx_sets + tx_xdp;
+}
+
+int bnxt_num_tx_to_cp(struct bnxt *bp, int tx)
+{
+	int tcs = netdev_get_num_tc(bp->dev);
+
+	if (!tcs)
+		tcs = 1;
+	return __bnxt_num_tx_to_cp(bp, tx, tcs, bp->tx_nr_rings_xdp);
+}
+
+static int bnxt_num_cp_to_tx(struct bnxt *bp, int tx_cp)
+{
+	int tcs = netdev_get_num_tc(bp->dev);
+
+	return (tx_cp - bp->tx_nr_rings_xdp) * tcs +
+	       bp->tx_nr_rings_xdp;
+}
+
+static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
+			   bool sh)
+{
+	int tx_cp = bnxt_num_tx_to_cp(bp, *tx);
+
+	if (tx_cp != *tx) {
+		int tx_saved = tx_cp, rc;
+
+		rc = __bnxt_trim_rings(bp, rx, &tx_cp, max, sh);
+		if (rc)
+			return rc;
+		if (tx_cp != tx_saved)
+			*tx = bnxt_num_cp_to_tx(bp, tx_cp);
+		return 0;
+	}
+	return __bnxt_trim_rings(bp, rx, tx, max, sh);
+}
+
 static void bnxt_setup_msix(struct bnxt *bp)
 {
 	const int len = sizeof(bp->irq_tbl[0].name);
@@ -9067,7 +9395,7 @@ static void bnxt_setup_msix(struct bnxt *bp)
 
 		for (i = 0; i < tcs; i++) {
 			count = bp->tx_nr_rings_per_tc;
-			off = i * count;
+			off = BNXT_TC_TO_RING_BASE(bp, i);
 			netdev_set_tc_queue(dev, i, count, off);
 		}
 	}
@@ -9148,7 +9476,7 @@ static unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp)
 {
 	unsigned int cp = bp->hw_resc.max_cp_rings;
 
-	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		cp -= bnxt_get_ulp_msix_num(bp);
 
 	return cp;
@@ -9158,7 +9486,7 @@ static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_nqs);
 
 	return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_cp_rings);
@@ -9174,7 +9502,7 @@ unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp)
 	unsigned int cp;
 
 	cp = bnxt_get_max_func_cp_rings_for_en(bp);
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		return cp - bp->rx_nr_rings - bp->tx_nr_rings;
 	else
 		return cp - bp->cp_nr_rings;
@@ -9193,7 +9521,7 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num)
 	int max_idx, avail_msix;
 
 	max_idx = bp->total_irqs;
-	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		max_idx = min_t(int, bp->total_irqs, max_cp);
 	avail_msix = max_idx - bp->cp_nr_rings;
 	if (!BNXT_NEW_RM(bp) || avail_msix >= num)
@@ -9217,7 +9545,7 @@ static int bnxt_get_num_msix(struct bnxt *bp)
 
 static int bnxt_init_msix(struct bnxt *bp)
 {
-	int i, total_vecs, max, rc = 0, min = 1, ulp_msix;
+	int i, total_vecs, max, rc = 0, min = 1, ulp_msix, tx_cp;
 	struct msix_entry *msix_ent;
 
 	total_vecs = bnxt_get_num_msix(bp);
@@ -9259,9 +9587,10 @@ static int bnxt_init_msix(struct bnxt *bp)
 		if (rc)
 			goto msix_setup_exit;
 
+		tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
 		bp->cp_nr_rings = (min == 1) ?
-				  max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
-				  bp->tx_nr_rings + bp->rx_nr_rings;
+				  max_t(int, tx_cp, bp->rx_nr_rings) :
+				  tx_cp + bp->rx_nr_rings;
 
 	} else {
 		rc = -ENOMEM;
@@ -9471,7 +9800,7 @@ static void bnxt_init_napi(struct bnxt *bp)
 	if (bp->flags & BNXT_FLAG_USING_MSIX) {
 		int (*poll_fn)(struct napi_struct *, int) = bnxt_poll;
 
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 			poll_fn = bnxt_poll_p5;
 		else if (BNXT_CHIP_TYPE_NITRO_A0(bp))
 			cp_nr_rings--;
@@ -9527,8 +9856,6 @@ static void bnxt_enable_napi(struct bnxt *bp)
 		cpr = &bnapi->cp_ring;
 		bnapi->in_reset = false;
 
-		bnapi->tx_pkts = 0;
-
 		if (bnapi->rx_ring) {
 			INIT_WORK(&cpr->dim.work, bnxt_dim_work);
 			cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
@@ -10211,8 +10538,6 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 			if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
 				bnxt_ulp_stop(bp);
 			bnxt_free_ctx_mem(bp);
-			kfree(bp->ctx);
-			bp->ctx = NULL;
 			bnxt_dcb_free(bp);
 			rc = bnxt_fw_init_one(bp);
 			if (rc) {
@@ -11194,7 +11519,7 @@ static bool bnxt_can_reserve_rings(struct bnxt *bp)
 /* If the chip and firmware supports RFS */
 static bool bnxt_rfs_supported(struct bnxt *bp)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2)
 			return true;
 		return false;
@@ -11215,7 +11540,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp)
 #ifdef CONFIG_RFS_ACCEL
 	int vnics, max_vnics, max_rss_ctxs;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		return bnxt_rfs_supported(bp);
 	if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings)
 		return false;
@@ -11317,7 +11642,7 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
 		update_tpa = true;
 		if ((bp->flags & BNXT_FLAG_TPA) == 0 ||
 		    (flags & BNXT_FLAG_TPA) == 0 ||
-		    (bp->flags & BNXT_FLAG_CHIP_P5))
+		    (bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 			re_init = true;
 	}
 
@@ -11559,15 +11884,13 @@ static int bnxt_dbg_hwrm_ring_info_get(struct bnxt *bp, u8 ring_type,
 
 static void bnxt_dump_tx_sw_state(struct bnxt_napi *bnapi)
 {
-	struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
-	int i = bnapi->index;
-
-	if (!txr)
-		return;
+	struct bnxt_tx_ring_info *txr;
+	int i = bnapi->index, j;
 
-	netdev_info(bnapi->bp->dev, "[%d]: tx{fw_ring: %d prod: %x cons: %x}\n",
-		    i, txr->tx_ring_struct.fw_ring_id, txr->tx_prod,
-		    txr->tx_cons);
+	bnxt_for_each_napi_tx(j, bnapi, txr)
+		netdev_info(bnapi->bp->dev, "[%d.%d]: tx{fw_ring: %d prod: %x cons: %x}\n",
+			    i, j, txr->tx_ring_struct.fw_ring_id, txr->tx_prod,
+			    txr->tx_cons);
 }
 
 static void bnxt_dump_rx_sw_state(struct bnxt_napi *bnapi)
@@ -11730,8 +12053,7 @@ static void bnxt_timer(struct timer_list *t)
 	if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
 		bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT);
 
-	if ((bp->flags & BNXT_FLAG_CHIP_P5) && !bp->chip_rev &&
-	    netif_carrier_ok(dev))
+	if ((BNXT_CHIP_P5(bp)) && !bp->chip_rev && netif_carrier_ok(dev))
 		bnxt_queue_sp_work(bp, BNXT_RING_COAL_NOW_SP_EVENT);
 
 bnxt_restart_timer:
@@ -11840,8 +12162,6 @@ static void bnxt_fw_reset_close(struct bnxt *bp)
 	if (pci_is_enabled(bp->pdev))
 		pci_disable_device(bp->pdev);
 	bnxt_free_ctx_mem(bp);
-	kfree(bp->ctx);
-	bp->ctx = NULL;
 }
 
 static bool is_bnxt_fw_ok(struct bnxt *bp)
@@ -11984,7 +12304,7 @@ static void bnxt_chk_missed_irq(struct bnxt *bp)
 {
 	int i;
 
-	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		return;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -11997,12 +12317,11 @@ static void bnxt_chk_missed_irq(struct bnxt *bp)
 			continue;
 
 		cpr = &bnapi->cp_ring;
-		for (j = 0; j < 2; j++) {
-			struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+		for (j = 0; j < cpr->cp_ring_count; j++) {
+			struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j];
 			u32 val[2];
 
-			if (!cpr2 || cpr2->has_more_work ||
-			    !bnxt_has_work(bp, cpr2))
+			if (cpr2->has_more_work || !bnxt_has_work(bp, cpr2))
 				continue;
 
 			if (cpr2->cp_raw_cons != cpr2->last_cp_raw_cons) {
@@ -12161,23 +12480,27 @@ static void bnxt_sp_task(struct work_struct *work)
 	clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
 }
 
+static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
+				int *max_cp);
+
 /* Under rtnl_lock */
 int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 		     int tx_xdp)
 {
-	int max_rx, max_tx, tx_sets = 1;
+	int max_rx, max_tx, max_cp, tx_sets = 1, tx_cp;
 	int tx_rings_needed, stats;
 	int rx_rings = rx;
-	int cp, vnics, rc;
+	int cp, vnics;
 
 	if (tcs)
 		tx_sets = tcs;
 
-	rc = bnxt_get_max_rings(bp, &max_rx, &max_tx, sh);
-	if (rc)
-		return rc;
+	if (bp->flags & BNXT_FLAG_AGG_RINGS)
+		rx_rings <<= 1;
+
+	_bnxt_get_max_rings(bp, &max_rx, &max_tx, &max_cp);
 
-	if (max_rx < rx)
+	if (max_rx < rx_rings)
 		return -ENOMEM;
 
 	tx_rings_needed = tx * tx_sets + tx_xdp;
@@ -12185,12 +12508,14 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 		return -ENOMEM;
 
 	vnics = 1;
-	if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS)
-		vnics += rx_rings;
+	if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5_PLUS)) ==
+	    BNXT_FLAG_RFS)
+		vnics += rx;
 
-	if (bp->flags & BNXT_FLAG_AGG_RINGS)
-		rx_rings <<= 1;
-	cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx;
+	tx_cp = __bnxt_num_tx_to_cp(bp, tx_rings_needed, tx_sets, tx_xdp);
+	cp = sh ? max_t(int, tx_cp, rx) : tx_cp + rx;
+	if (max_cp < cp)
+		return -ENOMEM;
 	stats = cp;
 	if (BNXT_NEW_RM(bp)) {
 		cp += bnxt_get_ulp_msix_num(bp);
@@ -12265,10 +12590,10 @@ static bool bnxt_fw_pre_resv_vnics(struct bnxt *bp)
 {
 	u16 fw_maj = BNXT_FW_MAJ(bp), fw_bld = BNXT_FW_BLD(bp);
 
-	if (!(bp->flags & BNXT_FLAG_CHIP_P5) &&
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
 	    (fw_maj > 218 || (fw_maj == 218 && fw_bld >= 18)))
 		return true;
-	if ((bp->flags & BNXT_FLAG_CHIP_P5) &&
+	if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
 	    (fw_maj > 216 || (fw_maj == 216 && fw_bld >= 172)))
 		return true;
 	return false;
@@ -12824,7 +13149,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 {
 	struct bnxt *bp = netdev_priv(dev);
 	bool sh = false;
-	int rc;
+	int rc, tx_cp;
 
 	if (tc > bp->max_tc) {
 		netdev_err(dev, "Too many traffic classes requested: %d. Max supported is %d.\n",
@@ -12855,8 +13180,9 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 		netdev_reset_tc(dev);
 	}
 	bp->tx_nr_rings += bp->tx_nr_rings_xdp;
-	bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
-			       bp->tx_nr_rings + bp->rx_nr_rings;
+	tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
+	bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) :
+			       tx_cp + bp->rx_nr_rings;
 
 	if (netif_running(bp->dev))
 		return bnxt_open_nic(bp, true, false);
@@ -13255,8 +13581,6 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
 	bnxt_free_ctx_mem(bp);
-	kfree(bp->ctx);
-	bp->ctx = NULL;
 	kfree(bp->rss_indir_tbl);
 	bp->rss_indir_tbl = NULL;
 	bnxt_free_port_stats(bp);
@@ -13325,7 +13649,7 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
 	max_irq = min_t(int, bnxt_get_max_func_irqs(bp) -
 			bnxt_get_ulp_msix_num(bp),
 			hw_resc->max_stat_ctxs - bnxt_get_ulp_stat_ctxs(bp));
-	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 		*max_cp = min_t(int, *max_cp, max_irq);
 	max_ring_grps = hw_resc->max_hw_ring_grps;
 	if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {
@@ -13334,8 +13658,11 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
 	}
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		*max_rx >>= 1;
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
-		bnxt_trim_rings(bp, max_rx, max_tx, *max_cp, false);
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+		if (*max_cp < (*max_rx + *max_tx)) {
+			*max_rx = *max_cp / 2;
+			*max_tx = *max_rx;
+		}
 		/* On P5 chips, max_cp output param should be available NQs */
 		*max_cp = max_irq;
 	}
@@ -13636,7 +13963,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	max_irqs = bnxt_get_max_irq(pdev);
-	dev = alloc_etherdev_mq(sizeof(*bp), max_irqs);
+	dev = alloc_etherdev_mqs(sizeof(*bp), max_irqs * BNXT_MAX_QUEUE,
+				 max_irqs);
 	if (!dev)
 		return -ENOMEM;
 
@@ -13678,8 +14006,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (BNXT_PF(bp))
 		bnxt_vpd_read_info(bp);
 
-	if (BNXT_CHIP_P5(bp)) {
-		bp->flags |= BNXT_FLAG_CHIP_P5;
+	if (BNXT_CHIP_P5_PLUS(bp)) {
+		bp->flags |= BNXT_FLAG_CHIP_P5_PLUS;
 		if (BNXT_CHIP_SR2(bp))
 			bp->flags |= BNXT_FLAG_CHIP_SR2;
 	}
@@ -13744,7 +14072,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		bp->gro_func = bnxt_gro_func_5730x;
 		if (BNXT_CHIP_P4(bp))
 			bp->gro_func = bnxt_gro_func_5731x;
-		else if (BNXT_CHIP_P5(bp))
+		else if (BNXT_CHIP_P5_PLUS(bp))
 			bp->gro_func = bnxt_gro_func_5750x;
 	}
 	if (!BNXT_CHIP_P4_PLUS(bp))
@@ -13852,8 +14180,6 @@ init_err_pci_clean:
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
 	bnxt_free_ctx_mem(bp);
-	kfree(bp->ctx);
-	bp->ctx = NULL;
 	kfree(bp->rss_indir_tbl);
 	bp->rss_indir_tbl = NULL;
 
@@ -13906,8 +14232,6 @@ static int bnxt_suspend(struct device *device)
 	bnxt_hwrm_func_drv_unrgtr(bp);
 	pci_disable_device(bp->pdev);
 	bnxt_free_ctx_mem(bp);
-	kfree(bp->ctx);
-	bp->ctx = NULL;
 	rtnl_unlock();
 	return rc;
 }
@@ -14004,8 +14328,6 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
 	if (pci_is_enabled(pdev))
 		pci_disable_device(pdev);
 	bnxt_free_ctx_mem(bp);
-	kfree(bp->ctx);
-	bp->ctx = NULL;
 	rtnl_unlock();
 
 	/* Request a slot slot reset. */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index e702dbc3e6b1..94b3627406c4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -61,6 +61,24 @@ struct tx_bd {
 	__le64 tx_bd_haddr;
 } __packed;
 
+#define TX_OPAQUE_IDX_MASK	0x0000ffff
+#define TX_OPAQUE_BDS_MASK	0x00ff0000
+#define TX_OPAQUE_BDS_SHIFT	16
+#define TX_OPAQUE_RING_MASK	0xff000000
+#define TX_OPAQUE_RING_SHIFT	24
+
+#define SET_TX_OPAQUE(bp, txr, idx, bds)				\
+	(((txr)->tx_napi_idx << TX_OPAQUE_RING_SHIFT) |			\
+	 ((bds) << TX_OPAQUE_BDS_SHIFT) | ((idx) & (bp)->tx_ring_mask))
+
+#define TX_OPAQUE_IDX(opq)	((opq) & TX_OPAQUE_IDX_MASK)
+#define TX_OPAQUE_RING(opq)	(((opq) & TX_OPAQUE_RING_MASK) >>	\
+				 TX_OPAQUE_RING_SHIFT)
+#define TX_OPAQUE_BDS(opq)	(((opq) & TX_OPAQUE_BDS_MASK) >>	\
+				 TX_OPAQUE_BDS_SHIFT)
+#define TX_OPAQUE_PROD(bp, opq)	((TX_OPAQUE_IDX(opq) + TX_OPAQUE_BDS(opq)) &\
+				 (bp)->tx_ring_mask)
+
 struct tx_bd_ext {
 	__le32 tx_bd_hsize_lflags;
 	#define TX_BD_FLAGS_TCP_UDP_CHKSUM			(1 << 0)
@@ -530,6 +548,19 @@ struct nqe_cn {
 	__le32	cq_handle_high;
 };
 
+#define BNXT_NQ_HDL_IDX_MASK	0x00ffffff
+#define BNXT_NQ_HDL_TYPE_MASK	0xff000000
+#define BNXT_NQ_HDL_TYPE_SHIFT	24
+#define BNXT_NQ_HDL_TYPE_RX	0x00
+#define BNXT_NQ_HDL_TYPE_TX	0x01
+
+#define BNXT_NQ_HDL_IDX(hdl)	((hdl) & BNXT_NQ_HDL_IDX_MASK)
+#define BNXT_NQ_HDL_TYPE(hdl)	(((hdl) & BNXT_NQ_HDL_TYPE_MASK) >>	\
+				 BNXT_NQ_HDL_TYPE_SHIFT)
+
+#define BNXT_SET_NQ_HDL(cpr)						\
+	(((cpr)->cp_ring_type << BNXT_NQ_HDL_TYPE_SHIFT) | (cpr)->cp_idx)
+
 #define DB_IDX_MASK						0xffffff
 #define DB_IDX_VALID						(0x1 << 26)
 #define DB_IRQ_DIS						(0x1 << 27)
@@ -655,10 +686,12 @@ struct nqe_cn {
  */
 #define BNXT_MIN_TX_DESC_CNT		(MAX_SKB_FRAGS + 2)
 
-#define RX_RING(x)	(((x) & ~(RX_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4))
+#define RX_RING(bp, x)	(((x) & (bp)->rx_ring_mask) >> (BNXT_PAGE_SHIFT - 4))
+#define RX_AGG_RING(bp, x)	(((x) & (bp)->rx_agg_ring_mask) >>	\
+				 (BNXT_PAGE_SHIFT - 4))
 #define RX_IDX(x)	((x) & (RX_DESC_CNT - 1))
 
-#define TX_RING(x)	(((x) & ~(TX_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4))
+#define TX_RING(bp, x)	(((x) & (bp)->tx_ring_mask) >> (BNXT_PAGE_SHIFT - 4))
 #define TX_IDX(x)	((x) & (TX_DESC_CNT - 1))
 
 #define CP_RING(x)	(((x) & ~(CP_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4))
@@ -685,11 +718,14 @@ struct nqe_cn {
 #define RX_CMP_TYPE(rxcmp)					\
 	(le32_to_cpu((rxcmp)->rx_cmp_len_flags_type) & RX_CMP_CMP_TYPE)
 
-#define NEXT_RX(idx)		(((idx) + 1) & bp->rx_ring_mask)
+#define RING_RX(bp, idx)	((idx) & (bp)->rx_ring_mask)
+#define NEXT_RX(idx)		((idx) + 1)
 
-#define NEXT_RX_AGG(idx)	(((idx) + 1) & bp->rx_agg_ring_mask)
+#define RING_RX_AGG(bp, idx)	((idx) & (bp)->rx_agg_ring_mask)
+#define NEXT_RX_AGG(idx)	((idx) + 1)
 
-#define NEXT_TX(idx)		(((idx) + 1) & bp->tx_ring_mask)
+#define RING_TX(bp, idx)	((idx) & (bp)->tx_ring_mask)
+#define NEXT_TX(idx)		((idx) + 1)
 
 #define ADV_RAW_CMP(idx, n)	((idx) + (n))
 #define NEXT_RAW_CMP(idx)	ADV_RAW_CMP(idx, 1)
@@ -702,6 +738,7 @@ struct nqe_cn {
 #define BNXT_AGG_EVENT		2
 #define BNXT_TX_EVENT		4
 #define BNXT_REDIRECT_EVENT	8
+#define BNXT_TX_CMP_EVENT	0x10
 
 struct bnxt_sw_tx_bd {
 	union {
@@ -730,13 +767,6 @@ struct bnxt_sw_rx_agg_bd {
 	dma_addr_t		mapping;
 };
 
-struct bnxt_mem_init {
-	u8	init_val;
-	u16	offset;
-#define	BNXT_MEM_INVALID_OFFSET	0xffff
-	u16	size;
-};
-
 struct bnxt_ring_mem_info {
 	int			nr_pages;
 	int			page_size;
@@ -746,7 +776,7 @@ struct bnxt_ring_mem_info {
 #define BNXT_RMEM_USE_FULL_PAGE_FLAG	4
 
 	u16			depth;
-	struct bnxt_mem_init	*mem_init;
+	struct bnxt_ctx_mem_type	*ctx_mem;
 
 	void			**pg_arr;
 	dma_addr_t		*dma_arr;
@@ -788,13 +818,19 @@ struct bnxt_db_info {
 		u64		db_key64;
 		u32		db_key32;
 	};
+	u32			db_ring_mask;
 };
 
+#define DB_RING_IDX(db, idx)	((idx) & (db)->db_ring_mask)
+
 struct bnxt_tx_ring_info {
 	struct bnxt_napi	*bnapi;
+	struct bnxt_cp_ring_info	*tx_cpr;
 	u16			tx_prod;
 	u16			tx_cons;
+	u16			tx_hw_cons;
 	u16			txq_index;
+	u8			tx_napi_idx;
 	u8			kick_pending;
 	struct bnxt_db_info	tx_db;
 
@@ -901,6 +937,7 @@ struct bnxt_tpa_idx_map {
 
 struct bnxt_rx_ring_info {
 	struct bnxt_napi	*bnapi;
+	struct bnxt_cp_ring_info	*rx_cpr;
 	u16			rx_prod;
 	u16			rx_agg_prod;
 	u16			rx_sw_agg_prod;
@@ -980,6 +1017,10 @@ struct bnxt_cp_ring_info {
 
 	u8			had_work_done:1;
 	u8			has_more_work:1;
+	u8			had_nqe_notify:1;
+
+	u8			cp_ring_type;
+	u8			cp_idx;
 
 	u32			last_cp_raw_cons;
 
@@ -1004,11 +1045,18 @@ struct bnxt_cp_ring_info {
 
 	struct bnxt_ring_struct	cp_ring_struct;
 
-	struct bnxt_cp_ring_info *cp_ring_arr[2];
-#define BNXT_RX_HDL	0
-#define BNXT_TX_HDL	1
+	int			cp_ring_count;
+	struct bnxt_cp_ring_info *cp_ring_arr;
 };
 
+#define BNXT_MAX_QUEUE		8
+#define BNXT_MAX_TXR_PER_NAPI	BNXT_MAX_QUEUE
+
+#define bnxt_for_each_napi_tx(iter, bnapi, txr)		\
+	for (iter = 0, txr = (bnapi)->tx_ring[0]; txr;	\
+	     txr = (iter < BNXT_MAX_TXR_PER_NAPI - 1) ?	\
+	     (bnapi)->tx_ring[++iter] : NULL)
+
 struct bnxt_napi {
 	struct napi_struct	napi;
 	struct bnxt		*bp;
@@ -1016,11 +1064,10 @@ struct bnxt_napi {
 	int			index;
 	struct bnxt_cp_ring_info	cp_ring;
 	struct bnxt_rx_ring_info	*rx_ring;
-	struct bnxt_tx_ring_info	*tx_ring;
+	struct bnxt_tx_ring_info	*tx_ring[BNXT_MAX_TXR_PER_NAPI];
 
 	void			(*tx_int)(struct bnxt *, struct bnxt_napi *,
 					  int budget);
-	int			tx_pkts;
 	u8			events;
 	u8			tx_fault:1;
 
@@ -1355,8 +1402,6 @@ struct bnxt_link_info {
 	(PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE |		\
 	 BNXT_FEC_RS_OFF(link_info))
 
-#define BNXT_MAX_QUEUE	8
-
 struct bnxt_queue_info {
 	u8	queue_id;
 	u8	queue_profile;
@@ -1385,7 +1430,7 @@ struct bnxt_test_info {
 };
 
 #define CHIMP_REG_VIEW_ADDR				\
-	((bp->flags & BNXT_FLAG_CHIP_P5) ? 0x80000000 : 0xb1000000)
+	((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) ? 0x80000000 : 0xb1000000)
 
 #define BNXT_GRCPF_REG_CHIMP_COMM		0x0
 #define BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER	0x100
@@ -1509,53 +1554,72 @@ do {									\
 		attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_4K;	\
 } while (0)
 
+struct bnxt_ctx_mem_type {
+	u16	type;
+	u16	entry_size;
+	u32	flags;
+#define BNXT_CTX_MEM_TYPE_VALID FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID
+	u32	instance_bmap;
+	u8	init_value;
+	u8	entry_multiple;
+	u16	init_offset;
+#define	BNXT_CTX_INIT_INVALID_OFFSET	0xffff
+	u32	max_entries;
+	u32	min_entries;
+	u8	last:1;
+	u8	split_entry_cnt;
+#define BNXT_MAX_SPLIT_ENTRY	4
+	union {
+		struct {
+			u32	qp_l2_entries;
+			u32	qp_qp1_entries;
+			u32	qp_fast_qpmd_entries;
+		};
+		u32	srq_l2_entries;
+		u32	cq_l2_entries;
+		u32	vnic_entries;
+		struct {
+			u32	mrav_av_entries;
+			u32	mrav_num_entries_units;
+		};
+		u32	split[BNXT_MAX_SPLIT_ENTRY];
+	};
+	struct bnxt_ctx_pg_info	*pg_info;
+};
+
+#define BNXT_CTX_MRAV_AV_SPLIT_ENTRY	0
+
+#define BNXT_CTX_QP	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP
+#define BNXT_CTX_SRQ	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ
+#define BNXT_CTX_CQ	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ
+#define BNXT_CTX_VNIC	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC
+#define BNXT_CTX_STAT	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT
+#define BNXT_CTX_STQM	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING
+#define BNXT_CTX_FTQM	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING
+#define BNXT_CTX_MRAV	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV
+#define BNXT_CTX_TIM	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM
+#define BNXT_CTX_TKC	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC
+#define BNXT_CTX_RKC	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC
+#define BNXT_CTX_MTQM	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING
+#define BNXT_CTX_SQDBS	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW
+#define BNXT_CTX_RQDBS	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW
+#define BNXT_CTX_SRQDBS	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW
+#define BNXT_CTX_CQDBS	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW
+#define BNXT_CTX_QTKC	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QUIC_TKC
+#define BNXT_CTX_QRKC	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QUIC_RKC
+#define BNXT_CTX_TBLSC	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE
+#define BNXT_CTX_XPAR	FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION
+
+#define BNXT_CTX_MAX	(BNXT_CTX_TIM + 1)
+#define BNXT_CTX_V2_MAX	(BNXT_CTX_XPAR + 1)
+#define BNXT_CTX_INV	((u16)-1)
+
 struct bnxt_ctx_mem_info {
-	u32	qp_max_entries;
-	u16	qp_min_qp1_entries;
-	u16	qp_max_l2_entries;
-	u16	qp_entry_size;
-	u16	srq_max_l2_entries;
-	u32	srq_max_entries;
-	u16	srq_entry_size;
-	u16	cq_max_l2_entries;
-	u32	cq_max_entries;
-	u16	cq_entry_size;
-	u16	vnic_max_vnic_entries;
-	u16	vnic_max_ring_table_entries;
-	u16	vnic_entry_size;
-	u32	stat_max_entries;
-	u16	stat_entry_size;
-	u16	tqm_entry_size;
-	u32	tqm_min_entries_per_ring;
-	u32	tqm_max_entries_per_ring;
-	u32	mrav_max_entries;
-	u16	mrav_entry_size;
-	u16	tim_entry_size;
-	u32	tim_max_entries;
-	u16	mrav_num_entries_units;
-	u8	tqm_entries_multiple;
 	u8	tqm_fp_rings_count;
 
 	u32	flags;
 	#define BNXT_CTX_FLAG_INITED	0x01
-
-	struct bnxt_ctx_pg_info qp_mem;
-	struct bnxt_ctx_pg_info srq_mem;
-	struct bnxt_ctx_pg_info cq_mem;
-	struct bnxt_ctx_pg_info vnic_mem;
-	struct bnxt_ctx_pg_info stat_mem;
-	struct bnxt_ctx_pg_info mrav_mem;
-	struct bnxt_ctx_pg_info tim_mem;
-	struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TQM_RINGS];
-
-#define BNXT_CTX_MEM_INIT_QP	0
-#define BNXT_CTX_MEM_INIT_SRQ	1
-#define BNXT_CTX_MEM_INIT_CQ	2
-#define BNXT_CTX_MEM_INIT_VNIC	3
-#define BNXT_CTX_MEM_INIT_STAT	4
-#define BNXT_CTX_MEM_INIT_MRAV	5
-#define BNXT_CTX_MEM_INIT_MAX	6
-	struct bnxt_mem_init	mem_init[BNXT_CTX_MEM_INIT_MAX];
+	struct bnxt_ctx_mem_type	ctx_arr[BNXT_CTX_V2_MAX];
 };
 
 enum bnxt_health_severity {
@@ -1795,7 +1859,7 @@ struct bnxt {
 	atomic_t		intr_sem;
 
 	u32			flags;
-	#define BNXT_FLAG_CHIP_P5	0x1
+	#define BNXT_FLAG_CHIP_P5_PLUS	0x1
 	#define BNXT_FLAG_VF		0x2
 	#define BNXT_FLAG_LRO		0x4
 #ifdef CONFIG_INET
@@ -1849,21 +1913,21 @@ struct bnxt {
 #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
 #define BNXT_RX_PAGE_MODE(bp)	((bp)->flags & BNXT_FLAG_RX_PAGE_MODE)
 #define BNXT_SUPPORTS_TPA(bp)	(!BNXT_CHIP_TYPE_NITRO_A0(bp) &&	\
-				 (!((bp)->flags & BNXT_FLAG_CHIP_P5) ||	\
+				 (!((bp)->flags & BNXT_FLAG_CHIP_P5_PLUS) ||\
 				  (bp)->max_tpa_v2) && !is_kdump_kernel())
 #define BNXT_RX_JUMBO_MODE(bp)	((bp)->flags & BNXT_FLAG_JUMBO)
 
 #define BNXT_CHIP_SR2(bp)			\
 	((bp)->chip_num == CHIP_NUM_58818)
 
-#define BNXT_CHIP_P5_THOR(bp)			\
+#define BNXT_CHIP_P5(bp)			\
 	((bp)->chip_num == CHIP_NUM_57508 ||	\
 	 (bp)->chip_num == CHIP_NUM_57504 ||	\
 	 (bp)->chip_num == CHIP_NUM_57502)
 
 /* Chip class phase 5 */
-#define BNXT_CHIP_P5(bp)			\
-	(BNXT_CHIP_P5_THOR(bp) || BNXT_CHIP_SR2(bp))
+#define BNXT_CHIP_P5_PLUS(bp)			\
+	(BNXT_CHIP_P5(bp) || BNXT_CHIP_SR2(bp))
 
 /* Chip class phase 4.x */
 #define BNXT_CHIP_P4(bp)			\
@@ -1874,7 +1938,7 @@ struct bnxt {
 	  !BNXT_CHIP_TYPE_NITRO_A0(bp)))
 
 #define BNXT_CHIP_P4_PLUS(bp)			\
-	(BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp))
+	(BNXT_CHIP_P4(bp) || BNXT_CHIP_P5_PLUS(bp))
 
 	struct bnxt_aux_priv	*aux_priv;
 	struct bnxt_en_dev	*edev;
@@ -2017,6 +2081,7 @@ struct bnxt {
 	#define BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED	BIT_ULL(33)
 	#define BNXT_FW_CAP_DFLT_VLAN_TPID_PCP		BIT_ULL(34)
 	#define BNXT_FW_CAP_PRE_RESV_VNICS		BIT_ULL(35)
+	#define BNXT_FW_CAP_BACKING_STORE_V2		BIT_ULL(36)
 
 	u32			fw_dbg_cap;
 
@@ -2297,10 +2362,11 @@ static inline void bnxt_writeq_relaxed(struct bnxt *bp, u64 val,
 static inline void bnxt_db_write_relaxed(struct bnxt *bp,
 					 struct bnxt_db_info *db, u32 idx)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
-		bnxt_writeq_relaxed(bp, db->db_key64 | idx, db->doorbell);
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+		bnxt_writeq_relaxed(bp, db->db_key64 | DB_RING_IDX(db, idx),
+				    db->doorbell);
 	} else {
-		u32 db_val = db->db_key32 | idx;
+		u32 db_val = db->db_key32 | DB_RING_IDX(db, idx);
 
 		writel_relaxed(db_val, db->doorbell);
 		if (bp->flags & BNXT_FLAG_DOUBLE_DB)
@@ -2312,10 +2378,11 @@ static inline void bnxt_db_write_relaxed(struct bnxt *bp,
 static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db,
 				 u32 idx)
 {
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
-		bnxt_writeq(bp, db->db_key64 | idx, db->doorbell);
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
+		bnxt_writeq(bp, db->db_key64 | DB_RING_IDX(db, idx),
+			    db->doorbell);
 	} else {
-		u32 db_val = db->db_key32 | idx;
+		u32 db_val = db->db_key32 | DB_RING_IDX(db, idx);
 
 		writel(db_val, db->doorbell);
 		if (bp->flags & BNXT_FLAG_DOUBLE_DB)
@@ -2351,6 +2418,7 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings);
 int bnxt_nq_rings_in_use(struct bnxt *bp);
 int bnxt_hwrm_set_coal(struct bnxt *);
 void bnxt_free_ctx_mem(struct bnxt *bp);
+int bnxt_num_tx_to_cp(struct bnxt *bp, int tx);
 unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
 unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp);
 unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp);
@@ -2360,7 +2428,7 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
 void bnxt_tx_disable(struct bnxt *bp);
 void bnxt_tx_enable(struct bnxt *bp);
 void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
-			  int idx);
+			  u16 curr);
 void bnxt_report_link(struct bnxt *bp);
 int bnxt_update_link(struct bnxt *bp, bool chng_link_state);
 int bnxt_hwrm_set_pause(struct bnxt *);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index f302dac56599..ae1bdda56d22 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -469,8 +469,6 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change,
 		}
 		bnxt_cancel_reservations(bp, false);
 		bnxt_free_ctx_mem(bp);
-		kfree(bp->ctx);
-		bp->ctx = NULL;
 		break;
 	}
 	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: {
@@ -741,7 +739,7 @@ static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp, u32 *nvm_cfg_ver)
 	}
 
 	/* earlier devices present as an array of raw bytes */
-	if (!BNXT_CHIP_P5(bp)) {
+	if (!BNXT_CHIP_P5_PLUS(bp)) {
 		dim = 0;
 		i = 0;
 		bits *= 3;  /* array of 3 version components */
@@ -761,7 +759,7 @@ static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp, u32 *nvm_cfg_ver)
 			goto exit;
 		bnxt_copy_from_nvm_data(&ver, data, bits, bytes);
 
-		if (BNXT_CHIP_P5(bp)) {
+		if (BNXT_CHIP_P5_PLUS(bp)) {
 			*nvm_cfg_ver <<= 8;
 			*nvm_cfg_ver |= ver.vu8;
 		} else {
@@ -781,7 +779,7 @@ static int bnxt_dl_info_put(struct bnxt *bp, struct devlink_info_req *req,
 	if (!strlen(buf))
 		return 0;
 
-	if ((bp->flags & BNXT_FLAG_CHIP_P5) &&
+	if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) &&
 	    (!strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_NCSI) ||
 	     !strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_ROCE)))
 		return 0;
@@ -1007,7 +1005,7 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 	if (rc)
 		return rc;
 
-	if (BNXT_CHIP_P5(bp)) {
+	if (BNXT_CHIP_P5_PLUS(bp)) {
 		rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH);
 		if (rc)
 			return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index f3f384773ac0..b0cea5b600cc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -511,7 +511,7 @@ static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp)
 {
 	if (BNXT_SUPPORTS_TPA(bp)) {
 		if (bp->max_tpa_v2) {
-			if (BNXT_CHIP_P5_THOR(bp))
+			if (BNXT_CHIP_P5(bp))
 				return BNXT_NUM_TPA_RING_STATS_P5;
 			return BNXT_NUM_TPA_RING_STATS_P5_SR2;
 		}
@@ -528,7 +528,8 @@ static int bnxt_get_num_ring_stats(struct bnxt *bp)
 	     bnxt_get_num_tpa_ring_stats(bp);
 	tx = NUM_RING_TX_HW_STATS;
 	cmn = NUM_RING_CMN_SW_STATS;
-	return rx * bp->rx_nr_rings + tx * bp->tx_nr_rings +
+	return rx * bp->rx_nr_rings +
+	       tx * (bp->tx_nr_rings_xdp + bp->tx_nr_rings_per_tc) +
 	       cmn * bp->cp_nr_rings;
 }
 
@@ -923,6 +924,7 @@ static int bnxt_set_channels(struct net_device *dev,
 	bool sh = false;
 	int tx_xdp = 0;
 	int rc = 0;
+	int tx_cp;
 
 	if (channel->other_count)
 		return -EINVAL;
@@ -994,8 +996,9 @@ static int bnxt_set_channels(struct net_device *dev,
 	if (tcs > 1)
 		bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp;
 
-	bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
-			       bp->tx_nr_rings + bp->rx_nr_rings;
+	tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
+	bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) :
+			       tx_cp + bp->rx_nr_rings;
 
 	/* After changing number of rx channels, update NTUPLE feature. */
 	netdev_update_features(dev);
@@ -1319,7 +1322,7 @@ u32 bnxt_get_rxfh_indir_size(struct net_device *dev)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		return ALIGN(bp->rx_nr_rings, BNXT_RSS_TABLE_ENTRIES_P5);
 	return HW_HASH_INDEX_SIZE;
 }
@@ -3940,8 +3943,8 @@ static int bnxt_run_loopback(struct bnxt *bp)
 	int rc;
 
 	cpr = &rxr->bnapi->cp_ring;
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
-		cpr = cpr->cp_ring_arr[BNXT_RX_HDL];
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
+		cpr = rxr->rx_cpr;
 	pkt_size = min(bp->dev->mtu + ETH_HLEN, bp->rx_copy_thresh);
 	skb = netdev_alloc_skb(bp->dev, pkt_size);
 	if (!skb)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index f3886710e778..a1ec39b46518 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -650,7 +650,7 @@ static int bnxt_map_ptp_regs(struct bnxt *bp)
 	int rc, i;
 
 	reg_arr = ptp->refclk_regs;
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (BNXT_CHIP_P5(bp)) {
 		rc = bnxt_map_regs(bp, reg_arr, 2, BNXT_PTP_GRC_WIN);
 		if (rc)
 			return rc;
@@ -967,7 +967,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
 		rc = err;
 		goto out;
 	}
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (BNXT_CHIP_P5(bp)) {
 		spin_lock_bh(&ptp->ptp_lock);
 		bnxt_refclk_read(bp, NULL, &ptp->current_time);
 		WRITE_ONCE(ptp->old_time, ptp->current_time);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index c722b3b41730..175192ebaa77 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -536,7 +536,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
 	if (rc)
 		return rc;
 
-	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 		vf_msix = hw_resc->max_nqs - bnxt_nq_rings_in_use(bp);
 		vf_ring_grps = 0;
 	} else {
@@ -565,7 +565,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
 		req->min_l2_ctxs = cpu_to_le16(min);
 		req->min_vnics = cpu_to_le16(min);
 		req->min_stat_ctx = cpu_to_le16(min);
-		if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
 			req->min_hw_ring_grps = cpu_to_le16(min);
 	} else {
 		vf_cp_rings /= num_vfs;
@@ -602,7 +602,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
 	req->max_stat_ctx = cpu_to_le16(vf_stat_ctx);
 	req->max_hw_ring_grps = cpu_to_le16(vf_ring_grps);
 	req->max_rsscos_ctx = cpu_to_le16(vf_rss);
-	if (bp->flags & BNXT_FLAG_CHIP_P5)
+	if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 		req->max_msix = cpu_to_le16(vf_msix / num_vfs);
 
 	hwrm_req_hold(bp, req);
@@ -630,7 +630,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
 			le16_to_cpu(req->min_rsscos_ctx) * n;
 		hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n;
 		hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n;
-		if (bp->flags & BNXT_FLAG_CHIP_P5)
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)
 			hw_resc->max_nqs -= vf_msix;
 
 		rc = pf->active_vfs;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 6ba2b9398633..e89731492f5e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -42,7 +42,7 @@ static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent)
 	for (i = 0; i < num_msix; i++) {
 		ent[i].vector = bp->irq_tbl[idx + i].vector;
 		ent[i].ring_idx = idx + i;
-		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) {
 			ent[i].db_offset = DB_PF_OFFSET_P5;
 			if (BNXT_VF(bp))
 				ent[i].db_offset = DB_VF_OFFSET_P5;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 96f5ca778c67..4791f6a14e55 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -42,17 +42,17 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
 
 	/* fill up the first buffer */
 	prod = txr->tx_prod;
-	tx_buf = &txr->tx_buf_ring[prod];
+	tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
 	tx_buf->nr_frags = num_frags;
 	if (xdp)
 		tx_buf->page = virt_to_head_page(xdp->data);
 
-	txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+	txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
 	flags = (len << TX_BD_LEN_SHIFT) |
 		((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT) |
 		bnxt_lhint_arr[len >> 9];
 	txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
-	txbd->tx_bd_opaque = prod;
+	txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 1 + num_frags);
 	txbd->tx_bd_haddr = cpu_to_le64(mapping);
 
 	/* now let us fill up the frags into the next buffers */
@@ -67,10 +67,10 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
 		WRITE_ONCE(txr->tx_prod, prod);
 
 		/* first fill up the first buffer */
-		frag_tx_buf = &txr->tx_buf_ring[prod];
+		frag_tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)];
 		frag_tx_buf->page = skb_frag_page(frag);
 
-		txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
+		txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)];
 
 		frag_len = skb_frag_size(frag);
 		frag_mapping = skb_frag_dma_map(&pdev->dev, frag, 0,
@@ -127,20 +127,20 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
 
 void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 {
-	struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
+	struct bnxt_tx_ring_info *txr = bnapi->tx_ring[0];
 	struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+	u16 tx_hw_cons = txr->tx_hw_cons;
 	bool rx_doorbell_needed = false;
-	int nr_pkts = bnapi->tx_pkts;
 	struct bnxt_sw_tx_bd *tx_buf;
 	u16 tx_cons = txr->tx_cons;
 	u16 last_tx_cons = tx_cons;
-	int i, j, frags;
+	int j, frags;
 
 	if (!budget)
 		return;
 
-	for (i = 0; i < nr_pkts; i++) {
-		tx_buf = &txr->tx_buf_ring[tx_cons];
+	while (RING_TX(bp, tx_cons) != tx_hw_cons) {
+		tx_buf = &txr->tx_buf_ring[RING_TX(bp, tx_cons)];
 
 		if (tx_buf->action == XDP_REDIRECT) {
 			struct pci_dev *pdev = bp->pdev;
@@ -160,17 +160,17 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 			frags = tx_buf->nr_frags;
 			for (j = 0; j < frags; j++) {
 				tx_cons = NEXT_TX(tx_cons);
-				tx_buf = &txr->tx_buf_ring[tx_cons];
+				tx_buf = &txr->tx_buf_ring[RING_TX(bp, tx_cons)];
 				page_pool_recycle_direct(rxr->page_pool, tx_buf->page);
 			}
 		} else {
-			bnxt_sched_reset_txr(bp, txr, i);
+			bnxt_sched_reset_txr(bp, txr, tx_cons);
 			return;
 		}
 		tx_cons = NEXT_TX(tx_cons);
 	}
 
-	bnapi->tx_pkts = 0;
+	bnapi->events &= ~BNXT_TX_CMP_EVENT;
 	WRITE_ONCE(txr->tx_cons, tx_cons);
 	if (rx_doorbell_needed) {
 		tx_buf = &txr->tx_buf_ring[last_tx_cons];
@@ -249,7 +249,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 	pdev = bp->pdev;
 	offset = bp->rx_offset;
 
-	txr = rxr->bnapi->tx_ring;
+	txr = rxr->bnapi->tx_ring[0];
 	/* BNXT_RX_PAGE_MODE(bp) when XDP enabled */
 	orig_data = xdp.data;
 
@@ -275,7 +275,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 	case XDP_TX:
 		rx_buf = &rxr->rx_buf_ring[cons];
 		mapping = rx_buf->mapping - bp->rx_dma_offset;
-		*event = 0;
+		*event &= BNXT_TX_CMP_EVENT;
 
 		if (unlikely(xdp_buff_has_frags(&xdp))) {
 			struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(&xdp);
@@ -398,7 +398,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames,
 static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
 {
 	struct net_device *dev = bp->dev;
-	int tx_xdp = 0, rc, tc;
+	int tx_xdp = 0, tx_cp, rc, tc;
 	struct bpf_prog *old;
 
 	if (prog && !prog->aux->xdp_has_frags &&
@@ -446,7 +446,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
 	}
 	bp->tx_nr_rings_xdp = tx_xdp;
 	bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp;
-	bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings);
+	tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings);
+	bp->cp_nr_rings = max_t(int, tx_cp, bp->rx_nr_rings);
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
 
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 78c972bb1d96..aa5700ac9c00 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1165,9 +1165,10 @@ struct macb_ptp_info {
 	int (*get_ts_info)(struct net_device *dev,
 			   struct ethtool_ts_info *info);
 	int (*get_hwtst)(struct net_device *netdev,
-			 struct ifreq *ifr);
+			 struct kernel_hwtstamp_config *tstamp_config);
 	int (*set_hwtst)(struct net_device *netdev,
-			 struct ifreq *ifr, int cmd);
+			 struct kernel_hwtstamp_config *tstamp_config,
+			 struct netlink_ext_ack *extack);
 };
 
 struct macb_pm_data {
@@ -1314,7 +1315,7 @@ struct macb {
 	struct ptp_clock *ptp_clock;
 	struct ptp_clock_info ptp_clock_info;
 	struct tsu_incr tsu_incr;
-	struct hwtstamp_config tstamp_config;
+	struct kernel_hwtstamp_config tstamp_config;
 
 	/* RX queue filer rule set*/
 	struct ethtool_rx_fs_list rx_fs_list;
@@ -1363,8 +1364,12 @@ static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, stru
 
 	gem_ptp_rxstamp(bp, skb, desc);
 }
-int gem_get_hwtst(struct net_device *dev, struct ifreq *rq);
-int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd);
+
+int gem_get_hwtst(struct net_device *dev,
+		  struct kernel_hwtstamp_config *tstamp_config);
+int gem_set_hwtst(struct net_device *dev,
+		  struct kernel_hwtstamp_config *tstamp_config,
+		  struct netlink_ext_ack *extack);
 #else
 static inline void gem_ptp_init(struct net_device *ndev) { }
 static inline void gem_ptp_remove(struct net_device *ndev) { }
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index cebae0f418f2..898debfd4db3 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3773,18 +3773,38 @@ static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (bp->ptp_info) {
-		switch (cmd) {
-		case SIOCSHWTSTAMP:
-			return bp->ptp_info->set_hwtst(dev, rq, cmd);
-		case SIOCGHWTSTAMP:
-			return bp->ptp_info->get_hwtst(dev, rq);
-		}
-	}
-
 	return phylink_mii_ioctl(bp->phylink, rq, cmd);
 }
 
+static int macb_hwtstamp_get(struct net_device *dev,
+			     struct kernel_hwtstamp_config *cfg)
+{
+	struct macb *bp = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	if (!bp->ptp_info)
+		return -EOPNOTSUPP;
+
+	return bp->ptp_info->get_hwtst(dev, cfg);
+}
+
+static int macb_hwtstamp_set(struct net_device *dev,
+			     struct kernel_hwtstamp_config *cfg,
+			     struct netlink_ext_ack *extack)
+{
+	struct macb *bp = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	if (!bp->ptp_info)
+		return -EOPNOTSUPP;
+
+	return bp->ptp_info->set_hwtst(dev, cfg, extack);
+}
+
 static inline void macb_set_txcsum_feature(struct macb *bp,
 					   netdev_features_t features)
 {
@@ -3884,6 +3904,8 @@ static const struct net_device_ops macb_netdev_ops = {
 #endif
 	.ndo_set_features	= macb_set_features,
 	.ndo_features_check	= macb_features_check,
+	.ndo_hwtstamp_set	= macb_hwtstamp_set,
+	.ndo_hwtstamp_get	= macb_hwtstamp_get,
 };
 
 /* Configure peripheral capabilities according to device tree
@@ -4539,6 +4561,8 @@ static const struct net_device_ops at91ether_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= at91ether_poll_controller,
 #endif
+	.ndo_hwtstamp_set	= macb_hwtstamp_set,
+	.ndo_hwtstamp_get	= macb_hwtstamp_get,
 };
 
 static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk,
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index 51d26fa190d7..a63bf29c4fa8 100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
@@ -374,19 +374,16 @@ static int gem_ptp_set_ts_mode(struct macb *bp,
 	return 0;
 }
 
-int gem_get_hwtst(struct net_device *dev, struct ifreq *rq)
+int gem_get_hwtst(struct net_device *dev,
+		  struct kernel_hwtstamp_config *tstamp_config)
 {
-	struct hwtstamp_config *tstamp_config;
 	struct macb *bp = netdev_priv(dev);
 
-	tstamp_config = &bp->tstamp_config;
+	*tstamp_config = bp->tstamp_config;
 	if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
 		return -EOPNOTSUPP;
 
-	if (copy_to_user(rq->ifr_data, tstamp_config, sizeof(*tstamp_config)))
-		return -EFAULT;
-	else
-		return 0;
+	return 0;
 }
 
 static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
@@ -401,22 +398,18 @@ static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable)
 		macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE));
 }
 
-int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd)
+int gem_set_hwtst(struct net_device *dev,
+		  struct kernel_hwtstamp_config *tstamp_config,
+		  struct netlink_ext_ack *extack)
 {
 	enum macb_bd_control tx_bd_control = TSTAMP_DISABLED;
 	enum macb_bd_control rx_bd_control = TSTAMP_DISABLED;
-	struct hwtstamp_config *tstamp_config;
 	struct macb *bp = netdev_priv(dev);
 	u32 regval;
 
-	tstamp_config = &bp->tstamp_config;
 	if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(tstamp_config, ifr->ifr_data,
-			   sizeof(*tstamp_config)))
-		return -EFAULT;
-
 	switch (tstamp_config->tx_type) {
 	case HWTSTAMP_TX_OFF:
 		break;
@@ -463,12 +456,11 @@ int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd)
 		return -ERANGE;
 	}
 
+	bp->tstamp_config = *tstamp_config;
+
 	if (gem_ptp_set_ts_mode(bp, tx_bd_control, rx_bd_control) != 0)
 		return -ERANGE;
 
-	if (copy_to_user(ifr->ifr_data, tstamp_config, sizeof(*tstamp_config)))
-		return -EFAULT;
-	else
-		return 0;
+	return 0;
 }
 
diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h
index 1eb4d5fd8561..c36b93f0de15 100644
--- a/drivers/net/ethernet/google/gve/gve_dqo.h
+++ b/drivers/net/ethernet/google/gve/gve_dqo.h
@@ -33,6 +33,9 @@
 #define GVE_DEALLOCATE_COMPL_TIMEOUT 60
 
 netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev);
+netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
+					 struct net_device *dev,
+					 netdev_features_t features);
 bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
 int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
 int gve_tx_alloc_rings_dqo(struct gve_priv *priv);
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 2d42e733837b..cc169748ffa2 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -79,6 +79,18 @@ static int gve_verify_driver_compatibility(struct gve_priv *priv)
 	return err;
 }
 
+static netdev_features_t gve_features_check(struct sk_buff *skb,
+					    struct net_device *dev,
+					    netdev_features_t features)
+{
+	struct gve_priv *priv = netdev_priv(dev);
+
+	if (!gve_is_gqi(priv))
+		return gve_features_check_dqo(skb, dev, features);
+
+	return features;
+}
+
 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct gve_priv *priv = netdev_priv(dev);
@@ -1879,6 +1891,7 @@ err:
 
 static const struct net_device_ops gve_netdev_ops = {
 	.ndo_start_xmit		=	gve_start_xmit,
+	.ndo_features_check	=	gve_features_check,
 	.ndo_open		=	gve_open,
 	.ndo_stop		=	gve_close,
 	.ndo_get_stats64	=	gve_get_stats,
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index 1e19b834a613..f59c4710f118 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -843,6 +843,16 @@ static bool gve_can_send_tso(const struct sk_buff *skb)
 	return true;
 }
 
+netdev_features_t gve_features_check_dqo(struct sk_buff *skb,
+					 struct net_device *dev,
+					 netdev_features_t features)
+{
+	if (skb_is_gso(skb) && !gve_can_send_tso(skb))
+		return features & ~NETIF_F_GSO_MASK;
+
+	return features;
+}
+
 /* Attempt to transmit specified SKB.
  *
  * Returns 0 if the SKB was transmitted or dropped.
@@ -854,11 +864,10 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
 	int num_buffer_descs;
 	int total_num_descs;
 
-	if (tx->dqo.qpl) {
-		if (skb_is_gso(skb))
-			if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
-				goto drop;
+	if (skb_is_gso(skb) && unlikely(ipv6_hopopt_jumbo_remove(skb)))
+		goto drop;
 
+	if (tx->dqo.qpl) {
 		/* We do not need to verify the number of buffers used per
 		 * packet or per segment in case of TSO as with 2K size buffers
 		 * none of the TX packet rules would be violated.
@@ -868,24 +877,8 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
 		 */
 		num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO);
 	} else {
-		if (skb_is_gso(skb)) {
-			/* If TSO doesn't meet HW requirements, attempt to linearize the
-			 * packet.
-			 */
-			if (unlikely(!gve_can_send_tso(skb) &&
-				     skb_linearize(skb) < 0)) {
-				net_err_ratelimited("%s: Failed to transmit TSO packet\n",
-						    priv->dev->name);
-				goto drop;
-			}
-
-			if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
-				goto drop;
-
-			num_buffer_descs = gve_num_buffer_descs_needed(skb);
-		} else {
-			num_buffer_descs = gve_num_buffer_descs_needed(skb);
-
+		num_buffer_descs = gve_num_buffer_descs_needed(skb);
+		if (!skb_is_gso(skb)) {
 			if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) {
 				if (unlikely(skb_linearize(skb) < 0))
 					goto drop;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 1bf424ac3145..6022944d04f8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -24,6 +24,7 @@
 #define I40E_MAX_VEB			16
 
 #define I40E_MAX_NUM_DESCRIPTORS	4096
+#define I40E_MAX_NUM_DESCRIPTORS_XL710	8160
 #define I40E_MAX_CSR_SPACE		(4 * 1024 * 1024 - 64 * 1024)
 #define I40E_DEFAULT_NUM_DESCRIPTORS	512
 #define I40E_REQ_DESCRIPTOR_MULTIPLE	32
@@ -33,11 +34,11 @@
 #define I40E_MIN_VSI_ALLOC		83 /* LAN, ATR, FCOE, 64 VF */
 /* max 16 qps */
 #define i40e_default_queues_per_vmdq(pf) \
-		(((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1)
+	(test_bit(I40E_HW_CAP_RSS_AQ, (pf)->hw.caps) ? 4 : 1)
 #define I40E_DEFAULT_QUEUES_PER_VF	4
 #define I40E_MAX_VF_QUEUES		16
 #define i40e_pf_get_max_q_per_tc(pf) \
-		(((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64)
+	(test_bit(I40E_HW_CAP_128_QP_RSS, (pf)->hw.caps) ? 128 : 64)
 #define I40E_FDIR_RING_COUNT		32
 #define I40E_MAX_AQ_BUF_SIZE		4096
 #define I40E_AQ_LEN			256
@@ -78,7 +79,7 @@
 #define I40E_MAX_BW_INACTIVE_ACCUM	4 /* accumulate 4 credits max */
 
 /* driver state flags */
-enum i40e_state_t {
+enum i40e_state {
 	__I40E_TESTING,
 	__I40E_CONFIG_BUSY,
 	__I40E_CONFIG_DONE,
@@ -126,7 +127,7 @@ enum i40e_state_t {
 	BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED)
 
 /* VSI state flags */
-enum i40e_vsi_state_t {
+enum i40e_vsi_state {
 	__I40E_VSI_DOWN,
 	__I40E_VSI_NEEDS_RESTART,
 	__I40E_VSI_SYNCING_FILTERS,
@@ -138,6 +139,60 @@ enum i40e_vsi_state_t {
 	__I40E_VSI_STATE_SIZE__,
 };
 
+enum i40e_pf_flags {
+	I40E_FLAG_MSI_ENA,
+	I40E_FLAG_MSIX_ENA,
+	I40E_FLAG_RSS_ENA,
+	I40E_FLAG_VMDQ_ENA,
+	I40E_FLAG_SRIOV_ENA,
+	I40E_FLAG_DCB_CAPABLE,
+	I40E_FLAG_DCB_ENA,
+	I40E_FLAG_FD_SB_ENA,
+	I40E_FLAG_FD_ATR_ENA,
+	I40E_FLAG_MFP_ENA,
+	I40E_FLAG_HW_ATR_EVICT_ENA,
+	I40E_FLAG_VEB_MODE_ENA,
+	I40E_FLAG_VEB_STATS_ENA,
+	I40E_FLAG_LINK_POLLING_ENA,
+	I40E_FLAG_TRUE_PROMISC_ENA,
+	I40E_FLAG_LEGACY_RX_ENA,
+	I40E_FLAG_PTP_ENA,
+	I40E_FLAG_IWARP_ENA,
+	I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA,
+	I40E_FLAG_SOURCE_PRUNING_DIS,
+	I40E_FLAG_TC_MQPRIO_ENA,
+	I40E_FLAG_FD_SB_INACTIVE,
+	I40E_FLAG_FD_SB_TO_CLOUD_FILTER,
+	I40E_FLAG_FW_LLDP_DIS,
+	I40E_FLAG_RS_FEC,
+	I40E_FLAG_BASE_R_FEC,
+	/* TOTAL_PORT_SHUTDOWN_ENA
+	 * Allows to physically disable the link on the NIC's port.
+	 * If enabled, (after link down request from the OS)
+	 * no link, traffic or led activity is possible on that port.
+	 *
+	 * If I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA is set, the
+	 * I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA must be explicitly forced
+	 * to true and cannot be disabled by system admin at that time.
+	 * The functionalities are exclusive in terms of configuration, but
+	 * they also have similar behavior (allowing to disable physical
+	 * link of the port), with following differences:
+	 * - LINK_DOWN_ON_CLOSE_ENA is configurable at host OS run-time and
+	 *   is supported by whole family of 7xx Intel Ethernet Controllers
+	 * - TOTAL_PORT_SHUTDOWN_ENA may be enabled only before OS loads
+	 *   (in BIOS) only if motherboard's BIOS and NIC's FW has support of it
+	 * - when LINK_DOWN_ON_CLOSE_ENABLED is used, the link is being brought
+	 *   down by sending phy_type=0 to NIC's FW
+	 * - when TOTAL_PORT_SHUTDOWN_ENA is used, phy_type is not altered,
+	 *   instead the link is being brought down by clearing
+	 *   bit (I40E_AQ_PHY_ENABLE_LINK) in abilities field of
+	 *   i40e_aq_set_phy_config structure
+	 */
+	I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
+	I40E_FLAG_VF_VLAN_PRUNING_ENA,
+	I40E_PF_FLAGS_NBITS,		/* must be last */
+};
+
 enum i40e_interrupt_policy {
 	I40E_INTERRUPT_BEST_CASE,
 	I40E_INTERRUPT_MEDIUM,
@@ -480,78 +535,7 @@ struct i40e_pf {
 	struct timer_list service_timer;
 	struct work_struct service_task;
 
-	u32 hw_features;
-#define I40E_HW_RSS_AQ_CAPABLE			BIT(0)
-#define I40E_HW_128_QP_RSS_CAPABLE		BIT(1)
-#define I40E_HW_ATR_EVICT_CAPABLE		BIT(2)
-#define I40E_HW_WB_ON_ITR_CAPABLE		BIT(3)
-#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE	BIT(4)
-#define I40E_HW_NO_PCI_LINK_CHECK		BIT(5)
-#define I40E_HW_100M_SGMII_CAPABLE		BIT(6)
-#define I40E_HW_NO_DCB_SUPPORT			BIT(7)
-#define I40E_HW_USE_SET_LLDP_MIB		BIT(8)
-#define I40E_HW_GENEVE_OFFLOAD_CAPABLE		BIT(9)
-#define I40E_HW_PTP_L4_CAPABLE			BIT(10)
-#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE		BIT(11)
-#define I40E_HW_HAVE_CRT_RETIMER		BIT(13)
-#define I40E_HW_OUTER_UDP_CSUM_CAPABLE		BIT(14)
-#define I40E_HW_PHY_CONTROLS_LEDS		BIT(15)
-#define I40E_HW_STOP_FW_LLDP			BIT(16)
-#define I40E_HW_PORT_ID_VALID			BIT(17)
-#define I40E_HW_RESTART_AUTONEG			BIT(18)
-
-	u32 flags;
-#define I40E_FLAG_RX_CSUM_ENABLED		BIT(0)
-#define I40E_FLAG_MSI_ENABLED			BIT(1)
-#define I40E_FLAG_MSIX_ENABLED			BIT(2)
-#define I40E_FLAG_RSS_ENABLED			BIT(3)
-#define I40E_FLAG_VMDQ_ENABLED			BIT(4)
-#define I40E_FLAG_SRIOV_ENABLED			BIT(5)
-#define I40E_FLAG_DCB_CAPABLE			BIT(6)
-#define I40E_FLAG_DCB_ENABLED			BIT(7)
-#define I40E_FLAG_FD_SB_ENABLED			BIT(8)
-#define I40E_FLAG_FD_ATR_ENABLED		BIT(9)
-#define I40E_FLAG_MFP_ENABLED			BIT(10)
-#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT(11)
-#define I40E_FLAG_VEB_MODE_ENABLED		BIT(12)
-#define I40E_FLAG_VEB_STATS_ENABLED		BIT(13)
-#define I40E_FLAG_LINK_POLLING_ENABLED		BIT(14)
-#define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT(15)
-#define I40E_FLAG_LEGACY_RX			BIT(16)
-#define I40E_FLAG_PTP				BIT(17)
-#define I40E_FLAG_IWARP_ENABLED			BIT(18)
-#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED	BIT(19)
-#define I40E_FLAG_SOURCE_PRUNING_DISABLED       BIT(20)
-#define I40E_FLAG_TC_MQPRIO			BIT(21)
-#define I40E_FLAG_FD_SB_INACTIVE		BIT(22)
-#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER		BIT(23)
-#define I40E_FLAG_DISABLE_FW_LLDP		BIT(24)
-#define I40E_FLAG_RS_FEC			BIT(25)
-#define I40E_FLAG_BASE_R_FEC			BIT(26)
-/* TOTAL_PORT_SHUTDOWN
- * Allows to physically disable the link on the NIC's port.
- * If enabled, (after link down request from the OS)
- * no link, traffic or led activity is possible on that port.
- *
- * If I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED is set, the
- * I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED must be explicitly forced to true
- * and cannot be disabled by system admin at that time.
- * The functionalities are exclusive in terms of configuration, but they also
- * have similar behavior (allowing to disable physical link of the port),
- * with following differences:
- * - LINK_DOWN_ON_CLOSE_ENABLED is configurable at host OS run-time and is
- *   supported by whole family of 7xx Intel Ethernet Controllers
- * - TOTAL_PORT_SHUTDOWN may be enabled only before OS loads (in BIOS)
- *   only if motherboard's BIOS and NIC's FW has support of it
- * - when LINK_DOWN_ON_CLOSE_ENABLED is used, the link is being brought down
- *   by sending phy_type=0 to NIC's FW
- * - when TOTAL_PORT_SHUTDOWN is used, phy_type is not altered, instead
- *   the link is being brought down by clearing bit (I40E_AQ_PHY_ENABLE_LINK)
- *   in abilities field of i40e_aq_set_phy_config structure
- */
-#define I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED	BIT(27)
-#define I40E_FLAG_VF_VLAN_PRUNING		BIT(28)
-
+	DECLARE_BITMAP(flags, I40E_PF_FLAGS_NBITS);
 	struct i40e_client_instance *cinst;
 	bool stat_offsets_loaded;
 	struct i40e_hw_port_stats stats;
@@ -1267,7 +1251,7 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
 void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
 static inline bool i40e_is_sw_dcb(struct i40e_pf *pf)
 {
-	return !!(pf->flags & I40E_FLAG_DISABLE_FW_LLDP);
+	return test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags);
 }
 
 #ifdef CONFIG_I40E_DCB
@@ -1301,7 +1285,7 @@ int i40e_set_partition_bw_setting(struct i40e_pf *pf);
 int i40e_commit_partition_bw_setting(struct i40e_pf *pf);
 void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
 
-void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags);
+void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags);
 
 static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
 {
@@ -1321,13 +1305,13 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
  * i40e_is_tc_mqprio_enabled - check if TC MQPRIO is enabled on PF
  * @pf: pointer to a pf.
  *
- * Check and return value of flag I40E_FLAG_TC_MQPRIO.
+ * Check and return state of flag I40E_FLAG_TC_MQPRIO.
  *
- * Return: I40E_FLAG_TC_MQPRIO set state.
+ * Return: true/false if I40E_FLAG_TC_MQPRIO is set or not
  **/
-static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
+static inline bool i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
 {
-	return pf->flags & I40E_FLAG_TC_MQPRIO;
+	return test_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 9ce6e633cc2f..896c43905309 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -17,29 +17,16 @@ static void i40e_resume_aq(struct i40e_hw *hw);
 static void i40e_adminq_init_regs(struct i40e_hw *hw)
 {
 	/* set head and tail registers in our local struct */
-	if (i40e_is_vf(hw)) {
-		hw->aq.asq.tail = I40E_VF_ATQT1;
-		hw->aq.asq.head = I40E_VF_ATQH1;
-		hw->aq.asq.len  = I40E_VF_ATQLEN1;
-		hw->aq.asq.bal  = I40E_VF_ATQBAL1;
-		hw->aq.asq.bah  = I40E_VF_ATQBAH1;
-		hw->aq.arq.tail = I40E_VF_ARQT1;
-		hw->aq.arq.head = I40E_VF_ARQH1;
-		hw->aq.arq.len  = I40E_VF_ARQLEN1;
-		hw->aq.arq.bal  = I40E_VF_ARQBAL1;
-		hw->aq.arq.bah  = I40E_VF_ARQBAH1;
-	} else {
-		hw->aq.asq.tail = I40E_PF_ATQT;
-		hw->aq.asq.head = I40E_PF_ATQH;
-		hw->aq.asq.len  = I40E_PF_ATQLEN;
-		hw->aq.asq.bal  = I40E_PF_ATQBAL;
-		hw->aq.asq.bah  = I40E_PF_ATQBAH;
-		hw->aq.arq.tail = I40E_PF_ARQT;
-		hw->aq.arq.head = I40E_PF_ARQH;
-		hw->aq.arq.len  = I40E_PF_ARQLEN;
-		hw->aq.arq.bal  = I40E_PF_ARQBAL;
-		hw->aq.arq.bah  = I40E_PF_ARQBAH;
-	}
+	hw->aq.asq.tail = I40E_PF_ATQT;
+	hw->aq.asq.head = I40E_PF_ATQH;
+	hw->aq.asq.len  = I40E_PF_ATQLEN;
+	hw->aq.asq.bal  = I40E_PF_ATQBAL;
+	hw->aq.asq.bah  = I40E_PF_ATQBAH;
+	hw->aq.arq.tail = I40E_PF_ARQT;
+	hw->aq.arq.head = I40E_PF_ARQH;
+	hw->aq.arq.len  = I40E_PF_ARQLEN;
+	hw->aq.arq.bal  = I40E_PF_ARQBAL;
+	hw->aq.arq.bah  = I40E_PF_ARQBAH;
 }
 
 /**
@@ -503,44 +490,76 @@ shutdown_arq_out:
 }
 
 /**
- *  i40e_set_hw_flags - set HW flags
+ *  i40e_set_hw_caps - set HW flags
  *  @hw: pointer to the hardware structure
  **/
-static void i40e_set_hw_flags(struct i40e_hw *hw)
+static void i40e_set_hw_caps(struct i40e_hw *hw)
 {
-	struct i40e_adminq_info *aq = &hw->aq;
-
-	hw->flags = 0;
+	bitmap_zero(hw->caps, I40E_HW_CAPS_NBITS);
 
 	switch (hw->mac.type) {
 	case I40E_MAC_XL710:
-		if (aq->api_maj_ver > 1 ||
-		    (aq->api_maj_ver == 1 &&
-		     aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710)) {
-			hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
-			hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+		if (i40e_is_aq_api_ver_ge(hw, 1,
+					  I40E_MINOR_VER_GET_LINK_INFO_XL710)) {
+			set_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps);
+			set_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, hw->caps);
 			/* The ability to RX (not drop) 802.1ad frames */
-			hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
+			set_bit(I40E_HW_CAP_802_1AD, hw->caps);
+		}
+		if (i40e_is_aq_api_ver_ge(hw, 1, 5)) {
+			/* Supported in FW API version higher than 1.4 */
+			set_bit(I40E_HW_CAP_GENEVE_OFFLOAD, hw->caps);
+		}
+		if (i40e_is_fw_ver_lt(hw, 4, 33)) {
+			set_bit(I40E_HW_CAP_RESTART_AUTONEG, hw->caps);
+			/* No DCB support  for FW < v4.33 */
+			set_bit(I40E_HW_CAP_NO_DCB_SUPPORT, hw->caps);
+		}
+		if (i40e_is_fw_ver_lt(hw, 4, 3)) {
+			/* Disable FW LLDP if FW < v4.3 */
+			set_bit(I40E_HW_CAP_STOP_FW_LLDP, hw->caps);
+		}
+		if (i40e_is_fw_ver_ge(hw, 4, 40)) {
+			/* Use the FW Set LLDP MIB API if FW >= v4.40 */
+			set_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, hw->caps);
+		}
+		if (i40e_is_fw_ver_ge(hw, 6, 0)) {
+			/* Enable PTP L4 if FW > v6.0 */
+			set_bit(I40E_HW_CAP_PTP_L4, hw->caps);
 		}
 		break;
 	case I40E_MAC_X722:
-		hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE |
-			     I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
+		set_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps);
+		set_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps);
+		set_bit(I40E_HW_CAP_RSS_AQ, hw->caps);
+		set_bit(I40E_HW_CAP_128_QP_RSS, hw->caps);
+		set_bit(I40E_HW_CAP_ATR_EVICT, hw->caps);
+		set_bit(I40E_HW_CAP_WB_ON_ITR, hw->caps);
+		set_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, hw->caps);
+		set_bit(I40E_HW_CAP_NO_PCI_LINK_CHECK, hw->caps);
+		set_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, hw->caps);
+		set_bit(I40E_HW_CAP_GENEVE_OFFLOAD, hw->caps);
+		set_bit(I40E_HW_CAP_PTP_L4, hw->caps);
+		set_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, hw->caps);
+		set_bit(I40E_HW_CAP_OUTER_UDP_CSUM, hw->caps);
+
+		if (rd32(hw, I40E_GLQF_FDEVICTENA(1)) !=
+		    I40E_FDEVICT_PCTYPE_DEFAULT) {
+			hw_warn(hw, "FD EVICT PCTYPES are not right, disable FD HW EVICT\n");
+			clear_bit(I40E_HW_CAP_ATR_EVICT, hw->caps);
+		}
 
-		if (aq->api_maj_ver > 1 ||
-		    (aq->api_maj_ver == 1 &&
-		     aq->api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722))
-			hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+		if (i40e_is_aq_api_ver_ge(hw, 1,
+					  I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722))
+			set_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, hw->caps);
 
-		if (aq->api_maj_ver > 1 ||
-		    (aq->api_maj_ver == 1 &&
-		     aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_X722))
-			hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+		if (i40e_is_aq_api_ver_ge(hw, 1,
+					  I40E_MINOR_VER_GET_LINK_INFO_X722))
+			set_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps);
 
-		if (aq->api_maj_ver > 1 ||
-		    (aq->api_maj_ver == 1 &&
-		     aq->api_min_ver >= I40E_MINOR_VER_FW_REQUEST_FEC_X722))
-			hw->flags |= I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE;
+		if (i40e_is_aq_api_ver_ge(hw, 1,
+					  I40E_MINOR_VER_FW_REQUEST_FEC_X722))
+			set_bit(I40E_HW_CAP_X722_FEC_REQUEST, hw->caps);
 
 		fallthrough;
 	default:
@@ -548,22 +567,18 @@ static void i40e_set_hw_flags(struct i40e_hw *hw)
 	}
 
 	/* Newer versions of firmware require lock when reading the NVM */
-	if (aq->api_maj_ver > 1 ||
-	    (aq->api_maj_ver == 1 &&
-	     aq->api_min_ver >= 5))
-		hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
-
-	if (aq->api_maj_ver > 1 ||
-	    (aq->api_maj_ver == 1 &&
-	     aq->api_min_ver >= 8)) {
-		hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT;
-		hw->flags |= I40E_HW_FLAG_DROP_MODE;
-	}
+	if (i40e_is_aq_api_ver_ge(hw, 1, 5))
+		set_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps);
+
+	/* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */
+	if (i40e_is_aq_api_ver_ge(hw, 1, 7))
+		set_bit(I40E_HW_CAP_802_1AD, hw->caps);
 
-	if (aq->api_maj_ver > 1 ||
-	    (aq->api_maj_ver == 1 &&
-	     aq->api_min_ver >= 9))
-		hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED;
+	if (i40e_is_aq_api_ver_ge(hw, 1, 8))
+		set_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps);
+
+	if (i40e_is_aq_api_ver_ge(hw, 1, 9))
+		set_bit(I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED, hw->caps);
 }
 
 /**
@@ -633,7 +648,7 @@ int i40e_init_adminq(struct i40e_hw *hw)
 	/* Some features were introduced in different FW API version
 	 * for different MAC type.
 	 */
-	i40e_set_hw_flags(hw);
+	i40e_set_hw_caps(hw);
 
 	/* get the NVM version info */
 	i40e_read_nvm_word(hw, I40E_SR_NVM_DEV_STARTER_VERSION,
@@ -648,25 +663,7 @@ int i40e_init_adminq(struct i40e_hw *hw)
 			   &oem_lo);
 	hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo;
 
-	if (hw->mac.type == I40E_MAC_XL710 &&
-	    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
-	    hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
-		hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
-		hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
-	}
-	if (hw->mac.type == I40E_MAC_X722 &&
-	    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
-	    hw->aq.api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722) {
-		hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
-	}
-
-	/* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */
-	if (hw->aq.api_maj_ver > 1 ||
-	    (hw->aq.api_maj_ver == 1 &&
-	     hw->aq.api_min_ver >= 7))
-		hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
-
-	if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
+	if (i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR + 1, 0)) {
 		ret_code = -EIO;
 		goto init_adminq_free_arq;
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index d7e24d661724..e171f4814e21 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1374,8 +1374,8 @@ i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
 
 	if (report_init) {
 		if (hw->mac.type ==  I40E_MAC_XL710 &&
-		    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
-		    hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
+		    i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR,
+					  I40E_MINOR_VER_GET_LINK_INFO_XL710)) {
 			status = i40e_aq_get_link_info(hw, true, NULL, NULL);
 		} else {
 			hw->phy.phy_types = le32_to_cpu(abilities->phy_type);
@@ -1645,12 +1645,11 @@ int i40e_aq_get_link_info(struct i40e_hw *hw,
 	else
 		hw_link_info->lse_enable = false;
 
-	if ((hw->mac.type == I40E_MAC_XL710) &&
-	    (hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 &&
-	     hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
+	if (hw->mac.type == I40E_MAC_XL710 && i40e_is_fw_ver_lt(hw, 4, 40) &&
+	    hw_link_info->phy_type == 0xE)
 		hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
 
-	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE &&
+	if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps) &&
 	    hw->mac.type != I40E_MAC_X722) {
 		__le32 tmp;
 
@@ -1750,21 +1749,6 @@ int i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
 }
 
 /**
- * i40e_is_aq_api_ver_ge
- * @aq: pointer to AdminQ info containing HW API version to compare
- * @maj: API major value
- * @min: API minor value
- *
- * Assert whether current HW API version is greater/equal than provided.
- **/
-static bool i40e_is_aq_api_ver_ge(struct i40e_adminq_info *aq, u16 maj,
-				  u16 min)
-{
-	return (aq->api_maj_ver > maj ||
-		(aq->api_maj_ver == maj && aq->api_min_ver >= min));
-}
-
-/**
  * i40e_aq_add_vsi
  * @hw: pointer to the hw struct
  * @vsi_ctx: pointer to a vsi context struct
@@ -1890,14 +1874,14 @@ int i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
 
 	if (set) {
 		flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
-		if (rx_only_promisc && i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+		if (rx_only_promisc && i40e_is_aq_api_ver_ge(hw, 1, 5))
 			flags |= I40E_AQC_SET_VSI_PROMISC_RX_ONLY;
 	}
 
 	cmd->promiscuous_flags = cpu_to_le16(flags);
 
 	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
-	if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+	if (i40e_is_aq_api_ver_ge(hw, 1, 5))
 		cmd->valid_flags |=
 			cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_RX_ONLY);
 
@@ -2000,13 +1984,13 @@ int i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
 
 	if (enable) {
 		flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
-		if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+		if (i40e_is_aq_api_ver_ge(hw, 1, 5))
 			flags |= I40E_AQC_SET_VSI_PROMISC_RX_ONLY;
 	}
 
 	cmd->promiscuous_flags = cpu_to_le16(flags);
 	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
-	if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+	if (i40e_is_aq_api_ver_ge(hw, 1, 5))
 		cmd->valid_flags |=
 			cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_RX_ONLY);
 	cmd->seid = cpu_to_le16(seid);
@@ -2253,7 +2237,7 @@ int i40e_aq_set_switch_config(struct i40e_hw *hw,
 	scfg->flags = cpu_to_le16(flags);
 	scfg->valid_flags = cpu_to_le16(valid_flags);
 	scfg->mode = mode;
-	if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) {
+	if (test_bit(I40E_HW_CAP_802_1AD, hw->caps)) {
 		scfg->switch_tag = cpu_to_le16(hw->switch_tag);
 		scfg->first_tag = cpu_to_le16(hw->first_tag);
 		scfg->second_tag = cpu_to_le16(hw->second_tag);
@@ -3637,7 +3621,7 @@ i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
 		(struct i40e_aqc_lldp_restore *)&desc.params.raw;
 	int status;
 
-	if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)) {
+	if (!test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) {
 		i40e_debug(hw, I40E_DEBUG_ALL,
 			   "Restore LLDP not supported by current FW version.\n");
 		return -ENODEV;
@@ -3680,7 +3664,7 @@ int i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
 		cmd->command |= I40E_AQ_LLDP_AGENT_SHUTDOWN;
 
 	if (persist) {
-		if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)
+		if (test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps))
 			cmd->command |= I40E_AQ_LLDP_AGENT_STOP_PERSIST;
 		else
 			i40e_debug(hw, I40E_DEBUG_ALL,
@@ -3713,7 +3697,7 @@ int i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
 	cmd->command = I40E_AQ_LLDP_AGENT_START;
 
 	if (persist) {
-		if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)
+		if (test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps))
 			cmd->command |= I40E_AQ_LLDP_AGENT_START_PERSIST;
 		else
 			i40e_debug(hw, I40E_DEBUG_ALL,
@@ -3741,7 +3725,7 @@ i40e_aq_set_dcb_parameters(struct i40e_hw *hw, bool dcb_enable,
 		(struct i40e_aqc_set_dcb_parameters *)&desc.params.raw;
 	int status;
 
-	if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
+	if (!test_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, hw->caps))
 		return -ENODEV;
 
 	i40e_fill_default_direct_cmd_desc(&desc,
@@ -5043,7 +5027,7 @@ static int i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
 	u32 i;
 
 	*reg_val = 0;
-	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+	if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) {
 		status =
 		       i40e_aq_get_phy_register(hw,
 						I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
@@ -5076,7 +5060,7 @@ static int i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
 	int status;
 	u32 i;
 
-	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+	if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) {
 		status =
 		       i40e_aq_set_phy_register(hw,
 						I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
@@ -5115,7 +5099,7 @@ int i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
 	u8 port_num;
 	u32 i;
 
-	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+	if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) {
 		status =
 		      i40e_aq_get_phy_register(hw,
 					       I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
@@ -5238,14 +5222,14 @@ int i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
  **/
 u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr)
 {
-	bool use_register;
+	bool use_register = false;
 	int status = 0;
 	int retry = 5;
 	u32 val = 0;
 
-	use_register = (((hw->aq.api_maj_ver == 1) &&
-			(hw->aq.api_min_ver < 5)) ||
-			(hw->mac.type == I40E_MAC_X722));
+	if (i40e_is_aq_api_ver_lt(hw, 1, 5) || hw->mac.type == I40E_MAC_X722)
+		use_register = true;
+
 	if (!use_register) {
 do_retry:
 		status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL);
@@ -5300,13 +5284,13 @@ int i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
  **/
 void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
 {
-	bool use_register;
+	bool use_register = false;
 	int status = 0;
 	int retry = 5;
 
-	use_register = (((hw->aq.api_maj_ver == 1) &&
-			(hw->aq.api_min_ver < 5)) ||
-			(hw->mac.type == I40E_MAC_X722));
+	if (i40e_is_aq_api_ver_lt(hw, 1, 5) || hw->mac.type == I40E_MAC_X722)
+		use_register = true;
+
 	if (!use_register) {
 do_retry:
 		status = i40e_aq_rx_ctl_write_register(hw, reg_addr,
@@ -5335,7 +5319,7 @@ static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio,
 					  struct i40e_aqc_phy_register_access *cmd)
 {
 	if (set_mdio && cmd->phy_interface == I40E_AQ_PHY_REG_ACCESS_EXTERNAL) {
-		if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED)
+		if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED, hw->caps))
 			cmd->cmd_flags |=
 				I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER |
 				((mdio_num <<
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 68602fc375f6..498728e16a37 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -804,14 +804,11 @@ int i40e_get_dcb_config(struct i40e_hw *hw)
 	int ret = 0;
 
 	/* If Firmware version < v4.33 on X710/XL710, IEEE only */
-	if ((hw->mac.type == I40E_MAC_XL710) &&
-	    (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) ||
-	      (hw->aq.fw_maj_ver < 4)))
+	if (hw->mac.type == I40E_MAC_XL710 && i40e_is_fw_ver_lt(hw, 4, 33))
 		return i40e_get_ieee_dcb_config(hw);
 
 	/* If Firmware version == v4.33 on X710/XL710, use old CEE struct */
-	if ((hw->mac.type == I40E_MAC_XL710) &&
-	    ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33))) {
+	if (hw->mac.type == I40E_MAC_XL710 && i40e_is_fw_ver_eq(hw, 4, 33)) {
 		ret = i40e_aq_get_cee_dcb_config(hw, &cee_v1_cfg,
 						 sizeof(cee_v1_cfg), NULL);
 		if (!ret) {
@@ -877,7 +874,7 @@ int i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
 		return -EOPNOTSUPP;
 
 	/* Read LLDP NVM area */
-	if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT) {
+	if (test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) {
 		u8 offset = 0;
 
 		if (hw->mac.type == I40E_MAC_XL710)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
index 077a95dad32c..4721845fda6e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -310,8 +310,8 @@ static u8 i40e_dcbnl_getstate(struct net_device *netdev)
 	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
 
 	dev_dbg(&pf->pdev->dev, "DCB state=%d\n",
-		!!(pf->flags & I40E_FLAG_DCB_ENABLED));
-	return !!(pf->flags & I40E_FLAG_DCB_ENABLED);
+		test_bit(I40E_FLAG_DCB_ENA, pf->flags) ? 1 : 0);
+	return test_bit(I40E_FLAG_DCB_ENA, pf->flags) ? 1 : 0;
 }
 
 /**
@@ -331,19 +331,19 @@ static u8 i40e_dcbnl_setstate(struct net_device *netdev, u8 state)
 		return ret;
 
 	dev_dbg(&pf->pdev->dev, "new state=%d current state=%d\n",
-		state, (pf->flags & I40E_FLAG_DCB_ENABLED) ? 1 : 0);
+		state, test_bit(I40E_FLAG_DCB_ENA, pf->flags) ? 1 : 0);
 	/* Nothing to do */
-	if (!state == !(pf->flags & I40E_FLAG_DCB_ENABLED))
+	if (!state == !test_bit(I40E_FLAG_DCB_ENA, pf->flags))
 		return ret;
 
 	if (i40e_is_sw_dcb(pf)) {
 		if (state) {
-			pf->flags |= I40E_FLAG_DCB_ENABLED;
+			set_bit(I40E_FLAG_DCB_ENA, pf->flags);
 			memcpy(&pf->hw.desired_dcbx_config,
 			       &pf->hw.local_dcbx_config,
 			       sizeof(struct i40e_dcbx_config));
 		} else {
-			pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+			clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 		}
 	} else {
 		/* Cannot directly manipulate FW LLDP Agent */
@@ -653,7 +653,7 @@ static u8 i40e_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
 {
 	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
 
-	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+	if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
 		return I40E_DCBNL_STATUS_ERROR;
 
 	switch (capid) {
@@ -693,7 +693,7 @@ static int i40e_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
 {
 	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
 
-	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+	if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
 		return -EINVAL;
 
 	*num = I40E_MAX_TRAFFIC_CLASS;
@@ -827,15 +827,12 @@ static void i40e_dcbnl_get_perm_hw_addr(struct net_device *dev,
 					u8 *perm_addr)
 {
 	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
-	int i, j;
+	int i;
 
 	memset(perm_addr, 0xff, MAX_ADDR_LEN);
 
 	for (i = 0; i < dev->addr_len; i++)
 		perm_addr[i] = pf->hw.mac.perm_addr[i];
-
-	for (j = 0; j < dev->addr_len; j++, i++)
-		perm_addr[i] = pf->hw.mac.san_addr[j];
 }
 
 static const struct dcbnl_rtnl_ops dcbnl_ops = {
@@ -891,11 +888,11 @@ void i40e_dcbnl_set_all(struct i40e_vsi *vsi)
 		return;
 
 	/* DCB not enabled */
-	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+	if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags))
 		return;
 
 	/* MFP mode but not an iSCSI PF so return */
-	if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(hw->func_caps.iscsi))
+	if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && !(hw->func_caps.iscsi))
 		return;
 
 	dcbxcfg = &hw->local_dcbx_config;
@@ -1002,7 +999,7 @@ void i40e_dcbnl_flush_apps(struct i40e_pf *pf,
 	int i;
 
 	/* MFP mode but not an iSCSI PF so return */
-	if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(pf->hw.func_caps.iscsi))
+	if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && !(pf->hw.func_caps.iscsi))
 		return;
 
 	for (i = 0; i < old_cfg->numapps; i++) {
@@ -1025,7 +1022,7 @@ void i40e_dcbnl_setup(struct i40e_vsi *vsi)
 	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
 
 	/* Not DCB capable */
-	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+	if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
 		return;
 
 	dev->dcbnl_ops = &dcbnl_ops;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debug.h b/drivers/net/ethernet/intel/i40e/i40e_debug.h
index 27ebc72d8bfe..e9871dfb32bd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debug.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_debug.h
@@ -37,6 +37,7 @@ struct i40e_hw;
 struct device *i40e_hw_to_dev(struct i40e_hw *hw);
 
 #define hw_dbg(hw, S, A...) dev_dbg(i40e_hw_to_dev(hw), S, ##A)
+#define hw_warn(hw, S, A...) dev_warn(i40e_hw_to_dev(hw), S, ##A)
 
 #define i40e_debug(h, m, s, ...)				\
 do {								\
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 999c9708def5..88240571721a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -147,9 +147,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 "    state[%d] = %08lx\n",
 			 i, vsi->state[i]);
 	if (vsi == pf->vsi[pf->lan_vsi])
-		dev_info(&pf->pdev->dev, "    MAC address: %pM SAN MAC: %pM Port MAC: %pM\n",
+		dev_info(&pf->pdev->dev, "    MAC address: %pM Port MAC: %pM\n",
 			 pf->hw.mac.addr,
-			 pf->hw.mac.san_addr,
 			 pf->hw.mac.port_addr);
 	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
 		dev_info(&pf->pdev->dev,
@@ -820,8 +819,8 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 		/* By default we are in VEPA mode, if this is the first VF/VMDq
 		 * VSI to be added switch to VEB mode.
 		 */
-		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
-			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+		if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
+			set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
 			i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
 		}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index fd7163128c4d..eb9a7b32af73 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -430,35 +430,35 @@ static const char i40e_gstrings_test[][ETH_GSTRING_LEN] = {
 
 struct i40e_priv_flags {
 	char flag_string[ETH_GSTRING_LEN];
-	u64 flag;
+	u8 bitno;
 	bool read_only;
 };
 
-#define I40E_PRIV_FLAG(_name, _flag, _read_only) { \
+#define I40E_PRIV_FLAG(_name, _bitno, _read_only) { \
 	.flag_string = _name, \
-	.flag = _flag, \
+	.bitno = _bitno, \
 	.read_only = _read_only, \
 }
 
 static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
 	/* NOTE: MFP setting cannot be changed */
-	I40E_PRIV_FLAG("MFP", I40E_FLAG_MFP_ENABLED, 1),
+	I40E_PRIV_FLAG("MFP", I40E_FLAG_MFP_ENA, 1),
 	I40E_PRIV_FLAG("total-port-shutdown",
-		       I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED, 1),
-	I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENABLED, 0),
-	I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
-	I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
-	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+		       I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, 1),
+	I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENA, 0),
+	I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENA, 0),
+	I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENA, 0),
+	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENA, 0),
 	I40E_PRIV_FLAG("link-down-on-close",
-		       I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
-	I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
+		       I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, 0),
+	I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX_ENA, 0),
 	I40E_PRIV_FLAG("disable-source-pruning",
-		       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
-	I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0),
+		       I40E_FLAG_SOURCE_PRUNING_DIS, 0),
+	I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_FW_LLDP_DIS, 0),
 	I40E_PRIV_FLAG("rs-fec", I40E_FLAG_RS_FEC, 0),
 	I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
 	I40E_PRIV_FLAG("vf-vlan-pruning",
-		       I40E_FLAG_VF_VLAN_PRUNING, 0),
+		       I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
 };
 
 #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
@@ -466,7 +466,7 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
 /* Private flags with a global effect, restricted to PF 0 */
 static const struct i40e_priv_flags i40e_gl_gstrings_priv_flags[] = {
 	I40E_PRIV_FLAG("vf-true-promisc-support",
-		       I40E_FLAG_TRUE_PROMISC_SUPPORT, 0),
+		       I40E_FLAG_TRUE_PROMISC_ENA, 0),
 };
 
 #define I40E_GL_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gl_gstrings_priv_flags)
@@ -502,7 +502,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseT_Full);
-		if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
+		if (test_bit(I40E_HW_CAP_100M_SGMII, pf->hw.caps)) {
 			ethtool_link_ksettings_add_link_mode(ks, supported,
 							     100baseT_Full);
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
@@ -601,7 +601,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
 							     10000baseKX4_Full);
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR &&
-	    !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+	    !test_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps)) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     10000baseKR_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
@@ -609,7 +609,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
 							     10000baseKR_Full);
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX &&
-	    !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+	    !test_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps)) {
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     1000baseKX_Full);
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
@@ -917,7 +917,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
 			ethtool_link_ksettings_add_link_mode(ks, advertising,
 							     1000baseT_Full);
-		if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
+		if (test_bit(I40E_HW_CAP_100M_SGMII, pf->hw.caps)) {
 			ethtool_link_ksettings_add_link_mode(ks, supported,
 							     100baseT_Full);
 			if (hw_link_info->requested_speeds &
@@ -1488,12 +1488,8 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	int status = 0;
-	u32 flags = 0;
 	int err = 0;
 
-	flags = READ_ONCE(pf->flags);
-	i40e_set_fec_in_flags(fec_cfg, &flags);
-
 	/* Get the current phy config */
 	memset(&abilities, 0, sizeof(abilities));
 	status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
@@ -1525,7 +1521,7 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
 			err = -EAGAIN;
 			goto done;
 		}
-		pf->flags = flags;
+		i40e_set_fec_in_flags(fec_cfg, pf->flags);
 		status = i40e_update_link_info(hw);
 		if (status)
 			/* debug level message only due to relation to the link
@@ -1599,7 +1595,7 @@ static int i40e_set_fec_param(struct net_device *netdev,
 		return -EPERM;
 
 	if (hw->mac.type == I40E_MAC_X722 &&
-	    !(hw->flags & I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE)) {
+	    !test_bit(I40E_HW_CAP_X722_FEC_REQUEST, hw->caps)) {
 		netdev_err(netdev, "Setting FEC encoding not supported by firmware. Please update the NVM image.\n");
 		return -EOPNOTSUPP;
 	}
@@ -2015,6 +2011,18 @@ static void i40e_get_drvinfo(struct net_device *netdev,
 		drvinfo->n_priv_flags += I40E_GL_PRIV_FLAGS_STR_LEN;
 }
 
+static u32 i40e_get_max_num_descriptors(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+
+	switch (hw->mac.type) {
+	case I40E_MAC_XL710:
+		return I40E_MAX_NUM_DESCRIPTORS_XL710;
+	default:
+		return I40E_MAX_NUM_DESCRIPTORS;
+	}
+}
+
 static void i40e_get_ringparam(struct net_device *netdev,
 			       struct ethtool_ringparam *ring,
 			       struct kernel_ethtool_ringparam *kernel_ring,
@@ -2024,8 +2032,8 @@ static void i40e_get_ringparam(struct net_device *netdev,
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 
-	ring->rx_max_pending = I40E_MAX_NUM_DESCRIPTORS;
-	ring->tx_max_pending = I40E_MAX_NUM_DESCRIPTORS;
+	ring->rx_max_pending = i40e_get_max_num_descriptors(pf);
+	ring->tx_max_pending = i40e_get_max_num_descriptors(pf);
 	ring->rx_mini_max_pending = 0;
 	ring->rx_jumbo_max_pending = 0;
 	ring->rx_pending = vsi->rx_rings[0]->count;
@@ -2050,12 +2058,12 @@ static int i40e_set_ringparam(struct net_device *netdev,
 			      struct kernel_ethtool_ringparam *kernel_ring,
 			      struct netlink_ext_ack *extack)
 {
+	u32 new_rx_count, new_tx_count, max_num_descriptors;
 	struct i40e_ring *tx_rings = NULL, *rx_rings = NULL;
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_hw *hw = &np->vsi->back->hw;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
-	u32 new_rx_count, new_tx_count;
 	u16 tx_alloc_queue_pairs;
 	int timeout = 50;
 	int i, err = 0;
@@ -2063,14 +2071,15 @@ static int i40e_set_ringparam(struct net_device *netdev,
 	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
 		return -EINVAL;
 
-	if (ring->tx_pending > I40E_MAX_NUM_DESCRIPTORS ||
+	max_num_descriptors = i40e_get_max_num_descriptors(pf);
+	if (ring->tx_pending > max_num_descriptors ||
 	    ring->tx_pending < I40E_MIN_NUM_DESCRIPTORS ||
-	    ring->rx_pending > I40E_MAX_NUM_DESCRIPTORS ||
+	    ring->rx_pending > max_num_descriptors ||
 	    ring->rx_pending < I40E_MIN_NUM_DESCRIPTORS) {
 		netdev_info(netdev,
 			    "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d]\n",
 			    ring->tx_pending, ring->rx_pending,
-			    I40E_MIN_NUM_DESCRIPTORS, I40E_MAX_NUM_DESCRIPTORS);
+			    I40E_MIN_NUM_DESCRIPTORS, max_num_descriptors);
 		return -EINVAL;
 	}
 
@@ -2419,7 +2428,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev,
 
 	veb_stats = ((pf->lan_veb != I40E_NO_VEB) &&
 		     (pf->lan_veb < I40E_MAX_VEB) &&
-		     (pf->flags & I40E_FLAG_VEB_STATS_ENABLED));
+		     test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags));
 
 	if (veb_stats) {
 		veb = pf->veb[pf->lan_veb];
@@ -2548,7 +2557,7 @@ static int i40e_get_ts_info(struct net_device *dev,
 	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
 
 	/* only report HW timestamping if PTP is enabled */
-	if (!(pf->flags & I40E_FLAG_PTP))
+	if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
 		return ethtool_op_get_ts_info(dev, info);
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
@@ -2570,7 +2579,7 @@ static int i40e_get_ts_info(struct net_device *dev,
 			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
 			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ);
 
-	if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE)
+	if (test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps))
 		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
 				    BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
 				    BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
@@ -2819,10 +2828,10 @@ static int i40e_set_phys_id(struct net_device *netdev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
+		if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps)) {
 			pf->led_status = i40e_led_get(hw);
 		} else {
-			if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+			if (!test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps))
 				i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL,
 						      NULL);
 			ret = i40e_led_get_phy(hw, &temp_status,
@@ -2831,25 +2840,25 @@ static int i40e_set_phys_id(struct net_device *netdev,
 		}
 		return blink_freq;
 	case ETHTOOL_ID_ON:
-		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS))
+		if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps))
 			i40e_led_set(hw, 0xf, false);
 		else
 			ret = i40e_led_set_phy(hw, true, pf->led_status, 0);
 		break;
 	case ETHTOOL_ID_OFF:
-		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS))
+		if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps))
 			i40e_led_set(hw, 0x0, false);
 		else
 			ret = i40e_led_set_phy(hw, false, pf->led_status, 0);
 		break;
 	case ETHTOOL_ID_INACTIVE:
-		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
+		if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps)) {
 			i40e_led_set(hw, pf->led_status, false);
 		} else {
 			ret = i40e_led_set_phy(hw, false, pf->led_status,
 					       (pf->phy_led_val |
 					       I40E_PHY_LED_MODE_ORIG));
-			if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+			if (!test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps))
 				i40e_aq_set_phy_debug(hw, 0, NULL);
 		}
 		break;
@@ -3628,7 +3637,7 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
 
 	bitmap_zero(flow_pctypes, FLOW_PCTYPES_SIZE);
 
-	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+	if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
 		dev_err(&pf->pdev->dev,
 			"Change of RSS hash input set is not supported when MFP mode is enabled\n");
 		return -EOPNOTSUPP;
@@ -3644,19 +3653,22 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
 	switch (nfc->flow_type) {
 	case TCP_V4_FLOW:
 		set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP, flow_pctypes);
-		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+		if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+			     pf->hw.caps))
 			set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK,
 				flow_pctypes);
 		break;
 	case TCP_V6_FLOW:
 		set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP, flow_pctypes);
-		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+		if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+			     pf->hw.caps))
 			set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK,
 				flow_pctypes);
 		break;
 	case UDP_V4_FLOW:
 		set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_UDP, flow_pctypes);
-		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+		if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+			     pf->hw.caps)) {
 			set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP,
 				flow_pctypes);
 			set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP,
@@ -3666,7 +3678,8 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
 		break;
 	case UDP_V6_FLOW:
 		set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_UDP, flow_pctypes);
-		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+		if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+			     pf->hw.caps)) {
 			set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP,
 				flow_pctypes);
 			set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP,
@@ -4644,7 +4657,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
 	 * main port cannot change them when in MFP mode as this would impact
 	 * any filters on the other ports.
 	 */
-	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+	if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
 		netif_err(pf, drv, vsi->netdev, "Cannot change Flow Director input sets while MFP is enabled\n");
 		return -EOPNOTSUPP;
 	}
@@ -4804,7 +4817,7 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
 		return -EINVAL;
 	pf = vsi->back;
 
-	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+	if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags))
 		return -EOPNOTSUPP;
 
 	if (test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
@@ -5001,7 +5014,7 @@ static void i40e_get_channels(struct net_device *dev,
 	ch->max_combined = i40e_max_channels(vsi);
 
 	/* report info for other vector */
-	ch->other_count = (pf->flags & I40E_FLAG_FD_SB_ENABLED) ? 1 : 0;
+	ch->other_count = test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) ? 1 : 0;
 	ch->max_other = ch->other_count;
 
 	/* Note: This code assumes DCB is disabled for now. */
@@ -5044,7 +5057,7 @@ static int i40e_set_channels(struct net_device *dev,
 		return -EINVAL;
 
 	/* verify other_count has not changed */
-	if (ch->other_count != ((pf->flags & I40E_FLAG_FD_SB_ENABLED) ? 1 : 0))
+	if (ch->other_count != (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) ? 1 : 0))
 		return -EINVAL;
 
 	/* verify the number of channels does not exceed hardware limits */
@@ -5215,11 +5228,11 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
 	u32 i, j, ret_flags = 0;
 
 	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
-		const struct i40e_priv_flags *priv_flags;
+		const struct i40e_priv_flags *priv_flag;
 
-		priv_flags = &i40e_gstrings_priv_flags[i];
+		priv_flag = &i40e_gstrings_priv_flags[i];
 
-		if (priv_flags->flag & pf->flags)
+		if (test_bit(priv_flag->bitno, pf->flags))
 			ret_flags |= BIT(i);
 	}
 
@@ -5227,11 +5240,11 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
 		return ret_flags;
 
 	for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) {
-		const struct i40e_priv_flags *priv_flags;
+		const struct i40e_priv_flags *priv_flag;
 
-		priv_flags = &i40e_gl_gstrings_priv_flags[j];
+		priv_flag = &i40e_gl_gstrings_priv_flags[j];
 
-		if (priv_flags->flag & pf->flags)
+		if (test_bit(priv_flag->bitno, pf->flags))
 			ret_flags |= BIT(i + j);
 	}
 
@@ -5245,8 +5258,10 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
  **/
 static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 {
+	DECLARE_BITMAP(changed_flags, I40E_PF_FLAGS_NBITS);
+	DECLARE_BITMAP(orig_flags, I40E_PF_FLAGS_NBITS);
+	DECLARE_BITMAP(new_flags, I40E_PF_FLAGS_NBITS);
 	struct i40e_netdev_priv *np = netdev_priv(dev);
-	u64 orig_flags, new_flags, changed_flags;
 	enum i40e_admin_queue_err adq_err;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
@@ -5254,51 +5269,57 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 	int status;
 	u32 i, j;
 
-	orig_flags = READ_ONCE(pf->flags);
-	new_flags = orig_flags;
+	bitmap_copy(orig_flags, pf->flags, I40E_PF_FLAGS_NBITS);
+	bitmap_copy(new_flags, pf->flags, I40E_PF_FLAGS_NBITS);
 
 	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
-		const struct i40e_priv_flags *priv_flags;
-
-		priv_flags = &i40e_gstrings_priv_flags[i];
+		const struct i40e_priv_flags *priv_flag;
+		bool new_val;
 
-		if (flags & BIT(i))
-			new_flags |= priv_flags->flag;
-		else
-			new_flags &= ~(priv_flags->flag);
+		priv_flag = &i40e_gstrings_priv_flags[i];
+		new_val = (flags & BIT(i)) ? true : false;
 
 		/* If this is a read-only flag, it can't be changed */
-		if (priv_flags->read_only &&
-		    ((orig_flags ^ new_flags) & ~BIT(i)))
+		if (priv_flag->read_only &&
+		    test_bit(priv_flag->bitno, orig_flags) != new_val)
 			return -EOPNOTSUPP;
+
+		if (new_val)
+			set_bit(priv_flag->bitno, new_flags);
+		else
+			clear_bit(priv_flag->bitno, new_flags);
 	}
 
 	if (pf->hw.pf_id != 0)
 		goto flags_complete;
 
 	for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) {
-		const struct i40e_priv_flags *priv_flags;
+		const struct i40e_priv_flags *priv_flag;
+		bool new_val;
 
-		priv_flags = &i40e_gl_gstrings_priv_flags[j];
-
-		if (flags & BIT(i + j))
-			new_flags |= priv_flags->flag;
-		else
-			new_flags &= ~(priv_flags->flag);
+		priv_flag = &i40e_gl_gstrings_priv_flags[j];
+		new_val = (flags & BIT(i + j)) ? true : false;
 
 		/* If this is a read-only flag, it can't be changed */
-		if (priv_flags->read_only &&
-		    ((orig_flags ^ new_flags) & ~BIT(i)))
+		if (priv_flag->read_only &&
+		    test_bit(priv_flag->bitno, orig_flags) != new_val)
 			return -EOPNOTSUPP;
+
+		if (new_val)
+			set_bit(priv_flag->bitno, new_flags);
+		else
+			clear_bit(priv_flag->bitno, new_flags);
 	}
 
 flags_complete:
-	changed_flags = orig_flags ^ new_flags;
+	bitmap_xor(changed_flags, pf->flags, orig_flags, I40E_PF_FLAGS_NBITS);
 
-	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP)
+	if (test_bit(I40E_FLAG_FW_LLDP_DIS, changed_flags))
 		reset_needed = I40E_PF_RESET_AND_REBUILD_FLAG;
-	if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
-	    I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED))
+
+	if (test_bit(I40E_FLAG_VEB_STATS_ENA, changed_flags) ||
+	    test_bit(I40E_FLAG_LEGACY_RX_ENA, changed_flags) ||
+	    test_bit(I40E_FLAG_SOURCE_PRUNING_DIS, changed_flags))
 		reset_needed = BIT(__I40E_PF_RESET_REQUESTED);
 
 	/* Before we finalize any flag changes, we need to perform some
@@ -5306,8 +5327,8 @@ flags_complete:
 	 */
 
 	/* ATR eviction is not supported on all devices */
-	if ((new_flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) &&
-	    !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
+	if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, new_flags) &&
+	    !test_bit(I40E_HW_CAP_ATR_EVICT, pf->hw.caps))
 		return -EOPNOTSUPP;
 
 	/* If the driver detected FW LLDP was disabled on init, this flag could
@@ -5318,15 +5339,14 @@ flags_complete:
 	 * disable LLDP, however we _must_ not allow the user to enable/disable
 	 * LLDP with this flag on unsupported FW versions.
 	 */
-	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
-		if (!(pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE)) {
-			dev_warn(&pf->pdev->dev,
-				 "Device does not support changing FW LLDP\n");
-			return -EOPNOTSUPP;
-		}
+	if (test_bit(I40E_FLAG_FW_LLDP_DIS, changed_flags) &&
+	    !test_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, pf->hw.caps)) {
+		dev_warn(&pf->pdev->dev,
+			 "Device does not support changing FW LLDP\n");
+		return -EOPNOTSUPP;
 	}
 
-	if (changed_flags & I40E_FLAG_RS_FEC &&
+	if (test_bit(I40E_FLAG_RS_FEC, changed_flags) &&
 	    pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
 	    pf->hw.device_id != I40E_DEV_ID_25G_B) {
 		dev_warn(&pf->pdev->dev,
@@ -5334,7 +5354,7 @@ flags_complete:
 		return -EOPNOTSUPP;
 	}
 
-	if (changed_flags & I40E_FLAG_BASE_R_FEC &&
+	if (test_bit(I40E_FLAG_BASE_R_FEC, changed_flags) &&
 	    pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
 	    pf->hw.device_id != I40E_DEV_ID_25G_B &&
 	    pf->hw.device_id != I40E_DEV_ID_KX_X722) {
@@ -5349,17 +5369,17 @@ flags_complete:
 	 */
 
 	/* Flush current ATR settings if ATR was disabled */
-	if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) &&
-	    !(new_flags & I40E_FLAG_FD_ATR_ENABLED)) {
+	if (test_bit(I40E_FLAG_FD_ATR_ENA, changed_flags) &&
+	    !test_bit(I40E_FLAG_FD_ATR_ENA, new_flags)) {
 		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
 		set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
 	}
 
-	if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) {
+	if (test_bit(I40E_FLAG_TRUE_PROMISC_ENA, changed_flags)) {
 		u16 sw_flags = 0, valid_flags = 0;
 		int ret;
 
-		if (!(new_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
+		if (!test_bit(I40E_FLAG_TRUE_PROMISC_ENA, new_flags))
 			sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
 		valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
 		ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags,
@@ -5374,17 +5394,17 @@ flags_complete:
 		}
 	}
 
-	if ((changed_flags & I40E_FLAG_RS_FEC) ||
-	    (changed_flags & I40E_FLAG_BASE_R_FEC)) {
+	if (test_bit(I40E_FLAG_RS_FEC, changed_flags) ||
+	    test_bit(I40E_FLAG_BASE_R_FEC, changed_flags)) {
 		u8 fec_cfg = 0;
 
-		if (new_flags & I40E_FLAG_RS_FEC &&
-		    new_flags & I40E_FLAG_BASE_R_FEC) {
+		if (test_bit(I40E_FLAG_RS_FEC, new_flags) &&
+		    test_bit(I40E_FLAG_BASE_R_FEC, new_flags)) {
 			fec_cfg = I40E_AQ_SET_FEC_AUTO;
-		} else if (new_flags & I40E_FLAG_RS_FEC) {
+		} else if (test_bit(I40E_FLAG_RS_FEC, new_flags)) {
 			fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS |
 				   I40E_AQ_SET_FEC_ABILITY_RS);
-		} else if (new_flags & I40E_FLAG_BASE_R_FEC) {
+		} else if (test_bit(I40E_FLAG_BASE_R_FEC, new_flags)) {
 			fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR |
 				   I40E_AQ_SET_FEC_ABILITY_KR);
 		}
@@ -5392,35 +5412,35 @@ flags_complete:
 			dev_warn(&pf->pdev->dev, "Cannot change FEC config\n");
 	}
 
-	if ((changed_flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
-	    (orig_flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)) {
+	if (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, changed_flags) &&
+	    test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, orig_flags)) {
 		dev_err(&pf->pdev->dev,
 			"Setting link-down-on-close not supported on this port (because total-port-shutdown is enabled)\n");
 		return -EOPNOTSUPP;
 	}
 
-	if ((changed_flags & I40E_FLAG_VF_VLAN_PRUNING) &&
+	if (test_bit(I40E_FLAG_VF_VLAN_PRUNING_ENA, changed_flags) &&
 	    pf->num_alloc_vfs) {
 		dev_warn(&pf->pdev->dev,
 			 "Changing vf-vlan-pruning flag while VF(s) are active is not supported\n");
 		return -EOPNOTSUPP;
 	}
 
-	if ((changed_flags & I40E_FLAG_LEGACY_RX) &&
+	if (test_bit(I40E_FLAG_LEGACY_RX_ENA, changed_flags) &&
 	    I40E_2K_TOO_SMALL_WITH_PADDING) {
 		dev_warn(&pf->pdev->dev,
 			 "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
 		return -EOPNOTSUPP;
 	}
 
-	if ((changed_flags & new_flags &
-	     I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
-	    (new_flags & I40E_FLAG_MFP_ENABLED))
+	if (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, changed_flags) &&
+	    test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, new_flags) &&
+	    test_bit(I40E_FLAG_MFP_ENA, new_flags))
 		dev_warn(&pf->pdev->dev,
 			 "Turning on link-down-on-close flag may affect other partitions\n");
 
-	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
-		if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+	if (test_bit(I40E_FLAG_FW_LLDP_DIS, changed_flags)) {
+		if (test_bit(I40E_FLAG_FW_LLDP_DIS, new_flags)) {
 #ifdef CONFIG_I40E_DCB
 			i40e_dcb_sw_default_config(pf);
 #endif /* CONFIG_I40E_DCB */
@@ -5461,7 +5481,7 @@ flags_complete:
 	 * initialization or (b) while holding the RTNL lock, we don't need
 	 * anything fancy here.
 	 */
-	pf->flags = new_flags;
+	bitmap_copy(pf->flags, new_flags, I40E_PF_FLAGS_NBITS);
 
 	/* Issue reset to cause things to take effect, as additional bits
 	 * are added we will need to create a mask of bits requiring reset
@@ -5491,7 +5511,7 @@ static int i40e_get_module_info(struct net_device *netdev,
 	int status;
 
 	/* Check if firmware supports reading module EEPROM. */
-	if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) {
+	if (!test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) {
 		netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n");
 		return -EINVAL;
 	}
@@ -5571,8 +5591,8 @@ static int i40e_get_module_info(struct net_device *netdev,
 		modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
 		break;
 	default:
-		netdev_err(vsi->netdev, "Module type unrecognized\n");
-		return -EINVAL;
+		netdev_dbg(vsi->netdev, "SFP module type unrecognized or no SFP connector used.\n");
+		return -EOPNOTSUPP;
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f7a332e51524..7ded598a68e6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -371,7 +371,7 @@ static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 	if (tx_ring) {
 		head = i40e_get_head(tx_ring);
 		/* Read interrupt register */
-		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+		if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 			val = rd32(&pf->hw,
 			     I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
 						tx_ring->vsi->base_vector - 1));
@@ -1209,13 +1209,13 @@ static void i40e_update_pf_stats(struct i40e_pf *pf)
 			   pf->stat_offsets_loaded,
 			   &osd->rx_lpi_count, &nsd->rx_lpi_count);
 
-	if (pf->flags & I40E_FLAG_FD_SB_ENABLED &&
+	if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
 	    !test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
 		nsd->fd_sb_status = true;
 	else
 		nsd->fd_sb_status = false;
 
-	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED &&
+	if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
 	    !test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 		nsd->fd_atr_status = true;
 	else
@@ -1485,7 +1485,7 @@ static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi,
 		return pvid;
 
 	is_any = (trusted ||
-		  !(pf->flags & I40E_FLAG_VF_VLAN_PRUNING));
+		  !test_bit(I40E_FLAG_VF_VLAN_PRUNING_ENA, pf->flags));
 
 	if ((vlan_filters && f->vlan == I40E_VLAN_ANY) ||
 	    (!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) ||
@@ -1890,7 +1890,7 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
 	u8 *lut;
 	int ret;
 
-	if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
+	if (!test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps))
 		return 0;
 	if (!vsi->rss_size)
 		vsi->rss_size = min_t(int, pf->alloc_rss_size,
@@ -2045,7 +2045,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 		 */
 		if (vsi->req_queue_pairs > 0)
 			vsi->num_queue_pairs = vsi->req_queue_pairs;
-		else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+		else if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 			vsi->num_queue_pairs = pf->num_lan_msix;
 		else
 			vsi->num_queue_pairs = 1;
@@ -2058,7 +2058,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 	else
 		num_tc_qps = vsi->alloc_queue_pairs;
 
-	if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
+	if (enabled_tc && test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) {
 		/* Find numtc from enabled TC bitmap */
 		for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
 			if (enabled_tc & BIT(i)) /* TC is enabled */
@@ -2077,7 +2077,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
 
 	/* Do not allow use more TC queue pairs than MSI-X vectors exist */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix);
 
 	/* Setup queue offset/count for all TCs for given VSI */
@@ -2089,8 +2089,10 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 
 			switch (vsi->type) {
 			case I40E_VSI_MAIN:
-				if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED |
-				    I40E_FLAG_FD_ATR_ENABLED)) ||
+				if ((!test_bit(I40E_FLAG_FD_SB_ENA,
+					       pf->flags) &&
+				     !test_bit(I40E_FLAG_FD_ATR_ENA,
+					       pf->flags)) ||
 				    vsi->tc_config.enabled_tc != 1) {
 					qcount = min_t(int, pf->alloc_rss_size,
 						       num_tc_qps);
@@ -2476,7 +2478,7 @@ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
 
 	if (vsi->type == I40E_VSI_MAIN &&
 	    pf->lan_veb != I40E_NO_VEB &&
-	    !(pf->flags & I40E_FLAG_MFP_ENABLED)) {
+	    !test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
 		/* set defport ON for Main VSI instead of true promisc
 		 * this way we will get all unicast/multicast and VLAN
 		 * promisc behavior but will not get VF or VMDq traffic
@@ -2907,7 +2909,7 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
  */
 static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
 {
-	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
+	if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags))
 		return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048);
 
 	return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
@@ -3462,7 +3464,7 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
 		ring->xsk_pool = i40e_xsk_pool(ring);
 
 	/* some ATR related tx ring init */
-	if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
+	if (test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags)) {
 		ring->atr_sample_rate = vsi->back->atr_sample_rate;
 		ring->atr_count = 0;
 	} else {
@@ -3478,9 +3480,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
 	tx_ctx.new_context = 1;
 	tx_ctx.base = (ring->dma / 128);
 	tx_ctx.qlen = ring->count;
-	tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
-					       I40E_FLAG_FD_ATR_ENABLED));
-	tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
+	if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags) ||
+	    test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags))
+		tx_ctx.fd_ena = 1;
+	if (test_bit(I40E_FLAG_PTP_ENA, vsi->back->flags))
+		tx_ctx.timesync_ena = 1;
 	/* FDIR VSI tx ring can still use RS bit and writebacks */
 	if (vsi->type != I40E_VSI_FDIR)
 		tx_ctx.head_wb_ena = 1;
@@ -3663,7 +3667,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	}
 
 	/* configure Rx buffer alignment */
-	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
+	if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) {
 		if (I40E_2K_TOO_SMALL_WITH_PADDING) {
 			dev_info(&vsi->back->pdev->dev,
 				 "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
@@ -3761,7 +3765,7 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
 	u16 qoffset, qcount;
 	int i, n;
 
-	if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
+	if (!test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) {
 		/* Reset the TC information */
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
 			rx_ring = vsi->rx_rings[i];
@@ -3828,7 +3832,7 @@ static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
 	struct i40e_pf *pf = vsi->back;
 	struct hlist_node *node;
 
-	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+	if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags))
 		return;
 
 	/* Reset FDir counters as we're replaying all existing filters */
@@ -3966,10 +3970,10 @@ static void i40e_enable_misc_int_causes(struct i40e_pf *pf)
 	      I40E_PFINT_ICR0_ENA_VFLR_MASK          |
 	      I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
 
-	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+	if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags))
 		val |= I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
 
-	if (pf->flags & I40E_FLAG_PTP)
+	if (test_bit(I40E_FLAG_PTP_ENA, pf->flags))
 		val |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
 
 	wr32(hw, I40E_PFINT_ICR0_ENA, val);
@@ -4205,7 +4209,7 @@ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi)
 	}
 
 	/* disable each interrupt */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		for (i = vsi->base_vector;
 		     i < (vsi->num_q_vectors + vsi->base_vector); i++)
 			wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0);
@@ -4231,7 +4235,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
 	struct i40e_pf *pf = vsi->back;
 	int i;
 
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		for (i = 0; i < vsi->num_q_vectors; i++)
 			i40e_irq_dynamic_enable(vsi, i);
 	} else {
@@ -4252,7 +4256,7 @@ static void i40e_free_misc_vector(struct i40e_pf *pf)
 	wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
 	i40e_flush(&pf->hw);
 
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) {
 		free_irq(pf->msix_entries[0].vector, pf);
 		clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
 	}
@@ -4287,7 +4291,7 @@ static irqreturn_t i40e_intr(int irq, void *data)
 	    (icr0 & I40E_PFINT_ICR0_SWINT_MASK))
 		pf->sw_int_count++;
 
-	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
+	if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) &&
 	    (icr0 & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) {
 		ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
 		dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n");
@@ -4480,7 +4484,7 @@ static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget)
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
 
-	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED)
+	if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags))
 		i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx);
 
 	return budget > 0;
@@ -4593,9 +4597,9 @@ static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename)
 	struct i40e_pf *pf = vsi->back;
 	int err;
 
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		err = i40e_vsi_request_irq_msix(vsi, basename);
-	else if (pf->flags & I40E_FLAG_MSI_ENABLED)
+	else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags))
 		err = request_irq(pf->pdev->irq, i40e_intr, 0,
 				  pf->int_name, pf);
 	else
@@ -4627,7 +4631,7 @@ static void i40e_netpoll(struct net_device *netdev)
 	if (test_bit(__I40E_VSI_DOWN, vsi->state))
 		return;
 
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		for (i = 0; i < vsi->num_q_vectors; i++)
 			i40e_msix_clean_rings(0, vsi->q_vectors[i]);
 	} else {
@@ -4967,7 +4971,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
 	u32 val, qp;
 	int i;
 
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		if (!vsi->q_vectors)
 			return;
 
@@ -5129,16 +5133,17 @@ static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi)
 static void i40e_reset_interrupt_capability(struct i40e_pf *pf)
 {
 	/* If we're in Legacy mode, the interrupt was cleaned in vsi_close */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		pci_disable_msix(pf->pdev);
 		kfree(pf->msix_entries);
 		pf->msix_entries = NULL;
 		kfree(pf->irq_pile);
 		pf->irq_pile = NULL;
-	} else if (pf->flags & I40E_FLAG_MSI_ENABLED) {
+	} else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags)) {
 		pci_disable_msi(pf->pdev);
 	}
-	pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+	clear_bit(I40E_FLAG_MSI_ENA, pf->flags);
+	clear_bit(I40E_FLAG_MSIX_ENA, pf->flags);
 }
 
 /**
@@ -5478,11 +5483,11 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
 		return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
 
 	/* If neither MQPRIO nor DCB is enabled, then always use single TC */
-	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+	if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags))
 		return 1;
 
 	/* SFP mode will be enabled for all TCs on port */
-	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+	if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags))
 		return i40e_dcb_get_num_tc(dcbcfg);
 
 	/* MFP mode return count of enabled TCs for this PF */
@@ -5512,11 +5517,11 @@ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
 	/* If neither MQPRIO nor DCB is enabled for this PF then just return
 	 * default TC
 	 */
-	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+	if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags))
 		return I40E_DEFAULT_TRAFFIC_CLASS;
 
 	/* SFP mode we want PF to be enabled for all TCs */
-	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+	if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags))
 		return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config);
 
 	/* MFP enabled and iSCSI PF type */
@@ -5605,7 +5610,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
 	/* There is no need to reset BW when mqprio mode is on.  */
 	if (i40e_is_tc_mqprio_enabled(pf))
 		return 0;
-	if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+	if (!vsi->mqprio_qopt.qopt.hw && !test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
 		ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
 		if (ret)
 			dev_info(&pf->pdev->dev,
@@ -5858,7 +5863,7 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
 		}
 		vsi->reconfig_rss = false;
 	}
-	if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+	if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) {
 		ctxt.info.valid_sections |=
 				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
 		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
@@ -6271,7 +6276,7 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
 	if (ch->type == I40E_VSI_VMDQ2)
 		ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
 
-	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
+	if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
 		ctxt.info.valid_sections |=
 		     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
 		ctxt.info.switch_id =
@@ -6576,8 +6581,8 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi,
 	 * VSI to be added switch to VEB mode.
 	 */
 
-	if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
-		pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+	if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
+		set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
 
 		if (vsi->type == I40E_VSI_MAIN) {
 			if (i40e_is_tc_mqprio_enabled(pf))
@@ -6988,9 +6993,9 @@ int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg)
 	if (need_reconfig) {
 		/* Enable DCB tagging only when more than one TC */
 		if (new_numtc > 1)
-			pf->flags |= I40E_FLAG_DCB_ENABLED;
+			set_bit(I40E_FLAG_DCB_ENA, pf->flags);
 		else
-			pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+			clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 
 		set_bit(__I40E_PORT_SUSPENDED, pf->state);
 		/* Reconfiguration needed quiesce all VSIs */
@@ -7080,7 +7085,7 @@ out:
 			set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
 		}
 		/* registers are set, lets apply */
-		if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB)
+		if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps))
 			ret = i40e_hw_set_dcb_config(pf, new_cfg);
 	}
 
@@ -7101,7 +7106,7 @@ int i40e_dcb_sw_default_config(struct i40e_pf *pf)
 	struct i40e_hw *hw = &pf->hw;
 	int err;
 
-	if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) {
+	if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps)) {
 		/* Update the local cached instance with TC0 ETS */
 		memset(&pf->tmp_cfg, 0, sizeof(struct i40e_dcbx_config));
 		pf->tmp_cfg.etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING;
@@ -7162,12 +7167,12 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
 	/* Do not enable DCB for SW1 and SW2 images even if the FW is capable
 	 * Also do not enable DCBx if FW LLDP agent is disabled
 	 */
-	if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT) {
+	if (test_bit(I40E_HW_CAP_NO_DCB_SUPPORT, pf->hw.caps)) {
 		dev_info(&pf->pdev->dev, "DCB is not supported.\n");
 		err = -EOPNOTSUPP;
 		goto out;
 	}
-	if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
+	if (test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) {
 		dev_info(&pf->pdev->dev, "FW LLDP is disabled, attempting SW DCB\n");
 		err = i40e_dcb_sw_default_config(pf);
 		if (err) {
@@ -7178,8 +7183,8 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
 		pf->dcbx_cap = DCB_CAP_DCBX_HOST |
 			       DCB_CAP_DCBX_VER_IEEE;
 		/* at init capable but disabled */
-		pf->flags |= I40E_FLAG_DCB_CAPABLE;
-		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+		set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+		clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 		goto out;
 	}
 	err = i40e_init_dcb(hw, true);
@@ -7194,20 +7199,20 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
 			pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
 				       DCB_CAP_DCBX_VER_IEEE;
 
-			pf->flags |= I40E_FLAG_DCB_CAPABLE;
+			set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
 			/* Enable DCB tagging only when more than one TC
 			 * or explicitly disable if only one TC
 			 */
 			if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
-				pf->flags |= I40E_FLAG_DCB_ENABLED;
+				set_bit(I40E_FLAG_DCB_ENA, pf->flags);
 			else
-				pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+				clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 			dev_dbg(&pf->pdev->dev,
 				"DCBX offload is supported for this PF.\n");
 		}
 	} else if (pf->hw.aq.asq_last_status == I40E_AQ_RC_EPERM) {
 		dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n");
-		pf->flags |= I40E_FLAG_DISABLE_FW_LLDP;
+		set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags);
 	} else {
 		dev_info(&pf->pdev->dev,
 			 "Query for DCB configuration failed, err %pe aq_err %s\n",
@@ -7367,7 +7372,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
 	struct i40e_pf *pf = vsi->back;
 	int err;
 
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		i40e_vsi_configure_msix(vsi);
 	else
 		i40e_configure_msi_and_legacy(vsi);
@@ -7471,7 +7476,7 @@ static int i40e_force_link_state(struct i40e_pf *pf, bool is_up)
 	 * and its speed values are OK, no need for a flap
 	 * if non_zero_phy_type was set, still need to force up
 	 */
-	if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)
+	if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags))
 		non_zero_phy_type = true;
 	else if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0)
 		return 0;
@@ -7487,7 +7492,7 @@ static int i40e_force_link_state(struct i40e_pf *pf, bool is_up)
 		non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0;
 	/* Copy the old settings, except of phy_type */
 	config.abilities = abilities.abilities;
-	if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED) {
+	if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) {
 		if (is_up)
 			config.abilities |= I40E_AQ_PHY_ENABLE_LINK;
 		else
@@ -7537,8 +7542,8 @@ int i40e_up(struct i40e_vsi *vsi)
 	int err;
 
 	if (vsi->type == I40E_VSI_MAIN &&
-	    (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
-	     vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
+	    (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) ||
+	     test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags)))
 		i40e_force_link_state(vsi->back, true);
 
 	err = i40e_vsi_configure(vsi);
@@ -7566,8 +7571,8 @@ void i40e_down(struct i40e_vsi *vsi)
 	i40e_vsi_disable_irq(vsi);
 	i40e_vsi_stop_rings(vsi);
 	if (vsi->type == I40E_VSI_MAIN &&
-	   (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
-	    vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
+	   (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) ||
+	    test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags)))
 		i40e_force_link_state(vsi->back, false);
 	i40e_napi_disable_all(vsi);
 
@@ -7973,7 +7978,7 @@ static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
 	struct i40e_fwd_adapter *fwd;
 	int avail_macvlan, ret;
 
-	if ((pf->flags & I40E_FLAG_DCB_ENABLED)) {
+	if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
 		netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -8168,23 +8173,23 @@ static int i40e_setup_tc(struct net_device *netdev, void *type_data)
 	hw = mqprio_qopt->qopt.hw;
 	mode = mqprio_qopt->mode;
 	if (!hw) {
-		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+		clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
 		memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
 		goto config_tc;
 	}
 
 	/* Check if MFP enabled */
-	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+	if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
 		netdev_info(netdev,
 			    "Configuring TC not supported in MFP mode\n");
 		return ret;
 	}
 	switch (mode) {
 	case TC_MQPRIO_MODE_DCB:
-		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+		clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
 
 		/* Check if DCB enabled to continue */
-		if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+		if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
 			netdev_info(netdev,
 				    "DCB is not enabled for adapter\n");
 			return ret;
@@ -8198,20 +8203,20 @@ static int i40e_setup_tc(struct net_device *netdev, void *type_data)
 		}
 		break;
 	case TC_MQPRIO_MODE_CHANNEL:
-		if (pf->flags & I40E_FLAG_DCB_ENABLED) {
+		if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
 			netdev_info(netdev,
 				    "Full offload of TC Mqprio options is not supported when DCB is enabled\n");
 			return ret;
 		}
-		if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+		if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 			return ret;
 		ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
 		if (ret)
 			return ret;
 		memcpy(&vsi->mqprio_qopt, mqprio_qopt,
 		       sizeof(*mqprio_qopt));
-		pf->flags |= I40E_FLAG_TC_MQPRIO;
-		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+		set_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
+		clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 		break;
 	default:
 		return -EINVAL;
@@ -8795,11 +8800,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
 		return -EINVAL;
 	}
 
-	if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) {
+	if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags)) {
 		dev_err(&vsi->back->pdev->dev,
 			"Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n");
-		vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED;
-		vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+		clear_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags);
+		clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, vsi->back->flags);
 	}
 
 	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
@@ -8895,11 +8900,11 @@ static int i40e_delete_clsflower(struct i40e_vsi *vsi,
 
 	pf->num_cloud_filters--;
 	if (!pf->num_cloud_filters)
-		if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
-		    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
-			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
-			pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
-			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+		if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) &&
+		    !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) {
+			set_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+			clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags);
+			clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
 		}
 	return 0;
 }
@@ -9200,11 +9205,11 @@ static void i40e_cloud_filter_exit(struct i40e_pf *pf)
 	}
 	pf->num_cloud_filters = 0;
 
-	if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
-	    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
-		pf->flags |= I40E_FLAG_FD_SB_ENABLED;
-		pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
-		pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+	if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) &&
+	    !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) {
+		set_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+		clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags);
+		clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
 	}
 }
 
@@ -9292,7 +9297,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
 		i40e_prep_for_reset(pf);
 		i40e_reset_and_rebuild(pf, true, lock_acquired);
 		dev_info(&pf->pdev->dev,
-			 pf->flags & I40E_FLAG_DISABLE_FW_LLDP ?
+			 test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ?
 			 "FW LLDP is disabled\n" :
 			 "FW LLDP is enabled\n");
 
@@ -9407,12 +9412,12 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
 	if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
 	    (hw->phy.link_info.link_speed &
 	     ~(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB)) &&
-	     !(pf->flags & I40E_FLAG_DCB_CAPABLE))
+	     !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
 		/* let firmware decide if the DCB should be disabled */
-		pf->flags |= I40E_FLAG_DCB_CAPABLE;
+		set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
 
 	/* Not DCB capable or capability disabled */
-	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+	if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
 		return ret;
 
 	/* Ignore if event is not for Nearest Bridge */
@@ -9448,7 +9453,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
 		     (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) {
 			dev_warn(&pf->pdev->dev,
 				 "DCB is not supported for X710-T*L 2.5/5G speeds\n");
-			pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+			clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
 		} else {
 			dev_info(&pf->pdev->dev,
 				 "Failed querying DCB configuration data from firmware, err %pe aq_err %s\n",
@@ -9476,9 +9481,9 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
 
 	/* Enable DCB tagging only when more than one TC */
 	if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
-		pf->flags |= I40E_FLAG_DCB_ENABLED;
+		set_bit(I40E_FLAG_DCB_ENA, pf->flags);
 	else
-		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+		clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 
 	set_bit(__I40E_PORT_SUSPENDED, pf->state);
 	/* Reconfiguration needed quiesce all VSIs */
@@ -9610,7 +9615,7 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf)
 static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
 {
 	if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
-		if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+		if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
 			dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
 }
@@ -9631,7 +9636,7 @@ static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
 					I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
 					I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
 
-		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+		if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
 			dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
 	}
@@ -9946,7 +9951,7 @@ static void i40e_link_event(struct i40e_pf *pf)
 	if (pf->vf)
 		i40e_vc_notify_link_state(pf);
 
-	if (pf->flags & I40E_FLAG_PTP)
+	if (test_bit(I40E_FLAG_PTP_ENA, pf->flags))
 		i40e_ptp_set_increment(pf);
 #ifdef CONFIG_I40E_DCB
 	if (new_link == old_link)
@@ -9963,13 +9968,13 @@ static void i40e_link_event(struct i40e_pf *pf)
 		memset(&pf->tmp_cfg, 0, sizeof(pf->tmp_cfg));
 		err = i40e_dcb_sw_default_config(pf);
 		if (err) {
-			pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
-				       I40E_FLAG_DCB_ENABLED);
+			clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+			clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 		} else {
 			pf->dcbx_cap = DCB_CAP_DCBX_HOST |
 				       DCB_CAP_DCBX_VER_IEEE;
-			pf->flags |= I40E_FLAG_DCB_CAPABLE;
-			pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+			set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+			clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 		}
 	}
 #endif /* CONFIG_I40E_DCB */
@@ -9994,7 +9999,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf)
 		return;
 	pf->service_timer_previous = jiffies;
 
-	if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) ||
+	if (test_bit(I40E_FLAG_LINK_POLLING_ENA, pf->flags) ||
 	    test_bit(__I40E_TEMP_LINK_POLLING, pf->state))
 		i40e_link_event(pf);
 
@@ -10005,7 +10010,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf)
 		if (pf->vsi[i] && pf->vsi[i]->netdev)
 			i40e_update_stats(pf->vsi[i]);
 
-	if (pf->flags & I40E_FLAG_VEB_STATS_ENABLED) {
+	if (test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags)) {
 		/* Update the stats for the active switching components */
 		for (i = 0; i < I40E_MAX_VEB; i++)
 			if (pf->veb[i])
@@ -10094,7 +10099,7 @@ static void i40e_handle_link_event(struct i40e_pf *pf,
 		if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
 		    (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
 		    (!(status->link_info & I40E_AQ_LINK_UP)) &&
-		    (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
+		    (!test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))) {
 			dev_err(&pf->pdev->dev,
 				"Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
 			dev_err(&pf->pdev->dev,
@@ -10400,7 +10405,7 @@ static int i40e_reconstitute_veb(struct i40e_veb *veb)
 	if (ret)
 		goto end_reconstitute;
 
-	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
+	if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags))
 		veb->bridge_mode = BRIDGE_MODE_VEB;
 	else
 		veb->bridge_mode = BRIDGE_MODE_VEPA;
@@ -10545,7 +10550,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
 			wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]);
 	}
 
-	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+	if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags))
 		return;
 
 	/* find existing VSI and see if it needs configuring */
@@ -10557,8 +10562,8 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
 				     pf->vsi[pf->lan_vsi]->seid, 0);
 		if (!vsi) {
 			dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
-			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
-			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+			clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+			set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
 			return;
 		}
 	}
@@ -10936,14 +10941,14 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 			i40e_aq_set_dcb_parameters(hw, false, NULL);
 			dev_warn(&pf->pdev->dev,
 				 "DCB is not supported for X710-T*L 2.5/5G speeds\n");
-			pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+			clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
 		} else {
 			i40e_aq_set_dcb_parameters(hw, true, NULL);
 			ret = i40e_init_pf_dcb(pf);
 			if (ret) {
 				dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n",
 					 ret);
-				pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+				clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
 				/* Continue without DCB enabled */
 			}
 		}
@@ -11064,7 +11069,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 		wr32(hw, I40E_REG_MSS, val);
 	}
 
-	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
+	if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) {
 		msleep(75);
 		ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
 		if (ret)
@@ -11074,7 +11079,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
 					     pf->hw.aq.asq_last_status));
 	}
 	/* reinit the misc interrupt */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		ret = i40e_setup_misc_vector(pf);
 		if (ret)
 			goto end_unlock;
@@ -11349,7 +11354,7 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
 		if (!vsi->num_rx_desc)
 			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
 						 I40E_REQ_DESCRIPTOR_MULTIPLE);
-		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+		if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 			vsi->num_q_vectors = pf->num_lan_msix;
 		else
 			vsi->num_q_vectors = 1;
@@ -11667,7 +11672,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		ring->count = vsi->num_tx_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
-		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
+		if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps))
 			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
 		ring->itr_setting = pf->tx_itr_default;
 		WRITE_ONCE(vsi->tx_rings[i], ring++);
@@ -11684,7 +11689,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
 		ring->count = vsi->num_tx_desc;
 		ring->size = 0;
 		ring->dcb_tc = 0;
-		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
+		if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps))
 			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
 		set_ring_xdp(ring);
 		ring->itr_setting = pf->tx_itr_default;
@@ -11748,7 +11753,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	int v_actual;
 	int iwarp_requested = 0;
 
-	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+	if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		return -ENODEV;
 
 	/* The number of vectors we'll request will be comprised of:
@@ -11787,7 +11792,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	vectors_left -= pf->num_lan_msix;
 
 	/* reserve one vector for sideband flow director */
-	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+	if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
 		if (vectors_left) {
 			pf->num_fdsb_msix = 1;
 			v_budget++;
@@ -11798,7 +11803,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	}
 
 	/* can we reserve enough for iWARP? */
-	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+	if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
 		iwarp_requested = pf->num_iwarp_msix;
 
 		if (!vectors_left)
@@ -11810,7 +11815,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	}
 
 	/* any vectors left over go for VMDq support */
-	if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
+	if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags)) {
 		if (!vectors_left) {
 			pf->num_vmdq_msix = 0;
 			pf->num_vmdq_qps = 0;
@@ -11867,7 +11872,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 	v_actual = i40e_reserve_msix_vectors(pf, v_budget);
 
 	if (v_actual < I40E_MIN_MSIX) {
-		pf->flags &= ~I40E_FLAG_MSIX_ENABLED;
+		clear_bit(I40E_FLAG_MSIX_ENA, pf->flags);
 		kfree(pf->msix_entries);
 		pf->msix_entries = NULL;
 		pci_disable_msix(pf->pdev);
@@ -11905,7 +11910,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 			pf->num_lan_msix = 1;
 			break;
 		case 3:
-			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+			if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
 				pf->num_lan_msix = 1;
 				pf->num_iwarp_msix = 1;
 			} else {
@@ -11913,7 +11918,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 			}
 			break;
 		default:
-			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+			if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
 				pf->num_iwarp_msix = min_t(int, (vec / 3),
 						 iwarp_requested);
 				pf->num_vmdq_vsis = min_t(int, (vec / 3),
@@ -11922,7 +11927,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
 				pf->num_vmdq_vsis = min_t(int, (vec / 2),
 						  I40E_DEFAULT_NUM_VMDQ_VSI);
 			}
-			if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+			if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
 				pf->num_fdsb_msix = 1;
 				vec--;
 			}
@@ -11934,22 +11939,20 @@ static int i40e_init_msix(struct i40e_pf *pf)
 		}
 	}
 
-	if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
-	    (pf->num_fdsb_msix == 0)) {
+	if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && pf->num_fdsb_msix == 0) {
 		dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
-		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
-		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+		clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+		set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
 	}
-	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
-	    (pf->num_vmdq_msix == 0)) {
+	if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) && pf->num_vmdq_msix == 0) {
 		dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
-		pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
+		clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
 	}
 
-	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
-	    (pf->num_iwarp_msix == 0)) {
+	if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) &&
+	    pf->num_iwarp_msix == 0) {
 		dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
-		pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+		clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
 	}
 	i40e_debug(&pf->hw, I40E_DEBUG_INIT,
 		   "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n",
@@ -12003,7 +12006,7 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
 	int err, v_idx, num_q_vectors;
 
 	/* if not MSIX, give the one vector only to the LAN VSI */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		num_q_vectors = vsi->num_q_vectors;
 	else if (vsi == pf->vsi[pf->lan_vsi])
 		num_q_vectors = 1;
@@ -12034,38 +12037,39 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
 	int vectors = 0;
 	ssize_t size;
 
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		vectors = i40e_init_msix(pf);
 		if (vectors < 0) {
-			pf->flags &= ~(I40E_FLAG_MSIX_ENABLED	|
-				       I40E_FLAG_IWARP_ENABLED	|
-				       I40E_FLAG_RSS_ENABLED	|
-				       I40E_FLAG_DCB_CAPABLE	|
-				       I40E_FLAG_DCB_ENABLED	|
-				       I40E_FLAG_SRIOV_ENABLED	|
-				       I40E_FLAG_FD_SB_ENABLED	|
-				       I40E_FLAG_FD_ATR_ENABLED	|
-				       I40E_FLAG_VMDQ_ENABLED);
-			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+			clear_bit(I40E_FLAG_MSIX_ENA, pf->flags);
+			clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
+			clear_bit(I40E_FLAG_RSS_ENA, pf->flags);
+			clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+			clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
+			clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
+			clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+			clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
+			clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
+			set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
 
 			/* rework the queue expectations without MSIX */
 			i40e_determine_queue_usage(pf);
 		}
 	}
 
-	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED) &&
-	    (pf->flags & I40E_FLAG_MSI_ENABLED)) {
+	if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags) &&
+	    test_bit(I40E_FLAG_MSI_ENA, pf->flags)) {
 		dev_info(&pf->pdev->dev, "MSI-X not available, trying MSI\n");
 		vectors = pci_enable_msi(pf->pdev);
 		if (vectors < 0) {
 			dev_info(&pf->pdev->dev, "MSI init failed - %d\n",
 				 vectors);
-			pf->flags &= ~I40E_FLAG_MSI_ENABLED;
+			clear_bit(I40E_FLAG_MSI_ENA, pf->flags);
 		}
 		vectors = 1;  /* one MSI or Legacy vector */
 	}
 
-	if (!(pf->flags & (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED)))
+	if (!test_bit(I40E_FLAG_MSI_ENA, pf->flags) &&
+	    !test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		dev_info(&pf->pdev->dev, "MSI-X and MSI not available, falling back to Legacy IRQ\n");
 
 	/* set up vector assignment tracking */
@@ -12098,7 +12102,8 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
 	 * scheme. We need to re-enabled them here in order to attempt to
 	 * re-acquire the MSI or MSI-X vectors
 	 */
-	pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+	set_bit(I40E_FLAG_MSI_ENA, pf->flags);
+	set_bit(I40E_FLAG_MSIX_ENA, pf->flags);
 
 	err = i40e_init_interrupt_scheme(pf);
 	if (err)
@@ -12120,7 +12125,7 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
 	if (err)
 		goto err_unwind;
 
-	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+	if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags))
 		i40e_client_update_msix_info(pf);
 
 	return 0;
@@ -12148,7 +12153,7 @@ static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf)
 {
 	int err;
 
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		err = i40e_setup_misc_vector(pf);
 
 		if (err) {
@@ -12158,7 +12163,7 @@ static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf)
 			return err;
 		}
 	} else {
-		u32 flags = pf->flags & I40E_FLAG_MSI_ENABLED ? 0 : IRQF_SHARED;
+		u32 flags = test_bit(I40E_FLAG_MSI_ENA, pf->flags) ? 0 : IRQF_SHARED;
 
 		err = request_irq(pf->pdev->irq, i40e_intr, flags,
 				  pf->int_name, pf);
@@ -12362,7 +12367,7 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 {
 	struct i40e_pf *pf = vsi->back;
 
-	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
+	if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps))
 		return i40e_config_rss_aq(vsi, seed, lut, lut_size);
 	else
 		return i40e_config_rss_reg(vsi, seed, lut, lut_size);
@@ -12381,7 +12386,7 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 {
 	struct i40e_pf *pf = vsi->back;
 
-	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
+	if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps))
 		return i40e_get_rss_aq(vsi, seed, lut, lut_size);
 	else
 		return i40e_get_rss_reg(vsi, seed, lut, lut_size);
@@ -12484,7 +12489,7 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
 	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
 	int new_rss_size;
 
-	if (!(pf->flags & I40E_FLAG_RSS_ENABLED))
+	if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags))
 		return 0;
 
 	queue_count = min_t(int, queue_count, num_online_cpus());
@@ -12717,9 +12722,9 @@ static int i40e_sw_init(struct i40e_pf *pf)
 	u16 pow;
 
 	/* Set default capability flags */
-	pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
-		    I40E_FLAG_MSI_ENABLED     |
-		    I40E_FLAG_MSIX_ENABLED;
+	bitmap_zero(pf->flags, I40E_PF_FLAGS_NBITS);
+	set_bit(I40E_FLAG_MSI_ENA, pf->flags);
+	set_bit(I40E_FLAG_MSIX_ENA, pf->flags);
 
 	/* Set default ITR */
 	pf->rx_itr_default = I40E_ITR_RX_DEF;
@@ -12739,14 +12744,14 @@ static int i40e_sw_init(struct i40e_pf *pf)
 	pf->rss_size_max = min_t(int, pf->rss_size_max, pow);
 
 	if (pf->hw.func_caps.rss) {
-		pf->flags |= I40E_FLAG_RSS_ENABLED;
+		set_bit(I40E_FLAG_RSS_ENA, pf->flags);
 		pf->alloc_rss_size = min_t(int, pf->rss_size_max,
 					   num_online_cpus());
 	}
 
 	/* MFP mode enabled */
 	if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.flex10_enable) {
-		pf->flags |= I40E_FLAG_MFP_ENABLED;
+		set_bit(I40E_FLAG_MFP_ENA, pf->flags);
 		dev_info(&pf->pdev->dev, "MFP mode Enabled\n");
 		if (i40e_get_partition_bw_setting(pf)) {
 			dev_warn(&pf->pdev->dev,
@@ -12763,84 +12768,32 @@ static int i40e_sw_init(struct i40e_pf *pf)
 
 	if ((pf->hw.func_caps.fd_filters_guaranteed > 0) ||
 	    (pf->hw.func_caps.fd_filters_best_effort > 0)) {
-		pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
+		set_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
 		pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE;
-		if (pf->flags & I40E_FLAG_MFP_ENABLED &&
+		if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) &&
 		    pf->hw.num_partitions > 1)
 			dev_info(&pf->pdev->dev,
 				 "Flow Director Sideband mode Disabled in MFP mode\n");
 		else
-			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+			set_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
 		pf->fdir_pf_filter_count =
 				 pf->hw.func_caps.fd_filters_guaranteed;
 		pf->hw.fdir_shared_filter_count =
 				 pf->hw.func_caps.fd_filters_best_effort;
 	}
 
-	if (pf->hw.mac.type == I40E_MAC_X722) {
-		pf->hw_features |= (I40E_HW_RSS_AQ_CAPABLE |
-				    I40E_HW_128_QP_RSS_CAPABLE |
-				    I40E_HW_ATR_EVICT_CAPABLE |
-				    I40E_HW_WB_ON_ITR_CAPABLE |
-				    I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE |
-				    I40E_HW_NO_PCI_LINK_CHECK |
-				    I40E_HW_USE_SET_LLDP_MIB |
-				    I40E_HW_GENEVE_OFFLOAD_CAPABLE |
-				    I40E_HW_PTP_L4_CAPABLE |
-				    I40E_HW_WOL_MC_MAGIC_PKT_WAKE |
-				    I40E_HW_OUTER_UDP_CSUM_CAPABLE);
-
-#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03
-		if (rd32(&pf->hw, I40E_GLQF_FDEVICTENA(1)) !=
-		    I40E_FDEVICT_PCTYPE_DEFAULT) {
-			dev_warn(&pf->pdev->dev,
-				 "FD EVICT PCTYPES are not right, disable FD HW EVICT\n");
-			pf->hw_features &= ~I40E_HW_ATR_EVICT_CAPABLE;
-		}
-	} else if ((pf->hw.aq.api_maj_ver > 1) ||
-		   ((pf->hw.aq.api_maj_ver == 1) &&
-		    (pf->hw.aq.api_min_ver > 4))) {
-		/* Supported in FW API version higher than 1.4 */
-		pf->hw_features |= I40E_HW_GENEVE_OFFLOAD_CAPABLE;
-	}
-
 	/* Enable HW ATR eviction if possible */
-	if (pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)
-		pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
-
-	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
-	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
-	    (pf->hw.aq.fw_maj_ver < 4))) {
-		pf->hw_features |= I40E_HW_RESTART_AUTONEG;
-		/* No DCB support  for FW < v4.33 */
-		pf->hw_features |= I40E_HW_NO_DCB_SUPPORT;
-	}
-
-	/* Disable FW LLDP if FW < v4.3 */
-	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
-	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) ||
-	    (pf->hw.aq.fw_maj_ver < 4)))
-		pf->hw_features |= I40E_HW_STOP_FW_LLDP;
-
-	/* Use the FW Set LLDP MIB API if FW > v4.40 */
-	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
-	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) ||
-	    (pf->hw.aq.fw_maj_ver >= 5)))
-		pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
-
-	/* Enable PTP L4 if FW > v6.0 */
-	if (pf->hw.mac.type == I40E_MAC_XL710 &&
-	    pf->hw.aq.fw_maj_ver >= 6)
-		pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
+	if (test_bit(I40E_HW_CAP_ATR_EVICT, pf->hw.caps))
+		set_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags);
 
 	if (pf->hw.func_caps.vmdq && num_online_cpus() != 1) {
 		pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
-		pf->flags |= I40E_FLAG_VMDQ_ENABLED;
+		set_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
 		pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
 	}
 
 	if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) {
-		pf->flags |= I40E_FLAG_IWARP_ENABLED;
+		set_bit(I40E_FLAG_IWARP_ENA, pf->flags);
 		/* IWARP needs one extra vector for CQP just like MISC.*/
 		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
 	}
@@ -12850,14 +12803,13 @@ static int i40e_sw_init(struct i40e_pf *pf)
 	 * if NPAR is functioning so unset this hw flag in this case.
 	 */
 	if (pf->hw.mac.type == I40E_MAC_XL710 &&
-	    pf->hw.func_caps.npar_enable &&
-	    (pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
-		pf->hw.flags &= ~I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+	    pf->hw.func_caps.npar_enable)
+		clear_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, pf->hw.caps);
 
 #ifdef CONFIG_PCI_IOV
 	if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
 		pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
-		pf->flags |= I40E_FLAG_SRIOV_ENABLED;
+		set_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
 		pf->num_req_vfs = min_t(int,
 					pf->hw.func_caps.num_vfs,
 					I40E_MAX_VF_COUNT);
@@ -12868,7 +12820,7 @@ static int i40e_sw_init(struct i40e_pf *pf)
 	pf->lan_vsi = I40E_NO_VSI;
 
 	/* By default FW has this off for performance reasons */
-	pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
+	clear_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags);
 
 	/* set up queue assignment tracking */
 	size = sizeof(struct i40e_lump_tracking)
@@ -12887,8 +12839,8 @@ static int i40e_sw_init(struct i40e_pf *pf)
 		/* Link down on close must be on when total port shutdown
 		 * is enabled for a given port
 		 */
-		pf->flags |= (I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED |
-			      I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED);
+		set_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
+		set_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
 		dev_info(&pf->pdev->dev,
 			 "total-port-shutdown was enabled, link-down-on-close is forced on\n");
 	}
@@ -12914,31 +12866,31 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
 	 */
 	if (features & NETIF_F_NTUPLE) {
 		/* Enable filters and mark for reset */
-		if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+		if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags))
 			need_reset = true;
 		/* enable FD_SB only if there is MSI-X vector and no cloud
 		 * filters exist
 		 */
 		if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
-			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
-			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+			set_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+			clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
 		}
 	} else {
 		/* turn off filters, mark for reset and clear SW filter list */
-		if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+		if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
 			need_reset = true;
 			i40e_fdir_filter_exit(pf);
 		}
-		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+		clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
 		clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state);
-		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+		set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
 
 		/* reset fd counters */
 		pf->fd_add_err = 0;
 		pf->fd_atr_cnt = 0;
 		/* if ATR was auto disabled it can be re-enabled. */
 		if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
-			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+			if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
 			    (I40E_DEBUG_FD & pf->hw.debug_mask))
 				dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
 	}
@@ -13087,7 +13039,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
 	struct i40e_pf *pf = np->vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 
-	if (!(pf->hw_features & I40E_HW_PORT_ID_VALID))
+	if (!test_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps))
 		return -EOPNOTSUPP;
 
 	ppid->id_len = min_t(int, sizeof(hw->mac.port_addr), sizeof(ppid->id));
@@ -13116,7 +13068,7 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	struct i40e_pf *pf = np->vsi->back;
 	int err = 0;
 
-	if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED))
+	if (!test_bit(I40E_FLAG_SRIOV_ENA, pf->flags))
 		return -EOPNOTSUPP;
 
 	if (vid) {
@@ -13216,9 +13168,9 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
 			veb->bridge_mode = mode;
 			/* TODO: If no VFs or VMDq VSIs, disallow VEB mode */
 			if (mode == BRIDGE_MODE_VEB)
-				pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+				set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
 			else
-				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+				clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
 			i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
 			break;
 		}
@@ -13552,7 +13504,7 @@ static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair)
 	struct i40e_hw *hw = &pf->hw;
 
 	/* All rings in a qp belong to the same qvector. */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx);
 	else
 		i40e_irq_dynamic_enable_icr0(pf);
@@ -13577,7 +13529,7 @@ static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)
 	 *
 	 * All rings in a qp belong to the same qvector.
 	 */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		u32 intpf = vsi->base_vector + rxr->q_vector->v_idx;
 
 		wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0);
@@ -13762,7 +13714,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 			  NETIF_F_RXCSUM		|
 			  0;
 
-	if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE))
+	if (!test_bit(I40E_HW_CAP_OUTER_UDP_CSUM, pf->hw.caps))
 		netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
 
 	netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic;
@@ -13798,7 +13750,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 		      NETIF_F_HW_VLAN_CTAG_TX	|
 		      NETIF_F_HW_VLAN_CTAG_RX;
 
-	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+	if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags))
 		hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
 
 	netdev->hw_features |= hw_features | NETIF_F_LOOPBACK;
@@ -13989,7 +13941,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 		 * negative logic - if it's set, we need to fiddle with
 		 * the VSI to disable source pruning.
 		 */
-		if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
+		if (test_bit(I40E_FLAG_SOURCE_PRUNING_DIS, pf->flags)) {
 			memset(&ctxt, 0, sizeof(ctxt));
 			ctxt.seid = pf->main_vsi_seid;
 			ctxt.pf_num = pf->hw.pf_id;
@@ -14011,7 +13963,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 		}
 
 		/* MFP mode setup queue map and update VSI */
-		if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
+		if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) &&
 		    !(pf->hw.func_caps.iscsi)) { /* NIC type PF */
 			memset(&ctxt, 0, sizeof(ctxt));
 			ctxt.seid = pf->main_vsi_seid;
@@ -14059,7 +14011,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 		ctxt.uplink_seid = vsi->uplink_seid;
 		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
 		ctxt.flags = I40E_AQ_VSI_TYPE_PF;
-		if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) &&
+		if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags) &&
 		    (i40e_is_vsi_uplink_mode_veb(vsi))) {
 			ctxt.info.valid_sections |=
 			     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
@@ -14107,7 +14059,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
 		}
 
-		if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+		if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) {
 			ctxt.info.valid_sections |=
 				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
 			ctxt.info.queueing_opt_flags |=
@@ -14323,7 +14275,7 @@ static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
 	/* In Legacy mode, we do not have to get any other vector since we
 	 * piggyback on the misc/ICR0 for queue interrupts.
 	*/
-	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+	if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		return ret;
 	if (vsi->num_q_vectors)
 		vsi->base_vector = i40e_get_lump(pf, pf->irq_pile,
@@ -14490,9 +14442,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
 			 * already enabled, in which case we can't force VEPA
 			 * mode.
 			 */
-			if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+			if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
 				veb->bridge_mode = BRIDGE_MODE_VEPA;
-				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+				clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
 			}
 			i40e_config_bridge_mode(veb);
 		}
@@ -14588,12 +14540,16 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
 		break;
 	}
 
-	if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
-	    (vsi->type == I40E_VSI_VMDQ2)) {
+	if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps) &&
+	    vsi->type == I40E_VSI_VMDQ2) {
 		ret = i40e_vsi_config_rss(vsi);
+		if (ret)
+			goto err_config;
 	}
 	return vsi;
 
+err_config:
+	i40e_vsi_clear_rings(vsi);
 err_rings:
 	i40e_vsi_free_q_vectors(vsi);
 err_msix:
@@ -14831,7 +14787,7 @@ void i40e_veb_release(struct i40e_veb *veb)
 static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = veb->pf;
-	bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED);
+	bool enable_stats = !!test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags);
 	int ret;
 
 	ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid,
@@ -15131,7 +15087,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui
 	*/
 
 	if ((pf->hw.pf_id == 0) &&
-	    !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
+	    !test_bit(I40E_FLAG_TRUE_PROMISC_ENA, pf->flags)) {
 		flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
 		pf->last_sw_conf_flags = flags;
 	}
@@ -15198,7 +15154,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui
 	/* enable RSS in the HW, even for only one queue, as the stack can use
 	 * the hash
 	 */
-	if ((pf->flags & I40E_FLAG_RSS_ENABLED))
+	if (test_bit(I40E_FLAG_RSS_ENA, pf->flags))
 		i40e_pf_config_rss(pf);
 
 	/* fill in link information and enable LSE reporting */
@@ -15240,42 +15196,42 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 	queues_left = pf->hw.func_caps.num_tx_qp;
 
 	if ((queues_left == 1) ||
-	    !(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
+	    !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		/* one qp for PF, no queues for anything else */
 		queues_left = 0;
 		pf->alloc_rss_size = pf->num_lan_qps = 1;
 
 		/* make sure all the fancies are disabled */
-		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
-			       I40E_FLAG_IWARP_ENABLED	|
-			       I40E_FLAG_FD_SB_ENABLED	|
-			       I40E_FLAG_FD_ATR_ENABLED	|
-			       I40E_FLAG_DCB_CAPABLE	|
-			       I40E_FLAG_DCB_ENABLED	|
-			       I40E_FLAG_SRIOV_ENABLED	|
-			       I40E_FLAG_VMDQ_ENABLED);
-		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
-	} else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
-				  I40E_FLAG_FD_SB_ENABLED |
-				  I40E_FLAG_FD_ATR_ENABLED |
-				  I40E_FLAG_DCB_CAPABLE))) {
+		clear_bit(I40E_FLAG_RSS_ENA, pf->flags);
+		clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
+		clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+		clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
+		clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+		clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
+		clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
+		clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
+		set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
+	} else if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags) &&
+		   !test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
+		   !test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
+		   !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) {
 		/* one qp for PF */
 		pf->alloc_rss_size = pf->num_lan_qps = 1;
 		queues_left -= pf->num_lan_qps;
 
-		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
-			       I40E_FLAG_IWARP_ENABLED	|
-			       I40E_FLAG_FD_SB_ENABLED	|
-			       I40E_FLAG_FD_ATR_ENABLED	|
-			       I40E_FLAG_DCB_ENABLED	|
-			       I40E_FLAG_VMDQ_ENABLED);
-		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+		clear_bit(I40E_FLAG_RSS_ENA, pf->flags);
+		clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
+		clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+		clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
+		clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
+		clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
+		set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
 	} else {
 		/* Not enough queues for all TCs */
-		if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
-		    (queues_left < I40E_MAX_TRAFFIC_CLASS)) {
-			pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
-					I40E_FLAG_DCB_ENABLED);
+		if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags) &&
+		    queues_left < I40E_MAX_TRAFFIC_CLASS) {
+			clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+			clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 			dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
 		}
 
@@ -15288,24 +15244,24 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 		queues_left -= pf->num_lan_qps;
 	}
 
-	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+	if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
 		if (queues_left > 1) {
 			queues_left -= 1; /* save 1 queue for FD */
 		} else {
-			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
-			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+			clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
+			set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
 			dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
 		}
 	}
 
-	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
+	if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) &&
 	    pf->num_vf_qps && pf->num_req_vfs && queues_left) {
 		pf->num_req_vfs = min_t(int, pf->num_req_vfs,
 					(queues_left / pf->num_vf_qps));
 		queues_left -= (pf->num_req_vfs * pf->num_vf_qps);
 	}
 
-	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
+	if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) &&
 	    pf->num_vmdq_vsis && pf->num_vmdq_qps && queues_left) {
 		pf->num_vmdq_vsis = min_t(int, pf->num_vmdq_vsis,
 					  (queues_left / pf->num_vmdq_qps));
@@ -15316,7 +15272,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
 	dev_dbg(&pf->pdev->dev,
 		"qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n",
 		pf->hw.func_caps.num_tx_qp,
-		!!(pf->flags & I40E_FLAG_FD_SB_ENABLED),
+		!!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags),
 		pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs,
 		pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps,
 		queues_left);
@@ -15340,7 +15296,8 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf)
 	settings->hash_lut_size = I40E_HASH_LUT_SIZE_128;
 
 	/* Flow Director is enabled */
-	if (pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))
+	if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) ||
+	    test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags))
 		settings->enable_fdir = true;
 
 	/* Ethtype and MACVLAN filters enabled for PF */
@@ -15372,21 +15329,21 @@ static void i40e_print_features(struct i40e_pf *pf)
 	i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d",
 		      pf->hw.func_caps.num_vsis,
 		      pf->vsi[pf->lan_vsi]->num_queue_pairs);
-	if (pf->flags & I40E_FLAG_RSS_ENABLED)
+	if (test_bit(I40E_FLAG_RSS_ENA, pf->flags))
 		i += scnprintf(&buf[i], REMAIN(i), " RSS");
-	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
+	if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags))
 		i += scnprintf(&buf[i], REMAIN(i), " FD_ATR");
-	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+	if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
 		i += scnprintf(&buf[i], REMAIN(i), " FD_SB");
 		i += scnprintf(&buf[i], REMAIN(i), " NTUPLE");
 	}
-	if (pf->flags & I40E_FLAG_DCB_CAPABLE)
+	if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags))
 		i += scnprintf(&buf[i], REMAIN(i), " DCB");
 	i += scnprintf(&buf[i], REMAIN(i), " VxLAN");
 	i += scnprintf(&buf[i], REMAIN(i), " Geneve");
-	if (pf->flags & I40E_FLAG_PTP)
+	if (test_bit(I40E_FLAG_PTP_ENA, pf->flags))
 		i += scnprintf(&buf[i], REMAIN(i), " PTP");
-	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
+	if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags))
 		i += scnprintf(&buf[i], REMAIN(i), " VEB");
 	else
 		i += scnprintf(&buf[i], REMAIN(i), " VEPA");
@@ -15417,22 +15374,26 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
  * @fec_cfg: FEC option to set in flags
  * @flags: ptr to flags in which we set FEC option
  **/
-void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags)
+void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags)
 {
-	if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
-		*flags |= I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC;
+	if (fec_cfg & I40E_AQ_SET_FEC_AUTO) {
+		set_bit(I40E_FLAG_RS_FEC, flags);
+		set_bit(I40E_FLAG_BASE_R_FEC, flags);
+	}
 	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) ||
 	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) {
-		*flags |= I40E_FLAG_RS_FEC;
-		*flags &= ~I40E_FLAG_BASE_R_FEC;
+		set_bit(I40E_FLAG_RS_FEC, flags);
+		clear_bit(I40E_FLAG_BASE_R_FEC, flags);
 	}
 	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) ||
 	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) {
-		*flags |= I40E_FLAG_BASE_R_FEC;
-		*flags &= ~I40E_FLAG_RS_FEC;
+		set_bit(I40E_FLAG_BASE_R_FEC, flags);
+		clear_bit(I40E_FLAG_RS_FEC, flags);
+	}
+	if (fec_cfg == 0) {
+		clear_bit(I40E_FLAG_RS_FEC, flags);
+		clear_bit(I40E_FLAG_BASE_R_FEC, flags);
 	}
-	if (fec_cfg == 0)
-		*flags &= ~(I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC);
 }
 
 /**
@@ -15858,15 +15819,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		 hw->vendor_id, hw->device_id, hw->subsystem_vendor_id,
 		 hw->subsystem_device_id);
 
-	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
-	    hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
+	if (i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR,
+				  I40E_FW_MINOR_VERSION(hw) + 1))
 		dev_dbg(&pdev->dev,
 			"The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n",
 			 hw->aq.api_maj_ver,
 			 hw->aq.api_min_ver,
 			 I40E_FW_API_VERSION_MAJOR,
 			 I40E_FW_MINOR_VERSION(hw));
-	else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
+	else if (i40e_is_aq_api_ver_lt(hw, 1, 4))
 		dev_info(&pdev->dev,
 			 "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n",
 			 hw->aq.api_maj_ver,
@@ -15913,7 +15874,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * Ignore error return codes because if it was already disabled via
 	 * hardware settings this will fail
 	 */
-	if (pf->hw_features & I40E_HW_STOP_FW_LLDP) {
+	if (test_bit(I40E_HW_CAP_STOP_FW_LLDP, pf->hw.caps)) {
 		dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n");
 		i40e_aq_stop_lldp(hw, true, false, NULL);
 	}
@@ -15930,7 +15891,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ether_addr_copy(hw->mac.perm_addr, hw->mac.addr);
 	i40e_get_port_mac_addr(hw, hw->mac.port_addr);
 	if (is_valid_ether_addr(hw->mac.port_addr))
-		pf->hw_features |= I40E_HW_PORT_ID_VALID;
+		set_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps);
 
 	i40e_ptp_alloc_pins(pf);
 	pci_set_drvdata(pdev, pf);
@@ -15940,10 +15901,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	status = i40e_get_fw_lldp_status(&pf->hw, &lldp_status);
 	(!status &&
 	 lldp_status == I40E_GET_FW_LLDP_STATUS_ENABLED) ?
-		(pf->flags &= ~I40E_FLAG_DISABLE_FW_LLDP) :
-		(pf->flags |= I40E_FLAG_DISABLE_FW_LLDP);
+		(clear_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) :
+		(set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags));
 	dev_info(&pdev->dev,
-		 (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ?
+		 test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ?
 			"FW LLDP is disabled\n" :
 			"FW LLDP is enabled\n");
 
@@ -15953,7 +15914,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	err = i40e_init_pf_dcb(pf);
 	if (err) {
 		dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
-		pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED);
+		clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
+		clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
 		/* Continue without DCB enabled */
 	}
 #endif /* CONFIG_I40E_DCB */
@@ -16021,11 +15983,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 #ifdef CONFIG_PCI_IOV
 	/* prep for VF support */
-	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
-	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+	if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) &&
+	    test_bit(I40E_FLAG_MSIX_ENA, pf->flags) &&
 	    !test_bit(__I40E_BAD_EEPROM, pf->state)) {
 		if (pci_num_vf(pdev))
-			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+			set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
 	}
 #endif
 	err = i40e_setup_pf_switch(pf, false, false);
@@ -16066,7 +16028,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		wr32(hw, I40E_REG_MSS, val);
 	}
 
-	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
+	if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) {
 		msleep(75);
 		err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
 		if (err)
@@ -16086,7 +16048,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * the misc functionality and queue processing is combined in
 	 * the same vector and that gets setup at open.
 	 */
-	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
 		err = i40e_setup_misc_vector(pf);
 		if (err) {
 			dev_info(&pdev->dev,
@@ -16099,8 +16061,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 #ifdef CONFIG_PCI_IOV
 	/* prep for VF support */
-	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
-	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+	if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) &&
+	    test_bit(I40E_FLAG_MSIX_ENA, pf->flags) &&
 	    !test_bit(__I40E_BAD_EEPROM, pf->state)) {
 		/* disable link interrupts for VFs */
 		val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM);
@@ -16120,7 +16082,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 #endif /* CONFIG_PCI_IOV */
 
-	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+	if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
 		pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
 						      pf->num_iwarp_msix,
 						      I40E_IWARP_IRQ_PILE_ID);
@@ -16128,7 +16090,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			dev_info(&pdev->dev,
 				 "failed to get tracking for %d vectors for IWARP err=%d\n",
 				 pf->num_iwarp_msix, pf->iwarp_base_vector);
-			pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+			clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
 		}
 	}
 
@@ -16142,7 +16104,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		  round_jiffies(jiffies + pf->service_timer_period));
 
 	/* add this PF to client device list and launch a client service task */
-	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+	if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
 		err = i40e_lan_add_device(pf);
 		if (err)
 			dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
@@ -16155,7 +16117,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * and will report PCI Gen 1 x 1 by default so don't bother
 	 * checking them.
 	 */
-	if (!(pf->hw_features & I40E_HW_NO_PCI_LINK_CHECK)) {
+	if (!test_bit(I40E_HW_CAP_NO_PCI_LINK_CHECK, pf->hw.caps)) {
 		char speed[PCI_SPEED_SIZE] = "Unknown";
 		char width[PCI_WIDTH_SIZE] = "Unknown";
 
@@ -16209,7 +16171,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
 
 	/* set the FEC config due to the board capabilities */
-	i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, &pf->flags);
+	i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, pf->flags);
 
 	/* get the supported phy types from the fw */
 	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL);
@@ -16236,10 +16198,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 						       pf->main_vsi_seid);
 
 	if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
-		(pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
-		pf->hw_features |= I40E_HW_PHY_CONTROLS_LEDS;
+	    (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
+		set_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps);
 	if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722)
-		pf->hw_features |= I40E_HW_HAVE_CRT_RETIMER;
+		set_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps);
 	/* print a string summarizing features */
 	i40e_print_features(pf);
 
@@ -16308,10 +16270,10 @@ static void i40e_remove(struct pci_dev *pdev)
 		usleep_range(1000, 2000);
 	set_bit(__I40E_IN_REMOVE, pf->state);
 
-	if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+	if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) {
 		set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
 		i40e_free_vfs(pf);
-		pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
+		clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
 	}
 	/* no more scheduling of any task */
 	set_bit(__I40E_SUSPENDED, pf->state);
@@ -16366,7 +16328,7 @@ static void i40e_remove(struct pci_dev *pdev)
 	i40e_cloud_filter_exit(pf);
 
 	/* remove attached clients */
-	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+	if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
 		ret_code = i40e_lan_del_device(pf);
 		if (ret_code)
 			dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
@@ -16385,7 +16347,7 @@ static void i40e_remove(struct pci_dev *pdev)
 unmap:
 	/* Free MSI/legacy interrupt 0 when in recovery mode. */
 	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
-	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
+	    !test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		free_irq(pf->pdev->irq, pf);
 
 	/* shutdown the adminq */
@@ -16601,7 +16563,8 @@ static void i40e_shutdown(struct pci_dev *pdev)
 	 */
 	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
 
-	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
+	if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) &&
+	    pf->wol_en)
 		i40e_enable_mc_magic_wake(pf);
 
 	i40e_prep_for_reset(pf);
@@ -16613,7 +16576,7 @@ static void i40e_shutdown(struct pci_dev *pdev)
 
 	/* Free MSI/legacy interrupt 0 when in recovery mode. */
 	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
-	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
+	    !test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
 		free_irq(pf->pdev->irq, pf);
 
 	/* Since we're going to destroy queues during the
@@ -16654,7 +16617,8 @@ static int __maybe_unused i40e_suspend(struct device *dev)
 	 */
 	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
 
-	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
+	if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) &&
+	    pf->wol_en)
 		i40e_enable_mc_magic_wake(pf);
 
 	/* Since we're going to destroy queues during the
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 77cdbfc19d47..62eb34871094 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -291,7 +291,7 @@ static int i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
 static int __i40e_read_nvm_word(struct i40e_hw *hw,
 				u16 offset, u16 *data)
 {
-	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+	if (test_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps))
 		return i40e_read_nvm_word_aq(hw, offset, data);
 
 	return i40e_read_nvm_word_srctl(hw, offset, data);
@@ -310,14 +310,14 @@ int i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 {
 	int ret_code = 0;
 
-	if (hw->flags & I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK)
+	if (test_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps))
 		ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
 	if (ret_code)
 		return ret_code;
 
 	ret_code = __i40e_read_nvm_word(hw, offset, data);
 
-	if (hw->flags & I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK)
+	if (test_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps))
 		i40e_release_nvm(hw);
 
 	return ret_code;
@@ -499,7 +499,7 @@ static int __i40e_read_nvm_buffer(struct i40e_hw *hw,
 				  u16 offset, u16 *words,
 				  u16 *data)
 {
-	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+	if (test_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps))
 		return i40e_read_nvm_buffer_aq(hw, offset, words, data);
 
 	return i40e_read_nvm_buffer_srctl(hw, offset, words, data);
@@ -521,7 +521,7 @@ int i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
 {
 	int ret_code = 0;
 
-	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
+	if (test_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps)) {
 		ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
 		if (!ret_code) {
 			ret_code = i40e_read_nvm_buffer_aq(hw, offset, words,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 001162042050..af4269330581 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -501,4 +501,74 @@ i40e_add_pinfo_to_list(struct i40e_hw *hw,
 /* i40e_ddp */
 int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash);
 
+/* Firmware and AdminQ version check helpers */
+
+/**
+ * i40e_is_aq_api_ver_ge
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current HW API version is greater/equal than provided.
+ **/
+static inline bool i40e_is_aq_api_ver_ge(struct i40e_hw *hw, u16 maj, u16 min)
+{
+	return (hw->aq.api_maj_ver > maj ||
+		(hw->aq.api_maj_ver == maj && hw->aq.api_min_ver >= min));
+}
+
+/**
+ * i40e_is_aq_api_ver_lt
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current HW API version is less than provided.
+ **/
+static inline bool i40e_is_aq_api_ver_lt(struct i40e_hw *hw, u16 maj, u16 min)
+{
+	return !i40e_is_aq_api_ver_ge(hw, maj, min);
+}
+
+/**
+ * i40e_is_fw_ver_ge
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current firmware version is greater/equal than provided.
+ **/
+static inline bool i40e_is_fw_ver_ge(struct i40e_hw *hw, u16 maj, u16 min)
+{
+	return (hw->aq.fw_maj_ver > maj ||
+		(hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver >= min));
+}
+
+/**
+ * i40e_is_fw_ver_lt
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current firmware version is less than provided.
+ **/
+static inline bool i40e_is_fw_ver_lt(struct i40e_hw *hw, u16 maj, u16 min)
+{
+	return !i40e_is_fw_ver_ge(hw, maj, min);
+}
+
+/**
+ * i40e_is_fw_ver_eq
+ * @hw: pointer to i40e_hw structure
+ * @maj: API major value to compare
+ * @min: API minor value to compare
+ *
+ * Assert whether current firmware version is equal to provided.
+ **/
+static inline bool i40e_is_fw_ver_eq(struct i40e_hw *hw, u16 maj, u16 min)
+{
+	return (hw->aq.fw_maj_ver > maj ||
+		(hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min));
+}
+
 #endif /* _I40E_PROTOTYPE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index 20b77398f060..1cf993a79438 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -35,7 +35,7 @@ enum i40e_ptp_pin {
 	GPIO_4
 };
 
-enum i40e_can_set_pins_t {
+enum i40e_can_set_pins {
 	CANT_DO_PINS = -1,
 	CAN_SET_PINS,
 	CAN_DO_PINS
@@ -193,7 +193,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw)
  * return CAN_DO_PINS if pins can be manipulated within a NIC or
  * return CANT_DO_PINS otherwise.
  **/
-static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf)
+static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf)
 {
 	if (!i40e_is_ptp_pin_dev(&pf->hw)) {
 		dev_warn(&pf->pdev->dev,
@@ -680,7 +680,7 @@ void i40e_ptp_rx_hang(struct i40e_pf *pf)
 	 * configured. We don't want to spuriously warn about Rx timestamp
 	 * hangs if we don't care about the timestamps.
 	 */
-	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx)
+	if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_rx)
 		return;
 
 	spin_lock_bh(&pf->ptp_rx_lock);
@@ -733,7 +733,7 @@ void i40e_ptp_tx_hang(struct i40e_pf *pf)
 {
 	struct sk_buff *skb;
 
-	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx)
+	if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_tx)
 		return;
 
 	/* Nothing to do if we're not already waiting for a timestamp */
@@ -771,7 +771,7 @@ void i40e_ptp_tx_hwtstamp(struct i40e_pf *pf)
 	u32 hi, lo;
 	u64 ns;
 
-	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx)
+	if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_tx)
 		return;
 
 	/* don't attempt to timestamp if we don't have an skb */
@@ -818,7 +818,7 @@ void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index)
 	/* Since we cannot turn off the Rx timestamp logic if the device is
 	 * doing Tx timestamping, check if Rx timestamping is configured.
 	 */
-	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx)
+	if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_rx)
 		return;
 
 	hw = &pf->hw;
@@ -924,7 +924,7 @@ int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
 {
 	struct hwtstamp_config *config = &pf->tstamp_config;
 
-	if (!(pf->flags & I40E_FLAG_PTP))
+	if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
 		return -EOPNOTSUPP;
 
 	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
@@ -1071,7 +1071,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf)
 static int i40e_ptp_set_pins(struct i40e_pf *pf,
 			     struct i40e_ptp_pins_settings *pins)
 {
-	enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf);
+	enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf);
 	int i = 0;
 
 	if (pin_caps == CANT_DO_PINS)
@@ -1211,7 +1211,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-		if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE))
+		if (!test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps))
 			return -ERANGE;
 		pf->ptp_rx = true;
 		tsyntype = I40E_PRTTSYN_CTL1_V1MESSTYPE0_MASK |
@@ -1225,7 +1225,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
-		if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE))
+		if (!test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps))
 			return -ERANGE;
 		fallthrough;
 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
@@ -1234,7 +1234,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 		pf->ptp_rx = true;
 		tsyntype = I40E_PRTTSYN_CTL1_V2MESSTYPE0_MASK |
 			   I40E_PRTTSYN_CTL1_TSYNTYPE_V2;
-		if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE) {
+		if (test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps)) {
 			tsyntype |= I40E_PRTTSYN_CTL1_UDP_ENA_MASK;
 			config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		} else {
@@ -1308,7 +1308,7 @@ int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
 	struct hwtstamp_config config;
 	int err;
 
-	if (!(pf->flags & I40E_FLAG_PTP))
+	if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
@@ -1426,7 +1426,7 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf)
 void i40e_ptp_save_hw_time(struct i40e_pf *pf)
 {
 	/* don't try to access the PTP clock if it's not enabled */
-	if (!(pf->flags & I40E_FLAG_PTP))
+	if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
 		return;
 
 	i40e_ptp_gettimex(&pf->ptp_caps, &pf->ptp_prev_hw_time, NULL);
@@ -1483,7 +1483,7 @@ void i40e_ptp_init(struct i40e_pf *pf)
 	pf_id = (rd32(hw, I40E_PRTTSYN_CTL0) & I40E_PRTTSYN_CTL0_PF_ID_MASK) >>
 		I40E_PRTTSYN_CTL0_PF_ID_SHIFT;
 	if (hw->pf_id != pf_id) {
-		pf->flags &= ~I40E_FLAG_PTP;
+		clear_bit(I40E_FLAG_PTP_ENA, pf->flags);
 		dev_info(&pf->pdev->dev, "%s: PTP not supported on %s\n",
 			 __func__,
 			 netdev->name);
@@ -1504,7 +1504,7 @@ void i40e_ptp_init(struct i40e_pf *pf)
 
 		if (pf->hw.debug_mask & I40E_DEBUG_LAN)
 			dev_info(&pf->pdev->dev, "PHC enabled\n");
-		pf->flags |= I40E_FLAG_PTP;
+		set_bit(I40E_FLAG_PTP_ENA, pf->flags);
 
 		/* Ensure the clocks are running. */
 		regval = rd32(hw, I40E_PRTTSYN_CTL0);
@@ -1539,7 +1539,7 @@ void i40e_ptp_stop(struct i40e_pf *pf)
 	struct i40e_hw *hw = &pf->hw;
 	u32 regval;
 
-	pf->flags &= ~I40E_FLAG_PTP;
+	clear_bit(I40E_FLAG_PTP_ENA, pf->flags);
 	pf->ptp_tx = false;
 	pf->ptp_rx = false;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index f408fcf23ce8..d561687303ea 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -899,6 +899,7 @@
 #define I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT 7
 #define I40E_GLQF_ORT_FLX_PAYLOAD_MASK I40E_MASK(0x1, I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT)
 #define I40E_GLQF_FDEVICTENA(_i) (0x00270384 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */
+#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03
 /* Redefined for X722 family */
 #define I40E_GLGEN_STAT_CLEAR 0x00390004 /* Reset: CORER */
 #endif /* _I40E_REGISTER_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index dd410b15000f..b82df5bdfac0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -464,7 +464,7 @@ static int i40e_add_del_fdir_tcp(struct i40e_vsi *vsi,
 			       &pf->fd_tcp6_filter_cnt);
 
 	if (add) {
-		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+		if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
 		    I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
 		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
@@ -734,7 +734,7 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
 		 * FD ATR/SB and then re-enable it when there is room.
 		 */
 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
-			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+			if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
 			    !test_and_set_bit(__I40E_FD_SB_AUTO_DISABLED,
 					      pf->state))
 				if (I40E_DEBUG_FD & pf->hw.debug_mask)
@@ -1071,7 +1071,7 @@ static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
 	if (q_vector->arm_wb_state)
 		return;
 
-	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
 		val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
 		      I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
 
@@ -1095,7 +1095,7 @@ static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
  **/
 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
 {
-	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+	if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
 		u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
 			  I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
 			  I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
@@ -2699,7 +2699,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 	u32 intval;
 
 	/* If we don't have MSIX, then we only need to re-enable icr0 */
-	if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
+	if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
 		i40e_irq_dynamic_enable_icr0(vsi->back);
 		return;
 	}
@@ -2888,7 +2888,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	u16 i;
 
 	/* make sure ATR is enabled */
-	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
+	if (!test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags))
 		return;
 
 	if (test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
@@ -2933,7 +2933,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	/* Due to lack of space, no more new filters can be programmed */
 	if (th->syn && test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
 		return;
-	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) {
+	if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags)) {
 		/* HW ATR eviction will take care of removing filters on FIN
 		 * and RST packets.
 		 */
@@ -2995,7 +2995,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
 
-	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED)
+	if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags))
 		dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
 
 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
@@ -3053,7 +3053,7 @@ static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
 		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
 	}
 
-	if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
+	if (!test_bit(I40E_FLAG_DCB_ENA, tx_ring->vsi->back->flags))
 		goto out;
 
 	/* Insert 802.1p priority into VLAN header */
@@ -3229,7 +3229,7 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	 * we are not already transmitting a packet to be timestamped
 	 */
 	pf = i40e_netdev_to_pf(tx_ring->netdev);
-	if (!(pf->flags & I40E_FLAG_PTP))
+	if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags))
 		return 0;
 
 	if (pf->ptp_tx &&
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 421fe5675584..abf15067eb5d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -58,7 +58,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl)
  * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
  * register but instead is a special value meaning "don't update" ITR0/1/2.
  */
-enum i40e_dyn_idx_t {
+enum i40e_dyn_idx {
 	I40E_IDX_ITR0 = 0,
 	I40E_IDX_ITR1 = 1,
 	I40E_IDX_ITR2 = 2,
@@ -92,8 +92,8 @@ enum i40e_dyn_idx_t {
 	BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
 
 #define i40e_pf_get_default_rss_hena(pf) \
-	(((pf)->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \
-	  I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
+	(test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, (pf)->hw.caps) ? \
+	 I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
 
 /* Supported Rx Buffer Sizes (a multiple of 128) */
 #define I40E_RXBUFFER_256   256
@@ -306,7 +306,7 @@ struct i40e_rx_queue_stats {
 	u64 page_busy_count;
 };
 
-enum i40e_ring_state_t {
+enum i40e_ring_state {
 	__I40E_TX_FDIR_INIT_DONE,
 	__I40E_TX_XPS_INIT_DONE,
 	__I40E_RING_STATE_NBITS /* must be last */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index aff6dc6afbe2..de69c2e22448 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -64,9 +64,7 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *);
 enum i40e_mac_type {
 	I40E_MAC_UNKNOWN = 0,
 	I40E_MAC_XL710,
-	I40E_MAC_VF,
 	I40E_MAC_X722,
-	I40E_MAC_X722_VF,
 	I40E_MAC_GENERIC,
 };
 
@@ -272,9 +270,7 @@ struct i40e_mac_info {
 	enum i40e_mac_type type;
 	u8 addr[ETH_ALEN];
 	u8 perm_addr[ETH_ALEN];
-	u8 san_addr[ETH_ALEN];
 	u8 port_addr[ETH_ALEN];
-	u16 max_fcoeq;
 };
 
 enum i40e_aq_resources_ids {
@@ -482,6 +478,36 @@ struct i40e_dcbx_config {
 	struct i40e_dcb_app_priority_table app[I40E_DCBX_MAX_APPS];
 };
 
+enum i40e_hw_flags {
+	I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE,
+	I40E_HW_CAP_802_1AD,
+	I40E_HW_CAP_AQ_PHY_ACCESS,
+	I40E_HW_CAP_NVM_READ_REQUIRES_LOCK,
+	I40E_HW_CAP_FW_LLDP_STOPPABLE,
+	I40E_HW_CAP_FW_LLDP_PERSISTENT,
+	I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED,
+	I40E_HW_CAP_X722_FEC_REQUEST,
+	I40E_HW_CAP_RSS_AQ,
+	I40E_HW_CAP_128_QP_RSS,
+	I40E_HW_CAP_ATR_EVICT,
+	I40E_HW_CAP_WB_ON_ITR,
+	I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE,
+	I40E_HW_CAP_NO_PCI_LINK_CHECK,
+	I40E_HW_CAP_100M_SGMII,
+	I40E_HW_CAP_NO_DCB_SUPPORT,
+	I40E_HW_CAP_USE_SET_LLDP_MIB,
+	I40E_HW_CAP_GENEVE_OFFLOAD,
+	I40E_HW_CAP_PTP_L4,
+	I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE,
+	I40E_HW_CAP_CRT_RETIMER,
+	I40E_HW_CAP_OUTER_UDP_CSUM,
+	I40E_HW_CAP_PHY_CONTROLS_LEDS,
+	I40E_HW_CAP_STOP_FW_LLDP,
+	I40E_HW_CAP_PORT_ID_VALID,
+	I40E_HW_CAP_RESTART_AUTONEG,
+	I40E_HW_CAPS_NBITS,
+};
+
 /* Port hardware description */
 struct i40e_hw {
 	u8 __iomem *hw_addr;
@@ -546,16 +572,7 @@ struct i40e_hw {
 	struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */
 	struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
 
-#define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0)
-#define I40E_HW_FLAG_802_1AD_CAPABLE        BIT_ULL(1)
-#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE  BIT_ULL(2)
-#define I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK BIT_ULL(3)
-#define I40E_HW_FLAG_FW_LLDP_STOPPABLE      BIT_ULL(4)
-#define I40E_HW_FLAG_FW_LLDP_PERSISTENT     BIT_ULL(5)
-#define I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED BIT_ULL(6)
-#define I40E_HW_FLAG_DROP_MODE              BIT_ULL(7)
-#define I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE BIT_ULL(8)
-	u64 flags;
+	DECLARE_BITMAP(caps, I40E_HW_CAPS_NBITS);
 
 	/* Used in set switch config AQ command */
 	u16 switch_tag;
@@ -567,12 +584,6 @@ struct i40e_hw {
 	char err_str[16];
 };
 
-static inline bool i40e_is_vf(struct i40e_hw *hw)
-{
-	return (hw->mac.type == I40E_MAC_VF ||
-		hw->mac.type == I40E_MAC_X722_VF);
-}
-
 struct i40e_driver_version {
 	u8 major_version;
 	u8 minor_version;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 3f99eb198245..37cca484abb8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1808,7 +1808,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
 	if (pci_num_vf(pf->pdev) != num_alloc_vfs) {
 		ret = pci_enable_sriov(pf->pdev, num_alloc_vfs);
 		if (ret) {
-			pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+			clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
 			pf->num_alloc_vfs = 0;
 			goto err_iov;
 		}
@@ -1919,8 +1919,8 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 	}
 
 	if (num_vfs) {
-		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
-			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+		if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
+			set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
 			i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG);
 		}
 		ret = i40e_pci_sriov_enable(pdev, num_vfs);
@@ -1929,7 +1929,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 
 	if (!pci_vfs_assigned(pf->pdev)) {
 		i40e_free_vfs(pf);
-		pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+		clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
 		i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG);
 	} else {
 		dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
@@ -2137,14 +2137,14 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
 	} else {
-		if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
+		if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps) &&
 		    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ))
 			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
 		else
 			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
 	}
 
-	if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+	if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, pf->hw.caps)) {
 		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
 			vfres->vf_cap_flags |=
 				VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
@@ -2153,12 +2153,12 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
 
-	if ((pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE) &&
+	if (test_bit(I40E_HW_CAP_OUTER_UDP_CSUM, pf->hw.caps) &&
 	    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
 
 	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING) {
-		if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+		if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
 			dev_err(&pf->pdev->dev,
 				"VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n",
 				 vf->vf_id);
@@ -2168,7 +2168,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
 		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
 	}
 
-	if (pf->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) {
+	if (test_bit(I40E_HW_CAP_WB_ON_ITR, pf->hw.caps)) {
 		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
 			vfres->vf_cap_flags |=
 					VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
@@ -4843,7 +4843,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
 		goto out;
 	}
 
-	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+	if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
 		dev_err(&pf->pdev->dev, "Trusted VF not supported in MFP mode.\n");
 		ret = -EINVAL;
 		goto out;
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 351e0d36df44..cd7dcd0fa7f2 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -522,11 +522,18 @@ enum ice_misc_thread_tasks {
 	ICE_MISC_THREAD_NBITS		/* must be last */
 };
 
-struct ice_switchdev_info {
+struct ice_eswitch {
 	struct ice_vsi *control_vsi;
 	struct ice_vsi *uplink_vsi;
 	struct ice_esw_br_offloads *br_offloads;
+	struct xarray reprs;
 	bool is_running;
+	/* struct to allow cp queues management optimization */
+	struct {
+		int to_reach;
+		int value;
+		bool is_reaching;
+	} qs;
 };
 
 struct ice_agg_node {
@@ -637,7 +644,7 @@ struct ice_pf {
 	struct ice_link_default_override_tlv link_dflt_override;
 	struct ice_lag *lag; /* Link Aggregation information */
 
-	struct ice_switchdev_info switchdev;
+	struct ice_eswitch eswitch;
 	struct ice_esw_br_port *br_port;
 
 #define ICE_INVALID_AGG_NODE_ID		0
@@ -846,7 +853,7 @@ static inline struct ice_vsi *ice_find_vsi(struct ice_pf *pf, u16 vsi_num)
  */
 static inline bool ice_is_switchdev_running(struct ice_pf *pf)
 {
-	return pf->switchdev.is_running;
+	return pf->eswitch.is_running;
 }
 
 #define ICE_FD_STAT_CTR_BLOCK_COUNT	256
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 80dc5445b50d..f4e24d11ebd0 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -810,6 +810,10 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node
 	struct ice_vf *vf;
 	int i;
 
+	if (node->rate_node)
+		/* already added, skip to the next */
+		goto traverse_children;
+
 	if (node->parent == tc_node) {
 		/* create root node */
 		rate_node = devl_rate_node_create(devlink, node, node->name, NULL);
@@ -831,6 +835,7 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node
 	if (rate_node && !IS_ERR(rate_node))
 		node->rate_node = rate_node;
 
+traverse_children:
 	for (i = 0; i < node->num_children; i++)
 		ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf);
 }
@@ -861,6 +866,30 @@ int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *v
 	return 0;
 }
 
+static void ice_clear_rate_nodes(struct ice_sched_node *node)
+{
+	node->rate_node = NULL;
+
+	for (int i = 0; i < node->num_children; i++)
+		ice_clear_rate_nodes(node->children[i]);
+}
+
+/**
+ * ice_devlink_rate_clear_tx_topology - clear node->rate_node
+ * @vsi: main vsi struct
+ *
+ * Clear rate_node to cleanup creation of Tx topology.
+ *
+ */
+void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi)
+{
+	struct ice_port_info *pi = vsi->port_info;
+
+	mutex_lock(&pi->sched_lock);
+	ice_clear_rate_nodes(pi->root->children[0]);
+	mutex_unlock(&pi->sched_lock);
+}
+
 /**
  * ice_set_object_tx_share - sets node scheduling parameter
  * @pi: devlink struct instance
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h
index 6ec96779f52e..d291c0e2e17b 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.h
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.h
@@ -20,5 +20,6 @@ void ice_devlink_destroy_regions(struct ice_pf *pf);
 
 int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi);
 void ice_tear_down_devlink_rate_tree(struct ice_pf *pf);
+void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi);
 
 #endif /* _ICE_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index a655d499abfa..3f80e2081e5d 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -11,17 +11,34 @@
 #include "ice_tc_lib.h"
 
 /**
- * ice_eswitch_add_vf_sp_rule - add adv rule with VF's VSI index
+ * ice_eswitch_del_sp_rules - delete adv rules added on PRs
+ * @pf: pointer to the PF struct
+ *
+ * Delete all advanced rules that were used to forward packets with the
+ * device's VSI index to the corresponding eswitch ctrl VSI queue.
+ */
+static void ice_eswitch_del_sp_rules(struct ice_pf *pf)
+{
+	struct ice_repr *repr;
+	unsigned long id;
+
+	xa_for_each(&pf->eswitch.reprs, id, repr) {
+		if (repr->sp_rule.rid)
+			ice_rem_adv_rule_by_id(&pf->hw, &repr->sp_rule);
+	}
+}
+
+/**
+ * ice_eswitch_add_sp_rule - add adv rule with device's VSI index
  * @pf: pointer to PF struct
- * @vf: pointer to VF struct
+ * @repr: pointer to the repr struct
  *
  * This function adds advanced rule that forwards packets with
- * VF's VSI index to the corresponding switchdev ctrl VSI queue.
+ * device's VSI index to the corresponding eswitch ctrl VSI queue.
  */
-static int
-ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf)
+static int ice_eswitch_add_sp_rule(struct ice_pf *pf, struct ice_repr *repr)
 {
-	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+	struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
 	struct ice_adv_rule_info rule_info = { 0 };
 	struct ice_adv_lkup_elem *list;
 	struct ice_hw *hw = &pf->hw;
@@ -38,39 +55,42 @@ ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf)
 	rule_info.sw_act.vsi_handle = ctrl_vsi->idx;
 	rule_info.sw_act.fltr_act = ICE_FWD_TO_Q;
 	rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id +
-				       ctrl_vsi->rxq_map[vf->vf_id];
+				       ctrl_vsi->rxq_map[repr->q_id];
 	rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE;
 	rule_info.flags_info.act_valid = true;
 	rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN;
-	rule_info.src_vsi = vf->lan_vsi_idx;
+	rule_info.src_vsi = repr->src_vsi->idx;
 
 	err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info,
-			       &vf->repr->sp_rule);
+			       &repr->sp_rule);
 	if (err)
-		dev_err(ice_pf_to_dev(pf), "Unable to add VF slow-path rule in switchdev mode for VF %d",
-			vf->vf_id);
+		dev_err(ice_pf_to_dev(pf), "Unable to add slow-path rule for eswitch for PR %d",
+			repr->id);
 
 	kfree(list);
 	return err;
 }
 
-/**
- * ice_eswitch_del_vf_sp_rule - delete adv rule with VF's VSI index
- * @vf: pointer to the VF struct
- *
- * Delete the advanced rule that was used to forward packets with the VF's VSI
- * index to the corresponding switchdev ctrl VSI queue.
- */
-static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf)
+static int
+ice_eswitch_add_sp_rules(struct ice_pf *pf)
 {
-	if (!vf->repr)
-		return;
+	struct ice_repr *repr;
+	unsigned long id;
+	int err;
 
-	ice_rem_adv_rule_by_id(&vf->pf->hw, &vf->repr->sp_rule);
+	xa_for_each(&pf->eswitch.reprs, id, repr) {
+		err = ice_eswitch_add_sp_rule(pf, repr);
+		if (err) {
+			ice_eswitch_del_sp_rules(pf);
+			return err;
+		}
+	}
+
+	return 0;
 }
 
 /**
- * ice_eswitch_setup_env - configure switchdev HW filters
+ * ice_eswitch_setup_env - configure eswitch HW filters
  * @pf: pointer to PF struct
  *
  * This function adds HW filters configuration specific for switchdev
@@ -78,18 +98,18 @@ static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf)
  */
 static int ice_eswitch_setup_env(struct ice_pf *pf)
 {
-	struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
-	struct net_device *uplink_netdev = uplink_vsi->netdev;
-	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+	struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi;
+	struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
+	struct net_device *netdev = uplink_vsi->netdev;
 	struct ice_vsi_vlan_ops *vlan_ops;
 	bool rule_added = false;
 
 	ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx);
 
-	netif_addr_lock_bh(uplink_netdev);
-	__dev_uc_unsync(uplink_netdev, NULL);
-	__dev_mc_unsync(uplink_netdev, NULL);
-	netif_addr_unlock_bh(uplink_netdev);
+	netif_addr_lock_bh(netdev);
+	__dev_uc_unsync(netdev, NULL);
+	__dev_mc_unsync(netdev, NULL);
+	netif_addr_unlock_bh(netdev);
 
 	if (ice_vsi_add_vlan_zero(uplink_vsi))
 		goto err_def_rx;
@@ -132,19 +152,20 @@ err_def_rx:
 }
 
 /**
- * ice_eswitch_remap_rings_to_vectors - reconfigure rings of switchdev ctrl VSI
- * @pf: pointer to PF struct
+ * ice_eswitch_remap_rings_to_vectors - reconfigure rings of eswitch ctrl VSI
+ * @eswitch: pointer to eswitch struct
  *
- * In switchdev number of allocated Tx/Rx rings is equal.
+ * In eswitch number of allocated Tx/Rx rings is equal.
  *
  * This function fills q_vectors structures associated with representor and
  * move each ring pairs to port representor netdevs. Each port representor
  * will have dedicated 1 Tx/Rx ring pair, so number of rings pair is equal to
  * number of VFs.
  */
-static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf)
+static void ice_eswitch_remap_rings_to_vectors(struct ice_eswitch *eswitch)
 {
-	struct ice_vsi *vsi = pf->switchdev.control_vsi;
+	struct ice_vsi *vsi = eswitch->control_vsi;
+	unsigned long repr_id = 0;
 	int q_id;
 
 	ice_for_each_txq(vsi, q_id) {
@@ -152,13 +173,14 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf)
 		struct ice_tx_ring *tx_ring;
 		struct ice_rx_ring *rx_ring;
 		struct ice_repr *repr;
-		struct ice_vf *vf;
 
-		vf = ice_get_vf_by_id(pf, q_id);
-		if (WARN_ON(!vf))
-			continue;
+		repr = xa_find(&eswitch->reprs, &repr_id, U32_MAX,
+			       XA_PRESENT);
+		if (!repr)
+			break;
 
-		repr = vf->repr;
+		repr_id += 1;
+		repr->q_id = q_id;
 		q_vector = repr->q_vector;
 		tx_ring = vsi->tx_rings[q_id];
 		rx_ring = vsi->rx_rings[q_id];
@@ -181,136 +203,96 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf)
 		rx_ring->q_vector = q_vector;
 		rx_ring->next = NULL;
 		rx_ring->netdev = repr->netdev;
-
-		ice_put_vf(vf);
 	}
 }
 
 /**
- * ice_eswitch_release_reprs - clear PR VSIs configuration
+ * ice_eswitch_release_repr - clear PR VSI configuration
  * @pf: poiner to PF struct
- * @ctrl_vsi: pointer to switchdev control VSI
+ * @repr: pointer to PR
  */
 static void
-ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi)
+ice_eswitch_release_repr(struct ice_pf *pf, struct ice_repr *repr)
 {
-	struct ice_vf *vf;
-	unsigned int bkt;
+	struct ice_vsi *vsi = repr->src_vsi;
 
-	lockdep_assert_held(&pf->vfs.table_lock);
-
-	ice_for_each_vf(pf, bkt, vf) {
-		struct ice_vsi *vsi = vf->repr->src_vsi;
-
-		/* Skip VFs that aren't configured */
-		if (!vf->repr->dst)
-			continue;
+	/* Skip representors that aren't configured */
+	if (!repr->dst)
+		return;
 
-		ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
-		metadata_dst_free(vf->repr->dst);
-		vf->repr->dst = NULL;
-		ice_eswitch_del_vf_sp_rule(vf);
-		ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
-					       ICE_FWD_TO_VSI);
+	ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
+	metadata_dst_free(repr->dst);
+	repr->dst = NULL;
+	ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac,
+				       ICE_FWD_TO_VSI);
 
-		netif_napi_del(&vf->repr->q_vector->napi);
-	}
+	netif_napi_del(&repr->q_vector->napi);
 }
 
 /**
- * ice_eswitch_setup_reprs - configure port reprs to run in switchdev mode
+ * ice_eswitch_setup_repr - configure PR to run in switchdev mode
  * @pf: pointer to PF struct
+ * @repr: pointer to PR struct
  */
-static int ice_eswitch_setup_reprs(struct ice_pf *pf)
+static int ice_eswitch_setup_repr(struct ice_pf *pf, struct ice_repr *repr)
 {
-	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
-	int max_vsi_num = 0;
-	struct ice_vf *vf;
-	unsigned int bkt;
-
-	lockdep_assert_held(&pf->vfs.table_lock);
-
-	ice_for_each_vf(pf, bkt, vf) {
-		struct ice_vsi *vsi = vf->repr->src_vsi;
-
-		ice_remove_vsi_fltr(&pf->hw, vsi->idx);
-		vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
-						   GFP_KERNEL);
-		if (!vf->repr->dst) {
-			ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
-						       ICE_FWD_TO_VSI);
-			goto err;
-		}
-
-		if (ice_eswitch_add_vf_sp_rule(pf, vf)) {
-			ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
-						       ICE_FWD_TO_VSI);
-			goto err;
-		}
-
-		if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) {
-			ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
-						       ICE_FWD_TO_VSI);
-			ice_eswitch_del_vf_sp_rule(vf);
-			metadata_dst_free(vf->repr->dst);
-			vf->repr->dst = NULL;
-			goto err;
-		}
+	struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
+	struct ice_vsi *vsi = repr->src_vsi;
+	struct metadata_dst *dst;
 
-		if (ice_vsi_add_vlan_zero(vsi)) {
-			ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
-						       ICE_FWD_TO_VSI);
-			ice_eswitch_del_vf_sp_rule(vf);
-			metadata_dst_free(vf->repr->dst);
-			vf->repr->dst = NULL;
-			ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
-			goto err;
-		}
+	ice_remove_vsi_fltr(&pf->hw, vsi->idx);
+	repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
+				       GFP_KERNEL);
+	if (!repr->dst)
+		goto err_add_mac_fltr;
 
-		if (max_vsi_num < vsi->vsi_num)
-			max_vsi_num = vsi->vsi_num;
+	if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof))
+		goto err_dst_free;
 
-		netif_napi_add(vf->repr->netdev, &vf->repr->q_vector->napi,
-			       ice_napi_poll);
+	if (ice_vsi_add_vlan_zero(vsi))
+		goto err_update_security;
 
-		netif_keep_dst(vf->repr->netdev);
-	}
+	netif_napi_add(repr->netdev, &repr->q_vector->napi,
+		       ice_napi_poll);
 
-	ice_for_each_vf(pf, bkt, vf) {
-		struct ice_repr *repr = vf->repr;
-		struct ice_vsi *vsi = repr->src_vsi;
-		struct metadata_dst *dst;
+	netif_keep_dst(repr->netdev);
 
-		dst = repr->dst;
-		dst->u.port_info.port_id = vsi->vsi_num;
-		dst->u.port_info.lower_dev = repr->netdev;
-		ice_repr_set_traffic_vsi(repr, ctrl_vsi);
-	}
+	dst = repr->dst;
+	dst->u.port_info.port_id = vsi->vsi_num;
+	dst->u.port_info.lower_dev = repr->netdev;
+	ice_repr_set_traffic_vsi(repr, ctrl_vsi);
 
 	return 0;
 
-err:
-	ice_eswitch_release_reprs(pf, ctrl_vsi);
+err_update_security:
+	ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
+err_dst_free:
+	metadata_dst_free(repr->dst);
+	repr->dst = NULL;
+err_add_mac_fltr:
+	ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac, ICE_FWD_TO_VSI);
 
 	return -ENODEV;
 }
 
 /**
- * ice_eswitch_update_repr - reconfigure VF port representor
- * @vsi: VF VSI for which port representor is configured
+ * ice_eswitch_update_repr - reconfigure port representor
+ * @repr_id: representor ID
+ * @vsi: VSI for which port representor is configured
  */
-void ice_eswitch_update_repr(struct ice_vsi *vsi)
+void ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
 	struct ice_repr *repr;
-	struct ice_vf *vf;
 	int ret;
 
 	if (!ice_is_switchdev_running(pf))
 		return;
 
-	vf = vsi->vf;
-	repr = vf->repr;
+	repr = xa_load(&pf->eswitch.reprs, repr_id);
+	if (!repr)
+		return;
+
 	repr->src_vsi = vsi;
 	repr->dst->u.port_info.port_id = vsi->vsi_num;
 
@@ -319,9 +301,10 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi)
 
 	ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof);
 	if (ret) {
-		ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI);
-		dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor",
-			vsi->vf->vf_id);
+		ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac,
+					       ICE_FWD_TO_VSI);
+		dev_err(ice_pf_to_dev(pf), "Failed to update VSI of port representor %d",
+			repr->id);
 	}
 }
 
@@ -353,13 +336,13 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 	skb_dst_drop(skb);
 	dst_hold((struct dst_entry *)repr->dst);
 	skb_dst_set(skb, (struct dst_entry *)repr->dst);
-	skb->queue_mapping = repr->vf->vf_id;
+	skb->queue_mapping = repr->q_id;
 
 	return ice_start_xmit(skb, netdev);
 }
 
 /**
- * ice_eswitch_set_target_vsi - set switchdev context in Tx context descriptor
+ * ice_eswitch_set_target_vsi - set eswitch context in Tx context descriptor
  * @skb: pointer to send buffer
  * @off: pointer to offload struct
  */
@@ -382,7 +365,7 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb,
 }
 
 /**
- * ice_eswitch_release_env - clear switchdev HW filters
+ * ice_eswitch_release_env - clear eswitch HW filters
  * @pf: pointer to PF struct
  *
  * This function removes HW filters configuration specific for switchdev
@@ -390,8 +373,8 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb,
  */
 static void ice_eswitch_release_env(struct ice_pf *pf)
 {
-	struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
-	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+	struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi;
+	struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
 	struct ice_vsi_vlan_ops *vlan_ops;
 
 	vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);
@@ -407,7 +390,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
 }
 
 /**
- * ice_eswitch_vsi_setup - configure switchdev control VSI
+ * ice_eswitch_vsi_setup - configure eswitch control VSI
  * @pf: pointer to PF structure
  * @pi: pointer to port_info structure
  */
@@ -424,48 +407,29 @@ ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
 }
 
 /**
- * ice_eswitch_napi_del - remove NAPI handle for all port representors
- * @pf: pointer to PF structure
- */
-static void ice_eswitch_napi_del(struct ice_pf *pf)
-{
-	struct ice_vf *vf;
-	unsigned int bkt;
-
-	lockdep_assert_held(&pf->vfs.table_lock);
-
-	ice_for_each_vf(pf, bkt, vf)
-		netif_napi_del(&vf->repr->q_vector->napi);
-}
-
-/**
  * ice_eswitch_napi_enable - enable NAPI for all port representors
- * @pf: pointer to PF structure
+ * @reprs: xarray of reprs
  */
-static void ice_eswitch_napi_enable(struct ice_pf *pf)
+static void ice_eswitch_napi_enable(struct xarray *reprs)
 {
-	struct ice_vf *vf;
-	unsigned int bkt;
-
-	lockdep_assert_held(&pf->vfs.table_lock);
+	struct ice_repr *repr;
+	unsigned long id;
 
-	ice_for_each_vf(pf, bkt, vf)
-		napi_enable(&vf->repr->q_vector->napi);
+	xa_for_each(reprs, id, repr)
+		napi_enable(&repr->q_vector->napi);
 }
 
 /**
  * ice_eswitch_napi_disable - disable NAPI for all port representors
- * @pf: pointer to PF structure
+ * @reprs: xarray of reprs
  */
-static void ice_eswitch_napi_disable(struct ice_pf *pf)
+static void ice_eswitch_napi_disable(struct xarray *reprs)
 {
-	struct ice_vf *vf;
-	unsigned int bkt;
-
-	lockdep_assert_held(&pf->vfs.table_lock);
+	struct ice_repr *repr;
+	unsigned long id;
 
-	ice_for_each_vf(pf, bkt, vf)
-		napi_disable(&vf->repr->q_vector->napi);
+	xa_for_each(reprs, id, repr)
+		napi_disable(&repr->q_vector->napi);
 }
 
 /**
@@ -486,39 +450,26 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
 		return -EINVAL;
 	}
 
-	pf->switchdev.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info);
-	if (!pf->switchdev.control_vsi)
+	pf->eswitch.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info);
+	if (!pf->eswitch.control_vsi)
 		return -ENODEV;
 
-	ctrl_vsi = pf->switchdev.control_vsi;
-	pf->switchdev.uplink_vsi = uplink_vsi;
+	ctrl_vsi = pf->eswitch.control_vsi;
+	/* cp VSI is createad with 1 queue as default */
+	pf->eswitch.qs.value = 1;
+	pf->eswitch.uplink_vsi = uplink_vsi;
 
 	if (ice_eswitch_setup_env(pf))
 		goto err_vsi;
 
-	if (ice_repr_add_for_all_vfs(pf))
-		goto err_repr_add;
-
-	if (ice_eswitch_setup_reprs(pf))
-		goto err_setup_reprs;
-
-	ice_eswitch_remap_rings_to_vectors(pf);
-
-	if (ice_vsi_open(ctrl_vsi))
-		goto err_setup_reprs;
-
 	if (ice_eswitch_br_offloads_init(pf))
 		goto err_br_offloads;
 
-	ice_eswitch_napi_enable(pf);
+	pf->eswitch.is_running = true;
 
 	return 0;
 
 err_br_offloads:
-	ice_vsi_close(ctrl_vsi);
-err_setup_reprs:
-	ice_repr_rem_from_all_vfs(pf);
-err_repr_add:
 	ice_eswitch_release_env(pf);
 err_vsi:
 	ice_vsi_release(ctrl_vsi);
@@ -526,19 +477,19 @@ err_vsi:
 }
 
 /**
- * ice_eswitch_disable_switchdev - disable switchdev resources
+ * ice_eswitch_disable_switchdev - disable eswitch resources
  * @pf: pointer to PF structure
  */
 static void ice_eswitch_disable_switchdev(struct ice_pf *pf)
 {
-	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+	struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi;
 
-	ice_eswitch_napi_disable(pf);
 	ice_eswitch_br_offloads_deinit(pf);
 	ice_eswitch_release_env(pf);
-	ice_eswitch_release_reprs(pf, ctrl_vsi);
 	ice_vsi_release(ctrl_vsi);
-	ice_repr_rem_from_all_vfs(pf);
+
+	pf->eswitch.is_running = false;
+	pf->eswitch.qs.is_reaching = false;
 }
 
 /**
@@ -566,6 +517,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
 	case DEVLINK_ESWITCH_MODE_LEGACY:
 		dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to legacy",
 			 pf->hw.pf_id);
+		xa_destroy(&pf->eswitch.reprs);
 		NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to legacy");
 		break;
 	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
@@ -578,6 +530,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
 
 		dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev",
 			 pf->hw.pf_id);
+		xa_init_flags(&pf->eswitch.reprs, XA_FLAGS_ALLOC);
 		NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev");
 		break;
 	}
@@ -616,74 +569,166 @@ bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf)
 }
 
 /**
- * ice_eswitch_release - cleanup eswitch
+ * ice_eswitch_start_all_tx_queues - start Tx queues of all port representors
  * @pf: pointer to PF structure
  */
-void ice_eswitch_release(struct ice_pf *pf)
+static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf)
 {
-	if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY)
+	struct ice_repr *repr;
+	unsigned long id;
+
+	if (test_bit(ICE_DOWN, pf->state))
 		return;
 
-	ice_eswitch_disable_switchdev(pf);
-	pf->switchdev.is_running = false;
+	xa_for_each(&pf->eswitch.reprs, id, repr)
+		ice_repr_start_tx_queues(repr);
 }
 
 /**
- * ice_eswitch_configure - configure eswitch
+ * ice_eswitch_stop_all_tx_queues - stop Tx queues of all port representors
  * @pf: pointer to PF structure
  */
-int ice_eswitch_configure(struct ice_pf *pf)
+void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf)
 {
-	int status;
-
-	if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY || pf->switchdev.is_running)
-		return 0;
+	struct ice_repr *repr;
+	unsigned long id;
 
-	status = ice_eswitch_enable_switchdev(pf);
-	if (status)
-		return status;
+	if (test_bit(ICE_DOWN, pf->state))
+		return;
 
-	pf->switchdev.is_running = true;
-	return 0;
+	xa_for_each(&pf->eswitch.reprs, id, repr)
+		ice_repr_stop_tx_queues(repr);
 }
 
-/**
- * ice_eswitch_start_all_tx_queues - start Tx queues of all port representors
- * @pf: pointer to PF structure
- */
-static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf)
+static void ice_eswitch_stop_reprs(struct ice_pf *pf)
 {
-	struct ice_vf *vf;
-	unsigned int bkt;
+	ice_eswitch_del_sp_rules(pf);
+	ice_eswitch_stop_all_tx_queues(pf);
+	ice_eswitch_napi_disable(&pf->eswitch.reprs);
+}
 
-	lockdep_assert_held(&pf->vfs.table_lock);
+static void ice_eswitch_start_reprs(struct ice_pf *pf)
+{
+	ice_eswitch_napi_enable(&pf->eswitch.reprs);
+	ice_eswitch_start_all_tx_queues(pf);
+	ice_eswitch_add_sp_rules(pf);
+}
 
-	if (test_bit(ICE_DOWN, pf->state))
-		return;
+static void
+ice_eswitch_cp_change_queues(struct ice_eswitch *eswitch, int change)
+{
+	struct ice_vsi *cp = eswitch->control_vsi;
+	int queues = 0;
+
+	if (eswitch->qs.is_reaching) {
+		if (eswitch->qs.to_reach >= eswitch->qs.value + change) {
+			queues = eswitch->qs.to_reach;
+			eswitch->qs.is_reaching = false;
+		} else {
+			queues = 0;
+		}
+	} else if ((change > 0 && cp->alloc_txq <= eswitch->qs.value) ||
+		   change < 0) {
+		queues = cp->alloc_txq + change;
+	}
 
-	ice_for_each_vf(pf, bkt, vf) {
-		if (vf->repr)
-			ice_repr_start_tx_queues(vf->repr);
+	if (queues) {
+		cp->req_txq = queues;
+		cp->req_rxq = queues;
+		ice_vsi_close(cp);
+		ice_vsi_rebuild(cp, ICE_VSI_FLAG_NO_INIT);
+		ice_vsi_open(cp);
+	} else if (!change) {
+		/* change == 0 means that VSI wasn't open, open it here */
+		ice_vsi_open(cp);
 	}
+
+	eswitch->qs.value += change;
+	ice_eswitch_remap_rings_to_vectors(eswitch);
 }
 
-/**
- * ice_eswitch_stop_all_tx_queues - stop Tx queues of all port representors
- * @pf: pointer to PF structure
- */
-void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf)
+int
+ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf)
 {
-	struct ice_vf *vf;
-	unsigned int bkt;
+	struct ice_repr *repr;
+	int change = 1;
+	int err;
 
-	lockdep_assert_held(&pf->vfs.table_lock);
+	if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY)
+		return 0;
 
-	if (test_bit(ICE_DOWN, pf->state))
+	if (xa_empty(&pf->eswitch.reprs)) {
+		err = ice_eswitch_enable_switchdev(pf);
+		if (err)
+			return err;
+		/* Control plane VSI is created with 1 queue as default */
+		pf->eswitch.qs.to_reach -= 1;
+		change = 0;
+	}
+
+	ice_eswitch_stop_reprs(pf);
+
+	repr = ice_repr_add_vf(vf);
+	if (IS_ERR(repr))
+		goto err_create_repr;
+
+	err = ice_eswitch_setup_repr(pf, repr);
+	if (err)
+		goto err_setup_repr;
+
+	err = xa_alloc(&pf->eswitch.reprs, &repr->id, repr,
+		       XA_LIMIT(1, INT_MAX), GFP_KERNEL);
+	if (err)
+		goto err_xa_alloc;
+
+	vf->repr_id = repr->id;
+
+	ice_eswitch_cp_change_queues(&pf->eswitch, change);
+	ice_eswitch_start_reprs(pf);
+
+	return 0;
+
+err_xa_alloc:
+	ice_eswitch_release_repr(pf, repr);
+err_setup_repr:
+	ice_repr_rem_vf(repr);
+err_create_repr:
+	if (xa_empty(&pf->eswitch.reprs))
+		ice_eswitch_disable_switchdev(pf);
+	ice_eswitch_start_reprs(pf);
+
+	return err;
+}
+
+void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf)
+{
+	struct ice_repr *repr = xa_load(&pf->eswitch.reprs, vf->repr_id);
+	struct devlink *devlink = priv_to_devlink(pf);
+
+	if (!repr)
 		return;
 
-	ice_for_each_vf(pf, bkt, vf) {
-		if (vf->repr)
-			ice_repr_stop_tx_queues(vf->repr);
+	ice_eswitch_stop_reprs(pf);
+	xa_erase(&pf->eswitch.reprs, repr->id);
+
+	if (xa_empty(&pf->eswitch.reprs))
+		ice_eswitch_disable_switchdev(pf);
+	else
+		ice_eswitch_cp_change_queues(&pf->eswitch, -1);
+
+	ice_eswitch_release_repr(pf, repr);
+	ice_repr_rem_vf(repr);
+
+	if (xa_empty(&pf->eswitch.reprs)) {
+		/* since all port representors are destroyed, there is
+		 * no point in keeping the nodes
+		 */
+		ice_devlink_rate_clear_tx_topology(ice_get_main_vsi(pf));
+		devl_lock(devlink);
+		devl_rate_nodes_destroy(devlink);
+		devl_unlock(devlink);
+	} else {
+		ice_eswitch_start_reprs(pf);
 	}
 }
 
@@ -693,30 +738,35 @@ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf)
  */
 int ice_eswitch_rebuild(struct ice_pf *pf)
 {
-	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
-	int status;
-
-	ice_eswitch_napi_disable(pf);
-	ice_eswitch_napi_del(pf);
-
-	status = ice_eswitch_setup_env(pf);
-	if (status)
-		return status;
+	struct ice_repr *repr;
+	unsigned long id;
+	int err;
 
-	status = ice_eswitch_setup_reprs(pf);
-	if (status)
-		return status;
+	if (!ice_is_switchdev_running(pf))
+		return 0;
 
-	ice_eswitch_remap_rings_to_vectors(pf);
+	err = ice_vsi_rebuild(pf->eswitch.control_vsi, ICE_VSI_FLAG_INIT);
+	if (err)
+		return err;
 
-	ice_replay_tc_fltrs(pf);
+	xa_for_each(&pf->eswitch.reprs, id, repr)
+		ice_eswitch_detach(pf, repr->vf);
 
-	status = ice_vsi_open(ctrl_vsi);
-	if (status)
-		return status;
+	return 0;
+}
 
-	ice_eswitch_napi_enable(pf);
-	ice_eswitch_start_all_tx_queues(pf);
+/**
+ * ice_eswitch_reserve_cp_queues - reserve control plane VSI queues
+ * @pf: pointer to PF structure
+ * @change: how many more (or less) queues is needed
+ *
+ * Remember to call ice_eswitch_attach/detach() the "change" times.
+ */
+void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change)
+{
+	if (pf->eswitch.qs.value + change < 0)
+		return;
 
-	return 0;
+	pf->eswitch.qs.to_reach = pf->eswitch.qs.value + change;
+	pf->eswitch.qs.is_reaching = true;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h
index b18bf83a2f5b..1a288a03a79a 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.h
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h
@@ -7,8 +7,9 @@
 #include <net/devlink.h>
 
 #ifdef CONFIG_ICE_SWITCHDEV
-void ice_eswitch_release(struct ice_pf *pf);
-int ice_eswitch_configure(struct ice_pf *pf);
+void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf);
+int
+ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf);
 int ice_eswitch_rebuild(struct ice_pf *pf);
 
 int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode);
@@ -17,7 +18,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
 		     struct netlink_ext_ack *extack);
 bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf);
 
-void ice_eswitch_update_repr(struct ice_vsi *vsi);
+void ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi);
 
 void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf);
 
@@ -25,8 +26,15 @@ void ice_eswitch_set_target_vsi(struct sk_buff *skb,
 				struct ice_tx_offload_params *off);
 netdev_tx_t
 ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change);
 #else /* CONFIG_ICE_SWITCHDEV */
-static inline void ice_eswitch_release(struct ice_pf *pf) { }
+static inline void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) { }
+
+static inline int
+ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf)
+{
+	return -EOPNOTSUPP;
+}
 
 static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { }
 
@@ -34,7 +42,8 @@ static inline void
 ice_eswitch_set_target_vsi(struct sk_buff *skb,
 			   struct ice_tx_offload_params *off) { }
 
-static inline void ice_eswitch_update_repr(struct ice_vsi *vsi) { }
+static inline void
+ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi) { }
 
 static inline int ice_eswitch_configure(struct ice_pf *pf)
 {
@@ -68,5 +77,8 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	return NETDEV_TX_BUSY;
 }
+
+static inline void
+ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change) { }
 #endif /* CONFIG_ICE_SWITCHDEV */
 #endif /* _ICE_ESWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch_br.c b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
index 6ae0269bdf73..ac5beecd028b 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
@@ -893,10 +893,14 @@ ice_eswitch_br_port_deinit(struct ice_esw_br *bridge,
 			ice_eswitch_br_fdb_entry_delete(bridge, fdb_entry);
 	}
 
-	if (br_port->type == ICE_ESWITCH_BR_UPLINK_PORT && vsi->back)
+	if (br_port->type == ICE_ESWITCH_BR_UPLINK_PORT && vsi->back) {
 		vsi->back->br_port = NULL;
-	else if (vsi->vf && vsi->vf->repr)
-		vsi->vf->repr->br_port = NULL;
+	} else {
+		struct ice_repr *repr = ice_repr_get_by_vsi(vsi);
+
+		if (repr)
+			repr->br_port = NULL;
+	}
 
 	xa_erase(&bridge->ports, br_port->vsi_idx);
 	ice_eswitch_br_port_vlans_flush(br_port);
@@ -947,7 +951,7 @@ ice_eswitch_br_vf_repr_port_init(struct ice_esw_br *bridge,
 static int
 ice_eswitch_br_uplink_port_init(struct ice_esw_br *bridge, struct ice_pf *pf)
 {
-	struct ice_vsi *vsi = pf->switchdev.uplink_vsi;
+	struct ice_vsi *vsi = pf->eswitch.uplink_vsi;
 	struct ice_esw_br_port *br_port;
 	int err;
 
@@ -1185,7 +1189,7 @@ ice_eswitch_br_port_event(struct notifier_block *nb,
 static void
 ice_eswitch_br_offloads_dealloc(struct ice_pf *pf)
 {
-	struct ice_esw_br_offloads *br_offloads = pf->switchdev.br_offloads;
+	struct ice_esw_br_offloads *br_offloads = pf->eswitch.br_offloads;
 
 	ASSERT_RTNL();
 
@@ -1194,7 +1198,7 @@ ice_eswitch_br_offloads_dealloc(struct ice_pf *pf)
 
 	ice_eswitch_br_deinit(br_offloads, br_offloads->bridge);
 
-	pf->switchdev.br_offloads = NULL;
+	pf->eswitch.br_offloads = NULL;
 	kfree(br_offloads);
 }
 
@@ -1205,14 +1209,14 @@ ice_eswitch_br_offloads_alloc(struct ice_pf *pf)
 
 	ASSERT_RTNL();
 
-	if (pf->switchdev.br_offloads)
+	if (pf->eswitch.br_offloads)
 		return ERR_PTR(-EEXIST);
 
 	br_offloads = kzalloc(sizeof(*br_offloads), GFP_KERNEL);
 	if (!br_offloads)
 		return ERR_PTR(-ENOMEM);
 
-	pf->switchdev.br_offloads = br_offloads;
+	pf->eswitch.br_offloads = br_offloads;
 	br_offloads->pf = pf;
 
 	return br_offloads;
@@ -1223,7 +1227,7 @@ ice_eswitch_br_offloads_deinit(struct ice_pf *pf)
 {
 	struct ice_esw_br_offloads *br_offloads;
 
-	br_offloads = pf->switchdev.br_offloads;
+	br_offloads = pf->eswitch.br_offloads;
 	if (!br_offloads)
 		return;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 4b1e56396293..d826b5afa143 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -212,11 +212,18 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
 						 vsi->alloc_txq));
 		break;
 	case ICE_VSI_SWITCHDEV_CTRL:
-		/* The number of queues for ctrl VSI is equal to number of VFs.
+		/* The number of queues for ctrl VSI is equal to number of PRs
 		 * Each ring is associated to the corresponding VF_PR netdev.
+		 * Tx and Rx rings are always equal
 		 */
-		vsi->alloc_txq = ice_get_num_vfs(pf);
-		vsi->alloc_rxq = vsi->alloc_txq;
+		if (vsi->req_txq && vsi->req_rxq) {
+			vsi->alloc_txq = vsi->req_txq;
+			vsi->alloc_rxq = vsi->req_rxq;
+		} else {
+			vsi->alloc_txq = 1;
+			vsi->alloc_rxq = 1;
+		}
+
 		vsi->num_q_vectors = 1;
 		break;
 	case ICE_VSI_VF:
@@ -519,16 +526,14 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d
 {
 	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
 	struct ice_pf *pf = q_vector->vsi->back;
-	struct ice_vf *vf;
-	unsigned int bkt;
+	struct ice_repr *repr;
+	unsigned long id;
 
 	if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
 		return IRQ_HANDLED;
 
-	rcu_read_lock();
-	ice_for_each_vf_rcu(pf, bkt, vf)
-		napi_schedule(&vf->repr->q_vector->napi);
-	rcu_read_unlock();
+	xa_for_each(&pf->eswitch.reprs, id, repr)
+		napi_schedule(&repr->q_vector->napi);
 
 	return IRQ_HANDLED;
 }
@@ -3071,27 +3076,26 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
 }
 
 /**
- * ice_vsi_realloc_stat_arrays - Frees unused stat structures
+ * ice_vsi_realloc_stat_arrays - Frees unused stat structures or alloc new ones
  * @vsi: VSI pointer
- * @prev_txq: Number of Tx rings before ring reallocation
- * @prev_rxq: Number of Rx rings before ring reallocation
  */
-static void
-ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
+static int
+ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi)
 {
+	u16 req_txq = vsi->req_txq ? vsi->req_txq : vsi->alloc_txq;
+	u16 req_rxq = vsi->req_rxq ? vsi->req_rxq : vsi->alloc_rxq;
+	struct ice_ring_stats **tx_ring_stats;
+	struct ice_ring_stats **rx_ring_stats;
 	struct ice_vsi_stats *vsi_stat;
 	struct ice_pf *pf = vsi->back;
+	u16 prev_txq = vsi->alloc_txq;
+	u16 prev_rxq = vsi->alloc_rxq;
 	int i;
 
-	if (!prev_txq || !prev_rxq)
-		return;
-	if (vsi->type == ICE_VSI_CHNL)
-		return;
-
 	vsi_stat = pf->vsi_stats[vsi->idx];
 
-	if (vsi->num_txq < prev_txq) {
-		for (i = vsi->num_txq; i < prev_txq; i++) {
+	if (req_txq < prev_txq) {
+		for (i = req_txq; i < prev_txq; i++) {
 			if (vsi_stat->tx_ring_stats[i]) {
 				kfree_rcu(vsi_stat->tx_ring_stats[i], rcu);
 				WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL);
@@ -3099,14 +3103,36 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
 		}
 	}
 
-	if (vsi->num_rxq < prev_rxq) {
-		for (i = vsi->num_rxq; i < prev_rxq; i++) {
+	tx_ring_stats = vsi_stat->rx_ring_stats;
+	vsi_stat->tx_ring_stats =
+		krealloc_array(vsi_stat->tx_ring_stats, req_txq,
+			       sizeof(*vsi_stat->tx_ring_stats),
+			       GFP_KERNEL | __GFP_ZERO);
+	if (!vsi_stat->tx_ring_stats) {
+		vsi_stat->tx_ring_stats = tx_ring_stats;
+		return -ENOMEM;
+	}
+
+	if (req_rxq < prev_rxq) {
+		for (i = req_rxq; i < prev_rxq; i++) {
 			if (vsi_stat->rx_ring_stats[i]) {
 				kfree_rcu(vsi_stat->rx_ring_stats[i], rcu);
 				WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL);
 			}
 		}
 	}
+
+	rx_ring_stats = vsi_stat->rx_ring_stats;
+	vsi_stat->rx_ring_stats =
+		krealloc_array(vsi_stat->rx_ring_stats, req_rxq,
+			       sizeof(*vsi_stat->rx_ring_stats),
+			       GFP_KERNEL | __GFP_ZERO);
+	if (!vsi_stat->rx_ring_stats) {
+		vsi_stat->rx_ring_stats = rx_ring_stats;
+		return -ENOMEM;
+	}
+
+	return 0;
 }
 
 /**
@@ -3123,9 +3149,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
 {
 	struct ice_vsi_cfg_params params = {};
 	struct ice_coalesce_stored *coalesce;
-	int ret, prev_txq, prev_rxq;
 	int prev_num_q_vectors = 0;
 	struct ice_pf *pf;
+	int ret;
 
 	if (!vsi)
 		return -EINVAL;
@@ -3144,8 +3170,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
 
 	prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
 
-	prev_txq = vsi->num_txq;
-	prev_rxq = vsi->num_rxq;
+	ret = ice_vsi_realloc_stat_arrays(vsi);
+	if (ret)
+		goto err_vsi_cfg;
 
 	ice_vsi_decfg(vsi);
 	ret = ice_vsi_cfg_def(vsi, &params);
@@ -3163,8 +3190,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
 		return ice_schedule_reset(pf, ICE_RESET_PFR);
 	}
 
-	ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq);
-
 	ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
 	kfree(coalesce);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index fb9c93f37e84..43ba3e55b8c1 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4702,6 +4702,8 @@ static void ice_deinit_features(struct ice_pf *pf)
 		ice_ptp_release(pf);
 	if (test_bit(ICE_FLAG_DPLL, pf->flags))
 		ice_dpll_deinit(pf);
+	if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
+		xa_destroy(&pf->eswitch.reprs);
 }
 
 static void ice_init_wakeup(struct ice_pf *pf)
@@ -7401,9 +7403,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 		goto err_vsi_rebuild;
 	}
 
-	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL);
+	err = ice_eswitch_rebuild(pf);
 	if (err) {
-		dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err);
+		dev_err(dev, "Switchdev rebuild failed: %d\n", err);
 		goto err_vsi_rebuild;
 	}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
index c686ac0935eb..5f30fb131f74 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.c
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -14,7 +14,7 @@
  */
 static int ice_repr_get_sw_port_id(struct ice_repr *repr)
 {
-	return repr->vf->pf->hw.port_info->lport;
+	return repr->src_vsi->back->hw.port_info->lport;
 }
 
 /**
@@ -35,7 +35,7 @@ ice_repr_get_phys_port_name(struct net_device *netdev, char *buf, size_t len)
 		return -EOPNOTSUPP;
 
 	res = snprintf(buf, len, "pf%dvfr%d", ice_repr_get_sw_port_id(repr),
-		       repr->vf->vf_id);
+		       repr->id);
 	if (res <= 0)
 		return -EOPNOTSUPP;
 	return 0;
@@ -278,25 +278,67 @@ ice_repr_reg_netdev(struct net_device *netdev)
 	return register_netdev(netdev);
 }
 
+static void ice_repr_remove_node(struct devlink_port *devlink_port)
+{
+	devl_lock(devlink_port->devlink);
+	devl_rate_leaf_destroy(devlink_port);
+	devl_unlock(devlink_port->devlink);
+}
+
 /**
- * ice_repr_add - add representor for VF
- * @vf: pointer to VF structure
+ * ice_repr_rem - remove representor from VF
+ * @repr: pointer to representor structure
  */
-static int ice_repr_add(struct ice_vf *vf)
+static void ice_repr_rem(struct ice_repr *repr)
+{
+	kfree(repr->q_vector);
+	free_netdev(repr->netdev);
+	kfree(repr);
+}
+
+/**
+ * ice_repr_rem_vf - remove representor from VF
+ * @repr: pointer to representor structure
+ */
+void ice_repr_rem_vf(struct ice_repr *repr)
+{
+	ice_repr_remove_node(&repr->vf->devlink_port);
+	unregister_netdev(repr->netdev);
+	ice_devlink_destroy_vf_port(repr->vf);
+	ice_virtchnl_set_dflt_ops(repr->vf);
+	ice_repr_rem(repr);
+}
+
+static void ice_repr_set_tx_topology(struct ice_pf *pf)
+{
+	struct devlink *devlink;
+
+	/* only export if ADQ and DCB disabled and eswitch enabled*/
+	if (ice_is_adq_active(pf) || ice_is_dcb_active(pf) ||
+	    !ice_is_switchdev_running(pf))
+		return;
+
+	devlink = priv_to_devlink(pf);
+	ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf));
+}
+
+/**
+ * ice_repr_add - add representor for generic VSI
+ * @pf: pointer to PF structure
+ * @src_vsi: pointer to VSI structure of device to represent
+ * @parent_mac: device MAC address
+ */
+static struct ice_repr *
+ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac)
 {
 	struct ice_q_vector *q_vector;
 	struct ice_netdev_priv *np;
 	struct ice_repr *repr;
-	struct ice_vsi *vsi;
 	int err;
 
-	vsi = ice_get_vf_vsi(vf);
-	if (!vsi)
-		return -EINVAL;
-
 	repr = kzalloc(sizeof(*repr), GFP_KERNEL);
 	if (!repr)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv));
 	if (!repr->netdev) {
@@ -304,9 +346,7 @@ static int ice_repr_add(struct ice_vf *vf)
 		goto err_alloc;
 	}
 
-	repr->src_vsi = vsi;
-	repr->vf = vf;
-	vf->repr = repr;
+	repr->src_vsi = src_vsi;
 	np = netdev_priv(repr->netdev);
 	np->repr = repr;
 
@@ -316,10 +356,40 @@ static int ice_repr_add(struct ice_vf *vf)
 		goto err_alloc_q_vector;
 	}
 	repr->q_vector = q_vector;
+	repr->q_id = repr->id;
+
+	ether_addr_copy(repr->parent_mac, parent_mac);
+
+	return repr;
+
+err_alloc_q_vector:
+	free_netdev(repr->netdev);
+err_alloc:
+	kfree(repr);
+	return ERR_PTR(err);
+}
+
+struct ice_repr *ice_repr_add_vf(struct ice_vf *vf)
+{
+	struct ice_repr *repr;
+	struct ice_vsi *vsi;
+	int err;
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi)
+		return ERR_PTR(-ENOENT);
 
 	err = ice_devlink_create_vf_port(vf);
 	if (err)
-		goto err_devlink;
+		return ERR_PTR(err);
+
+	repr = ice_repr_add(vf->pf, vsi, vf->hw_lan_addr);
+	if (IS_ERR(repr)) {
+		err = PTR_ERR(repr);
+		goto err_repr_add;
+	}
+
+	repr->vf = vf;
 
 	repr->netdev->min_mtu = ETH_MIN_MTU;
 	repr->netdev->max_mtu = ICE_MAX_MTU;
@@ -331,100 +401,23 @@ static int ice_repr_add(struct ice_vf *vf)
 		goto err_netdev;
 
 	ice_virtchnl_set_repr_ops(vf);
+	ice_repr_set_tx_topology(vf->pf);
 
-	return 0;
+	return repr;
 
 err_netdev:
+	ice_repr_rem(repr);
+err_repr_add:
 	ice_devlink_destroy_vf_port(vf);
-err_devlink:
-	kfree(repr->q_vector);
-	vf->repr->q_vector = NULL;
-err_alloc_q_vector:
-	free_netdev(repr->netdev);
-	repr->netdev = NULL;
-err_alloc:
-	kfree(repr);
-	vf->repr = NULL;
-	return err;
-}
-
-/**
- * ice_repr_rem - remove representor from VF
- * @vf: pointer to VF structure
- */
-static void ice_repr_rem(struct ice_vf *vf)
-{
-	if (!vf->repr)
-		return;
-
-	kfree(vf->repr->q_vector);
-	vf->repr->q_vector = NULL;
-	unregister_netdev(vf->repr->netdev);
-	ice_devlink_destroy_vf_port(vf);
-	free_netdev(vf->repr->netdev);
-	vf->repr->netdev = NULL;
-	kfree(vf->repr);
-	vf->repr = NULL;
-
-	ice_virtchnl_set_dflt_ops(vf);
-}
-
-/**
- * ice_repr_rem_from_all_vfs - remove port representor for all VFs
- * @pf: pointer to PF structure
- */
-void ice_repr_rem_from_all_vfs(struct ice_pf *pf)
-{
-	struct devlink *devlink;
-	struct ice_vf *vf;
-	unsigned int bkt;
-
-	lockdep_assert_held(&pf->vfs.table_lock);
-
-	ice_for_each_vf(pf, bkt, vf)
-		ice_repr_rem(vf);
-
-	/* since all port representors are destroyed, there is
-	 * no point in keeping the nodes
-	 */
-	devlink = priv_to_devlink(pf);
-	devl_lock(devlink);
-	devl_rate_nodes_destroy(devlink);
-	devl_unlock(devlink);
+	return ERR_PTR(err);
 }
 
-/**
- * ice_repr_add_for_all_vfs - add port representor for all VFs
- * @pf: pointer to PF structure
- */
-int ice_repr_add_for_all_vfs(struct ice_pf *pf)
+struct ice_repr *ice_repr_get_by_vsi(struct ice_vsi *vsi)
 {
-	struct devlink *devlink;
-	struct ice_vf *vf;
-	unsigned int bkt;
-	int err;
-
-	lockdep_assert_held(&pf->vfs.table_lock);
-
-	ice_for_each_vf(pf, bkt, vf) {
-		err = ice_repr_add(vf);
-		if (err)
-			goto err;
-	}
-
-	/* only export if ADQ and DCB disabled */
-	if (ice_is_adq_active(pf) || ice_is_dcb_active(pf))
-		return 0;
-
-	devlink = priv_to_devlink(pf);
-	ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf));
-
-	return 0;
-
-err:
-	ice_repr_rem_from_all_vfs(pf);
+	if (!vsi->vf)
+		return NULL;
 
-	return err;
+	return xa_load(&vsi->back->eswitch.reprs, vsi->vf->repr_id);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h
index e1ee2d2c1d2d..f9aede315716 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.h
+++ b/drivers/net/ethernet/intel/ice/ice_repr.h
@@ -13,14 +13,17 @@ struct ice_repr {
 	struct net_device *netdev;
 	struct metadata_dst *dst;
 	struct ice_esw_br_port *br_port;
+	int q_id;
+	u32 id;
+	u8 parent_mac[ETH_ALEN];
 #ifdef CONFIG_ICE_SWITCHDEV
 	/* info about slow path rule */
 	struct ice_rule_query_data sp_rule;
 #endif
 };
 
-int ice_repr_add_for_all_vfs(struct ice_pf *pf);
-void ice_repr_rem_from_all_vfs(struct ice_pf *pf);
+struct ice_repr *ice_repr_add_vf(struct ice_vf *vf);
+void ice_repr_rem_vf(struct ice_repr *repr);
 
 void ice_repr_start_tx_queues(struct ice_repr *repr);
 void ice_repr_stop_tx_queues(struct ice_repr *repr);
@@ -29,4 +32,6 @@ void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi);
 
 struct ice_repr *ice_netdev_to_repr(struct net_device *netdev);
 bool ice_is_port_repr_netdev(const struct net_device *netdev);
+
+struct ice_repr *ice_repr_get_by_vsi(struct ice_vsi *vsi);
 #endif
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 2a5e6616cc0a..5a45bd5ce6ad 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -172,13 +172,14 @@ void ice_free_vfs(struct ice_pf *pf)
 	else
 		dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
 
-	mutex_lock(&vfs->table_lock);
+	ice_eswitch_reserve_cp_queues(pf, -ice_get_num_vfs(pf));
 
-	ice_eswitch_release(pf);
+	mutex_lock(&vfs->table_lock);
 
 	ice_for_each_vf(pf, bkt, vf) {
 		mutex_lock(&vf->cfg_lock);
 
+		ice_eswitch_detach(pf, vf);
 		ice_dis_vf_qs(vf);
 
 		if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
@@ -614,6 +615,14 @@ static int ice_start_vfs(struct ice_pf *pf)
 			goto teardown;
 		}
 
+		retval = ice_eswitch_attach(pf, vf);
+		if (retval) {
+			dev_err(ice_pf_to_dev(pf), "Failed to attach VF %d to eswitch, error %d",
+				vf->vf_id, retval);
+			ice_vf_vsi_release(vf);
+			goto teardown;
+		}
+
 		set_bit(ICE_VF_STATE_INIT, vf->vf_states);
 		ice_ena_vf_mappings(vf);
 		wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
@@ -923,6 +932,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
 		goto err_unroll_sriov;
 	}
 
+	ice_eswitch_reserve_cp_queues(pf, num_vfs);
 	ret = ice_start_vfs(pf);
 	if (ret) {
 		dev_err(dev, "Failed to start %d VFs, err %d\n", num_vfs, ret);
@@ -932,12 +942,6 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
 
 	clear_bit(ICE_VF_DIS, pf->state);
 
-	ret = ice_eswitch_configure(pf);
-	if (ret) {
-		dev_err(dev, "Failed to configure eswitch, err %d\n", ret);
-		goto err_unroll_sriov;
-	}
-
 	/* rearm global interrupts */
 	if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state))
 		ice_irq_dynamic_ena(hw, NULL, NULL);
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index dd03cb69ad26..08d3bbf4b44c 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -653,7 +653,7 @@ static int ice_tc_setup_redirect_action(struct net_device *filter_dev,
 		   ice_tc_is_dev_uplink(target_dev)) {
 		repr = ice_netdev_to_repr(filter_dev);
 
-		fltr->dest_vsi = repr->src_vsi->back->switchdev.uplink_vsi;
+		fltr->dest_vsi = repr->src_vsi->back->eswitch.uplink_vsi;
 		fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
 	} else if (ice_tc_is_dev_uplink(filter_dev) &&
 		   ice_is_port_repr_netdev(target_dev)) {
@@ -765,7 +765,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
 		rule_info.sw_act.src = hw->pf_id;
 		rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE;
 	} else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS &&
-		   fltr->dest_vsi == vsi->back->switchdev.uplink_vsi) {
+		   fltr->dest_vsi == vsi->back->eswitch.uplink_vsi) {
 		/* VF to Uplink */
 		rule_info.sw_act.flag |= ICE_FLTR_TX;
 		rule_info.sw_act.src = vsi->idx;
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index aca1f2ea5034..d2a99a20c4ad 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -760,6 +760,7 @@ void ice_reset_all_vfs(struct ice_pf *pf)
 	ice_for_each_vf(pf, bkt, vf) {
 		mutex_lock(&vf->cfg_lock);
 
+		ice_eswitch_detach(pf, vf);
 		vf->driver_caps = 0;
 		ice_vc_set_default_allowlist(vf);
 
@@ -775,13 +776,11 @@ void ice_reset_all_vfs(struct ice_pf *pf)
 		ice_vf_rebuild_vsi(vf);
 		ice_vf_post_vsi_rebuild(vf);
 
+		ice_eswitch_attach(pf, vf);
+
 		mutex_unlock(&vf->cfg_lock);
 	}
 
-	if (ice_is_eswitch_mode_switchdev(pf))
-		if (ice_eswitch_rebuild(pf))
-			dev_warn(dev, "eswitch rebuild failed\n");
-
 	ice_flush(hw);
 	clear_bit(ICE_VF_DIS, pf->state);
 
@@ -928,7 +927,7 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
 		goto out_unlock;
 	}
 
-	ice_eswitch_update_repr(vsi);
+	ice_eswitch_update_repr(vf->repr_id, vsi);
 
 	/* if the VF has been reset allow it to come up again */
 	ice_mbx_clear_malvf(&vf->mbx_info);
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index 93c774f2f437..35866553f288 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -130,7 +130,7 @@ struct ice_vf {
 	struct ice_mdd_vf_events mdd_tx_events;
 	DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX);
 
-	struct ice_repr *repr;
+	unsigned long repr_id;
 	const struct ice_virtchnl_ops *virtchnl_ops;
 	const struct ice_vf_ops *vf_ops;
 
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index f48f82d5e274..ac7c861e83a0 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -81,6 +81,21 @@ struct igc_tx_timestamp_request {
 	u32 flags;             /* flags that should be added to the tx_buffer */
 };
 
+struct igc_inline_rx_tstamps {
+	/* Timestamps are saved in little endian at the beginning of the packet
+	 * buffer following the layout:
+	 *
+	 * DWORD: | 0              | 1              | 2              | 3              |
+	 * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH |
+	 *
+	 * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds
+	 * part of the timestamp.
+	 *
+	 */
+	__le32 timer1[2];
+	__le32 timer0[2];
+};
+
 struct igc_ring_container {
 	struct igc_ring *ring;          /* pointer to linked list of rings */
 	unsigned int total_bytes;       /* total bytes processed this int */
@@ -261,6 +276,8 @@ struct igc_adapter {
 	unsigned int ptp_flags;
 	/* System time value lock */
 	spinlock_t tmreg_lock;
+	/* Free-running timer lock */
+	spinlock_t free_timer_lock;
 	struct cyclecounter cc;
 	struct timecounter tc;
 	struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */
@@ -469,6 +486,8 @@ enum igc_tx_flags {
 	IGC_TX_FLAGS_TSTAMP_1	= 0x100,
 	IGC_TX_FLAGS_TSTAMP_2	= 0x200,
 	IGC_TX_FLAGS_TSTAMP_3	= 0x400,
+
+	IGC_TX_FLAGS_TSTAMP_TIMER_1 = 0x800,
 };
 
 enum igc_boards {
@@ -531,7 +550,7 @@ struct igc_rx_buffer {
 struct igc_xdp_buff {
 	struct xdp_buff xdp;
 	union igc_adv_rx_desc *rx_desc;
-	ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */
+	struct igc_inline_rx_tstamps *rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */
 };
 
 struct igc_q_vector {
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
index f7d6491d4c60..bf8cdfbba9ff 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.h
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -37,6 +37,10 @@ struct igc_adv_tx_context_desc {
 #define IGC_ADVTXD_TSTAMP_REG_1	0x00010000 /* Select register 1 for timestamp */
 #define IGC_ADVTXD_TSTAMP_REG_2	0x00020000 /* Select register 2 for timestamp */
 #define IGC_ADVTXD_TSTAMP_REG_3	0x00030000 /* Select register 3 for timestamp */
+#define IGC_ADVTXD_TSTAMP_TIMER_1	0x00010000 /* Select timer 1 for timestamp */
+#define IGC_ADVTXD_TSTAMP_TIMER_2	0x00020000 /* Select timer 2 for timestamp */
+#define IGC_ADVTXD_TSTAMP_TIMER_3	0x00030000 /* Select timer 3 for timestamp */
+
 #define IGC_ADVTXD_DTYP_CTXT	0x00200000 /* Advanced Context Descriptor */
 #define IGC_ADVTXD_DTYP_DATA	0x00300000 /* Advanced Data Descriptor */
 #define IGC_ADVTXD_DCMD_EOP	0x01000000 /* End of Packet */
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index b3037016f31d..5f92b3c7c3d4 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -317,6 +317,8 @@
 #define IGC_TXD_CMD_TSE		0x04000000 /* TCP Seg enable */
 #define IGC_TXD_EXTCMD_TSTAMP	0x00000010 /* IEEE1588 Timestamp packet */
 
+#define IGC_TXD_PTP2_TIMER_1	0x00000020
+
 /* IPSec Encrypt Enable */
 #define IGC_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
 #define IGC_ADVTXD_MSS_SHIFT	16 /* Adv ctxt MSS shift */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index e9bb403bbacf..61db1d3bfa0b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1299,14 +1299,16 @@ static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
 	u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
 
 	/* insert L4 checksum */
-	olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
-			  ((IGC_TXD_POPTS_TXSM << 8) /
-			  IGC_TX_FLAGS_CSUM);
+	olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_CSUM,
+				      (IGC_TXD_POPTS_TXSM << 8));
 
 	/* insert IPv4 checksum */
-	olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
-			  (((IGC_TXD_POPTS_IXSM << 8)) /
-			  IGC_TX_FLAGS_IPV4);
+	olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_IPV4,
+				      (IGC_TXD_POPTS_IXSM << 8));
+
+	/* Use the second timer (free running, in general) for the timestamp */
+	olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_TIMER_1,
+				      IGC_TXD_PTP2_TIMER_1);
 
 	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
 }
@@ -1651,6 +1653,8 @@ done:
 		if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) {
 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 			tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags;
+			if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_USE_CYCLES)
+				tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1;
 		} else {
 			adapter->tx_hwtstamp_skipped++;
 		}
@@ -1963,9 +1967,9 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
 
 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
 					 struct igc_rx_buffer *rx_buffer,
-					 struct xdp_buff *xdp,
-					 ktime_t timestamp)
+					 struct igc_xdp_buff *ctx)
 {
+	struct xdp_buff *xdp = &ctx->xdp;
 	unsigned int metasize = xdp->data - xdp->data_meta;
 	unsigned int size = xdp->data_end - xdp->data;
 	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
@@ -1982,8 +1986,10 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
 	if (unlikely(!skb))
 		return NULL;
 
-	if (timestamp)
-		skb_hwtstamps(skb)->hwtstamp = timestamp;
+	if (ctx->rx_ts) {
+		skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV;
+		skb_hwtstamps(skb)->netdev_data = ctx->rx_ts;
+	}
 
 	/* Determine available headroom for copy */
 	headlen = size;
@@ -2583,11 +2589,10 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 	int xdp_status = 0, rx_buffer_pgcnt;
 
 	while (likely(total_packets < budget)) {
-		union igc_adv_rx_desc *rx_desc;
+		struct igc_xdp_buff ctx = { .rx_ts = NULL };
 		struct igc_rx_buffer *rx_buffer;
+		union igc_adv_rx_desc *rx_desc;
 		unsigned int size, truesize;
-		struct igc_xdp_buff ctx;
-		ktime_t timestamp = 0;
 		int pkt_offset = 0;
 		void *pktbuf;
 
@@ -2614,9 +2619,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
 
 		if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
-			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
-							pktbuf);
-			ctx.rx_ts = timestamp;
+			ctx.rx_ts = pktbuf;
 			pkt_offset = IGC_TS_HDR_LEN;
 			size -= IGC_TS_HDR_LEN;
 		}
@@ -2653,8 +2656,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
 		else if (ring_uses_build_skb(rx_ring))
 			skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp);
 		else
-			skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp,
-						timestamp);
+			skb = igc_construct_skb(rx_ring, rx_buffer, &ctx);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
@@ -2803,9 +2805,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
 		ctx->rx_desc = desc;
 
 		if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
-			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
-							bi->xdp->data);
-			ctx->rx_ts = timestamp;
+			ctx->rx_ts = bi->xdp->data;
 
 			bi->xdp->data += IGC_TS_HDR_LEN;
 
@@ -6562,6 +6562,24 @@ int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
 	return 0;
 }
 
+static ktime_t igc_get_tstamp(struct net_device *dev,
+			      const struct skb_shared_hwtstamps *hwtstamps,
+			      bool cycles)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+	struct igc_inline_rx_tstamps *tstamp;
+	ktime_t timestamp;
+
+	tstamp = hwtstamps->netdev_data;
+
+	if (cycles)
+		timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer1);
+	else
+		timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0);
+
+	return timestamp;
+}
+
 static const struct net_device_ops igc_netdev_ops = {
 	.ndo_open		= igc_open,
 	.ndo_stop		= igc_close,
@@ -6579,6 +6597,7 @@ static const struct net_device_ops igc_netdev_ops = {
 	.ndo_bpf		= igc_bpf,
 	.ndo_xdp_xmit		= igc_xdp_xmit,
 	.ndo_xsk_wakeup		= igc_xsk_wakeup,
+	.ndo_get_tstamp		= igc_get_tstamp,
 };
 
 /* PCIe configuration access */
@@ -6682,9 +6701,11 @@ static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
 static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp)
 {
 	const struct igc_xdp_buff *ctx = (void *)_ctx;
+	struct igc_adapter *adapter = netdev_priv(ctx->xdp.rxq->dev);
+	struct igc_inline_rx_tstamps *tstamp = ctx->rx_ts;
 
 	if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) {
-		*timestamp = ctx->rx_ts;
+		*timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0);
 
 		return 0;
 	}
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 928f38792203..885faaa7b9de 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -459,12 +459,10 @@ static int igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
 /**
  * igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer
  * @adapter: Pointer to adapter the packet buffer belongs to
- * @buf: Pointer to packet buffer
+ * @buf: Pointer to start of timestamp in HW format (2 32-bit words)
  *
- * This function retrieves the timestamp saved in the beginning of packet
- * buffer. While two timestamps are available, one in timer0 reference and the
- * other in timer1 reference, this function considers only the timestamp in
- * timer0 reference.
+ * This function retrieves and converts the timestamp stored at @buf
+ * to ktime_t, adjusting for hardware latencies.
  *
  * Returns timestamp value.
  */
@@ -474,17 +472,8 @@ ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf)
 	u32 secs, nsecs;
 	int adjust;
 
-	/* Timestamps are saved in little endian at the beginning of the packet
-	 * buffer following the layout:
-	 *
-	 * DWORD: | 0              | 1              | 2              | 3              |
-	 * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH |
-	 *
-	 * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds
-	 * part of the timestamp.
-	 */
-	nsecs = le32_to_cpu(buf[2]);
-	secs = le32_to_cpu(buf[3]);
+	nsecs = le32_to_cpu(buf[0]);
+	secs = le32_to_cpu(buf[1]);
 
 	timestamp = ktime_set(secs, nsecs);
 
@@ -542,10 +531,11 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter)
 
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		val = rd32(IGC_SRRCTL(i));
-		/* FIXME: For now, only support retrieving RX timestamps from
-		 * timer 0.
+		/* Enable retrieving timestamps from timer 0, the
+		 * "adjustable clock" and timer 1 the "free running
+		 * clock".
 		 */
-		val |= IGC_SRRCTL_TIMER1SEL(0) | IGC_SRRCTL_TIMER0SEL(0) |
+		val |= IGC_SRRCTL_TIMER1SEL(1) | IGC_SRRCTL_TIMER0SEL(0) |
 		       IGC_SRRCTL_TIMESTAMP;
 		wr32(IGC_SRRCTL(i), val);
 	}
@@ -1035,6 +1025,26 @@ static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp,
 					     adapter, &adapter->snapshot, cts);
 }
 
+static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp,
+				struct timespec64 *ts,
+				struct ptp_system_timestamp *sts)
+{
+	struct igc_adapter *igc = container_of(ptp, struct igc_adapter, ptp_caps);
+	struct igc_hw *hw = &igc->hw;
+	unsigned long flags;
+
+	spin_lock_irqsave(&igc->free_timer_lock, flags);
+
+	ptp_read_system_prets(sts);
+	ts->tv_nsec = rd32(IGC_SYSTIML_1);
+	ts->tv_sec = rd32(IGC_SYSTIMH_1);
+	ptp_read_system_postts(sts);
+
+	spin_unlock_irqrestore(&igc->free_timer_lock, flags);
+
+	return 0;
+}
+
 /**
  * igc_ptp_init - Initialize PTP functionality
  * @adapter: Board private structure
@@ -1088,6 +1098,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
 		adapter->ptp_caps.adjfine = igc_ptp_adjfine_i225;
 		adapter->ptp_caps.adjtime = igc_ptp_adjtime_i225;
 		adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225;
+		adapter->ptp_caps.getcyclesx64 = igc_ptp_getcyclesx64;
 		adapter->ptp_caps.settime64 = igc_ptp_settime_i225;
 		adapter->ptp_caps.enable = igc_ptp_feature_enable_i225;
 		adapter->ptp_caps.pps = 1;
@@ -1108,6 +1119,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
 	}
 
 	spin_lock_init(&adapter->ptp_tx_lock);
+	spin_lock_init(&adapter->free_timer_lock);
 	spin_lock_init(&adapter->tmreg_lock);
 
 	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 20e17f5fbce3..d38c87d7e5e8 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -243,6 +243,11 @@
 #define IGC_SYSTIMR	0x0B6F8  /* System time register Residue */
 #define IGC_TIMINCA	0x0B608  /* Increment attributes register - RW */
 
+#define IGC_SYSTIML_1	0x0B688  /* System time register Low - RO (timer 1) */
+#define IGC_SYSTIMH_1	0x0B68C  /* System time register High - RO (timer 1) */
+#define IGC_SYSTIMR_1	0x0B684  /* System time register Residue (timer 1) */
+#define IGC_TIMINCA_1	0x0B690  /* Increment attributes register - RW (timer 1) */
+
 /* TX Timestamp Low */
 #define IGC_TXSTMPL_0		0x0B618
 #define IGC_TXSTMPL_1		0x0B698
diff --git a/drivers/net/ethernet/marvell/octeon_ep/Makefile b/drivers/net/ethernet/marvell/octeon_ep/Makefile
index 2026c8118158..02a4a21bc298 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/Makefile
+++ b/drivers/net/ethernet/marvell/octeon_ep/Makefile
@@ -6,4 +6,5 @@
 obj-$(CONFIG_OCTEON_EP) += octeon_ep.o
 
 octeon_ep-y := octep_main.o octep_cn9k_pf.o octep_tx.o octep_rx.o \
-	       octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o
+	       octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o \
+	       octep_cnxk_pf.o
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c
new file mode 100644
index 000000000000..abb03e9119e7
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c
@@ -0,0 +1,886 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include "octep_config.h"
+#include "octep_main.h"
+#include "octep_regs_cnxk_pf.h"
+
+/* We will support 128 pf's in control mbox */
+#define CTRL_MBOX_MAX_PF	128
+#define CTRL_MBOX_SZ		((size_t)(0x400000 / CTRL_MBOX_MAX_PF))
+
+/* Names of Hardware non-queue generic interrupts */
+static char *cnxk_non_ioq_msix_names[] = {
+	"epf_ire_rint",
+	"epf_ore_rint",
+	"epf_vfire_rint",
+	"epf_rsvd0",
+	"epf_vfore_rint",
+	"epf_rsvd1",
+	"epf_mbox_rint",
+	"epf_rsvd2_0",
+	"epf_rsvd2_1",
+	"epf_dma_rint",
+	"epf_dma_vf_rint",
+	"epf_rsvd3",
+	"epf_pp_vf_rint",
+	"epf_rsvd3",
+	"epf_misc_rint",
+	"epf_rsvd5",
+	/* Next 16 are for OEI_RINT */
+	"epf_oei_rint0",
+	"epf_oei_rint1",
+	"epf_oei_rint2",
+	"epf_oei_rint3",
+	"epf_oei_rint4",
+	"epf_oei_rint5",
+	"epf_oei_rint6",
+	"epf_oei_rint7",
+	"epf_oei_rint8",
+	"epf_oei_rint9",
+	"epf_oei_rint10",
+	"epf_oei_rint11",
+	"epf_oei_rint12",
+	"epf_oei_rint13",
+	"epf_oei_rint14",
+	"epf_oei_rint15",
+	/* IOQ interrupt */
+	"octeon_ep"
+};
+
+/* Dump useful hardware CSRs for debug purpose */
+static void cnxk_dump_regs(struct octep_device *oct, int qno)
+{
+	struct device *dev = &oct->pdev->dev;
+
+	dev_info(dev, "IQ-%d register dump\n", qno);
+	dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_IN_INSTR_DBELL(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(qno)));
+	dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_IN_CONTROL(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(qno)));
+	dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_IN_ENABLE(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(qno)));
+	dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_IN_INSTR_BADDR(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(qno)));
+	dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_IN_INSTR_RSIZE(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(qno)));
+	dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_IN_CNTS(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_IN_CNTS(qno)));
+	dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_IN_INT_LEVELS(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(qno)));
+	dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_IN_PKT_CNT(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_IN_PKT_CNT(qno)));
+	dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_IN_BYTE_CNT(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_IN_BYTE_CNT(qno)));
+
+	dev_info(dev, "OQ-%d register dump\n", qno);
+	dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_OUT_SLIST_DBELL(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(qno)));
+	dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_OUT_CONTROL(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(qno)));
+	dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_OUT_ENABLE(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(qno)));
+	dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_OUT_SLIST_BADDR(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(qno)));
+	dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_OUT_SLIST_RSIZE(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(qno)));
+	dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_OUT_CNTS(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_OUT_CNTS(qno)));
+	dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_OUT_INT_LEVELS(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(qno)));
+	dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_OUT_PKT_CNT(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_OUT_PKT_CNT(qno)));
+	dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_OUT_BYTE_CNT(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_OUT_BYTE_CNT(qno)));
+	dev_info(dev, "R[%d]_ERR_TYPE[0x%llx]: 0x%016llx\n",
+		 qno, CNXK_SDP_R_ERR_TYPE(qno),
+		 octep_read_csr64(oct, CNXK_SDP_R_ERR_TYPE(qno)));
+}
+
+/* Reset Hardware Tx queue */
+static int cnxk_reset_iq(struct octep_device *oct, int q_no)
+{
+	struct octep_config *conf = oct->conf;
+	u64 val = 0ULL;
+
+	dev_dbg(&oct->pdev->dev, "Reset PF IQ-%d\n", q_no);
+
+	/* Get absolute queue number */
+	q_no += conf->pf_ring_cfg.srn;
+
+	/* Disable the Tx/Instruction Ring */
+	octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(q_no), val);
+
+	/* clear the Instruction Ring packet/byte counts and doorbell CSRs */
+	octep_write_csr64(oct, CNXK_SDP_R_IN_CNTS(q_no), val);
+	octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(q_no), val);
+	octep_write_csr64(oct, CNXK_SDP_R_IN_PKT_CNT(q_no), val);
+	octep_write_csr64(oct, CNXK_SDP_R_IN_BYTE_CNT(q_no), val);
+	octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(q_no), val);
+	octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(q_no), val);
+
+	val = 0xFFFFFFFF;
+	octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(q_no), val);
+
+	return 0;
+}
+
+/* Reset Hardware Rx queue */
+static void cnxk_reset_oq(struct octep_device *oct, int q_no)
+{
+	u64 val = 0ULL;
+
+	q_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+	/* Disable Output (Rx) Ring */
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(q_no), val);
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(q_no), val);
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(q_no), val);
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(q_no), val);
+
+	/* Clear count CSRs */
+	val = octep_read_csr(oct, CNXK_SDP_R_OUT_CNTS(q_no));
+	octep_write_csr(oct, CNXK_SDP_R_OUT_CNTS(q_no), val);
+
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_PKT_CNT(q_no), 0xFFFFFFFFFULL);
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(q_no), 0xFFFFFFFF);
+}
+
+/* Reset all hardware Tx/Rx queues */
+static void octep_reset_io_queues_cnxk_pf(struct octep_device *oct)
+{
+	struct pci_dev *pdev = oct->pdev;
+	int q;
+
+	dev_dbg(&pdev->dev, "Reset OCTEP_CNXK PF IO Queues\n");
+
+	for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+		cnxk_reset_iq(oct, q);
+		cnxk_reset_oq(oct, q);
+	}
+}
+
+/* Initialize windowed addresses to access some hardware registers */
+static void octep_setup_pci_window_regs_cnxk_pf(struct octep_device *oct)
+{
+	u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr;
+
+	oct->pci_win_regs.pci_win_wr_addr = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_WR_ADDR64);
+	oct->pci_win_regs.pci_win_rd_addr = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_RD_ADDR64);
+	oct->pci_win_regs.pci_win_wr_data = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_WR_DATA64);
+	oct->pci_win_regs.pci_win_rd_data = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_RD_DATA64);
+}
+
+/* Configure Hardware mapping: inform hardware which rings belong to PF. */
+static void octep_configure_ring_mapping_cnxk_pf(struct octep_device *oct)
+{
+	struct octep_config *conf = oct->conf;
+	struct pci_dev *pdev = oct->pdev;
+	u64 pf_srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+	int q;
+
+	for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(conf); q++) {
+		u64 regval = 0;
+
+		if (oct->pcie_port)
+			regval = 8 << CNXK_SDP_FUNC_SEL_EPF_BIT_POS;
+
+		octep_write_csr64(oct, CNXK_SDP_EPVF_RING(pf_srn + q), regval);
+
+		regval = octep_read_csr64(oct, CNXK_SDP_EPVF_RING(pf_srn + q));
+		dev_dbg(&pdev->dev, "Write SDP_EPVF_RING[0x%llx] = 0x%llx\n",
+			CNXK_SDP_EPVF_RING(pf_srn + q), regval);
+	}
+}
+
+/* Initialize configuration limits and initial active config */
+static void octep_init_config_cnxk_pf(struct octep_device *oct)
+{
+	struct octep_config *conf = oct->conf;
+	struct pci_dev *pdev = oct->pdev;
+	u8 link = 0;
+	u64 val;
+	int pos;
+
+	/* Read ring configuration:
+	 * PF ring count, number of VFs and rings per VF supported
+	 */
+	val = octep_read_csr64(oct, CNXK_SDP_EPF_RINFO);
+	dev_info(&pdev->dev, "SDP_EPF_RINFO[0x%x]:0x%llx\n", CNXK_SDP_EPF_RINFO, val);
+	conf->sriov_cfg.max_rings_per_vf = CNXK_SDP_EPF_RINFO_RPVF(val);
+	conf->sriov_cfg.active_rings_per_vf = conf->sriov_cfg.max_rings_per_vf;
+	conf->sriov_cfg.max_vfs = CNXK_SDP_EPF_RINFO_NVFS(val);
+	conf->sriov_cfg.active_vfs = conf->sriov_cfg.max_vfs;
+	conf->sriov_cfg.vf_srn = CNXK_SDP_EPF_RINFO_SRN(val);
+
+	val = octep_read_csr64(oct, CNXK_SDP_MAC_PF_RING_CTL(oct->pcie_port));
+	dev_info(&pdev->dev, "SDP_MAC_PF_RING_CTL[%d]:0x%llx\n", oct->pcie_port, val);
+	conf->pf_ring_cfg.srn =  CNXK_SDP_MAC_PF_RING_CTL_SRN(val);
+	conf->pf_ring_cfg.max_io_rings = CNXK_SDP_MAC_PF_RING_CTL_RPPF(val);
+	conf->pf_ring_cfg.active_io_rings = conf->pf_ring_cfg.max_io_rings;
+	dev_info(&pdev->dev, "pf_srn=%u rpvf=%u nvfs=%u rppf=%u\n",
+		 conf->pf_ring_cfg.srn, conf->sriov_cfg.active_rings_per_vf,
+		 conf->sriov_cfg.active_vfs, conf->pf_ring_cfg.active_io_rings);
+
+	conf->iq.num_descs = OCTEP_IQ_MAX_DESCRIPTORS;
+	conf->iq.instr_type = OCTEP_64BYTE_INSTR;
+	conf->iq.db_min = OCTEP_DB_MIN;
+	conf->iq.intr_threshold = OCTEP_IQ_INTR_THRESHOLD;
+
+	conf->oq.num_descs = OCTEP_OQ_MAX_DESCRIPTORS;
+	conf->oq.buf_size = OCTEP_OQ_BUF_SIZE;
+	conf->oq.refill_threshold = OCTEP_OQ_REFILL_THRESHOLD;
+	conf->oq.oq_intr_pkt = OCTEP_OQ_INTR_PKT_THRESHOLD;
+	conf->oq.oq_intr_time = OCTEP_OQ_INTR_TIME_THRESHOLD;
+
+	conf->msix_cfg.non_ioq_msix = CNXK_NUM_NON_IOQ_INTR;
+	conf->msix_cfg.ioq_msix = conf->pf_ring_cfg.active_io_rings;
+	conf->msix_cfg.non_ioq_msix_names = cnxk_non_ioq_msix_names;
+
+	pos = pci_find_ext_capability(oct->pdev, PCI_EXT_CAP_ID_SRIOV);
+	if (pos) {
+		pci_read_config_byte(oct->pdev,
+				     pos + PCI_SRIOV_FUNC_LINK,
+				     &link);
+		link = PCI_DEVFN(PCI_SLOT(oct->pdev->devfn), link);
+	}
+	conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr +
+					   CNXK_PEM_BAR4_INDEX_OFFSET +
+					   (link * CTRL_MBOX_SZ);
+
+	conf->fw_info.hb_interval = OCTEP_DEFAULT_FW_HB_INTERVAL;
+	conf->fw_info.hb_miss_count = OCTEP_DEFAULT_FW_HB_MISS_COUNT;
+}
+
+/* Setup registers for a hardware Tx Queue  */
+static void octep_setup_iq_regs_cnxk_pf(struct octep_device *oct, int iq_no)
+{
+	struct octep_iq *iq = oct->iq[iq_no];
+	u32 reset_instr_cnt;
+	u64 reg_val;
+
+	iq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+	reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no));
+
+	/* wait for IDLE to set to 1 */
+	if (!(reg_val & CNXK_R_IN_CTL_IDLE)) {
+		do {
+			reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no));
+		} while (!(reg_val & CNXK_R_IN_CTL_IDLE));
+	}
+
+	reg_val |= CNXK_R_IN_CTL_RDSIZE;
+	reg_val |= CNXK_R_IN_CTL_IS_64B;
+	reg_val |= CNXK_R_IN_CTL_ESR;
+	octep_write_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no), reg_val);
+
+	/* Write the start of the input queue's ring and its size  */
+	octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(iq_no),
+			  iq->desc_ring_dma);
+	octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(iq_no),
+			  iq->max_count);
+
+	/* Remember the doorbell & instruction count register addr
+	 * for this queue
+	 */
+	iq->doorbell_reg = oct->mmio[0].hw_addr +
+			   CNXK_SDP_R_IN_INSTR_DBELL(iq_no);
+	iq->inst_cnt_reg = oct->mmio[0].hw_addr +
+			   CNXK_SDP_R_IN_CNTS(iq_no);
+	iq->intr_lvl_reg = oct->mmio[0].hw_addr +
+			   CNXK_SDP_R_IN_INT_LEVELS(iq_no);
+
+	/* Store the current instruction counter (used in flush_iq calculation) */
+	reset_instr_cnt = readl(iq->inst_cnt_reg);
+	writel(reset_instr_cnt, iq->inst_cnt_reg);
+
+	/* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */
+	reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & 0xffffffff;
+	octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+}
+
+/* Setup registers for a hardware Rx Queue  */
+static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no)
+{
+	u64 reg_val;
+	u64 oq_ctl = 0ULL;
+	u32 time_threshold = 0;
+	struct octep_oq *oq = oct->oq[oq_no];
+
+	oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+	reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no));
+
+	/* wait for IDLE to set to 1 */
+	if (!(reg_val & CNXK_R_OUT_CTL_IDLE)) {
+		do {
+			reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no));
+		} while (!(reg_val & CNXK_R_OUT_CTL_IDLE));
+	}
+
+	reg_val &= ~(CNXK_R_OUT_CTL_IMODE);
+	reg_val &= ~(CNXK_R_OUT_CTL_ROR_P);
+	reg_val &= ~(CNXK_R_OUT_CTL_NSR_P);
+	reg_val &= ~(CNXK_R_OUT_CTL_ROR_I);
+	reg_val &= ~(CNXK_R_OUT_CTL_NSR_I);
+	reg_val &= ~(CNXK_R_OUT_CTL_ES_I);
+	reg_val &= ~(CNXK_R_OUT_CTL_ROR_D);
+	reg_val &= ~(CNXK_R_OUT_CTL_NSR_D);
+	reg_val &= ~(CNXK_R_OUT_CTL_ES_D);
+	reg_val |= (CNXK_R_OUT_CTL_ES_P);
+
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), reg_val);
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no),
+			  oq->desc_ring_dma);
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no),
+			  oq->max_count);
+
+	oq_ctl = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no));
+
+	/* Clear the ISIZE and BSIZE (22-0) */
+	oq_ctl &= ~0x7fffffULL;
+
+	/* Populate the BSIZE (15-0) */
+	oq_ctl |= (oq->buffer_size & 0xffff);
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), oq_ctl);
+
+	/* Get the mapped address of the pkt_sent and pkts_credit regs */
+	oq->pkts_sent_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_OUT_CNTS(oq_no);
+	oq->pkts_credit_reg = oct->mmio[0].hw_addr +
+			      CNXK_SDP_R_OUT_SLIST_DBELL(oq_no);
+
+	time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf);
+	reg_val = ((u64)time_threshold << 32) |
+		  CFG_GET_OQ_INTR_PKT(oct->conf);
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+}
+
+/* Setup registers for a PF mailbox */
+static void octep_setup_mbox_regs_cnxk_pf(struct octep_device *oct, int q_no)
+{
+	struct octep_mbox *mbox = oct->mbox[q_no];
+
+	mbox->q_no = q_no;
+
+	/* PF mbox interrupt reg */
+	mbox->mbox_int_reg = oct->mmio[0].hw_addr + CNXK_SDP_EPF_MBOX_RINT(0);
+
+	/* PF to VF DATA reg. PF writes into this reg */
+	mbox->mbox_write_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_PF_VF_DATA(q_no);
+
+	/* VF to PF DATA reg. PF reads from this reg */
+	mbox->mbox_read_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_VF_PF_DATA(q_no);
+}
+
+/* Poll OEI events like heartbeat */
+static void octep_poll_oei_cnxk_pf(struct octep_device *oct)
+{
+	u64 reg0;
+
+	/* Check for OEI INTR */
+	reg0 = octep_read_csr64(oct, CNXK_SDP_EPF_OEI_RINT);
+	if (reg0) {
+		octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT, reg0);
+		if (reg0 & CNXK_SDP_EPF_OEI_RINT_DATA_BIT_MBOX)
+			queue_work(octep_wq, &oct->ctrl_mbox_task);
+		if (reg0 & CNXK_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT)
+			atomic_set(&oct->hb_miss_cnt, 0);
+	}
+}
+
+/* OEI interrupt handler */
+static irqreturn_t octep_oei_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+
+	octep_poll_oei_cnxk_pf(oct);
+	return IRQ_HANDLED;
+}
+
+/* Process non-ioq interrupts required to keep pf interface running.
+ * OEI_RINT is needed for control mailbox
+ * MBOX_RINT is needed for pfvf mailbox
+ */
+static void octep_poll_non_ioq_interrupts_cnxk_pf(struct octep_device *oct)
+{
+	octep_poll_oei_cnxk_pf(oct);
+}
+
+/* Interrupt handler for input ring error interrupts. */
+static irqreturn_t octep_ire_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
+	int i = 0;
+
+	/* Check for IRERR INTR */
+	reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_IRERR_RINT);
+	if (reg_val) {
+		dev_info(&pdev->dev,
+			 "received IRERR_RINT intr: 0x%llx\n", reg_val);
+		octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT, reg_val);
+
+		for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+			reg_val = octep_read_csr64(oct,
+						   CNXK_SDP_R_ERR_TYPE(i));
+			if (reg_val) {
+				dev_info(&pdev->dev,
+					 "Received err type on IQ-%d: 0x%llx\n",
+					 i, reg_val);
+				octep_write_csr64(oct, CNXK_SDP_R_ERR_TYPE(i),
+						  reg_val);
+			}
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for output ring error interrupts. */
+static irqreturn_t octep_ore_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
+	int i = 0;
+
+	/* Check for ORERR INTR */
+	reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_ORERR_RINT);
+	if (reg_val) {
+		dev_info(&pdev->dev,
+			 "Received ORERR_RINT intr: 0x%llx\n", reg_val);
+		octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT, reg_val);
+		for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+			reg_val = octep_read_csr64(oct, CNXK_SDP_R_ERR_TYPE(i));
+			if (reg_val) {
+				dev_info(&pdev->dev,
+					 "Received err type on OQ-%d: 0x%llx\n",
+					 i, reg_val);
+				octep_write_csr64(oct, CNXK_SDP_R_ERR_TYPE(i),
+						  reg_val);
+			}
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for vf input ring error interrupts. */
+static irqreturn_t octep_vfire_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
+
+	/* Check for VFIRE INTR */
+	reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT(0));
+	if (reg_val) {
+		dev_info(&pdev->dev,
+			 "Received VFIRE_RINT intr: 0x%llx\n", reg_val);
+		octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT(0), reg_val);
+	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for vf output ring error interrupts. */
+static irqreturn_t octep_vfore_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
+
+	/* Check for VFORE INTR */
+	reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_VFORE_RINT(0));
+	if (reg_val) {
+		dev_info(&pdev->dev,
+			 "Received VFORE_RINT intr: 0x%llx\n", reg_val);
+		octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT(0), reg_val);
+	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for dpi dma related interrupts. */
+static irqreturn_t octep_dma_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	u64 reg_val = 0;
+
+	/* Check for DMA INTR */
+	reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_DMA_RINT);
+	if (reg_val)
+		octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT, reg_val);
+
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for dpi dma transaction error interrupts for VFs  */
+static irqreturn_t octep_dma_vf_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
+
+	/* Check for DMA VF INTR */
+	reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT(0));
+	if (reg_val) {
+		dev_info(&pdev->dev,
+			 "Received DMA_VF_RINT intr: 0x%llx\n", reg_val);
+		octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT(0), reg_val);
+	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for pp transaction error interrupts for VFs  */
+static irqreturn_t octep_pp_vf_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
+
+	/* Check for PPVF INTR */
+	reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT(0));
+	if (reg_val) {
+		dev_info(&pdev->dev,
+			 "Received PP_VF_RINT intr: 0x%llx\n", reg_val);
+		octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT(0), reg_val);
+	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handler for mac related interrupts. */
+static irqreturn_t octep_misc_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+	u64 reg_val = 0;
+
+	/* Check for MISC INTR */
+	reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_MISC_RINT);
+	if (reg_val) {
+		dev_info(&pdev->dev,
+			 "Received MISC_RINT intr: 0x%llx\n", reg_val);
+		octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT, reg_val);
+	}
+	return IRQ_HANDLED;
+}
+
+/* Interrupts handler for all reserved interrupts. */
+static irqreturn_t octep_rsvd_intr_handler_cnxk_pf(void *dev)
+{
+	struct octep_device *oct = (struct octep_device *)dev;
+	struct pci_dev *pdev = oct->pdev;
+
+	dev_info(&pdev->dev, "Reserved interrupts raised; Ignore\n");
+	return IRQ_HANDLED;
+}
+
+/* Tx/Rx queue interrupt handler */
+static irqreturn_t octep_ioq_intr_handler_cnxk_pf(void *data)
+{
+	struct octep_ioq_vector *vector = (struct octep_ioq_vector *)data;
+	struct octep_oq *oq = vector->oq;
+
+	napi_schedule_irqoff(oq->napi);
+	return IRQ_HANDLED;
+}
+
+/* soft reset */
+static int octep_soft_reset_cnxk_pf(struct octep_device *oct)
+{
+	dev_info(&oct->pdev->dev, "CNXKXX: Doing soft reset\n");
+
+	octep_write_csr64(oct, CNXK_SDP_WIN_WR_MASK_REG, 0xFF);
+
+	/* Firmware status CSR is supposed to be cleared by
+	 * core domain reset, but due to a hw bug, it is not.
+	 * Set it to RUNNING right before reset so that it is not
+	 * left in READY (1) state after a reset.  This is required
+	 * in addition to the early setting to handle the case where
+	 * the OcteonTX is unexpectedly reset, reboots, and then
+	 * the module is removed.
+	 */
+	OCTEP_PCI_WIN_WRITE(oct, CNXK_PEMX_PFX_CSX_PFCFGX(0, 0, CNXK_PCIEEP_VSECST_CTL),
+			    FW_STATUS_RUNNING);
+
+	/* Set chip domain reset bit */
+	OCTEP_PCI_WIN_WRITE(oct, CNXK_RST_CHIP_DOMAIN_W1S, 1);
+	/* Wait till Octeon resets. */
+	mdelay(10);
+	/* restore the  reset value */
+	octep_write_csr64(oct, CNXK_SDP_WIN_WR_MASK_REG, 0xFF);
+
+	return 0;
+}
+
+/* Re-initialize Octeon hardware registers */
+static void octep_reinit_regs_cnxk_pf(struct octep_device *oct)
+{
+	u32 i;
+
+	for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+		oct->hw_ops.setup_iq_regs(oct, i);
+
+	for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+		oct->hw_ops.setup_oq_regs(oct, i);
+
+	oct->hw_ops.enable_interrupts(oct);
+	oct->hw_ops.enable_io_queues(oct);
+
+	for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++)
+		writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg);
+}
+
+/* Enable all interrupts */
+static void octep_enable_interrupts_cnxk_pf(struct octep_device *oct)
+{
+	u64 intr_mask = 0ULL;
+	int srn, num_rings, i;
+
+	srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+	num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+	for (i = 0; i < num_rings; i++)
+		intr_mask |= (0x1ULL << (srn + i));
+
+	octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1S, intr_mask);
+	octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1S, intr_mask);
+	octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT_ENA_W1S, -1ULL);
+
+	octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S(0), -1ULL);
+	octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT_ENA_W1S(0), -1ULL);
+
+	octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask);
+	octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask);
+
+	octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S(0), -1ULL);
+	octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S(0), -1ULL);
+}
+
+/* Disable all interrupts */
+static void octep_disable_interrupts_cnxk_pf(struct octep_device *oct)
+{
+	u64 intr_mask = 0ULL;
+	int srn, num_rings, i;
+
+	srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+	num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+	for (i = 0; i < num_rings; i++)
+		intr_mask |= (0x1ULL << (srn + i));
+
+	octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask);
+	octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask);
+	octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT_ENA_W1C, -1ULL);
+
+	octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C(0), -1ULL);
+	octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT_ENA_W1C(0), -1ULL);
+
+	octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask);
+	octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask);
+
+	octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C(0), -1ULL);
+	octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C(0), -1ULL);
+}
+
+/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */
+static u32 octep_update_iq_read_index_cnxk_pf(struct octep_iq *iq)
+{
+	u32 pkt_in_done = readl(iq->inst_cnt_reg);
+	u32 last_done, new_idx;
+
+	last_done = pkt_in_done - iq->pkt_in_done;
+	iq->pkt_in_done = pkt_in_done;
+
+	new_idx = (iq->octep_read_index + last_done) % iq->max_count;
+
+	return new_idx;
+}
+
+/* Enable a hardware Tx Queue */
+static void octep_enable_iq_cnxk_pf(struct octep_device *oct, int iq_no)
+{
+	u64 loop = HZ;
+	u64 reg_val;
+
+	iq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+	octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(iq_no), 0xFFFFFFFF);
+
+	while (octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(iq_no)) &&
+	       loop--) {
+		schedule_timeout_interruptible(1);
+	}
+
+	reg_val = octep_read_csr64(oct,  CNXK_SDP_R_IN_INT_LEVELS(iq_no));
+	reg_val |= (0x1ULL << 62);
+	octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no), reg_val);
+
+	reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no));
+	reg_val |= 0x1ULL;
+	octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Enable a hardware Rx Queue */
+static void octep_enable_oq_cnxk_pf(struct octep_device *oct, int oq_no)
+{
+	u64 reg_val = 0ULL;
+
+	oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+	reg_val = octep_read_csr64(oct,  CNXK_SDP_R_OUT_INT_LEVELS(oq_no));
+	reg_val |= (0x1ULL << 62);
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no), reg_val);
+
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(oq_no), 0xFFFFFFFF);
+
+	reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no));
+	reg_val |= 0x1ULL;
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Enable all hardware Tx/Rx Queues assined to PF */
+static void octep_enable_io_queues_cnxk_pf(struct octep_device *oct)
+{
+	u8 q;
+
+	for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+		octep_enable_iq_cnxk_pf(oct, q);
+		octep_enable_oq_cnxk_pf(oct, q);
+	}
+}
+
+/* Disable a hardware Tx Queue assined to PF */
+static void octep_disable_iq_cnxk_pf(struct octep_device *oct, int iq_no)
+{
+	u64 reg_val = 0ULL;
+
+	iq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+
+	reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no));
+	reg_val &= ~0x1ULL;
+	octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no), reg_val);
+}
+
+/* Disable a hardware Rx Queue assined to PF */
+static void octep_disable_oq_cnxk_pf(struct octep_device *oct, int oq_no)
+{
+	u64 reg_val = 0ULL;
+
+	oq_no += CFG_GET_PORTS_PF_SRN(oct->conf);
+	reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no));
+	reg_val &= ~0x1ULL;
+	octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no), reg_val);
+}
+
+/* Disable all hardware Tx/Rx Queues assined to PF */
+static void octep_disable_io_queues_cnxk_pf(struct octep_device *oct)
+{
+	int q = 0;
+
+	for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) {
+		octep_disable_iq_cnxk_pf(oct, q);
+		octep_disable_oq_cnxk_pf(oct, q);
+	}
+}
+
+/* Dump hardware registers (including Tx/Rx queues) for debugging. */
+static void octep_dump_registers_cnxk_pf(struct octep_device *oct)
+{
+	u8 srn, num_rings, q;
+
+	srn = CFG_GET_PORTS_PF_SRN(oct->conf);
+	num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf);
+
+	for (q = srn; q < srn + num_rings; q++)
+		cnxk_dump_regs(oct, q);
+}
+
+/**
+ * octep_device_setup_cnxk_pf() - Setup Octeon device.
+ *
+ * @oct: Octeon device private data structure.
+ *
+ * - initialize hardware operations.
+ * - get target side pcie port number for the device.
+ * - setup window access to hardware registers.
+ * - set initial configuration and max limits.
+ * - setup hardware mapping of rings to the PF device.
+ */
+void octep_device_setup_cnxk_pf(struct octep_device *oct)
+{
+	oct->hw_ops.setup_iq_regs = octep_setup_iq_regs_cnxk_pf;
+	oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cnxk_pf;
+	oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cnxk_pf;
+
+	oct->hw_ops.oei_intr_handler = octep_oei_intr_handler_cnxk_pf;
+	oct->hw_ops.ire_intr_handler = octep_ire_intr_handler_cnxk_pf;
+	oct->hw_ops.ore_intr_handler = octep_ore_intr_handler_cnxk_pf;
+	oct->hw_ops.vfire_intr_handler = octep_vfire_intr_handler_cnxk_pf;
+	oct->hw_ops.vfore_intr_handler = octep_vfore_intr_handler_cnxk_pf;
+	oct->hw_ops.dma_intr_handler = octep_dma_intr_handler_cnxk_pf;
+	oct->hw_ops.dma_vf_intr_handler = octep_dma_vf_intr_handler_cnxk_pf;
+	oct->hw_ops.pp_vf_intr_handler = octep_pp_vf_intr_handler_cnxk_pf;
+	oct->hw_ops.misc_intr_handler = octep_misc_intr_handler_cnxk_pf;
+	oct->hw_ops.rsvd_intr_handler = octep_rsvd_intr_handler_cnxk_pf;
+	oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cnxk_pf;
+	oct->hw_ops.soft_reset = octep_soft_reset_cnxk_pf;
+	oct->hw_ops.reinit_regs = octep_reinit_regs_cnxk_pf;
+
+	oct->hw_ops.enable_interrupts = octep_enable_interrupts_cnxk_pf;
+	oct->hw_ops.disable_interrupts = octep_disable_interrupts_cnxk_pf;
+	oct->hw_ops.poll_non_ioq_interrupts = octep_poll_non_ioq_interrupts_cnxk_pf;
+
+	oct->hw_ops.update_iq_read_idx = octep_update_iq_read_index_cnxk_pf;
+
+	oct->hw_ops.enable_iq = octep_enable_iq_cnxk_pf;
+	oct->hw_ops.enable_oq = octep_enable_oq_cnxk_pf;
+	oct->hw_ops.enable_io_queues = octep_enable_io_queues_cnxk_pf;
+
+	oct->hw_ops.disable_iq = octep_disable_iq_cnxk_pf;
+	oct->hw_ops.disable_oq = octep_disable_oq_cnxk_pf;
+	oct->hw_ops.disable_io_queues = octep_disable_io_queues_cnxk_pf;
+	oct->hw_ops.reset_io_queues = octep_reset_io_queues_cnxk_pf;
+
+	oct->hw_ops.dump_registers = octep_dump_registers_cnxk_pf;
+
+	octep_setup_pci_window_regs_cnxk_pf(oct);
+
+	oct->pcie_port = octep_read_csr64(oct, CNXK_SDP_MAC_NUMBER) & 0xff;
+	dev_info(&oct->pdev->dev,
+		 "Octeon device using PCIE Port %d\n", oct->pcie_port);
+
+	octep_init_config_cnxk_pf(oct);
+	octep_configure_ring_mapping_cnxk_pf(oct);
+
+	/* Firmware status CSR is supposed to be cleared by
+	 * core domain reset, but due to IPBUPEM-38842, it is not.
+	 * Set it to RUNNING early in boot, so that unexpected resets
+	 * leave it in a state that is not READY (1).
+	 */
+	OCTEP_PCI_WIN_WRITE(oct, CNXK_PEMX_PFX_CSX_PFCFGX(0, 0, CNXK_PCIEEP_VSECST_CTL),
+			    FW_STATUS_RUNNING);
+}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
index 1622a6ebf036..91cfa19c65b9 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
@@ -13,9 +13,10 @@
 #define OCTEP_64BYTE_INSTR  64
 
 /* Tx Queue: maximum descriptors per ring */
+/* This needs to be a power of 2 */
 #define OCTEP_IQ_MAX_DESCRIPTORS    1024
 /* Minimum input (Tx) requests to be enqueued to ring doorbell */
-#define OCTEP_DB_MIN                1
+#define OCTEP_DB_MIN                8
 /* Packet threshold for Tx queue interrupt */
 #define OCTEP_IQ_INTR_THRESHOLD     0x0
 
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 552970c7dec0..2da00a701df2 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -24,6 +24,10 @@ struct workqueue_struct *octep_wq;
 static const struct pci_device_id octep_pci_id_tbl[] = {
 	{PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_PF)},
 	{PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF95N_PF)},
+	{PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KA_PF)},
+	{PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KA_PF)},
+	{PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KB_PF)},
+	{PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KB_PF)},
 	{0, },
 };
 MODULE_DEVICE_TABLE(pci, octep_pci_id_tbl);
@@ -777,17 +781,24 @@ static int octep_stop(struct net_device *netdev)
  */
 static inline int octep_iq_full_check(struct octep_iq *iq)
 {
-	if (likely((iq->max_count - atomic_read(&iq->instr_pending)) >=
+	if (likely((IQ_INSTR_SPACE(iq)) >
 		   OCTEP_WAKE_QUEUE_THRESHOLD))
 		return 0;
 
 	/* Stop the queue if unable to send */
 	netif_stop_subqueue(iq->netdev, iq->q_no);
 
+	/* Allow for pending updates in write index
+	 * from iq_process_completion in other cpus
+	 * to reflect, in case queue gets free
+	 * entries.
+	 */
+	smp_mb();
+
 	/* check again and restart the queue, in case NAPI has just freed
 	 * enough Tx ring entries.
 	 */
-	if (unlikely((iq->max_count - atomic_read(&iq->instr_pending)) >=
+	if (unlikely(IQ_INSTR_SPACE(iq) >
 		     OCTEP_WAKE_QUEUE_THRESHOLD)) {
 		netif_start_subqueue(iq->netdev, iq->q_no);
 		iq->stats.restart_cnt++;
@@ -818,8 +829,12 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
 	struct octep_iq *iq;
 	skb_frag_t *frag;
 	u16 nr_frags, si;
+	int xmit_more;
 	u16 q_no, wi;
 
+	if (skb_put_padto(skb, ETH_ZLEN))
+		return NETDEV_TX_OK;
+
 	q_no = skb_get_queue_mapping(skb);
 	if (q_no >= oct->num_iqs) {
 		netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no);
@@ -827,10 +842,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
 	}
 
 	iq = oct->iq[q_no];
-	if (octep_iq_full_check(iq)) {
-		iq->stats.tx_busy++;
-		return NETDEV_TX_BUSY;
-	}
 
 	shinfo = skb_shinfo(skb);
 	nr_frags = shinfo->nr_frags;
@@ -869,9 +880,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
 		if (dma_mapping_error(iq->dev, dma))
 			goto dma_map_err;
 
-		dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma,
-					OCTEP_SGLIST_SIZE_PER_PKT,
-					DMA_TO_DEVICE);
 		memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT);
 		sglist[0].len[3] = len;
 		sglist[0].dma_ptr[0] = dma;
@@ -891,26 +899,33 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
 			frag++;
 			si++;
 		}
-		dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma,
-					   OCTEP_SGLIST_SIZE_PER_PKT,
-					   DMA_TO_DEVICE);
-
 		hw_desc->dptr = tx_buffer->sglist_dma;
 	}
 
-	netdev_tx_sent_queue(iq->netdev_q, skb->len);
+	xmit_more = netdev_xmit_more();
+
+	__netdev_tx_sent_queue(iq->netdev_q, skb->len, xmit_more);
+
 	skb_tx_timestamp(skb);
-	atomic_inc(&iq->instr_pending);
+	iq->fill_cnt++;
 	wi++;
-	if (wi == iq->max_count)
-		wi = 0;
-	iq->host_write_index = wi;
+	iq->host_write_index = wi & iq->ring_size_mask;
+
+	/* octep_iq_full_check stops the queue and returns
+	 * true if so, in case the queue has become full
+	 * by inserting current packet. If so, we can
+	 * go ahead and ring doorbell.
+	 */
+	if (!octep_iq_full_check(iq) && xmit_more &&
+	    iq->fill_cnt < iq->fill_threshold)
+		return NETDEV_TX_OK;
+
 	/* Flush the hw descriptor before writing to doorbell */
 	wmb();
-
-	/* Ring Doorbell to notify the NIC there is a new packet */
-	writel(1, iq->doorbell_reg);
-	iq->stats.instr_posted++;
+	/* Ring Doorbell to notify the NIC of new packets */
+	writel(iq->fill_cnt, iq->doorbell_reg);
+	iq->stats.instr_posted += iq->fill_cnt;
+	iq->fill_cnt = 0;
 	return NETDEV_TX_OK;
 
 dma_map_sg_err:
@@ -1136,6 +1151,14 @@ static const char *octep_devid_to_str(struct octep_device *oct)
 		return "CN93XX";
 	case OCTEP_PCI_DEVICE_ID_CNF95N_PF:
 		return "CNF95N";
+	case OCTEP_PCI_DEVICE_ID_CN10KA_PF:
+		return "CN10KA";
+	case OCTEP_PCI_DEVICE_ID_CNF10KA_PF:
+		return "CNF10KA";
+	case OCTEP_PCI_DEVICE_ID_CNF10KB_PF:
+		return "CNF10KB";
+	case OCTEP_PCI_DEVICE_ID_CN10KB_PF:
+		return "CN10KB";
 	default:
 		return "Unsupported";
 	}
@@ -1181,6 +1204,14 @@ int octep_device_setup(struct octep_device *oct)
 			 OCTEP_MINOR_REV(oct));
 		octep_device_setup_cn93_pf(oct);
 		break;
+	case OCTEP_PCI_DEVICE_ID_CNF10KA_PF:
+	case OCTEP_PCI_DEVICE_ID_CN10KA_PF:
+	case OCTEP_PCI_DEVICE_ID_CNF10KB_PF:
+	case OCTEP_PCI_DEVICE_ID_CN10KB_PF:
+		dev_info(&pdev->dev, "Setting up OCTEON %s PF PASS%d.%d\n",
+			 octep_devid_to_str(oct), OCTEP_MAJOR_REV(oct), OCTEP_MINOR_REV(oct));
+		octep_device_setup_cnxk_pf(oct);
+		break;
 	default:
 		dev_err(&pdev->dev,
 			"%s: unsupported device\n", __func__);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
index 6df902ebb7f3..e2fe8b28eb0e 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -23,6 +23,11 @@
 
 #define  OCTEP_PCI_DEVICE_ID_CNF95N_PF 0xB400    //95N PF
 
+#define  OCTEP_PCI_DEVICE_ID_CN10KA_PF  0xB900   //CN10KA PF
+#define  OCTEP_PCI_DEVICE_ID_CNF10KA_PF 0xBA00   //CNF10KA PF
+#define  OCTEP_PCI_DEVICE_ID_CNF10KB_PF 0xBC00   //CNF10KB PF
+#define  OCTEP_PCI_DEVICE_ID_CN10KB_PF  0xBD00   //CN10KB PF
+
 #define  OCTEP_MAX_QUEUES   63
 #define  OCTEP_MAX_IQ       OCTEP_MAX_QUEUES
 #define  OCTEP_MAX_OQ       OCTEP_MAX_QUEUES
@@ -40,6 +45,15 @@
 #define  OCTEP_OQ_INTR_RESEND_BIT  59
 
 #define  OCTEP_MMIO_REGIONS     3
+
+#define  IQ_INSTR_PENDING(iq)  ({ typeof(iq) iq__ = (iq); \
+				  ((iq__)->host_write_index - (iq__)->flush_index) & \
+				  (iq__)->ring_size_mask; \
+				})
+#define  IQ_INSTR_SPACE(iq)    ({ typeof(iq) iq_ = (iq); \
+				  (iq_)->max_count - IQ_INSTR_PENDING(iq_); \
+				})
+
 /* PCI address space mapping information.
  * Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of
  * Octeon gets mapped to different physical address spaces in
@@ -377,6 +391,7 @@ int octep_setup_oqs(struct octep_device *oct);
 void octep_free_oqs(struct octep_device *oct);
 void octep_oq_dbell_init(struct octep_device *oct);
 void octep_device_setup_cn93_pf(struct octep_device *oct);
+void octep_device_setup_cnxk_pf(struct octep_device *oct);
 int octep_iq_process_completions(struct octep_iq *iq, u16 budget);
 int octep_oq_process_rx(struct octep_oq *oq, int budget);
 void octep_set_ethtool_ops(struct net_device *netdev);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h
new file mode 100644
index 000000000000..abe02df8af11
--- /dev/null
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h
@@ -0,0 +1,400 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell Octeon EP (EndPoint) Ethernet Driver
+ *
+ * Copyright (C) 2020 Marvell.
+ *
+ */
+
+#ifndef _OCTEP_REGS_CNXK_PF_H_
+#define _OCTEP_REGS_CNXK_PF_H_
+
+/* ############################ RST ######################### */
+#define    CNXK_RST_BOOT               0x000087E006001600ULL
+#define    CNXK_RST_CHIP_DOMAIN_W1S    0x000087E006001810ULL
+#define    CNXK_RST_CORE_DOMAIN_W1S    0x000087E006001820ULL
+#define    CNXK_RST_CORE_DOMAIN_W1C    0x000087E006001828ULL
+
+#define     CNXK_CONFIG_XPANSION_BAR             0x38
+#define     CNXK_CONFIG_PCIE_CAP                 0x70
+#define     CNXK_CONFIG_PCIE_DEVCAP              0x74
+#define     CNXK_CONFIG_PCIE_DEVCTL              0x78
+#define     CNXK_CONFIG_PCIE_LINKCAP             0x7C
+#define     CNXK_CONFIG_PCIE_LINKCTL             0x80
+#define     CNXK_CONFIG_PCIE_SLOTCAP             0x84
+#define     CNXK_CONFIG_PCIE_SLOTCTL             0x88
+
+#define     CNXK_PCIE_SRIOV_FDL                  0x188      /* 0x98 */
+#define     CNXK_PCIE_SRIOV_FDL_BIT_POS          0x10
+#define     CNXK_PCIE_SRIOV_FDL_MASK             0xFF
+
+#define     CNXK_CONFIG_PCIE_FLTMSK              0x720
+
+/* ################# Offsets of RING, EPF, MAC ######################### */
+#define    CNXK_RING_OFFSET                      (0x1ULL << 17)
+#define    CNXK_EPF_OFFSET                       (0x1ULL << 25)
+#define    CNXK_MAC_OFFSET                       (0x1ULL << 4)
+#define    CNXK_BIT_ARRAY_OFFSET                 (0x1ULL << 4)
+#define    CNXK_EPVF_RING_OFFSET                 (0x1ULL << 4)
+
+/* ################# Scratch Registers ######################### */
+#define    CNXK_SDP_EPF_SCRATCH                  0x209E0
+
+/* ################# Window Registers ######################### */
+#define    CNXK_SDP_WIN_WR_ADDR64                0x20000
+#define    CNXK_SDP_WIN_RD_ADDR64                0x20010
+#define    CNXK_SDP_WIN_WR_DATA64                0x20020
+#define    CNXK_SDP_WIN_WR_MASK_REG              0x20030
+#define    CNXK_SDP_WIN_RD_DATA64                0x20040
+
+#define    CNXK_SDP_MAC_NUMBER                   0x2C100
+
+/* ################# Global Previliged registers ######################### */
+#define    CNXK_SDP_EPF_RINFO                    0x209F0
+
+#define    CNXK_SDP_EPF_RINFO_SRN(val)           ((val) & 0x7F)
+#define    CNXK_SDP_EPF_RINFO_RPVF(val)          (((val) >> 32) & 0xF)
+#define    CNXK_SDP_EPF_RINFO_NVFS(val)          (((val) >> 48) & 0x7F)
+
+/* SDP Function select */
+#define    CNXK_SDP_FUNC_SEL_EPF_BIT_POS         7
+#define    CNXK_SDP_FUNC_SEL_FUNC_BIT_POS        0
+
+/* ##### RING IN (Into device from PCI: Tx Ring) REGISTERS #### */
+#define    CNXK_SDP_R_IN_CONTROL_START           0x10000
+#define    CNXK_SDP_R_IN_ENABLE_START            0x10010
+#define    CNXK_SDP_R_IN_INSTR_BADDR_START       0x10020
+#define    CNXK_SDP_R_IN_INSTR_RSIZE_START       0x10030
+#define    CNXK_SDP_R_IN_INSTR_DBELL_START       0x10040
+#define    CNXK_SDP_R_IN_CNTS_START              0x10050
+#define    CNXK_SDP_R_IN_INT_LEVELS_START        0x10060
+#define    CNXK_SDP_R_IN_PKT_CNT_START           0x10080
+#define    CNXK_SDP_R_IN_BYTE_CNT_START          0x10090
+
+#define    CNXK_SDP_R_IN_CONTROL(ring)		\
+	(CNXK_SDP_R_IN_CONTROL_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_ENABLE(ring)		\
+	(CNXK_SDP_R_IN_ENABLE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_INSTR_BADDR(ring)	\
+	(CNXK_SDP_R_IN_INSTR_BADDR_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_INSTR_RSIZE(ring)	\
+	(CNXK_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_INSTR_DBELL(ring)	\
+	(CNXK_SDP_R_IN_INSTR_DBELL_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_CNTS(ring)		\
+	(CNXK_SDP_R_IN_CNTS_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_INT_LEVELS(ring)	\
+	(CNXK_SDP_R_IN_INT_LEVELS_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_PKT_CNT(ring)		\
+	(CNXK_SDP_R_IN_PKT_CNT_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_BYTE_CNT(ring)		\
+	(CNXK_SDP_R_IN_BYTE_CNT_START + ((ring) * CNXK_RING_OFFSET))
+
+/* Rings per Virtual Function */
+#define    CNXK_R_IN_CTL_RPVF_MASK	(0xF)
+#define    CNXK_R_IN_CTL_RPVF_POS	(48)
+
+/* Number of instructions to be read in one MAC read request.
+ * setting to Max value(4)
+ */
+#define    CNXK_R_IN_CTL_IDLE                    (0x1ULL << 28)
+#define    CNXK_R_IN_CTL_RDSIZE                  (0x3ULL << 25)
+#define    CNXK_R_IN_CTL_IS_64B                  (0x1ULL << 24)
+#define    CNXK_R_IN_CTL_D_NSR                   (0x1ULL << 8)
+#define    CNXK_R_IN_CTL_D_ESR                   (0x1ULL << 6)
+#define    CNXK_R_IN_CTL_D_ROR                   (0x1ULL << 5)
+#define    CNXK_R_IN_CTL_NSR                     (0x1ULL << 3)
+#define    CNXK_R_IN_CTL_ESR                     (0x1ULL << 1)
+#define    CNXK_R_IN_CTL_ROR                     (0x1ULL << 0)
+
+#define    CNXK_R_IN_CTL_MASK  (CNXK_R_IN_CTL_RDSIZE | CNXK_R_IN_CTL_IS_64B)
+
+/* ##### RING OUT (out from device to PCI host: Rx Ring) REGISTERS #### */
+#define    CNXK_SDP_R_OUT_CNTS_START              0x10100
+#define    CNXK_SDP_R_OUT_INT_LEVELS_START        0x10110
+#define    CNXK_SDP_R_OUT_SLIST_BADDR_START       0x10120
+#define    CNXK_SDP_R_OUT_SLIST_RSIZE_START       0x10130
+#define    CNXK_SDP_R_OUT_SLIST_DBELL_START       0x10140
+#define    CNXK_SDP_R_OUT_CONTROL_START           0x10150
+#define    CNXK_SDP_R_OUT_WMARK_START             0x10160
+#define    CNXK_SDP_R_OUT_ENABLE_START            0x10170
+#define    CNXK_SDP_R_OUT_PKT_CNT_START           0x10180
+#define    CNXK_SDP_R_OUT_BYTE_CNT_START          0x10190
+
+#define    CNXK_SDP_R_OUT_CONTROL(ring)          \
+	(CNXK_SDP_R_OUT_CONTROL_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_ENABLE(ring)          \
+	(CNXK_SDP_R_OUT_ENABLE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_SLIST_BADDR(ring)          \
+	(CNXK_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_SLIST_RSIZE(ring)          \
+	(CNXK_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_SLIST_DBELL(ring)          \
+	(CNXK_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_CNTS(ring)          \
+	(CNXK_SDP_R_OUT_CNTS_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_INT_LEVELS(ring)          \
+	(CNXK_SDP_R_OUT_INT_LEVELS_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_PKT_CNT(ring)          \
+	(CNXK_SDP_R_OUT_PKT_CNT_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_BYTE_CNT(ring)          \
+	(CNXK_SDP_R_OUT_BYTE_CNT_START + ((ring) * CNXK_RING_OFFSET))
+
+/*------------------ R_OUT Masks ----------------*/
+#define    CNXK_R_OUT_INT_LEVELS_BMODE            BIT_ULL(63)
+#define    CNXK_R_OUT_INT_LEVELS_TIMET            (32)
+
+#define    CNXK_R_OUT_CTL_IDLE                    BIT_ULL(40)
+#define    CNXK_R_OUT_CTL_ES_I                    BIT_ULL(34)
+#define    CNXK_R_OUT_CTL_NSR_I                   BIT_ULL(33)
+#define    CNXK_R_OUT_CTL_ROR_I                   BIT_ULL(32)
+#define    CNXK_R_OUT_CTL_ES_D                    BIT_ULL(30)
+#define    CNXK_R_OUT_CTL_NSR_D                   BIT_ULL(29)
+#define    CNXK_R_OUT_CTL_ROR_D                   BIT_ULL(28)
+#define    CNXK_R_OUT_CTL_ES_P                    BIT_ULL(26)
+#define    CNXK_R_OUT_CTL_NSR_P                   BIT_ULL(25)
+#define    CNXK_R_OUT_CTL_ROR_P                   BIT_ULL(24)
+#define    CNXK_R_OUT_CTL_IMODE                   BIT_ULL(23)
+
+/* ############### Interrupt Moderation Registers ############### */
+#define CNXK_SDP_R_IN_INT_MDRT_CTL0_START         0x10280
+#define CNXK_SDP_R_IN_INT_MDRT_CTL1_START         0x102A0
+#define CNXK_SDP_R_IN_INT_MDRT_DBG_START          0x102C0
+
+#define CNXK_SDP_R_OUT_INT_MDRT_CTL0_START        0x10380
+#define CNXK_SDP_R_OUT_INT_MDRT_CTL1_START        0x103A0
+#define CNXK_SDP_R_OUT_INT_MDRT_DBG_START         0x103C0
+
+#define CNXK_SDP_R_OUT_CNTS_ISM_START             0x10510
+#define CNXK_SDP_R_IN_CNTS_ISM_START              0x10520
+
+#define    CNXK_SDP_R_IN_INT_MDRT_CTL0(ring)		\
+	(CNXK_SDP_R_IN_INT_MDRT_CTL0_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_INT_MDRT_CTL1(ring)		\
+	(CNXK_SDP_R_IN_INT_MDRT_CTL1_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_INT_MDRT_DBG(ring)		\
+	(CNXK_SDP_R_IN_INT_MDRT_DBG_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_INT_MDRT_CTL0(ring)		\
+	(CNXK_SDP_R_OUT_INT_MDRT_CTL0_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_INT_MDRT_CTL1(ring)		\
+	(CNXK_SDP_R_OUT_INT_MDRT_CTL1_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_INT_MDRT_DBG(ring)		\
+	(CNXK_SDP_R_OUT_INT_MDRT_DBG_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_CNTS_ISM(ring)          \
+	(CNXK_SDP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_CNTS_ISM(ring)          \
+	(CNXK_SDP_R_IN_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+/* ##################### Mail Box Registers ########################## */
+/* INT register for VF. when a MBOX write from PF happed to a VF,
+ * corresponding bit will be set in this register as well as in
+ * PF_VF_INT register.
+ *
+ * This is a RO register, the int can be cleared by writing 1 to PF_VF_INT
+ */
+/* Basically first 3 are from PF to VF. The last one is data from VF to PF */
+#define    CNXK_SDP_R_MBOX_PF_VF_DATA_START       0x10210
+#define    CNXK_SDP_R_MBOX_PF_VF_INT_START        0x10220
+#define    CNXK_SDP_R_MBOX_VF_PF_DATA_START       0x10230
+
+#define    CNXK_SDP_R_MBOX_PF_VF_DATA(ring)		\
+	(CNXK_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_MBOX_PF_VF_INT(ring)		\
+	(CNXK_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_MBOX_VF_PF_DATA(ring)		\
+	(CNXK_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_RING_OFFSET))
+
+/* ##################### Interrupt Registers ########################## */
+#define	   CNXK_SDP_R_ERR_TYPE_START	          0x10400
+
+#define    CNXK_SDP_R_ERR_TYPE(ring)		\
+	(CNXK_SDP_R_ERR_TYPE_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_MBOX_ISM_START              0x10500
+#define    CNXK_SDP_R_OUT_CNTS_ISM_START          0x10510
+#define    CNXK_SDP_R_IN_CNTS_ISM_START           0x10520
+
+#define    CNXK_SDP_R_MBOX_ISM(ring)		\
+	(CNXK_SDP_R_MBOX_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_OUT_CNTS_ISM(ring)	\
+	(CNXK_SDP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+#define    CNXK_SDP_R_IN_CNTS_ISM(ring)		\
+	(CNXK_SDP_R_IN_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET))
+
+#define	   CNXK_SDP_EPF_MBOX_RINT_START	          0x20100
+#define	   CNXK_SDP_EPF_MBOX_RINT_W1S_START	  0x20120
+#define	   CNXK_SDP_EPF_MBOX_RINT_ENA_W1C_START   0x20140
+#define	   CNXK_SDP_EPF_MBOX_RINT_ENA_W1S_START   0x20160
+
+#define	   CNXK_SDP_EPF_VFIRE_RINT_START          0x20180
+#define	   CNXK_SDP_EPF_VFIRE_RINT_W1S_START      0x201A0
+#define	   CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C_START  0x201C0
+#define	   CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S_START  0x201E0
+
+#define	   CNXK_SDP_EPF_IRERR_RINT                0x20200
+#define	   CNXK_SDP_EPF_IRERR_RINT_W1S            0x20210
+#define	   CNXK_SDP_EPF_IRERR_RINT_ENA_W1C        0x20220
+#define	   CNXK_SDP_EPF_IRERR_RINT_ENA_W1S        0x20230
+
+#define	   CNXK_SDP_EPF_VFORE_RINT_START          0x20240
+#define	   CNXK_SDP_EPF_VFORE_RINT_W1S_START      0x20260
+#define	   CNXK_SDP_EPF_VFORE_RINT_ENA_W1C_START  0x20280
+#define	   CNXK_SDP_EPF_VFORE_RINT_ENA_W1S_START  0x202A0
+
+#define	   CNXK_SDP_EPF_ORERR_RINT                0x20320
+#define	   CNXK_SDP_EPF_ORERR_RINT_W1S            0x20330
+#define	   CNXK_SDP_EPF_ORERR_RINT_ENA_W1C        0x20340
+#define	   CNXK_SDP_EPF_ORERR_RINT_ENA_W1S        0x20350
+
+#define	   CNXK_SDP_EPF_OEI_RINT                  0x20400
+#define	   CNXK_SDP_EPF_OEI_RINT_W1S              0x20500
+#define	   CNXK_SDP_EPF_OEI_RINT_ENA_W1C          0x20600
+#define	   CNXK_SDP_EPF_OEI_RINT_ENA_W1S          0x20700
+
+#define	   CNXK_SDP_EPF_DMA_RINT                  0x20800
+#define	   CNXK_SDP_EPF_DMA_RINT_W1S              0x20810
+#define	   CNXK_SDP_EPF_DMA_RINT_ENA_W1C          0x20820
+#define	   CNXK_SDP_EPF_DMA_RINT_ENA_W1S          0x20830
+
+#define	   CNXK_SDP_EPF_DMA_INT_LEVEL_START	    0x20840
+#define	   CNXK_SDP_EPF_DMA_CNT_START	            0x20860
+#define	   CNXK_SDP_EPF_DMA_TIM_START	            0x20880
+
+#define	   CNXK_SDP_EPF_MISC_RINT                 0x208A0
+#define	   CNXK_SDP_EPF_MISC_RINT_W1S	            0x208B0
+#define	   CNXK_SDP_EPF_MISC_RINT_ENA_W1C         0x208C0
+#define	   CNXK_SDP_EPF_MISC_RINT_ENA_W1S         0x208D0
+
+#define	   CNXK_SDP_EPF_DMA_VF_RINT_START           0x208E0
+#define	   CNXK_SDP_EPF_DMA_VF_RINT_W1S_START       0x20900
+#define	   CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C_START   0x20920
+#define	   CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S_START   0x20940
+
+#define	   CNXK_SDP_EPF_PP_VF_RINT_START            0x20960
+#define	   CNXK_SDP_EPF_PP_VF_RINT_W1S_START        0x20980
+#define	   CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C_START    0x209A0
+#define	   CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S_START    0x209C0
+
+#define	   CNXK_SDP_EPF_MBOX_RINT(index)		\
+		(CNXK_SDP_EPF_MBOX_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_MBOX_RINT_W1S(index)		\
+		(CNXK_SDP_EPF_MBOX_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_MBOX_RINT_ENA_W1C(index)	\
+		(CNXK_SDP_EPF_MBOX_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_MBOX_RINT_ENA_W1S(index)	\
+		(CNXK_SDP_EPF_MBOX_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+
+#define	   CNXK_SDP_EPF_VFIRE_RINT(index)		\
+		(CNXK_SDP_EPF_VFIRE_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_VFIRE_RINT_W1S(index)		\
+		(CNXK_SDP_EPF_VFIRE_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C(index)	\
+		(CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S(index)	\
+		(CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+
+#define	   CNXK_SDP_EPF_VFORE_RINT(index)		\
+		(CNXK_SDP_EPF_VFORE_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_VFORE_RINT_W1S(index)		\
+		(CNXK_SDP_EPF_VFORE_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_VFORE_RINT_ENA_W1C(index)	\
+		(CNXK_SDP_EPF_VFORE_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_VFORE_RINT_ENA_W1S(index)	\
+		(CNXK_SDP_EPF_VFORE_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET))
+
+#define	   CNXK_SDP_EPF_DMA_VF_RINT(index)		\
+		(CNXK_SDP_EPF_DMA_VF_RINT_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_DMA_VF_RINT_W1S(index)		\
+		(CNXK_SDP_EPF_DMA_VF_RINT_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C(index)	\
+		(CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S(index)	\
+		(CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+
+#define	   CNXK_SDP_EPF_PP_VF_RINT(index)		\
+		(CNXK_SDP_EPF_PP_VF_RINT_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_PP_VF_RINT_W1S(index)		\
+		(CNXK_SDP_EPF_PP_VF_RINT_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C(index)	\
+		(CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+#define	   CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S(index)	\
+		(CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET))
+
+/*------------------ Interrupt Masks ----------------*/
+#define	   CNXK_INTR_R_SEND_ISM       BIT_ULL(63)
+#define	   CNXK_INTR_R_OUT_INT        BIT_ULL(62)
+#define    CNXK_INTR_R_IN_INT         BIT_ULL(61)
+#define    CNXK_INTR_R_MBOX_INT       BIT_ULL(60)
+#define    CNXK_INTR_R_RESEND         BIT_ULL(59)
+#define    CNXK_INTR_R_CLR_TIM        BIT_ULL(58)
+
+/* ####################### Ring Mapping Registers ################################## */
+#define    CNXK_SDP_EPVF_RING_START          0x26000
+#define    CNXK_SDP_IN_RING_TB_MAP_START     0x28000
+#define    CNXK_SDP_IN_RATE_LIMIT_START      0x2A000
+#define    CNXK_SDP_MAC_PF_RING_CTL_START    0x2C000
+
+#define	   CNXK_SDP_EPVF_RING(ring)		\
+		(CNXK_SDP_EPVF_RING_START + ((ring) * CNXK_EPVF_RING_OFFSET))
+#define	   CNXK_SDP_IN_RING_TB_MAP(ring)	\
+		(CNXK_SDP_N_RING_TB_MAP_START + ((ring) * CNXK_EPVF_RING_OFFSET))
+#define	   CNXK_SDP_IN_RATE_LIMIT(ring)		\
+		(CNXK_SDP_IN_RATE_LIMIT_START + ((ring) * CNXK_EPVF_RING_OFFSET))
+#define	   CNXK_SDP_MAC_PF_RING_CTL(mac)	\
+		(CNXK_SDP_MAC_PF_RING_CTL_START + ((mac) * CNXK_MAC_OFFSET))
+
+#define    CNXK_SDP_MAC_PF_RING_CTL_NPFS(val)  ((val) & 0x3)
+#define    CNXK_SDP_MAC_PF_RING_CTL_SRN(val)   (((val) >> 8) & 0x7F)
+#define    CNXK_SDP_MAC_PF_RING_CTL_RPPF(val)  (((val) >> 16) & 0x3F)
+
+/* Number of non-queue interrupts in CNXKxx */
+#define    CNXK_NUM_NON_IOQ_INTR    32
+
+/* bit 0 for control mbox interrupt */
+#define CNXK_SDP_EPF_OEI_RINT_DATA_BIT_MBOX	BIT_ULL(0)
+/* bit 1 for firmware heartbeat interrupt */
+#define CNXK_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT	BIT_ULL(1)
+#define FW_STATUS_RUNNING      2ULL
+#define CNXK_PEMX_PFX_CSX_PFCFGX(pem, pf, offset)      ({ typeof(offset) _off = (offset); \
+							  ((0x8e0000008000 | \
+							    (uint64_t)(pem) << 36 \
+							    | (pf) << 18 \
+							    | ((_off >> 16) & 1) << 16 \
+							    | (_off >> 3) << 3) \
+							   + (((_off >> 2) & 1) << 2)); \
+							})
+
+/* Register defines for use with CNXK_PEMX_PFX_CSX_PFCFGX */
+#define CNXK_PCIEEP_VSECST_CTL  0x418
+
+#define CNXK_PEM_BAR4_INDEX		7
+#define CNXK_PEM_BAR4_INDEX_SIZE	0x400000ULL
+#define CNXK_PEM_BAR4_INDEX_OFFSET	(CNXK_PEM_BAR4_INDEX * CNXK_PEM_BAR4_INDEX_SIZE)
+
+#endif /* _OCTEP_REGS_CNXK_PF_H_ */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
index d0adb82d65c3..06851b78aa28 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
@@ -21,7 +21,6 @@ static void octep_iq_reset_indices(struct octep_iq *iq)
 	iq->flush_index = 0;
 	iq->pkts_processed = 0;
 	iq->pkt_in_done = 0;
-	atomic_set(&iq->instr_pending, 0);
 }
 
 /**
@@ -82,7 +81,6 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
 	}
 
 	iq->pkts_processed += compl_pkts;
-	atomic_sub(compl_pkts, &iq->instr_pending);
 	iq->stats.instr_completed += compl_pkts;
 	iq->stats.bytes_sent += compl_bytes;
 	iq->stats.sgentry_sent += compl_sg;
@@ -91,7 +89,7 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
 	netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes);
 
 	if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) &&
-	    ((iq->max_count - atomic_read(&iq->instr_pending)) >
+	    (IQ_INSTR_SPACE(iq) >
 	     OCTEP_WAKE_QUEUE_THRESHOLD))
 		netif_wake_subqueue(iq->netdev, iq->q_no);
 	return !budget;
@@ -144,7 +142,6 @@ static void octep_iq_free_pending(struct octep_iq *iq)
 		dev_kfree_skb_any(skb);
 	}
 
-	atomic_set(&iq->instr_pending, 0);
 	iq->flush_index = fi;
 	netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no));
 }
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
index 86c98b13fc44..1ba4ff65e54d 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
@@ -172,9 +172,6 @@ struct octep_iq {
 	/* Statistics for this input queue. */
 	struct octep_iq_stats stats;
 
-	/* This field keeps track of the instructions pending in this queue. */
-	atomic_t instr_pending;
-
 	/* Pointer to the Virtual Base addr of the input ring. */
 	struct octep_tx_desc_hw *desc_ring;
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 3cf6589cfdac..a6e91573f8da 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1159,15 +1159,18 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 	phy_ring_tail = eth->phy_scratch_ring + soc->txrx.txd_size * (cnt - 1);
 
 	for (i = 0; i < cnt; i++) {
+		dma_addr_t addr = dma_addr + i * MTK_QDMA_PAGE_SIZE;
 		struct mtk_tx_dma_v2 *txd;
 
 		txd = eth->scratch_ring + i * soc->txrx.txd_size;
-		txd->txd1 = dma_addr + i * MTK_QDMA_PAGE_SIZE;
+		txd->txd1 = addr;
 		if (i < cnt - 1)
 			txd->txd2 = eth->phy_scratch_ring +
 				    (i + 1) * soc->txrx.txd_size;
 
 		txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE);
+		if (MTK_HAS_CAPS(soc->caps, MTK_36BIT_DMA))
+			txd->txd3 |= TX_DMA_PREP_ADDR64(addr);
 		txd->txd4 = 0;
 		if (mtk_is_netsys_v2_or_greater(eth)) {
 			txd->txd5 = 0;
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 9a6744c0d458..c895e265ae0e 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -670,7 +670,7 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
 		void *buf;
 		int s;
 
-		page = __dev_alloc_pages(GFP_KERNEL, 0);
+		page = __dev_alloc_page(GFP_KERNEL);
 		if (!page)
 			return -ENOMEM;
 
@@ -691,10 +691,11 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
 
 		for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) {
 			struct mtk_wdma_desc *desc = desc_ptr;
+			u32 ctrl;
 
 			desc->buf0 = cpu_to_le32(buf_phys);
 			if (!mtk_wed_is_v3_or_greater(dev->hw)) {
-				u32 txd_size, ctrl;
+				u32 txd_size;
 
 				txd_size = dev->wlan.init_buf(buf, buf_phys,
 							      token++);
@@ -708,11 +709,11 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev)
 					ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG0 |
 						FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2,
 							   MTK_WED_BUF_SIZE - txd_size);
-				desc->ctrl = cpu_to_le32(ctrl);
 				desc->info = 0;
 			} else {
-				desc->ctrl = cpu_to_le32(token << 16);
+				ctrl = token << 16 | TX_DMA_PREP_ADDR64(buf_phys);
 			}
+			desc->ctrl = cpu_to_le32(ctrl);
 
 			desc_ptr += desc_size;
 			buf += MTK_WED_BUF_SIZE;
@@ -811,6 +812,7 @@ mtk_wed_hwrro_buffer_alloc(struct mtk_wed_device *dev)
 		buf_phys = page_phys;
 		for (s = 0; s < MTK_WED_RX_BUF_PER_PAGE; s++) {
 			desc->buf0 = cpu_to_le32(buf_phys);
+			desc->token = cpu_to_le32(RX_DMA_PREP_ADDR64(buf_phys));
 			buf_phys += MTK_WED_PAGE_BUF_SIZE;
 			desc++;
 		}
diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
index 3bd51a3d6650..7ffbd4fca881 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
@@ -142,7 +142,8 @@ mtk_wed_wo_queue_refill(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q,
 		dma_addr_t addr;
 		void *buf;
 
-		buf = page_frag_alloc(&q->cache, q->buf_size, GFP_ATOMIC);
+		buf = page_frag_alloc(&q->cache, q->buf_size,
+				      GFP_ATOMIC | GFP_DMA32);
 		if (!buf)
 			break;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
index 28d02749d3c4..7659ad21e6e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
@@ -55,7 +55,10 @@ int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data)
 	ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET,
 				     MLX5_VSC_LOCK);
 	if (ret) {
-		mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
+		if (ret == -EBUSY)
+			mlx5_core_info(dev, "SW reset semaphore is already in use\n");
+		else
+			mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
 		goto unlock_gw;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index b12fe3c5a258..a55452c69f06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -147,6 +147,20 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
 	}
 }
 
+static void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
+				    struct tc_cls_matchall_offload *ma)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	u64 dbytes;
+	u64 dpkts;
+
+	dpkts = priv->stats.rep_stats.vport_rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
+	dbytes = priv->stats.rep_stats.vport_rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
+	mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats, &priv->stats.rep_stats);
+	flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
+			  FLOW_ACTION_HW_STATS_DELAYED);
+}
+
 static
 int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
 				    struct tc_cls_matchall_offload *ma)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
index 368a95fa77d3..b14cd62edffc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c
@@ -48,7 +48,8 @@ mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv,
 				struct pedit_headers_action *hdrs,
 				struct netlink_ext_ack *extack)
 {
-	u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
+	u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? TCA_PEDIT_KEY_EX_CMD_SET :
+						   TCA_PEDIT_KEY_EX_CMD_ADD;
 	u8 htype = act->mangle.htype;
 	int err = -EOPNOTSUPP;
 	u32 mask, val, offset;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ea58c6917433..3aecdf099a2f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5244,7 +5244,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 
 	netdev->gso_partial_features             |= NETIF_F_GSO_UDP_L4;
 	netdev->hw_features                      |= NETIF_F_GSO_UDP_L4;
-	netdev->features                         |= NETIF_F_GSO_UDP_L4;
 
 	mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 3ab682bbcf86..fe0726c7b847 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -112,8 +112,18 @@ static const struct counter_desc vport_rep_stats_desc[] = {
 			     tx_vport_rdma_multicast_bytes) },
 };
 
+static const struct counter_desc vport_rep_loopback_stats_desc[] = {
+	{ MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+			     vport_loopback_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_rep_stats,
+			     vport_loopback_bytes) },
+};
+
 #define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc)
 #define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc)
+#define NUM_VPORT_REP_LOOPBACK_COUNTERS(dev) \
+	(MLX5_CAP_GEN(dev, vport_counter_local_loopback) ? \
+	 ARRAY_SIZE(vport_rep_loopback_stats_desc) : 0)
 
 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw_rep)
 {
@@ -157,7 +167,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw_rep)
 
 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport_rep)
 {
-	return NUM_VPORT_REP_HW_COUNTERS;
+	return NUM_VPORT_REP_HW_COUNTERS +
+	       NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev);
 }
 
 static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep)
@@ -166,6 +177,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep)
 
 	for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_rep_stats_desc[i].format);
+	for (i = 0; i < NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev); i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       vport_rep_loopback_stats_desc[i].format);
 	return idx;
 }
 
@@ -176,6 +190,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep)
 	for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++)
 		data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats,
 						   vport_rep_stats_desc, i);
+	for (i = 0; i < NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev); i++)
+		data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats,
+						   vport_rep_loopback_stats_desc, i);
 	return idx;
 }
 
@@ -247,6 +264,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep)
 	rep_stats->tx_vport_rdma_multicast_bytes =
 		MLX5_GET_CTR(out, received_ib_multicast.octets);
 
+	if (MLX5_CAP_GEN(priv->mdev, vport_counter_local_loopback)) {
+		rep_stats->vport_loopback_packets =
+			MLX5_GET_CTR(out, local_loopback.packets);
+		rep_stats->vport_loopback_bytes =
+			MLX5_GET_CTR(out, local_loopback.octets);
+	}
+
 out:
 	kvfree(out);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 477c547dcc04..12b3607afecd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -476,6 +476,8 @@ struct mlx5e_rep_stats {
 	u64 tx_vport_rdma_multicast_packets;
 	u64 rx_vport_rdma_multicast_bytes;
 	u64 tx_vport_rdma_multicast_bytes;
+	u64 vport_loopback_packets;
+	u64 vport_loopback_bytes;
 };
 
 struct mlx5e_stats {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 7ca9e5b86778..9dca2809b46f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3206,10 +3206,10 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
 	headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
 	headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
 
-	set_masks = &hdrs[0].masks;
-	add_masks = &hdrs[1].masks;
-	set_vals = &hdrs[0].vals;
-	add_vals = &hdrs[1].vals;
+	set_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].masks;
+	add_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].masks;
+	set_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].vals;
+	add_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].vals;
 
 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
 		bool skip;
@@ -5011,22 +5011,6 @@ int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
 	return apply_police_params(priv, 0, extack);
 }
 
-void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
-			     struct tc_cls_matchall_offload *ma)
-{
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct rtnl_link_stats64 cur_stats;
-	u64 dbytes;
-	u64 dpkts;
-
-	mlx5e_stats_copy_rep_stats(&cur_stats, &priv->stats.rep_stats);
-	dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
-	dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
-	rpriv->prev_vf_vport_stats = cur_stats;
-	flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
-			  FLOW_ACTION_HW_STATS_DELAYED);
-}
-
 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
 					      struct mlx5e_priv *peer_priv)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index adb39e30f90f..c24bda56b2b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -203,8 +203,6 @@ int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
 				struct tc_cls_matchall_offload *f);
 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
 			     struct tc_cls_matchall_offload *f);
-void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
-			     struct tc_cls_matchall_offload *ma);
 
 struct mlx5e_encap_entry;
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 4aed1768b85f..78eb6b7097e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -181,7 +181,7 @@ struct mlx5_flow_rule {
 
 struct mlx5_flow_handle {
 	int num_rules;
-	struct mlx5_flow_rule *rule[];
+	struct mlx5_flow_rule *rule[] __counted_by(num_rules);
 };
 
 /* Type of children is mlx5_flow_group */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 17fe30a4c06c..0c26d707eed2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -539,7 +539,7 @@ struct mlx5_fc_bulk {
 	u32 base_id;
 	int bulk_len;
 	unsigned long *bitmask;
-	struct mlx5_fc fcs[];
+	struct mlx5_fc fcs[] __counted_by(bulk_len);
 };
 
 static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index b568988e92e3..4b8cb120362b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -325,6 +325,25 @@ static void mlx5_fw_live_patch_event(struct work_struct *work)
 		mlx5_core_err(dev, "Failed to reload FW tracer\n");
 }
 
+static const struct pci_device_id mgt_ifc_device_ids[] = {
+	{ PCI_VDEVICE(MELLANOX, 0xc2d2) }, /* BlueField1 MGT interface device ID */
+	{ PCI_VDEVICE(MELLANOX, 0xc2d3) }, /* BlueField2 MGT interface device ID */
+	{ PCI_VDEVICE(MELLANOX, 0xc2d4) }, /* BlueField3-Lx MGT interface device ID */
+	{ PCI_VDEVICE(MELLANOX, 0xc2d5) }, /* BlueField3 MGT interface device ID */
+	{ PCI_VDEVICE(MELLANOX, 0xc2d6) }, /* BlueField4 MGT interface device ID */
+};
+
+static bool mlx5_is_mgt_ifc_pci_device(struct mlx5_core_dev *dev, u16 dev_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mgt_ifc_device_ids); ++i)
+		if (mgt_ifc_device_ids[i].device == dev_id)
+			return true;
+
+	return false;
+}
+
 static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id)
 {
 	struct pci_bus *bridge_bus = dev->pdev->bus;
@@ -339,10 +358,15 @@ static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id)
 		err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id);
 		if (err)
 			return pcibios_err_to_errno(err);
-		if (sdev_id != dev_id) {
-			mlx5_core_warn(dev, "unrecognized dev_id (0x%x)\n", sdev_id);
-			return -EPERM;
-		}
+
+		if (sdev_id == dev_id)
+			continue;
+
+		if (mlx5_is_mgt_ifc_pci_device(dev, sdev_id))
+			continue;
+
+		mlx5_core_warn(dev, "unrecognized dev_id (0x%x)\n", sdev_id);
+		return -EPERM;
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0c83ef174275..0361741632a6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -266,9 +266,6 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev,
 {
 	u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
 
-	if (!mlx5_modify_mtutc_allowed(mdev))
-		return 0;
-
 	if (ts->tv_sec < 0 || ts->tv_sec > U32_MAX ||
 	    ts->tv_nsec < 0 || ts->tv_nsec > NSEC_PER_SEC)
 		return -EINVAL;
@@ -286,12 +283,15 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64
 	struct mlx5_timer *timer = &clock->timer;
 	struct mlx5_core_dev *mdev;
 	unsigned long flags;
-	int err;
 
 	mdev = container_of(clock, struct mlx5_core_dev, clock);
-	err = mlx5_ptp_settime_real_time(mdev, ts);
-	if (err)
-		return err;
+
+	if (mlx5_modify_mtutc_allowed(mdev)) {
+		int err = mlx5_ptp_settime_real_time(mdev, ts);
+
+		if (err)
+			return err;
+	}
 
 	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_init(&timer->tc, &timer->cycles, timespec64_to_ns(ts));
@@ -341,9 +341,6 @@ static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta)
 {
 	u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
 
-	if (!mlx5_modify_mtutc_allowed(mdev))
-		return 0;
-
 	/* HW time adjustment range is checked. If out of range, settime instead */
 	if (!mlx5_is_mtutc_time_adj_cap(mdev, delta)) {
 		struct timespec64 ts;
@@ -367,13 +364,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 	struct mlx5_timer *timer = &clock->timer;
 	struct mlx5_core_dev *mdev;
 	unsigned long flags;
-	int err;
 
 	mdev = container_of(clock, struct mlx5_core_dev, clock);
 
-	err = mlx5_ptp_adjtime_real_time(mdev, delta);
-	if (err)
-		return err;
+	if (mlx5_modify_mtutc_allowed(mdev)) {
+		int err = mlx5_ptp_adjtime_real_time(mdev, delta);
+
+		if (err)
+			return err;
+	}
+
 	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_adjtime(&timer->tc, delta);
 	mlx5_update_clock_info_page(mdev);
@@ -396,15 +396,14 @@ static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_p
 {
 	u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
 
-	if (!mlx5_modify_mtutc_allowed(mdev))
-		return 0;
-
 	MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC);
 
-	if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units)) {
+	if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units) &&
+	    scaled_ppm <= S32_MAX && scaled_ppm >= S32_MIN) {
+		/* HW scaled_ppm support on mlx5 devices only supports a 32-bit value */
 		MLX5_SET(mtutc_reg, in, freq_adj_units,
 			 MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM);
-		MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm);
+		MLX5_SET(mtutc_reg, in, freq_adjustment, (s32)scaled_ppm);
 	} else {
 		MLX5_SET(mtutc_reg, in, freq_adj_units, MLX5_MTUTC_FREQ_ADJ_UNITS_PPB);
 		MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm_to_ppb(scaled_ppm));
@@ -420,13 +419,15 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 	struct mlx5_core_dev *mdev;
 	unsigned long flags;
 	u32 mult;
-	int err;
 
 	mdev = container_of(clock, struct mlx5_core_dev, clock);
 
-	err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
-	if (err)
-		return err;
+	if (mlx5_modify_mtutc_allowed(mdev)) {
+		int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm);
+
+		if (err)
+			return err;
+	}
 
 	mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm);
 
@@ -1004,14 +1005,38 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev)
 	info->frac = timer->tc.frac;
 }
 
+static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_clock *clock = &mdev->clock;
+	u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {};
+	u8 log_max_freq_adjustment = 0;
+	int err;
+
+	err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out),
+				   MLX5_REG_MTUTC, 0, 0);
+	if (!err)
+		log_max_freq_adjustment =
+			MLX5_GET(mtutc_reg, out, log_max_freq_adjustment);
+
+	if (log_max_freq_adjustment)
+		clock->ptp_info.max_adj =
+			min(S32_MAX, 1 << log_max_freq_adjustment);
+}
+
 static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_clock *clock = &mdev->clock;
 
+	/* Configure the PHC */
+	clock->ptp_info = mlx5_ptp_clock_info;
+
+	if (MLX5_CAP_MCAM_REG(mdev, mtutc))
+		mlx5_init_timer_max_freq_adjustment(mdev);
+
 	mlx5_timecounter_init(mdev);
 	mlx5_init_clock_info(mdev);
 	mlx5_init_overflow_period(clock);
-	clock->ptp_info = mlx5_ptp_clock_info;
 
 	if (mlx5_real_time_mode(mdev)) {
 		struct timespec64 ts;
@@ -1042,11 +1067,10 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
 	}
 
 	seqlock_init(&clock->lock);
-	mlx5_init_timer_clock(mdev);
 	INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
 
-	/* Configure the PHC */
-	clock->ptp_info = mlx5_ptp_clock_info;
+	/* Initialize the device clock */
+	mlx5_init_timer_clock(mdev);
 
 	/* Initialize 1PPS data structures */
 	mlx5_init_pps(mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a17152c1cbb2..bccf6e53556c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -219,7 +219,6 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
 	int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
 					      driver_version);
 	u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {};
-	int remaining_size = driver_ver_sz;
 	char *string;
 
 	if (!MLX5_CAP_GEN(dev, driver_version))
@@ -227,22 +226,9 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
 
 	string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
 
-	strncpy(string, "Linux", remaining_size);
-
-	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
-	strncat(string, ",", remaining_size);
-
-	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
-	strncat(string, KBUILD_MODNAME, remaining_size);
-
-	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
-	strncat(string, ",", remaining_size);
-
-	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
-
-	snprintf(string + strlen(string), remaining_size, "%u.%u.%u",
-		LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
-		LINUX_VERSION_SUBLEVEL);
+	snprintf(string, driver_ver_sz, "Linux,%s,%u.%u.%u",
+		 KBUILD_MODNAME, LINUX_VERSION_MAJOR,
+		 LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL);
 
 	/*Send the command*/
 	MLX5_SET(set_driver_version_in, in, opcode,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index e827c78be114..e3271c845ee6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -282,6 +282,12 @@ MLXSW_ITEM32(cmd_mbox, query_fw, fw_day, 0x14, 0, 8);
  */
 MLXSW_ITEM32(cmd_mbox, query_fw, lag_mode_support, 0x18, 1, 1);
 
+/* cmd_mbox_query_fw_cff_support
+ * 0: CONFIG_PROFILE.flood_mode = 5 (CFF) is not supported by FW
+ * 1: CONFIG_PROFILE.flood_mode = 5 (CFF) is supported by FW
+ */
+MLXSW_ITEM32(cmd_mbox, query_fw, cff_support, 0x18, 2, 1);
+
 /* cmd_mbox_query_fw_clr_int_base_offset
  * Clear Interrupt register's offset from clr_int_bar register
  * in PCI address space.
@@ -779,6 +785,11 @@ enum mlxsw_cmd_mbox_config_profile_flood_mode {
 	 * used.
 	 */
 	MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CONTROLLED = 4,
+	/* CFF - Compressed FID Flood (CFF) mode.
+	 * Reserved when legacy bridge model is used.
+	 * Supported only by Spectrum-2+.
+	 */
+	MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CFF = 5,
 };
 
 /* cmd_mbox_config_profile_flood_mode
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index f23421f038f3..e4d7739bd7c8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -211,6 +211,13 @@ mlxsw_core_lag_mode(struct mlxsw_core *mlxsw_core)
 }
 EXPORT_SYMBOL(mlxsw_core_lag_mode);
 
+enum mlxsw_cmd_mbox_config_profile_flood_mode
+mlxsw_core_flood_mode(struct mlxsw_core *mlxsw_core)
+{
+	return mlxsw_core->bus->flood_mode(mlxsw_core->bus_priv);
+}
+EXPORT_SYMBOL(mlxsw_core_flood_mode);
+
 void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core)
 {
 	return mlxsw_core->driver_priv;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 764d14bd5bc0..6d11225594dd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -38,6 +38,8 @@ unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core);
 int mlxsw_core_max_lag(struct mlxsw_core *mlxsw_core, u16 *p_max_lag);
 enum mlxsw_cmd_mbox_config_profile_lag_mode
 mlxsw_core_lag_mode(struct mlxsw_core *mlxsw_core);
+enum mlxsw_cmd_mbox_config_profile_flood_mode
+mlxsw_core_flood_mode(struct mlxsw_core *mlxsw_core);
 
 void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
 
@@ -322,7 +324,12 @@ struct mlxsw_config_profile {
 	u16	max_regions;
 	u8	max_flood_tables;
 	u8	max_vid_flood_tables;
+
+	/* Flood mode to use if used_flood_mode. If flood_mode_prefer_cff,
+	 * the backup flood mode (if any) when CFF unsupported.
+	 */
 	u8	flood_mode;
+
 	u8	max_fid_offset_flood_tables;
 	u16	fid_offset_flood_table_size;
 	u8	max_fid_flood_tables;
@@ -338,6 +345,7 @@ struct mlxsw_config_profile {
 	u8	kvd_hash_double_parts;
 	u8	cqe_time_stamp_type;
 	bool	lag_mode_prefer_sw;
+	bool	flood_mode_prefer_cff;
 	struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
 };
 
@@ -489,6 +497,7 @@ struct mlxsw_bus {
 	u32 (*read_utc_sec)(void *bus_priv);
 	u32 (*read_utc_nsec)(void *bus_priv);
 	enum mlxsw_cmd_mbox_config_profile_lag_mode (*lag_mode)(void *bus_priv);
+	enum mlxsw_cmd_mbox_config_profile_flood_mode (*flood_mode)(void *priv);
 	u8 features;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index e4b25e187467..0d58f13a7c7d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -106,7 +106,9 @@ struct mlxsw_pci {
 	u64 utc_sec_offset;
 	u64 utc_nsec_offset;
 	bool lag_mode_support;
+	bool cff_support;
 	enum mlxsw_cmd_mbox_config_profile_lag_mode lag_mode;
+	enum mlxsw_cmd_mbox_config_profile_flood_mode flood_mode;
 	struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT];
 	u32 doorbell_offset;
 	struct mlxsw_core *core;
@@ -130,6 +132,7 @@ struct mlxsw_pci {
 	const struct pci_device_id *id;
 	enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
 	u8 num_sdq_cqs; /* Number of CQs used for SDQs */
+	bool skip_reset;
 };
 
 static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
@@ -1245,11 +1248,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
 		mlxsw_cmd_mbox_config_profile_fid_flood_table_size_set(
 			mbox, profile->fid_flood_table_size);
 	}
-	if (profile->used_flood_mode) {
+	if (profile->flood_mode_prefer_cff && mlxsw_pci->cff_support) {
+		enum mlxsw_cmd_mbox_config_profile_flood_mode flood_mode =
+			MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CFF;
+
+		mlxsw_cmd_mbox_config_profile_set_flood_mode_set(mbox, 1);
+		mlxsw_cmd_mbox_config_profile_flood_mode_set(mbox, flood_mode);
+		mlxsw_pci->flood_mode = flood_mode;
+	} else if (profile->used_flood_mode) {
 		mlxsw_cmd_mbox_config_profile_set_flood_mode_set(
 			mbox, 1);
 		mlxsw_cmd_mbox_config_profile_flood_mode_set(
 			mbox, profile->flood_mode);
+		mlxsw_pci->flood_mode = profile->flood_mode;
+	} else {
+		WARN_ON(1);
+		return -EINVAL;
 	}
 	if (profile->used_max_ib_mc) {
 		mlxsw_cmd_mbox_config_profile_set_max_ib_mc_set(
@@ -1476,11 +1490,47 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
 	return -EBUSY;
 }
 
-static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
-			      const struct pci_device_id *id)
+static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci)
 {
 	struct pci_dev *pdev = mlxsw_pci->pdev;
 	char mrsr_pl[MLXSW_REG_MRSR_LEN];
+	int err;
+
+	mlxsw_reg_mrsr_pack(mrsr_pl,
+			    MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
+	err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+	if (err)
+		return err;
+
+	device_lock_assert(&pdev->dev);
+
+	pci_cfg_access_lock(pdev);
+	pci_save_state(pdev);
+
+	err = __pci_reset_function_locked(pdev);
+	if (err)
+		pci_err(pdev, "PCI function reset failed with %d\n", err);
+
+	pci_restore_state(pdev);
+	pci_cfg_access_unlock(pdev);
+
+	return err;
+}
+
+static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci)
+{
+	char mrsr_pl[MLXSW_REG_MRSR_LEN];
+
+	mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET);
+	return mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+}
+
+static int
+mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
+{
+	struct pci_dev *pdev = mlxsw_pci->pdev;
+	char mcam_pl[MLXSW_REG_MCAM_LEN];
+	bool pci_reset_supported;
 	u32 sys_status;
 	int err;
 
@@ -1491,11 +1541,27 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
 		return err;
 	}
 
-	mlxsw_reg_mrsr_pack(mrsr_pl);
-	err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+	/* PCI core already issued a PCI reset, do not issue another reset. */
+	if (mlxsw_pci->skip_reset)
+		return 0;
+
+	mlxsw_reg_mcam_pack(mcam_pl,
+			    MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
+	err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
 	if (err)
 		return err;
 
+	mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
+			      &pci_reset_supported);
+
+	if (pci_reset_supported) {
+		pci_dbg(pdev, "Starting PCI reset flow\n");
+		err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci);
+	} else {
+		pci_dbg(pdev, "Starting software reset flow\n");
+		err = mlxsw_pci_reset_sw(mlxsw_pci);
+	}
+
 	err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
@@ -1537,9 +1603,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 	if (!mbox)
 		return -ENOMEM;
 
-	err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id);
+	err = mlxsw_pci_reset(mlxsw_pci, mlxsw_pci->id);
 	if (err)
-		goto err_sw_reset;
+		goto err_reset;
 
 	err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
 	if (err < 0) {
@@ -1601,6 +1667,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 
 	mlxsw_pci->lag_mode_support =
 		mlxsw_cmd_mbox_query_fw_lag_mode_support_get(mbox);
+	mlxsw_pci->cff_support =
+		mlxsw_cmd_mbox_query_fw_cff_support_get(mbox);
+
 	num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox);
 	err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages);
 	if (err)
@@ -1672,7 +1741,7 @@ err_iface_rev:
 err_query_fw:
 	mlxsw_pci_free_irq_vectors(mlxsw_pci);
 err_alloc_irq:
-err_sw_reset:
+err_reset:
 mbox_put:
 	mlxsw_cmd_mbox_free(mbox);
 	return err;
@@ -1917,6 +1986,14 @@ mlxsw_pci_lag_mode(void *bus_priv)
 	return mlxsw_pci->lag_mode;
 }
 
+static enum mlxsw_cmd_mbox_config_profile_flood_mode
+mlxsw_pci_flood_mode(void *bus_priv)
+{
+	struct mlxsw_pci *mlxsw_pci = bus_priv;
+
+	return mlxsw_pci->flood_mode;
+}
+
 static const struct mlxsw_bus mlxsw_pci_bus = {
 	.kind			= "pci",
 	.init			= mlxsw_pci_init,
@@ -1929,6 +2006,7 @@ static const struct mlxsw_bus mlxsw_pci_bus = {
 	.read_utc_sec		= mlxsw_pci_read_utc_sec,
 	.read_utc_nsec		= mlxsw_pci_read_utc_nsec,
 	.lag_mode		= mlxsw_pci_lag_mode,
+	.flood_mode		= mlxsw_pci_flood_mode,
 	.features		= MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET,
 };
 
@@ -2059,11 +2137,34 @@ static void mlxsw_pci_remove(struct pci_dev *pdev)
 	kfree(mlxsw_pci);
 }
 
+static void mlxsw_pci_reset_prepare(struct pci_dev *pdev)
+{
+	struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
+
+	mlxsw_core_bus_device_unregister(mlxsw_pci->core, false);
+}
+
+static void mlxsw_pci_reset_done(struct pci_dev *pdev)
+{
+	struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);
+
+	mlxsw_pci->skip_reset = true;
+	mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus,
+				       mlxsw_pci, false, NULL, NULL);
+	mlxsw_pci->skip_reset = false;
+}
+
+static const struct pci_error_handlers mlxsw_pci_err_handler = {
+	.reset_prepare = mlxsw_pci_reset_prepare,
+	.reset_done = mlxsw_pci_reset_done,
+};
+
 int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
 {
 	pci_driver->probe = mlxsw_pci_probe;
 	pci_driver->remove = mlxsw_pci_remove;
 	pci_driver->shutdown = mlxsw_pci_remove;
+	pci_driver->err_handler = &mlxsw_pci_err_handler;
 	return pci_register_driver(pci_driver);
 }
 EXPORT_SYMBOL(mlxsw_pci_driver_register);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 25b294fdeb3d..3aae4467e431 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1024,6 +1024,8 @@ static inline void mlxsw_reg_spaft_pack(char *payload, u16 local_port,
  * ------------------------------------------
  * The following register controls the association of flooding tables and MIDs
  * to packet types used for flooding.
+ *
+ * Reserved when CONFIG_PROFILE.flood_mode = CFF.
  */
 #define MLXSW_REG_SFGC_ID 0x2011
 #define MLXSW_REG_SFGC_LEN 0x14
@@ -1862,6 +1864,7 @@ MLXSW_ITEM32(reg, sfmr, fid, 0x00, 0, 16);
  * Access: RW
  *
  * Note: Reserved when legacy bridge model is used.
+ * Reserved when CONFIG_PROFILE.flood_mode = CFF.
  */
 MLXSW_ITEM32(reg, sfmr, flood_rsp, 0x08, 31, 1);
 
@@ -1872,6 +1875,7 @@ MLXSW_ITEM32(reg, sfmr, flood_rsp, 0x08, 31, 1);
  * Access: RW
  *
  * Note: Reserved when legacy bridge model is used and when flood_rsp=1.
+ * Reserved when CONFIG_PROFILE.flood_mode = CFF
  */
 MLXSW_ITEM32(reg, sfmr, flood_bridge_type, 0x08, 28, 1);
 
@@ -1880,6 +1884,8 @@ MLXSW_ITEM32(reg, sfmr, flood_bridge_type, 0x08, 28, 1);
  * Used to point into the flooding table selected by SFGC register if
  * the table is of type FID-Offset. Otherwise, this field is reserved.
  * Access: RW
+ *
+ * Note: Reserved when CONFIG_PROFILE.flood_mode = CFF
  */
 MLXSW_ITEM32(reg, sfmr, fid_offset, 0x08, 0, 16);
 
@@ -1938,6 +1944,26 @@ MLXSW_ITEM32(reg, sfmr, irif_v, 0x14, 24, 1);
  */
 MLXSW_ITEM32(reg, sfmr, irif, 0x14, 0, 16);
 
+/* reg_sfmr_cff_mid_base
+ * Pointer to PGT table.
+ * Range: 0..(cap_max_pgt-1)
+ * Access: RW
+ *
+ * Note: Reserved when SwitchX/-2 and Spectrum-1.
+ * Supported when CONFIG_PROFILE.flood_mode = CFF.
+ */
+MLXSW_ITEM32(reg, sfmr, cff_mid_base, 0x20, 0, 16);
+
+/* reg_sfmr_cff_prf_id
+ * Compressed Fid Flooding profile_id
+ * Range 0..(max_cap_nve_flood_prf-1)
+ * Access: RW
+ *
+ * Note: Reserved when SwitchX/-2 and Spectrum-1
+ * Supported only when CONFIG_PROFLE.flood_mode = CFF.
+ */
+MLXSW_ITEM32(reg, sfmr, cff_prf_id, 0x24, 0, 2);
+
 /* reg_sfmr_smpe_valid
  * SMPE is valid.
  * Access: RW
@@ -1959,18 +1985,11 @@ MLXSW_ITEM32(reg, sfmr, smpe, 0x28, 0, 16);
 
 static inline void mlxsw_reg_sfmr_pack(char *payload,
 				       enum mlxsw_reg_sfmr_op op, u16 fid,
-				       u16 fid_offset, bool flood_rsp,
-				       enum mlxsw_reg_bridge_type bridge_type,
 				       bool smpe_valid, u16 smpe)
 {
 	MLXSW_REG_ZERO(sfmr, payload);
 	mlxsw_reg_sfmr_op_set(payload, op);
 	mlxsw_reg_sfmr_fid_set(payload, fid);
-	mlxsw_reg_sfmr_fid_offset_set(payload, fid_offset);
-	mlxsw_reg_sfmr_vtfp_set(payload, false);
-	mlxsw_reg_sfmr_vv_set(payload, false);
-	mlxsw_reg_sfmr_flood_rsp_set(payload, flood_rsp);
-	mlxsw_reg_sfmr_flood_bridge_type_set(payload, bridge_type);
 	mlxsw_reg_sfmr_smpe_valid_set(payload, smpe_valid);
 	mlxsw_reg_sfmr_smpe_set(payload, smpe);
 }
@@ -2168,6 +2187,50 @@ static inline void mlxsw_reg_spvc_pack(char *payload, u16 local_port, bool et1,
 	mlxsw_reg_spvc_et0_set(payload, et0);
 }
 
+/* SFFP - Switch FID Flooding Profiles Register
+ * --------------------------------------------
+ * The SFFP register populates the fid flooding profile tables used for the NVE
+ * flooding and Compressed-FID Flooding (CFF).
+ *
+ * Reserved on Spectrum-1.
+ */
+#define MLXSW_REG_SFFP_ID 0x2029
+#define MLXSW_REG_SFFP_LEN 0x0C
+
+MLXSW_REG_DEFINE(sffp, MLXSW_REG_SFFP_ID, MLXSW_REG_SFFP_LEN);
+
+/* reg_sffp_profile_id
+ * Profile ID a.k.a. SFMR.nve_flood_prf_id or SFMR.cff_prf_id
+ * Range 0..max_cap_nve_flood_prf-1
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sffp, profile_id, 0x00, 16, 2);
+
+/* reg_sffp_type
+ * The traffic type to reach the flooding table.
+ * Same as SFGC.type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, sffp, type, 0x00, 0, 4);
+
+/* reg_sffp_flood_offset
+ * Flood offset. Offset to add to SFMR.cff_mid_base to get the final PGT address
+ * for FID flood; or offset to add to SFMR.nve_tunnel_flood_ptr to get KVD
+ * pointer for NVE underlay.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sffp, flood_offset, 0x04, 0, 3);
+
+static inline void mlxsw_reg_sffp_pack(char *payload, u8 profile_id,
+				       enum mlxsw_reg_sfgc_type type,
+				       u8 flood_offset)
+{
+	MLXSW_REG_ZERO(sffp, payload);
+	mlxsw_reg_sffp_profile_id_set(payload, profile_id);
+	mlxsw_reg_sffp_type_set(payload, type);
+	mlxsw_reg_sffp_flood_offset_set(payload, flood_offset);
+}
+
 /* SPEVET - Switch Port Egress VLAN EtherType
  * ------------------------------------------
  * The switch port egress VLAN EtherType configures which EtherType to push at
@@ -10122,6 +10185,15 @@ mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid,
 
 MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
 
+enum mlxsw_reg_mrsr_command {
+	/* Switch soft reset, does not reset PCI firmware. */
+	MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET = 1,
+	/* Reset will be done when PCI link will be disabled.
+	 * This command will reset PCI firmware also.
+	 */
+	MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE = 6,
+};
+
 /* reg_mrsr_command
  * Reset/shutdown command
  * 0 - do nothing
@@ -10130,10 +10202,11 @@ MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
  */
 MLXSW_ITEM32(reg, mrsr, command, 0x00, 0, 4);
 
-static inline void mlxsw_reg_mrsr_pack(char *payload)
+static inline void mlxsw_reg_mrsr_pack(char *payload,
+				       enum mlxsw_reg_mrsr_command command)
 {
 	MLXSW_REG_ZERO(mrsr, payload);
-	mlxsw_reg_mrsr_command_set(payload, 1);
+	mlxsw_reg_mrsr_command_set(payload, command);
 }
 
 /* MLCR - Management LED Control Register
@@ -10584,6 +10657,8 @@ MLXSW_ITEM32(reg, mcam, feature_group, 0x00, 16, 8);
 enum mlxsw_reg_mcam_mng_feature_cap_mask_bits {
 	/* If set, MCIA supports 128 bytes payloads. Otherwise, 48 bytes. */
 	MLXSW_REG_MCAM_MCIA_128B = 34,
+	/* If set, MRSR.command=6 is supported. */
+	MLXSW_REG_MCAM_PCI_RESET = 48,
 };
 
 #define MLXSW_REG_BYTES_PER_DWORD 0x4
@@ -12934,6 +13009,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
 	MLXSW_REG(spvmlr),
 	MLXSW_REG(spfsr),
 	MLXSW_REG(spvc),
+	MLXSW_REG(sffp),
 	MLXSW_REG(spevet),
 	MLXSW_REG(smpe),
 	MLXSW_REG(smid2),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 89dd2777ec4d..9d7977ebe186 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -27,6 +27,7 @@ enum mlxsw_res_id {
 	MLXSW_RES_ID_FID,
 	MLXSW_RES_ID_MAX_LAG,
 	MLXSW_RES_ID_MAX_LAG_MEMBERS,
+	MLXSW_RES_ID_MAX_NVE_FLOOD_PRF,
 	MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER,
 	MLXSW_RES_ID_CELL_SIZE,
 	MLXSW_RES_ID_MAX_HEADROOM_SIZE,
@@ -88,6 +89,7 @@ static u16 mlxsw_res_ids[] = {
 	[MLXSW_RES_ID_FID] = 0x2512,
 	[MLXSW_RES_ID_MAX_LAG] = 0x2520,
 	[MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521,
+	[MLXSW_RES_ID_MAX_NVE_FLOOD_PRF] = 0x2522,
 	[MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER] = 0x2805,	/* Bytes */
 	[MLXSW_RES_ID_CELL_SIZE] = 0x2803,	/* Bytes */
 	[MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811,	/* Bytes */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index c70333b460ea..800c461deefa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -753,6 +753,8 @@ union mlxsw_sp_l3addr {
 };
 
 u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_rif_subport_port(const struct mlxsw_sp_rif *rif,
+			      u16 *port, bool *is_lag);
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
 			 struct netlink_ext_ack *extack);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index e954b8cd2ee8..aad4bb17dfb1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -424,18 +424,35 @@ static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid)
 		       MLXSW_REG_SFMR_OP_DESTROY_FID;
 }
 
-static int mlxsw_sp_fid_op(const struct mlxsw_sp_fid *fid, bool valid)
+static void mlxsw_sp_fid_pack(char *sfmr_pl,
+			      const struct mlxsw_sp_fid *fid,
+			      enum mlxsw_reg_sfmr_op op)
 {
-	struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
-	char sfmr_pl[MLXSW_REG_SFMR_LEN];
 	u16 smpe;
 
 	smpe = fid->fid_family->smpe_index_valid ? fid->fid_index : 0;
 
-	mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid->fid_index,
-			    fid->fid_offset, fid->fid_family->flood_rsp,
-			    fid->fid_family->bridge_type,
+	mlxsw_reg_sfmr_pack(sfmr_pl, op, fid->fid_index,
 			    fid->fid_family->smpe_index_valid, smpe);
+}
+
+static void mlxsw_sp_fid_pack_ctl(char *sfmr_pl,
+				  const struct mlxsw_sp_fid *fid,
+				  enum mlxsw_reg_sfmr_op op)
+{
+	mlxsw_sp_fid_pack(sfmr_pl, fid, op);
+	mlxsw_reg_sfmr_fid_offset_set(sfmr_pl, fid->fid_offset);
+	mlxsw_reg_sfmr_flood_rsp_set(sfmr_pl, fid->fid_family->flood_rsp);
+	mlxsw_reg_sfmr_flood_bridge_type_set(sfmr_pl,
+					     fid->fid_family->bridge_type);
+}
+
+static int mlxsw_sp_fid_op(const struct mlxsw_sp_fid *fid, bool valid)
+{
+	struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
+	char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+	mlxsw_sp_fid_pack_ctl(sfmr_pl, fid, mlxsw_sp_sfmr_op(valid));
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
 }
 
@@ -444,15 +461,8 @@ static int mlxsw_sp_fid_edit_op(const struct mlxsw_sp_fid *fid,
 {
 	struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp;
 	char sfmr_pl[MLXSW_REG_SFMR_LEN];
-	u16 smpe;
-
-	smpe = fid->fid_family->smpe_index_valid ? fid->fid_index : 0;
 
-	mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID,
-			    fid->fid_index, fid->fid_offset,
-			    fid->fid_family->flood_rsp,
-			    fid->fid_family->bridge_type,
-			    fid->fid_family->smpe_index_valid, smpe);
+	mlxsw_sp_fid_pack_ctl(sfmr_pl, fid, MLXSW_REG_SFMR_OP_CREATE_FID);
 	mlxsw_reg_sfmr_vv_set(sfmr_pl, fid->vni_valid);
 	mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(fid->vni));
 	mlxsw_reg_sfmr_vtfp_set(sfmr_pl, fid->nve_flood_index_valid);
@@ -1687,9 +1697,6 @@ mlxsw_sp_fid_flood_tables_init(struct mlxsw_sp_fid_family *fid_family)
 	int err;
 	int i;
 
-	if (!fid_family->nr_flood_tables)
-		return 0;
-
 	pgt_size = mlxsw_sp_fid_family_pgt_size(fid_family);
 	err = mlxsw_sp_pgt_mid_alloc_range(mlxsw_sp, &fid_family->pgt_base,
 					   pgt_size);
@@ -1718,9 +1725,6 @@ mlxsw_sp_fid_flood_tables_fini(struct mlxsw_sp_fid_family *fid_family)
 	struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
 	u16 pgt_size;
 
-	if (!fid_family->nr_flood_tables)
-		return;
-
 	pgt_size = mlxsw_sp_fid_family_pgt_size(fid_family);
 	mlxsw_sp_pgt_mid_free_range(mlxsw_sp, fid_family->pgt_base, pgt_size);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 82a95125d9ca..2c255ed9b8a9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -8419,6 +8419,9 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 	rif->ops = ops;
 	rif->rif_entries = rif_entries;
 
+	if (ops->setup)
+		ops->setup(rif, params);
+
 	if (ops->fid_get) {
 		fid = ops->fid_get(rif, params, extack);
 		if (IS_ERR(fid)) {
@@ -8428,9 +8431,6 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
 		rif->fid = fid;
 	}
 
-	if (ops->setup)
-		ops->setup(rif, params);
-
 	err = ops->configure(rif, extack);
 	if (err)
 		goto err_configure;
@@ -8660,6 +8660,20 @@ mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
 	return container_of(rif, struct mlxsw_sp_rif_subport, common);
 }
 
+int mlxsw_sp_rif_subport_port(const struct mlxsw_sp_rif *rif,
+			      u16 *port, bool *is_lag)
+{
+	struct mlxsw_sp_rif_subport *rif_subport;
+
+	if (WARN_ON(rif->ops->type != MLXSW_SP_RIF_TYPE_SUBPORT))
+		return -EINVAL;
+
+	rif_subport = mlxsw_sp_rif_subport_rif(rif);
+	*is_lag = rif_subport->lag;
+	*port = *is_lag ? rif_subport->lag_id : rif_subport->system_port;
+	return 0;
+}
+
 static struct mlxsw_sp_rif *
 mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
 			 const struct mlxsw_sp_rif_params *params,
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index b648461787d2..be79cb0ae5af 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -1075,7 +1075,7 @@ struct lan743x_adapter {
 #define DMA_DESCRIPTOR_SPACING_32       (32)
 #define DMA_DESCRIPTOR_SPACING_64       (64)
 #define DMA_DESCRIPTOR_SPACING_128      (128)
-#define DEFAULT_DMA_DESCRIPTOR_SPACING  (L1_CACHE_BYTES)
+#define DEFAULT_DMA_DESCRIPTOR_SPACING  (DMA_DESCRIPTOR_SPACING_16)
 
 #define DMAC_CHANNEL_STATE_SET(start_bit, stop_bit) \
 	(((start_bit) ? 2 : 0) | ((stop_bit) ? 1 : 0))
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 939cfce15830..bd0e26524417 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -621,6 +621,9 @@ struct nfp_net_dp {
  * @mbox_amsg.lock:	Protect message list
  * @mbox_amsg.list:	List of message to process
  * @mbox_amsg.work:	Work to process message asynchronously
+ * @fs:			Flow steering
+ * @fs.count:		Flow count
+ * @fs.list:		List of flows
  * @app_priv:		APP private data for this vNIC
  */
 struct nfp_net {
@@ -728,9 +731,39 @@ struct nfp_net {
 		struct work_struct work;
 	} mbox_amsg;
 
+	struct {
+		u16 count;
+		struct list_head list;
+	} fs;
+
 	void *app_priv;
 };
 
+struct nfp_fs_entry {
+	struct list_head node;
+	u32 flow_type;
+	u32 loc;
+	struct {
+		union {
+			struct {
+				__be32 sip4;
+				__be32 dip4;
+			};
+			struct {
+				__be32 sip6[4];
+				__be32 dip6[4];
+			};
+		};
+		union {
+			__be16 l3_proto;
+			u8 l4_proto;
+		};
+		__be16 sport;
+		__be16 dport;
+	} key, msk;
+	u64 action;
+};
+
 struct nfp_mbox_amsg_entry {
 	struct list_head list;
 	int (*cfg)(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry);
@@ -987,6 +1020,9 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn);
 int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new,
 			  struct netlink_ext_ack *extack);
 
+int nfp_net_fs_add_hw(struct nfp_net *nn, struct nfp_fs_entry *entry);
+int nfp_net_fs_del_hw(struct nfp_net *nn, struct nfp_fs_entry *entry);
+
 #ifdef CONFIG_NFP_DEBUG
 void nfp_net_debugfs_create(void);
 void nfp_net_debugfs_destroy(void);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index de0a5d5ded30..ac1f4514b1d0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1763,6 +1763,186 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
 	return nfp_net_mbox_reconfig_and_unlock(nn, cmd);
 }
 
+static void
+nfp_net_fs_fill_v4(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 op, u32 *addr)
+{
+	unsigned int i;
+
+	union {
+		struct {
+			__be16 loc;
+			u8 k_proto, m_proto;
+			__be32 k_sip, m_sip, k_dip, m_dip;
+			__be16 k_sport, m_sport, k_dport, m_dport;
+		};
+		__be32 val[7];
+	} v4_rule;
+
+	nn_writel(nn, *addr, op);
+	*addr += sizeof(u32);
+
+	v4_rule.loc     = cpu_to_be16(entry->loc);
+	v4_rule.k_proto = entry->key.l4_proto;
+	v4_rule.m_proto = entry->msk.l4_proto;
+	v4_rule.k_sip   = entry->key.sip4;
+	v4_rule.m_sip   = entry->msk.sip4;
+	v4_rule.k_dip   = entry->key.dip4;
+	v4_rule.m_dip   = entry->msk.dip4;
+	v4_rule.k_sport = entry->key.sport;
+	v4_rule.m_sport = entry->msk.sport;
+	v4_rule.k_dport = entry->key.dport;
+	v4_rule.m_dport = entry->msk.dport;
+
+	for (i = 0; i < ARRAY_SIZE(v4_rule.val); i++, *addr += sizeof(__be32))
+		nn_writel(nn, *addr, be32_to_cpu(v4_rule.val[i]));
+}
+
+static void
+nfp_net_fs_fill_v6(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 op, u32 *addr)
+{
+	unsigned int i;
+
+	union {
+		struct {
+			__be16 loc;
+			u8 k_proto, m_proto;
+			__be32 k_sip[4], m_sip[4], k_dip[4], m_dip[4];
+			__be16 k_sport, m_sport, k_dport, m_dport;
+		};
+		__be32 val[19];
+	} v6_rule;
+
+	nn_writel(nn, *addr, op);
+	*addr += sizeof(u32);
+
+	v6_rule.loc     = cpu_to_be16(entry->loc);
+	v6_rule.k_proto = entry->key.l4_proto;
+	v6_rule.m_proto = entry->msk.l4_proto;
+	for (i = 0; i < 4; i++) {
+		v6_rule.k_sip[i] = entry->key.sip6[i];
+		v6_rule.m_sip[i] = entry->msk.sip6[i];
+		v6_rule.k_dip[i] = entry->key.dip6[i];
+		v6_rule.m_dip[i] = entry->msk.dip6[i];
+	}
+	v6_rule.k_sport = entry->key.sport;
+	v6_rule.m_sport = entry->msk.sport;
+	v6_rule.k_dport = entry->key.dport;
+	v6_rule.m_dport = entry->msk.dport;
+
+	for (i = 0; i < ARRAY_SIZE(v6_rule.val); i++, *addr += sizeof(__be32))
+		nn_writel(nn, *addr, be32_to_cpu(v6_rule.val[i]));
+}
+
+#define NFP_FS_QUEUE_ID	GENMASK(22, 16)
+#define NFP_FS_ACT	GENMASK(15, 0)
+#define NFP_FS_ACT_DROP	BIT(0)
+#define NFP_FS_ACT_Q	BIT(1)
+static void
+nfp_net_fs_fill_act(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 addr)
+{
+	u32 action = 0; /* 0 means default passthrough */
+
+	if (entry->action == RX_CLS_FLOW_DISC)
+		action = NFP_FS_ACT_DROP;
+	else if (!(entry->flow_type & FLOW_RSS))
+		action = FIELD_PREP(NFP_FS_QUEUE_ID, entry->action) | NFP_FS_ACT_Q;
+
+	nn_writel(nn, addr, action);
+}
+
+int nfp_net_fs_add_hw(struct nfp_net *nn, struct nfp_fs_entry *entry)
+{
+	u32 addr = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	int err;
+
+	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_FS_SZ);
+	if (err)
+		return err;
+
+	switch (entry->flow_type & ~FLOW_RSS) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case IPV4_USER_FLOW:
+		nfp_net_fs_fill_v4(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_ADD_V4, &addr);
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+	case IPV6_USER_FLOW:
+		nfp_net_fs_fill_v6(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_ADD_V6, &addr);
+		break;
+	case ETHER_FLOW:
+		nn_writel(nn, addr, NFP_NET_CFG_MBOX_CMD_FS_ADD_ETHTYPE);
+		addr += sizeof(u32);
+		nn_writew(nn, addr, be16_to_cpu(entry->key.l3_proto));
+		addr += sizeof(u32);
+		break;
+	}
+
+	nfp_net_fs_fill_act(nn, entry, addr);
+
+	err = nfp_net_mbox_reconfig_and_unlock(nn, NFP_NET_CFG_MBOX_CMD_FLOW_STEER);
+	if (err) {
+		nn_err(nn, "Add new fs rule failed with %d\n", err);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+int nfp_net_fs_del_hw(struct nfp_net *nn, struct nfp_fs_entry *entry)
+{
+	u32 addr = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL;
+	int err;
+
+	err = nfp_net_mbox_lock(nn, NFP_NET_CFG_FS_SZ);
+	if (err)
+		return err;
+
+	switch (entry->flow_type & ~FLOW_RSS) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case IPV4_USER_FLOW:
+		nfp_net_fs_fill_v4(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_DEL_V4, &addr);
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+	case IPV6_USER_FLOW:
+		nfp_net_fs_fill_v6(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_DEL_V6, &addr);
+		break;
+	case ETHER_FLOW:
+		nn_writel(nn, addr, NFP_NET_CFG_MBOX_CMD_FS_DEL_ETHTYPE);
+		addr += sizeof(u32);
+		nn_writew(nn, addr, be16_to_cpu(entry->key.l3_proto));
+		addr += sizeof(u32);
+		break;
+	}
+
+	nfp_net_fs_fill_act(nn, entry, addr);
+
+	err = nfp_net_mbox_reconfig_and_unlock(nn, NFP_NET_CFG_MBOX_CMD_FLOW_STEER);
+	if (err) {
+		nn_err(nn, "Delete fs rule failed with %d\n", err);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void nfp_net_fs_clean(struct nfp_net *nn)
+{
+	struct nfp_fs_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &nn->fs.list, node) {
+		nfp_net_fs_del_hw(nn, entry);
+		list_del(&entry->node);
+		kfree(entry);
+	}
+}
+
 static void nfp_net_stat64(struct net_device *netdev,
 			   struct rtnl_link_stats64 *stats)
 {
@@ -2740,6 +2920,8 @@ int nfp_net_init(struct nfp_net *nn)
 	INIT_LIST_HEAD(&nn->mbox_amsg.list);
 	INIT_WORK(&nn->mbox_amsg.work, nfp_net_mbox_amsg_work);
 
+	INIT_LIST_HEAD(&nn->fs.list);
+
 	return register_netdev(nn->dp.netdev);
 
 err_clean_mbox:
@@ -2759,6 +2941,7 @@ void nfp_net_clean(struct nfp_net *nn)
 	unregister_netdev(nn->dp.netdev);
 	nfp_net_ipsec_clean(nn);
 	nfp_ccm_mbox_clean(nn);
+	nfp_net_fs_clean(nn);
 	flush_work(&nn->mbox_amsg.work);
 	nfp_net_reconfig_wait_posted(nn);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 3e63f6d6a563..eaf4d3c499d1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -269,6 +269,7 @@
 #define   NFP_NET_CFG_CTRL_IPSEC	  (0x1 << 1) /* IPsec offload */
 #define   NFP_NET_CFG_CTRL_MCAST_FILTER	  (0x1 << 2) /* Multicast Filter */
 #define   NFP_NET_CFG_CTRL_FREELIST_EN	  (0x1 << 6) /* Freelist enable flag bit */
+#define   NFP_NET_CFG_CTRL_FLOW_STEER	  (0x1 << 8) /* Flow steering */
 
 #define NFP_NET_CFG_CAP_WORD1		0x00a4
 
@@ -418,6 +419,8 @@
 #define NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD		8
 #define NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL		9
 
+#define NFP_NET_CFG_MBOX_CMD_FLOW_STEER			10
+
 /* VLAN filtering using general use mailbox
  * %NFP_NET_CFG_VLAN_FILTER:		Base address of VLAN filter mailbox
  * %NFP_NET_CFG_VLAN_FILTER_VID:	VLAN ID to filter
@@ -440,6 +443,18 @@
 #define NFP_NET_CFG_MULTICAST_MAC_LO	(NFP_NET_CFG_MULTICAST + 6)
 #define NFP_NET_CFG_MULTICAST_SZ	0x0006
 
+/* Max size of FS rules in bytes */
+#define NFP_NET_CFG_FS_SZ		0x0054
+/* Sub commands for FS */
+enum {
+	NFP_NET_CFG_MBOX_CMD_FS_ADD_V4,
+	NFP_NET_CFG_MBOX_CMD_FS_DEL_V4,
+	NFP_NET_CFG_MBOX_CMD_FS_ADD_V6,
+	NFP_NET_CFG_MBOX_CMD_FS_DEL_V6,
+	NFP_NET_CFG_MBOX_CMD_FS_ADD_ETHTYPE,
+	NFP_NET_CFG_MBOX_CMD_FS_DEL_ETHTYPE,
+};
+
 /* TLV capabilities
  * %NFP_NET_CFG_TLV_TYPE:	Offset of type within the TLV
  * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index e75cbb287625..d7896391b8ba 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -1317,6 +1317,116 @@ static int nfp_net_get_rss_hash_opts(struct nfp_net *nn,
 	return 0;
 }
 
+#define NFP_FS_MAX_ENTRY	1024
+
+static int nfp_net_fs_to_ethtool(struct nfp_fs_entry *entry, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fs = &cmd->fs;
+	unsigned int i;
+
+	switch (entry->flow_type & ~FLOW_RSS) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fs->h_u.tcp_ip4_spec.ip4src = entry->key.sip4;
+		fs->h_u.tcp_ip4_spec.ip4dst = entry->key.dip4;
+		fs->h_u.tcp_ip4_spec.psrc   = entry->key.sport;
+		fs->h_u.tcp_ip4_spec.pdst   = entry->key.dport;
+		fs->m_u.tcp_ip4_spec.ip4src = entry->msk.sip4;
+		fs->m_u.tcp_ip4_spec.ip4dst = entry->msk.dip4;
+		fs->m_u.tcp_ip4_spec.psrc   = entry->msk.sport;
+		fs->m_u.tcp_ip4_spec.pdst   = entry->msk.dport;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		for (i = 0; i < 4; i++) {
+			fs->h_u.tcp_ip6_spec.ip6src[i] = entry->key.sip6[i];
+			fs->h_u.tcp_ip6_spec.ip6dst[i] = entry->key.dip6[i];
+			fs->m_u.tcp_ip6_spec.ip6src[i] = entry->msk.sip6[i];
+			fs->m_u.tcp_ip6_spec.ip6dst[i] = entry->msk.dip6[i];
+		}
+		fs->h_u.tcp_ip6_spec.psrc = entry->key.sport;
+		fs->h_u.tcp_ip6_spec.pdst = entry->key.dport;
+		fs->m_u.tcp_ip6_spec.psrc = entry->msk.sport;
+		fs->m_u.tcp_ip6_spec.pdst = entry->msk.dport;
+		break;
+	case IPV4_USER_FLOW:
+		fs->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fs->h_u.usr_ip4_spec.ip4src = entry->key.sip4;
+		fs->h_u.usr_ip4_spec.ip4dst = entry->key.dip4;
+		fs->h_u.usr_ip4_spec.proto  = entry->key.l4_proto;
+		fs->m_u.usr_ip4_spec.ip4src = entry->msk.sip4;
+		fs->m_u.usr_ip4_spec.ip4dst = entry->msk.dip4;
+		fs->m_u.usr_ip4_spec.proto  = entry->msk.l4_proto;
+		break;
+	case IPV6_USER_FLOW:
+		for (i = 0; i < 4; i++) {
+			fs->h_u.usr_ip6_spec.ip6src[i] = entry->key.sip6[i];
+			fs->h_u.usr_ip6_spec.ip6dst[i] = entry->key.dip6[i];
+			fs->m_u.usr_ip6_spec.ip6src[i] = entry->msk.sip6[i];
+			fs->m_u.usr_ip6_spec.ip6dst[i] = entry->msk.dip6[i];
+		}
+		fs->h_u.usr_ip6_spec.l4_proto = entry->key.l4_proto;
+		fs->m_u.usr_ip6_spec.l4_proto = entry->msk.l4_proto;
+		break;
+	case ETHER_FLOW:
+		fs->h_u.ether_spec.h_proto = entry->key.l3_proto;
+		fs->m_u.ether_spec.h_proto = entry->msk.l3_proto;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	fs->flow_type   = entry->flow_type;
+	fs->ring_cookie = entry->action;
+
+	if (fs->flow_type & FLOW_RSS) {
+		/* Only rss_context of 0 is supported. */
+		cmd->rss_context = 0;
+		/* RSS is used, mask the ring. */
+		fs->ring_cookie |= ETHTOOL_RX_FLOW_SPEC_RING;
+	}
+
+	return 0;
+}
+
+static int nfp_net_get_fs_rule(struct nfp_net *nn, struct ethtool_rxnfc *cmd)
+{
+	struct nfp_fs_entry *entry;
+
+	if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER))
+		return -EOPNOTSUPP;
+
+	if (cmd->fs.location >= NFP_FS_MAX_ENTRY)
+		return -EINVAL;
+
+	list_for_each_entry(entry, &nn->fs.list, node) {
+		if (entry->loc == cmd->fs.location)
+			return nfp_net_fs_to_ethtool(entry, cmd);
+
+		if (entry->loc > cmd->fs.location)
+			/* no need to continue */
+			return -ENOENT;
+	}
+
+	return -ENOENT;
+}
+
+static int nfp_net_get_fs_loc(struct nfp_net *nn, u32 *rule_locs)
+{
+	struct nfp_fs_entry *entry;
+	u32 count = 0;
+
+	if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER))
+		return -EOPNOTSUPP;
+
+	list_for_each_entry(entry, &nn->fs.list, node)
+		rule_locs[count++] = entry->loc;
+
+	return 0;
+}
+
 static int nfp_net_get_rxnfc(struct net_device *netdev,
 			     struct ethtool_rxnfc *cmd, u32 *rule_locs)
 {
@@ -1326,6 +1436,14 @@ static int nfp_net_get_rxnfc(struct net_device *netdev,
 	case ETHTOOL_GRXRINGS:
 		cmd->data = nn->dp.num_rx_rings;
 		return 0;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = nn->fs.count;
+		return 0;
+	case ETHTOOL_GRXCLSRULE:
+		return nfp_net_get_fs_rule(nn, cmd);
+	case ETHTOOL_GRXCLSRLALL:
+		cmd->data = NFP_FS_MAX_ENTRY;
+		return nfp_net_get_fs_loc(nn, rule_locs);
 	case ETHTOOL_GRXFH:
 		return nfp_net_get_rss_hash_opts(nn, cmd);
 	default:
@@ -1385,6 +1503,253 @@ static int nfp_net_set_rss_hash_opt(struct nfp_net *nn,
 	return 0;
 }
 
+static int nfp_net_fs_from_ethtool(struct nfp_fs_entry *entry, struct ethtool_rx_flow_spec *fs)
+{
+	unsigned int i;
+
+	/* FLOW_EXT/FLOW_MAC_EXT is not supported. */
+	switch (fs->flow_type & ~FLOW_RSS) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		entry->msk.sip4  = fs->m_u.tcp_ip4_spec.ip4src;
+		entry->msk.dip4  = fs->m_u.tcp_ip4_spec.ip4dst;
+		entry->msk.sport = fs->m_u.tcp_ip4_spec.psrc;
+		entry->msk.dport = fs->m_u.tcp_ip4_spec.pdst;
+		entry->key.sip4  = fs->h_u.tcp_ip4_spec.ip4src & entry->msk.sip4;
+		entry->key.dip4  = fs->h_u.tcp_ip4_spec.ip4dst & entry->msk.dip4;
+		entry->key.sport = fs->h_u.tcp_ip4_spec.psrc & entry->msk.sport;
+		entry->key.dport = fs->h_u.tcp_ip4_spec.pdst & entry->msk.dport;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		for (i = 0; i < 4; i++) {
+			entry->msk.sip6[i] = fs->m_u.tcp_ip6_spec.ip6src[i];
+			entry->msk.dip6[i] = fs->m_u.tcp_ip6_spec.ip6dst[i];
+			entry->key.sip6[i] = fs->h_u.tcp_ip6_spec.ip6src[i] & entry->msk.sip6[i];
+			entry->key.dip6[i] = fs->h_u.tcp_ip6_spec.ip6dst[i] & entry->msk.dip6[i];
+		}
+		entry->msk.sport = fs->m_u.tcp_ip6_spec.psrc;
+		entry->msk.dport = fs->m_u.tcp_ip6_spec.pdst;
+		entry->key.sport = fs->h_u.tcp_ip6_spec.psrc & entry->msk.sport;
+		entry->key.dport = fs->h_u.tcp_ip6_spec.pdst & entry->msk.dport;
+		break;
+	case IPV4_USER_FLOW:
+		entry->msk.sip4     = fs->m_u.usr_ip4_spec.ip4src;
+		entry->msk.dip4     = fs->m_u.usr_ip4_spec.ip4dst;
+		entry->msk.l4_proto = fs->m_u.usr_ip4_spec.proto;
+		entry->key.sip4     = fs->h_u.usr_ip4_spec.ip4src & entry->msk.sip4;
+		entry->key.dip4     = fs->h_u.usr_ip4_spec.ip4dst & entry->msk.dip4;
+		entry->key.l4_proto = fs->h_u.usr_ip4_spec.proto & entry->msk.l4_proto;
+		break;
+	case IPV6_USER_FLOW:
+		for (i = 0; i < 4; i++) {
+			entry->msk.sip6[i] = fs->m_u.usr_ip6_spec.ip6src[i];
+			entry->msk.dip6[i] = fs->m_u.usr_ip6_spec.ip6dst[i];
+			entry->key.sip6[i] = fs->h_u.usr_ip6_spec.ip6src[i] & entry->msk.sip6[i];
+			entry->key.dip6[i] = fs->h_u.usr_ip6_spec.ip6dst[i] & entry->msk.dip6[i];
+		}
+		entry->msk.l4_proto = fs->m_u.usr_ip6_spec.l4_proto;
+		entry->key.l4_proto = fs->h_u.usr_ip6_spec.l4_proto & entry->msk.l4_proto;
+		break;
+	case ETHER_FLOW:
+		entry->msk.l3_proto = fs->m_u.ether_spec.h_proto;
+		entry->key.l3_proto = fs->h_u.ether_spec.h_proto & entry->msk.l3_proto;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fs->flow_type & ~FLOW_RSS) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		entry->key.l4_proto = IPPROTO_TCP;
+		entry->msk.l4_proto = 0xff;
+		break;
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		entry->key.l4_proto = IPPROTO_UDP;
+		entry->msk.l4_proto = 0xff;
+		break;
+	case SCTP_V4_FLOW:
+	case SCTP_V6_FLOW:
+		entry->key.l4_proto = IPPROTO_SCTP;
+		entry->msk.l4_proto = 0xff;
+		break;
+	}
+
+	entry->flow_type = fs->flow_type;
+	entry->action    = fs->ring_cookie;
+	entry->loc       = fs->location;
+
+	return 0;
+}
+
+static int nfp_net_fs_check_existing(struct nfp_net *nn, struct nfp_fs_entry *new)
+{
+	struct nfp_fs_entry *entry;
+
+	list_for_each_entry(entry, &nn->fs.list, node) {
+		if (new->loc != entry->loc &&
+		    !((new->flow_type ^ entry->flow_type) & ~FLOW_RSS) &&
+		    !memcmp(&new->key, &entry->key, sizeof(new->key)) &&
+		    !memcmp(&new->msk, &entry->msk, sizeof(new->msk)))
+			return entry->loc;
+	}
+
+	/* -1 means no duplicates */
+	return -1;
+}
+
+static int nfp_net_fs_add(struct nfp_net *nn, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fs = &cmd->fs;
+	struct nfp_fs_entry *new, *entry;
+	bool unsupp_mask;
+	int err, id;
+
+	if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER))
+		return -EOPNOTSUPP;
+
+	/* Only default RSS context(0) is supported. */
+	if ((fs->flow_type & FLOW_RSS) && cmd->rss_context)
+		return -EOPNOTSUPP;
+
+	if (fs->location >= NFP_FS_MAX_ENTRY)
+		return -EINVAL;
+
+	if (fs->ring_cookie != RX_CLS_FLOW_DISC &&
+	    fs->ring_cookie >= nn->dp.num_rx_rings)
+		return -EINVAL;
+
+	/* FLOW_EXT/FLOW_MAC_EXT is not supported. */
+	switch (fs->flow_type & ~FLOW_RSS) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		unsupp_mask = !!fs->m_u.tcp_ip4_spec.tos;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		unsupp_mask = !!fs->m_u.tcp_ip6_spec.tclass;
+		break;
+	case IPV4_USER_FLOW:
+		unsupp_mask = !!fs->m_u.usr_ip4_spec.l4_4_bytes ||
+			      !!fs->m_u.usr_ip4_spec.tos ||
+			      !!fs->m_u.usr_ip4_spec.ip_ver;
+		/* ip_ver must be ETH_RX_NFC_IP4. */
+		unsupp_mask |= fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4;
+		break;
+	case IPV6_USER_FLOW:
+		unsupp_mask = !!fs->m_u.usr_ip6_spec.l4_4_bytes ||
+			      !!fs->m_u.usr_ip6_spec.tclass;
+		break;
+	case ETHER_FLOW:
+		if (fs->h_u.ether_spec.h_proto == htons(ETH_P_IP) ||
+		    fs->h_u.ether_spec.h_proto == htons(ETH_P_IPV6)) {
+			nn_err(nn, "Please use ip4/ip6 flow type instead.\n");
+			return -EOPNOTSUPP;
+		}
+		/* Only unmasked ethtype is supported. */
+		unsupp_mask = !is_zero_ether_addr(fs->m_u.ether_spec.h_dest) ||
+			      !is_zero_ether_addr(fs->m_u.ether_spec.h_source) ||
+			      (fs->m_u.ether_spec.h_proto != htons(0xffff));
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (unsupp_mask)
+		return -EOPNOTSUPP;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	nfp_net_fs_from_ethtool(new, fs);
+
+	id = nfp_net_fs_check_existing(nn, new);
+	if (id >= 0) {
+		nn_err(nn, "Identical rule is existing in %d.\n", id);
+		err = -EINVAL;
+		goto err;
+	}
+
+	/* Insert to list in ascending order of location. */
+	list_for_each_entry(entry, &nn->fs.list, node) {
+		if (entry->loc == fs->location) {
+			err = nfp_net_fs_del_hw(nn, entry);
+			if (err)
+				goto err;
+
+			nn->fs.count--;
+			err = nfp_net_fs_add_hw(nn, new);
+			if (err)
+				goto err;
+
+			nn->fs.count++;
+			list_replace(&entry->node, &new->node);
+			kfree(entry);
+
+			return 0;
+		}
+
+		if (entry->loc > fs->location)
+			break;
+	}
+
+	if (nn->fs.count == NFP_FS_MAX_ENTRY) {
+		err = -ENOSPC;
+		goto err;
+	}
+
+	err = nfp_net_fs_add_hw(nn, new);
+	if (err)
+		goto err;
+
+	list_add_tail(&new->node, &entry->node);
+	nn->fs.count++;
+
+	return 0;
+
+err:
+	kfree(new);
+	return err;
+}
+
+static int nfp_net_fs_del(struct nfp_net *nn, struct ethtool_rxnfc *cmd)
+{
+	struct nfp_fs_entry *entry;
+	int err;
+
+	if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER))
+		return -EOPNOTSUPP;
+
+	if (!nn->fs.count || cmd->fs.location >= NFP_FS_MAX_ENTRY)
+		return -EINVAL;
+
+	list_for_each_entry(entry, &nn->fs.list, node) {
+		if (entry->loc == cmd->fs.location) {
+			err = nfp_net_fs_del_hw(nn, entry);
+			if (err)
+				return err;
+
+			list_del(&entry->node);
+			kfree(entry);
+			nn->fs.count--;
+
+			return 0;
+		} else if (entry->loc > cmd->fs.location) {
+			/* no need to continue */
+			break;
+		}
+	}
+
+	return -ENOENT;
+}
+
 static int nfp_net_set_rxnfc(struct net_device *netdev,
 			     struct ethtool_rxnfc *cmd)
 {
@@ -1393,6 +1758,10 @@ static int nfp_net_set_rxnfc(struct net_device *netdev,
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXFH:
 		return nfp_net_set_rss_hash_opt(nn, cmd);
+	case ETHTOOL_SRXCLSRLINS:
+		return nfp_net_fs_add(nn, cmd);
+	case ETHTOOL_SRXCLSRLDEL:
+		return nfp_net_fs_del(nn, cmd);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 295366a85c63..dbc5c9d354c6 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3100,6 +3100,33 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168g_2);
 }
 
+static void rtl8411b_fix_phy_down(struct rtl8169_private *tp)
+{
+	static const u16 fix_data[] = {
+/* 0xf800 */ 0xe008, 0xe00a, 0xe00c, 0xe00e, 0xe027, 0xe04f, 0xe05e, 0xe065,
+/* 0xf810 */ 0xc602, 0xbe00, 0x0000, 0xc502, 0xbd00, 0x074c, 0xc302, 0xbb00,
+/* 0xf820 */ 0x080a, 0x6420, 0x48c2, 0x8c20, 0xc516, 0x64a4, 0x49c0, 0xf009,
+/* 0xf830 */ 0x74a2, 0x8ca5, 0x74a0, 0xc50e, 0x9ca2, 0x1c11, 0x9ca0, 0xe006,
+/* 0xf840 */ 0x74f8, 0x48c4, 0x8cf8, 0xc404, 0xbc00, 0xc403, 0xbc00, 0x0bf2,
+/* 0xf850 */ 0x0c0a, 0xe434, 0xd3c0, 0x49d9, 0xf01f, 0xc526, 0x64a5, 0x1400,
+/* 0xf860 */ 0xf007, 0x0c01, 0x8ca5, 0x1c15, 0xc51b, 0x9ca0, 0xe013, 0xc519,
+/* 0xf870 */ 0x74a0, 0x48c4, 0x8ca0, 0xc516, 0x74a4, 0x48c8, 0x48ca, 0x9ca4,
+/* 0xf880 */ 0xc512, 0x1b00, 0x9ba0, 0x1b1c, 0x483f, 0x9ba2, 0x1b04, 0xc508,
+/* 0xf890 */ 0x9ba0, 0xc505, 0xbd00, 0xc502, 0xbd00, 0x0300, 0x051e, 0xe434,
+/* 0xf8a0 */ 0xe018, 0xe092, 0xde20, 0xd3c0, 0xc50f, 0x76a4, 0x49e3, 0xf007,
+/* 0xf8b0 */ 0x49c0, 0xf103, 0xc607, 0xbe00, 0xc606, 0xbe00, 0xc602, 0xbe00,
+/* 0xf8c0 */ 0x0c4c, 0x0c28, 0x0c2c, 0xdc00, 0xc707, 0x1d00, 0x8de2, 0x48c1,
+/* 0xf8d0 */ 0xc502, 0xbd00, 0x00aa, 0xe0c0, 0xc502, 0xbd00, 0x0132
+	};
+	unsigned long flags;
+	int i;
+
+	raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
+	for (i = 0; i < ARRAY_SIZE(fix_data); i++)
+		__r8168_mac_ocp_write(tp, 0xf800 + 2 * i, fix_data[i]);
+	raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
+}
+
 static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 {
 	static const struct ephy_info e_info_8411_2[] = {
@@ -3133,117 +3160,7 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
 	mdelay(3);
 	r8168_mac_ocp_write(tp, 0xFC26, 0x0000);
 
-	r8168_mac_ocp_write(tp, 0xF800, 0xE008);
-	r8168_mac_ocp_write(tp, 0xF802, 0xE00A);
-	r8168_mac_ocp_write(tp, 0xF804, 0xE00C);
-	r8168_mac_ocp_write(tp, 0xF806, 0xE00E);
-	r8168_mac_ocp_write(tp, 0xF808, 0xE027);
-	r8168_mac_ocp_write(tp, 0xF80A, 0xE04F);
-	r8168_mac_ocp_write(tp, 0xF80C, 0xE05E);
-	r8168_mac_ocp_write(tp, 0xF80E, 0xE065);
-	r8168_mac_ocp_write(tp, 0xF810, 0xC602);
-	r8168_mac_ocp_write(tp, 0xF812, 0xBE00);
-	r8168_mac_ocp_write(tp, 0xF814, 0x0000);
-	r8168_mac_ocp_write(tp, 0xF816, 0xC502);
-	r8168_mac_ocp_write(tp, 0xF818, 0xBD00);
-	r8168_mac_ocp_write(tp, 0xF81A, 0x074C);
-	r8168_mac_ocp_write(tp, 0xF81C, 0xC302);
-	r8168_mac_ocp_write(tp, 0xF81E, 0xBB00);
-	r8168_mac_ocp_write(tp, 0xF820, 0x080A);
-	r8168_mac_ocp_write(tp, 0xF822, 0x6420);
-	r8168_mac_ocp_write(tp, 0xF824, 0x48C2);
-	r8168_mac_ocp_write(tp, 0xF826, 0x8C20);
-	r8168_mac_ocp_write(tp, 0xF828, 0xC516);
-	r8168_mac_ocp_write(tp, 0xF82A, 0x64A4);
-	r8168_mac_ocp_write(tp, 0xF82C, 0x49C0);
-	r8168_mac_ocp_write(tp, 0xF82E, 0xF009);
-	r8168_mac_ocp_write(tp, 0xF830, 0x74A2);
-	r8168_mac_ocp_write(tp, 0xF832, 0x8CA5);
-	r8168_mac_ocp_write(tp, 0xF834, 0x74A0);
-	r8168_mac_ocp_write(tp, 0xF836, 0xC50E);
-	r8168_mac_ocp_write(tp, 0xF838, 0x9CA2);
-	r8168_mac_ocp_write(tp, 0xF83A, 0x1C11);
-	r8168_mac_ocp_write(tp, 0xF83C, 0x9CA0);
-	r8168_mac_ocp_write(tp, 0xF83E, 0xE006);
-	r8168_mac_ocp_write(tp, 0xF840, 0x74F8);
-	r8168_mac_ocp_write(tp, 0xF842, 0x48C4);
-	r8168_mac_ocp_write(tp, 0xF844, 0x8CF8);
-	r8168_mac_ocp_write(tp, 0xF846, 0xC404);
-	r8168_mac_ocp_write(tp, 0xF848, 0xBC00);
-	r8168_mac_ocp_write(tp, 0xF84A, 0xC403);
-	r8168_mac_ocp_write(tp, 0xF84C, 0xBC00);
-	r8168_mac_ocp_write(tp, 0xF84E, 0x0BF2);
-	r8168_mac_ocp_write(tp, 0xF850, 0x0C0A);
-	r8168_mac_ocp_write(tp, 0xF852, 0xE434);
-	r8168_mac_ocp_write(tp, 0xF854, 0xD3C0);
-	r8168_mac_ocp_write(tp, 0xF856, 0x49D9);
-	r8168_mac_ocp_write(tp, 0xF858, 0xF01F);
-	r8168_mac_ocp_write(tp, 0xF85A, 0xC526);
-	r8168_mac_ocp_write(tp, 0xF85C, 0x64A5);
-	r8168_mac_ocp_write(tp, 0xF85E, 0x1400);
-	r8168_mac_ocp_write(tp, 0xF860, 0xF007);
-	r8168_mac_ocp_write(tp, 0xF862, 0x0C01);
-	r8168_mac_ocp_write(tp, 0xF864, 0x8CA5);
-	r8168_mac_ocp_write(tp, 0xF866, 0x1C15);
-	r8168_mac_ocp_write(tp, 0xF868, 0xC51B);
-	r8168_mac_ocp_write(tp, 0xF86A, 0x9CA0);
-	r8168_mac_ocp_write(tp, 0xF86C, 0xE013);
-	r8168_mac_ocp_write(tp, 0xF86E, 0xC519);
-	r8168_mac_ocp_write(tp, 0xF870, 0x74A0);
-	r8168_mac_ocp_write(tp, 0xF872, 0x48C4);
-	r8168_mac_ocp_write(tp, 0xF874, 0x8CA0);
-	r8168_mac_ocp_write(tp, 0xF876, 0xC516);
-	r8168_mac_ocp_write(tp, 0xF878, 0x74A4);
-	r8168_mac_ocp_write(tp, 0xF87A, 0x48C8);
-	r8168_mac_ocp_write(tp, 0xF87C, 0x48CA);
-	r8168_mac_ocp_write(tp, 0xF87E, 0x9CA4);
-	r8168_mac_ocp_write(tp, 0xF880, 0xC512);
-	r8168_mac_ocp_write(tp, 0xF882, 0x1B00);
-	r8168_mac_ocp_write(tp, 0xF884, 0x9BA0);
-	r8168_mac_ocp_write(tp, 0xF886, 0x1B1C);
-	r8168_mac_ocp_write(tp, 0xF888, 0x483F);
-	r8168_mac_ocp_write(tp, 0xF88A, 0x9BA2);
-	r8168_mac_ocp_write(tp, 0xF88C, 0x1B04);
-	r8168_mac_ocp_write(tp, 0xF88E, 0xC508);
-	r8168_mac_ocp_write(tp, 0xF890, 0x9BA0);
-	r8168_mac_ocp_write(tp, 0xF892, 0xC505);
-	r8168_mac_ocp_write(tp, 0xF894, 0xBD00);
-	r8168_mac_ocp_write(tp, 0xF896, 0xC502);
-	r8168_mac_ocp_write(tp, 0xF898, 0xBD00);
-	r8168_mac_ocp_write(tp, 0xF89A, 0x0300);
-	r8168_mac_ocp_write(tp, 0xF89C, 0x051E);
-	r8168_mac_ocp_write(tp, 0xF89E, 0xE434);
-	r8168_mac_ocp_write(tp, 0xF8A0, 0xE018);
-	r8168_mac_ocp_write(tp, 0xF8A2, 0xE092);
-	r8168_mac_ocp_write(tp, 0xF8A4, 0xDE20);
-	r8168_mac_ocp_write(tp, 0xF8A6, 0xD3C0);
-	r8168_mac_ocp_write(tp, 0xF8A8, 0xC50F);
-	r8168_mac_ocp_write(tp, 0xF8AA, 0x76A4);
-	r8168_mac_ocp_write(tp, 0xF8AC, 0x49E3);
-	r8168_mac_ocp_write(tp, 0xF8AE, 0xF007);
-	r8168_mac_ocp_write(tp, 0xF8B0, 0x49C0);
-	r8168_mac_ocp_write(tp, 0xF8B2, 0xF103);
-	r8168_mac_ocp_write(tp, 0xF8B4, 0xC607);
-	r8168_mac_ocp_write(tp, 0xF8B6, 0xBE00);
-	r8168_mac_ocp_write(tp, 0xF8B8, 0xC606);
-	r8168_mac_ocp_write(tp, 0xF8BA, 0xBE00);
-	r8168_mac_ocp_write(tp, 0xF8BC, 0xC602);
-	r8168_mac_ocp_write(tp, 0xF8BE, 0xBE00);
-	r8168_mac_ocp_write(tp, 0xF8C0, 0x0C4C);
-	r8168_mac_ocp_write(tp, 0xF8C2, 0x0C28);
-	r8168_mac_ocp_write(tp, 0xF8C4, 0x0C2C);
-	r8168_mac_ocp_write(tp, 0xF8C6, 0xDC00);
-	r8168_mac_ocp_write(tp, 0xF8C8, 0xC707);
-	r8168_mac_ocp_write(tp, 0xF8CA, 0x1D00);
-	r8168_mac_ocp_write(tp, 0xF8CC, 0x8DE2);
-	r8168_mac_ocp_write(tp, 0xF8CE, 0x48C1);
-	r8168_mac_ocp_write(tp, 0xF8D0, 0xC502);
-	r8168_mac_ocp_write(tp, 0xF8D2, 0xBD00);
-	r8168_mac_ocp_write(tp, 0xF8D4, 0x00AA);
-	r8168_mac_ocp_write(tp, 0xF8D6, 0xE0C0);
-	r8168_mac_ocp_write(tp, 0xF8D8, 0xC502);
-	r8168_mac_ocp_write(tp, 0xF8DA, 0xBD00);
-	r8168_mac_ocp_write(tp, 0xF8DC, 0x0132);
+	rtl8411b_fix_phy_down(tp);
 
 	r8168_mac_ocp_write(tp, 0xFC26, 0x8000);
 
diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig
index 8ef5b0241e64..733cbb6eb3ed 100644
--- a/drivers/net/ethernet/renesas/Kconfig
+++ b/drivers/net/ethernet/renesas/Kconfig
@@ -44,7 +44,16 @@ config RENESAS_ETHER_SWITCH
 	select CRC32
 	select MII
 	select PHYLINK
+	select RENESAS_GEN4_PTP
 	help
 	  Renesas Ethernet Switch device driver.
 
+config RENESAS_GEN4_PTP
+	tristate "Renesas R-Car Gen4 gPTP support" if COMPILE_TEST
+	select CRC32
+	select MII
+	select PHYLIB
+	help
+	  Renesas R-Car Gen4 gPTP device driver.
+
 endif # NET_VENDOR_RENESAS
diff --git a/drivers/net/ethernet/renesas/Makefile b/drivers/net/ethernet/renesas/Makefile
index e8fd85b5fe8f..9070acfd6aaf 100644
--- a/drivers/net/ethernet/renesas/Makefile
+++ b/drivers/net/ethernet/renesas/Makefile
@@ -8,5 +8,6 @@ obj-$(CONFIG_SH_ETH) += sh_eth.o
 ravb-objs := ravb_main.o ravb_ptp.o
 obj-$(CONFIG_RAVB) += ravb.o
 
-rswitch_drv-objs := rswitch.o rcar_gen4_ptp.o
-obj-$(CONFIG_RENESAS_ETHER_SWITCH) += rswitch_drv.o
+obj-$(CONFIG_RENESAS_ETHER_SWITCH) += rswitch.o
+
+obj-$(CONFIG_RENESAS_GEN4_PTP) += rcar_gen4_ptp.o
diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c
index c007e33c47e1..72e7fcc56693 100644
--- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c
+++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c
@@ -14,7 +14,7 @@
 #include "rcar_gen4_ptp.h"
 #define ptp_to_priv(ptp)	container_of(ptp, struct rcar_gen4_ptp_private, info)
 
-static const struct rcar_gen4_ptp_reg_offset s4_offs = {
+static const struct rcar_gen4_ptp_reg_offset gen4_offs = {
 	.enable = PTPTMEC,
 	.disable = PTPTMDC,
 	.increment = PTPTIVC0,
@@ -130,25 +130,42 @@ static struct ptp_clock_info rcar_gen4_ptp_info = {
 	.enable = rcar_gen4_ptp_enable,
 };
 
-static void rcar_gen4_ptp_set_offs(struct rcar_gen4_ptp_private *ptp_priv,
-				   enum rcar_gen4_ptp_reg_layout layout)
+static int rcar_gen4_ptp_set_offs(struct rcar_gen4_ptp_private *ptp_priv,
+				  enum rcar_gen4_ptp_reg_layout layout)
 {
-	WARN_ON(layout != RCAR_GEN4_PTP_REG_LAYOUT_S4);
+	if (layout != RCAR_GEN4_PTP_REG_LAYOUT)
+		return -EINVAL;
 
-	ptp_priv->offs = &s4_offs;
+	ptp_priv->offs = &gen4_offs;
+
+	return 0;
+}
+
+static s64 rcar_gen4_ptp_rate_to_increment(u32 rate)
+{
+	/* Timer increment in ns.
+	 * bit[31:27] - integer
+	 * bit[26:0]  - decimal
+	 * increment[ns] = perid[ns] * 2^27 => (1ns * 2^27) / rate[hz]
+	 */
+	return div_s64(1000000000LL << 27, rate);
 }
 
 int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv,
-			   enum rcar_gen4_ptp_reg_layout layout, u32 clock)
+			   enum rcar_gen4_ptp_reg_layout layout, u32 rate)
 {
+	int ret;
+
 	if (ptp_priv->initialized)
 		return 0;
 
 	spin_lock_init(&ptp_priv->lock);
 
-	rcar_gen4_ptp_set_offs(ptp_priv, layout);
+	ret = rcar_gen4_ptp_set_offs(ptp_priv, layout);
+	if (ret)
+		return ret;
 
-	ptp_priv->default_addend = clock;
+	ptp_priv->default_addend = rcar_gen4_ptp_rate_to_increment(rate);
 	iowrite32(ptp_priv->default_addend, ptp_priv->addr + ptp_priv->offs->increment);
 	ptp_priv->clock = ptp_clock_register(&ptp_priv->info, NULL);
 	if (IS_ERR(ptp_priv->clock))
@@ -159,6 +176,7 @@ int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(rcar_gen4_ptp_register);
 
 int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv)
 {
@@ -166,6 +184,7 @@ int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv)
 
 	return ptp_clock_unregister(ptp_priv->clock);
 }
+EXPORT_SYMBOL_GPL(rcar_gen4_ptp_unregister);
 
 struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev)
 {
@@ -179,3 +198,8 @@ struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev)
 
 	return ptp;
 }
+EXPORT_SYMBOL_GPL(rcar_gen4_ptp_alloc);
+
+MODULE_AUTHOR("Yoshihiro Shimoda");
+MODULE_DESCRIPTION("Renesas R-Car Gen4 gPTP driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h
index b1bbea8d3a52..e22da5acd53d 100644
--- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h
+++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h
@@ -9,13 +9,10 @@
 
 #include <linux/ptp_clock_kernel.h>
 
-#define PTPTIVC_INIT			0x19000000	/* 320MHz */
-#define RCAR_GEN4_PTP_CLOCK_S4		PTPTIVC_INIT
 #define RCAR_GEN4_GPTP_OFFSET_S4	0x00018000
 
-/* for rcar_gen4_ptp_init */
 enum rcar_gen4_ptp_reg_layout {
-	RCAR_GEN4_PTP_REG_LAYOUT_S4
+	RCAR_GEN4_PTP_REG_LAYOUT
 };
 
 /* driver's definitions */
@@ -28,7 +25,7 @@ enum rcar_gen4_ptp_reg_layout {
 
 #define PTPRO				0
 
-enum rcar_gen4_ptp_reg_s4 {
+enum rcar_gen4_ptp_reg {
 	PTPTMEC		= PTPRO + 0x0010,
 	PTPTMDC		= PTPRO + 0x0014,
 	PTPTIVC0	= PTPRO + 0x0020,
@@ -65,7 +62,7 @@ struct rcar_gen4_ptp_private {
 };
 
 int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv,
-			   enum rcar_gen4_ptp_reg_layout layout, u32 clock);
+			   enum rcar_gen4_ptp_reg_layout layout, u32 rate);
 int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv);
 struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev);
 
diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 43a7795d6591..d6089429f654 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -1828,8 +1828,8 @@ static int rswitch_init(struct rswitch_private *priv)
 
 	rswitch_fwd_init(priv);
 
-	err = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT_S4,
-				     RCAR_GEN4_PTP_CLOCK_S4);
+	err = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT,
+				     clk_get_rate(priv->clk));
 	if (err < 0)
 		goto err_ptp_register;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e3f650e88f82..6b935922054d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -580,6 +580,7 @@ struct mac_device_info {
 	u32 vlan_filter[32];
 	bool vlan_fail_q_en;
 	u8 vlan_fail_q;
+	bool hw_vlan_en;
 };
 
 struct stmmac_rx_routing {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index c6ff1fa0e04d..5f35faf90963 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -1134,6 +1134,35 @@ static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no,
 	return 0;
 }
 
+static void dwmac4_rx_hw_vlan(struct mac_device_info *hw,
+			      struct dma_desc *rx_desc, struct sk_buff *skb)
+{
+	if (hw->desc->get_rx_vlan_valid(rx_desc)) {
+		u16 vid = hw->desc->get_rx_vlan_tci(rx_desc);
+
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+	}
+}
+
+static void dwmac4_set_hw_vlan_mode(struct mac_device_info *hw)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value = readl(ioaddr + GMAC_VLAN_TAG);
+
+	value &= ~GMAC_VLAN_TAG_CTRL_EVLS_MASK;
+
+	if (hw->hw_vlan_en)
+		/* Always strip VLAN on Receive */
+		value |= GMAC_VLAN_TAG_STRIP_ALL;
+	else
+		/* Do not strip VLAN on Receive */
+		value |= GMAC_VLAN_TAG_STRIP_NONE;
+
+	/* Enable outer VLAN Tag in Rx DMA descriptor */
+	value |= GMAC_VLAN_TAG_CTRL_EVLRXS;
+	writel(value, ioaddr + GMAC_VLAN_TAG);
+}
+
 const struct stmmac_ops dwmac4_ops = {
 	.core_init = dwmac4_core_init,
 	.phylink_get_caps = dwmac4_phylink_get_caps,
@@ -1175,6 +1204,8 @@ const struct stmmac_ops dwmac4_ops = {
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
+	.rx_hw_vlan = dwmac4_rx_hw_vlan,
+	.set_hw_vlan_mode = dwmac4_set_hw_vlan_mode,
 };
 
 const struct stmmac_ops dwmac410_ops = {
@@ -1224,6 +1255,8 @@ const struct stmmac_ops dwmac410_ops = {
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
+	.rx_hw_vlan = dwmac4_rx_hw_vlan,
+	.set_hw_vlan_mode = dwmac4_set_hw_vlan_mode,
 };
 
 const struct stmmac_ops dwmac510_ops = {
@@ -1277,6 +1310,8 @@ const struct stmmac_ops dwmac510_ops = {
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
+	.rx_hw_vlan = dwmac4_rx_hw_vlan,
+	.set_hw_vlan_mode = dwmac4_set_hw_vlan_mode,
 };
 
 static u32 dwmac4_get_num_vlan(void __iomem *ioaddr)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 89a14084c611..1c5802e0d7f4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -198,6 +198,17 @@ static int dwmac4_get_tx_ls(struct dma_desc *p)
 		>> TDES3_LAST_DESCRIPTOR_SHIFT;
 }
 
+static u16 dwmac4_wrback_get_rx_vlan_tci(struct dma_desc *p)
+{
+	return (le32_to_cpu(p->des0) & RDES0_VLAN_TAG_MASK);
+}
+
+static bool dwmac4_wrback_get_rx_vlan_valid(struct dma_desc *p)
+{
+	return ((le32_to_cpu(p->des3) & RDES3_LAST_DESCRIPTOR) &&
+		(le32_to_cpu(p->des3) & RDES3_RDES0_VALID));
+}
+
 static int dwmac4_wrback_get_rx_frame_len(struct dma_desc *p, int rx_coe)
 {
 	return (le32_to_cpu(p->des3) & RDES3_PACKET_SIZE_MASK);
@@ -551,6 +562,8 @@ const struct stmmac_desc_ops dwmac4_desc_ops = {
 	.set_tx_owner = dwmac4_set_tx_owner,
 	.set_rx_owner = dwmac4_set_rx_owner,
 	.get_tx_ls = dwmac4_get_tx_ls,
+	.get_rx_vlan_tci = dwmac4_wrback_get_rx_vlan_tci,
+	.get_rx_vlan_valid = dwmac4_wrback_get_rx_vlan_valid,
 	.get_rx_frame_len = dwmac4_wrback_get_rx_frame_len,
 	.enable_tx_timestamp = dwmac4_rd_enable_tx_timestamp,
 	.get_tx_timestamp_status = dwmac4_wrback_get_tx_timestamp_status,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index b95d3e137813..1d424c9bf037 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -56,6 +56,10 @@ struct stmmac_desc_ops {
 	void (*set_tx_ic)(struct dma_desc *p);
 	/* Last tx segment reports the transmit status */
 	int (*get_tx_ls)(struct dma_desc *p);
+	/* Get the tag of the descriptor */
+	u16 (*get_rx_vlan_tci)(struct dma_desc *p);
+	/* Get the valid status of descriptor */
+	bool (*get_rx_vlan_valid)(struct dma_desc *p);
 	/* Return the transmit status looking at the TDES1 */
 	int (*tx_status)(struct stmmac_extra_stats *x,
 			 struct dma_desc *p, void __iomem *ioaddr);
@@ -117,6 +121,10 @@ struct stmmac_desc_ops {
 	stmmac_do_void_callback(__priv, desc, set_tx_ic, __args)
 #define stmmac_get_tx_ls(__priv, __args...) \
 	stmmac_do_callback(__priv, desc, get_tx_ls, __args)
+#define stmmac_get_rx_vlan_tci(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_rx_vlan_tci, __args)
+#define stmmac_get_rx_vlan_valid(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_rx_vlan_valid, __args)
 #define stmmac_tx_status(__priv, __args...) \
 	stmmac_do_callback(__priv, desc, tx_status, __args)
 #define stmmac_get_tx_len(__priv, __args...) \
@@ -388,6 +396,9 @@ struct stmmac_ops {
 	void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash,
 				 __le16 perfect_match, bool is_double);
 	void (*enable_vlan)(struct mac_device_info *hw, u32 type);
+	void (*rx_hw_vlan)(struct mac_device_info *hw, struct dma_desc *rx_desc,
+			   struct sk_buff *skb);
+	void (*set_hw_vlan_mode)(struct mac_device_info *hw);
 	int (*add_hw_vlan_rx_fltr)(struct net_device *dev,
 				   struct mac_device_info *hw,
 				   __be16 proto, u16 vid);
@@ -497,6 +508,10 @@ struct stmmac_ops {
 	stmmac_do_void_callback(__priv, mac, update_vlan_hash, __args)
 #define stmmac_enable_vlan(__priv, __args...) \
 	stmmac_do_void_callback(__priv, mac, enable_vlan, __args)
+#define stmmac_rx_hw_vlan(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, rx_hw_vlan, __args)
+#define stmmac_set_hw_vlan_mode(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, set_hw_vlan_mode, __args)
 #define stmmac_add_hw_vlan_rx_fltr(__priv, __args...) \
 	stmmac_do_callback(__priv, mac, add_hw_vlan_rx_fltr, __args)
 #define stmmac_del_hw_vlan_rx_fltr(__priv, __args...) \
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 2afb2bd25977..8964fc8a955f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3469,6 +3469,8 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
 	/* Start the ball rolling... */
 	stmmac_start_all_dma(priv);
 
+	stmmac_set_hw_vlan_mode(priv, priv->hw);
+
 	if (priv->dma_cap.fpesel) {
 		stmmac_fpe_start_wq(priv);
 
@@ -4993,7 +4995,12 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue,
 	}
 
 	stmmac_get_rx_hwtstamp(priv, p, np, skb);
-	stmmac_rx_vlan(priv->dev, skb);
+	if (priv->hw->hw_vlan_en)
+		/* MAC level stripping. */
+		stmmac_rx_hw_vlan(priv, priv->hw, p, skb);
+	else
+		/* Driver level stripping. */
+		stmmac_rx_vlan(priv->dev, skb);
 	skb->protocol = eth_type_trans(skb, priv->dev);
 
 	if (unlikely(!coe))
@@ -5509,7 +5516,14 @@ drain_data:
 		/* Got entire packet into SKB. Finish it. */
 
 		stmmac_get_rx_hwtstamp(priv, p, np, skb);
-		stmmac_rx_vlan(priv->dev, skb);
+
+		if (priv->hw->hw_vlan_en)
+			/* MAC level stripping. */
+			stmmac_rx_hw_vlan(priv, priv->hw, p, skb);
+		else
+			/* Driver level stripping. */
+			stmmac_rx_vlan(priv->dev, skb);
+
 		skb->protocol = eth_type_trans(skb, priv->dev);
 
 		if (unlikely(!coe))
@@ -5818,6 +5832,13 @@ static int stmmac_set_features(struct net_device *netdev,
 			stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
 	}
 
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		priv->hw->hw_vlan_en = true;
+	else
+		priv->hw->hw_vlan_en = false;
+
+	stmmac_set_hw_vlan_mode(priv, priv->hw);
+
 	return 0;
 }
 
@@ -6180,30 +6201,23 @@ static struct dentry *stmmac_fs_dir;
 static void sysfs_display_ring(void *head, int size, int extend_desc,
 			       struct seq_file *seq, dma_addr_t dma_phy_addr)
 {
-	int i;
 	struct dma_extended_desc *ep = (struct dma_extended_desc *)head;
 	struct dma_desc *p = (struct dma_desc *)head;
+	unsigned int desc_size;
 	dma_addr_t dma_addr;
+	int i;
 
+	desc_size = extend_desc ? sizeof(*ep) : sizeof(*p);
 	for (i = 0; i < size; i++) {
-		if (extend_desc) {
-			dma_addr = dma_phy_addr + i * sizeof(*ep);
-			seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
-				   i, &dma_addr,
-				   le32_to_cpu(ep->basic.des0),
-				   le32_to_cpu(ep->basic.des1),
-				   le32_to_cpu(ep->basic.des2),
-				   le32_to_cpu(ep->basic.des3));
-			ep++;
-		} else {
-			dma_addr = dma_phy_addr + i * sizeof(*p);
-			seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
-				   i, &dma_addr,
-				   le32_to_cpu(p->des0), le32_to_cpu(p->des1),
-				   le32_to_cpu(p->des2), le32_to_cpu(p->des3));
+		dma_addr = dma_phy_addr + i * desc_size;
+		seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+				i, &dma_addr,
+				le32_to_cpu(p->des0), le32_to_cpu(p->des1),
+				le32_to_cpu(p->des2), le32_to_cpu(p->des3));
+		if (extend_desc)
+			p = &(++ep)->basic;
+		else
 			p++;
-		}
-		seq_printf(seq, "\n");
 	}
 }
 
@@ -7516,6 +7530,9 @@ int stmmac_dvr_probe(struct device *device,
 #ifdef STMMAC_VLAN_TAG_USED
 	/* Both mac100 and gmac support receive VLAN tag detection */
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX;
+	ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+	priv->hw->hw_vlan_en = true;
+
 	if (priv->dma_cap.vlhash) {
 		ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 		ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
index c51e2af91f69..b9e1d568604b 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
@@ -662,6 +662,31 @@ static void am65_cpsw_get_ethtool_stats(struct net_device *ndev,
 					hw_stats[i].offset);
 }
 
+static void am65_cpsw_get_eth_mac_stats(struct net_device *ndev,
+					struct ethtool_eth_mac_stats *s)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	struct am65_cpsw_stats_regs __iomem *stats;
+
+	stats = port->stat_base;
+
+	s->FramesTransmittedOK = readl_relaxed(&stats->tx_good_frames);
+	s->SingleCollisionFrames = readl_relaxed(&stats->tx_single_coll_frames);
+	s->MultipleCollisionFrames = readl_relaxed(&stats->tx_mult_coll_frames);
+	s->FramesReceivedOK = readl_relaxed(&stats->rx_good_frames);
+	s->FrameCheckSequenceErrors = readl_relaxed(&stats->rx_crc_errors);
+	s->AlignmentErrors = readl_relaxed(&stats->rx_align_code_errors);
+	s->OctetsTransmittedOK = readl_relaxed(&stats->tx_octets);
+	s->FramesWithDeferredXmissions = readl_relaxed(&stats->tx_deferred_frames);
+	s->LateCollisions = readl_relaxed(&stats->tx_late_collisions);
+	s->CarrierSenseErrors = readl_relaxed(&stats->tx_carrier_sense_errors);
+	s->OctetsReceivedOK = readl_relaxed(&stats->rx_octets);
+	s->MulticastFramesXmittedOK = readl_relaxed(&stats->tx_multicast_frames);
+	s->BroadcastFramesXmittedOK = readl_relaxed(&stats->tx_broadcast_frames);
+	s->MulticastFramesReceivedOK = readl_relaxed(&stats->rx_multicast_frames);
+	s->BroadcastFramesReceivedOK = readl_relaxed(&stats->rx_broadcast_frames);
+};
+
 static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev,
 					 struct ethtool_ts_info *info)
 {
@@ -729,6 +754,7 @@ const struct ethtool_ops am65_cpsw_ethtool_ops_slave = {
 	.get_sset_count		= am65_cpsw_get_sset_count,
 	.get_strings		= am65_cpsw_get_strings,
 	.get_ethtool_stats	= am65_cpsw_get_ethtool_stats,
+	.get_eth_mac_stats	= am65_cpsw_get_eth_mac_stats,
 	.get_ts_info		= am65_cpsw_get_ethtool_ts_info,
 	.get_priv_flags		= am65_cpsw_get_ethtool_priv_flags,
 	.set_priv_flags		= am65_cpsw_set_ethtool_priv_flags,
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index ece9f8df98ae..7992a76ed4d8 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -136,6 +136,8 @@
 			 NETIF_MSG_IFUP	| NETIF_MSG_PROBE | NETIF_MSG_IFDOWN | \
 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
 
+#define AM65_CPSW_DEFAULT_TX_CHNS	8
+
 static void am65_cpsw_port_set_sl_mac(struct am65_cpsw_port *slave,
 				      const u8 *dev_addr)
 {
@@ -367,10 +369,81 @@ static void am65_cpsw_init_host_port_emac(struct am65_cpsw_common *common);
 static void am65_cpsw_init_port_switch_ale(struct am65_cpsw_port *port);
 static void am65_cpsw_init_port_emac_ale(struct am65_cpsw_port *port);
 
+static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
+{
+	struct am65_cpsw_rx_chn *rx_chn = data;
+	struct cppi5_host_desc_t *desc_rx;
+	struct sk_buff *skb;
+	dma_addr_t buf_dma;
+	u32 buf_dma_len;
+	void **swdata;
+
+	desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma);
+	swdata = cppi5_hdesc_get_swdata(desc_rx);
+	skb = *swdata;
+	cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len);
+	k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma);
+
+	dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE);
+	k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
+
+	dev_kfree_skb_any(skb);
+}
+
+static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn,
+				     struct cppi5_host_desc_t *desc)
+{
+	struct cppi5_host_desc_t *first_desc, *next_desc;
+	dma_addr_t buf_dma, next_desc_dma;
+	u32 buf_dma_len;
+
+	first_desc = desc;
+	next_desc = first_desc;
+
+	cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
+	k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
+
+	dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE);
+
+	next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc);
+	k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma);
+	while (next_desc_dma) {
+		next_desc = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool,
+						       next_desc_dma);
+		cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len);
+		k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
+
+		dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len,
+			       DMA_TO_DEVICE);
+
+		next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc);
+		k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma);
+
+		k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
+	}
+
+	k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
+}
+
+static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma)
+{
+	struct am65_cpsw_tx_chn *tx_chn = data;
+	struct cppi5_host_desc_t *desc_tx;
+	struct sk_buff *skb;
+	void **swdata;
+
+	desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
+	swdata = cppi5_hdesc_get_swdata(desc_tx);
+	skb = *(swdata);
+	am65_cpsw_nuss_xmit_free(tx_chn, desc_tx);
+
+	dev_kfree_skb_any(skb);
+}
+
 static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 {
 	struct am65_cpsw_host *host_p = am65_common_get_host(common);
-	int port_idx, i, ret;
+	int port_idx, i, ret, tx;
 	struct sk_buff *skb;
 	u32 val, port_mask;
 
@@ -437,8 +510,12 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 						  AM65_CPSW_MAX_PACKET_SIZE,
 						  GFP_KERNEL);
 		if (!skb) {
+			ret = -ENOMEM;
 			dev_err(common->dev, "cannot allocate skb\n");
-			return -ENOMEM;
+			if (i)
+				goto fail_rx;
+
+			return ret;
 		}
 
 		ret = am65_cpsw_nuss_rx_push(common, skb);
@@ -447,17 +524,28 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 				"cannot submit skb to channel rx, error %d\n",
 				ret);
 			kfree_skb(skb);
+			if (i)
+				goto fail_rx;
+
 			return ret;
 		}
-		kmemleak_not_leak(skb);
 	}
-	k3_udma_glue_enable_rx_chn(common->rx_chns.rx_chn);
 
-	for (i = 0; i < common->tx_ch_num; i++) {
-		ret = k3_udma_glue_enable_tx_chn(common->tx_chns[i].tx_chn);
-		if (ret)
-			return ret;
-		napi_enable(&common->tx_chns[i].napi_tx);
+	ret = k3_udma_glue_enable_rx_chn(common->rx_chns.rx_chn);
+	if (ret) {
+		dev_err(common->dev, "couldn't enable rx chn: %d\n", ret);
+		goto fail_rx;
+	}
+
+	for (tx = 0; tx < common->tx_ch_num; tx++) {
+		ret = k3_udma_glue_enable_tx_chn(common->tx_chns[tx].tx_chn);
+		if (ret) {
+			dev_err(common->dev, "couldn't enable tx chn %d: %d\n",
+				tx, ret);
+			tx--;
+			goto fail_tx;
+		}
+		napi_enable(&common->tx_chns[tx].napi_tx);
 	}
 
 	napi_enable(&common->napi_rx);
@@ -468,10 +556,22 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
 
 	dev_dbg(common->dev, "cpsw_nuss started\n");
 	return 0;
-}
 
-static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma);
-static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma);
+fail_tx:
+	while (tx >= 0) {
+		napi_disable(&common->tx_chns[tx].napi_tx);
+		k3_udma_glue_disable_tx_chn(common->tx_chns[tx].tx_chn);
+		tx--;
+	}
+
+	k3_udma_glue_disable_rx_chn(common->rx_chns.rx_chn);
+
+fail_rx:
+	k3_udma_glue_reset_rx_chn(common->rx_chns.rx_chn, 0,
+				  &common->rx_chns,
+				  am65_cpsw_nuss_rx_cleanup, 0);
+	return ret;
+}
 
 static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common)
 {
@@ -646,27 +746,6 @@ runtime_put:
 	return ret;
 }
 
-static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
-{
-	struct am65_cpsw_rx_chn *rx_chn = data;
-	struct cppi5_host_desc_t *desc_rx;
-	struct sk_buff *skb;
-	dma_addr_t buf_dma;
-	u32 buf_dma_len;
-	void **swdata;
-
-	desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma);
-	swdata = cppi5_hdesc_get_swdata(desc_rx);
-	skb = *swdata;
-	cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len);
-	k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma);
-
-	dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE);
-	k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx);
-
-	dev_kfree_skb_any(skb);
-}
-
 static void am65_cpsw_nuss_rx_ts(struct sk_buff *skb, u32 *psdata)
 {
 	struct skb_shared_hwtstamps *ssh;
@@ -840,56 +919,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
 	return num_rx;
 }
 
-static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn,
-				     struct cppi5_host_desc_t *desc)
-{
-	struct cppi5_host_desc_t *first_desc, *next_desc;
-	dma_addr_t buf_dma, next_desc_dma;
-	u32 buf_dma_len;
-
-	first_desc = desc;
-	next_desc = first_desc;
-
-	cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len);
-	k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
-
-	dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE);
-
-	next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc);
-	k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma);
-	while (next_desc_dma) {
-		next_desc = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool,
-						       next_desc_dma);
-		cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len);
-		k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma);
-
-		dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len,
-			       DMA_TO_DEVICE);
-
-		next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc);
-		k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma);
-
-		k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc);
-	}
-
-	k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc);
-}
-
-static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma)
-{
-	struct am65_cpsw_tx_chn *tx_chn = data;
-	struct cppi5_host_desc_t *desc_tx;
-	struct sk_buff *skb;
-	void **swdata;
-
-	desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma);
-	swdata = cppi5_hdesc_get_swdata(desc_tx);
-	skb = *(swdata);
-	am65_cpsw_nuss_xmit_free(tx_chn, desc_tx);
-
-	dev_kfree_skb_any(skb);
-}
-
 static struct sk_buff *
 am65_cpsw_nuss_tx_compl_packet(struct am65_cpsw_tx_chn *tx_chn,
 			       dma_addr_t desc_dma)
@@ -2897,7 +2926,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
 
 	common->rx_flow_id_base = -1;
 	init_completion(&common->tdown_complete);
-	common->tx_ch_num = 1;
+	common->tx_ch_num = AM65_CPSW_DEFAULT_TX_CHNS;
 	common->pf_p0_rx_ptype_rrobin = false;
 	common->default_vlan = 1;
 
diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig
index 0014729b8865..35d96c633a33 100644
--- a/drivers/net/ethernet/xilinx/Kconfig
+++ b/drivers/net/ethernet/xilinx/Kconfig
@@ -26,6 +26,7 @@ config XILINX_EMACLITE
 config XILINX_AXI_EMAC
 	tristate "Xilinx 10/100/1000 AXI Ethernet support"
 	depends on HAS_IOMEM
+	depends on XILINX_DMA
 	select PHYLINK
 	help
 	  This driver supports the 10/100/1000 Ethernet from Xilinx for the
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h
index 575ff9de8985..807ead678551 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@ -14,6 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/if_vlan.h>
 #include <linux/phylink.h>
+#include <linux/skbuff.h>
 
 /* Packet size info */
 #define XAE_HDR_SIZE			14 /* Size of Ethernet header */
@@ -379,6 +380,22 @@ struct axidma_bd {
 #define XAE_NUM_MISC_CLOCKS 3
 
 /**
+ * struct skbuf_dma_descriptor - skb for each dma descriptor
+ * @sgl: Pointer for sglist.
+ * @desc: Pointer to dma descriptor.
+ * @dma_address: dma address of sglist.
+ * @skb: Pointer to SKB transferred using DMA
+ * @sg_len: number of entries in the sglist.
+ */
+struct skbuf_dma_descriptor {
+	struct scatterlist sgl[MAX_SKB_FRAGS + 1];
+	struct dma_async_tx_descriptor *desc;
+	dma_addr_t dma_address;
+	struct sk_buff *skb;
+	int sg_len;
+};
+
+/**
  * struct axienet_local - axienet private per device data
  * @ndev:	Pointer for net_device to which it will be attached.
  * @dev:	Pointer to device structure
@@ -435,6 +452,15 @@ struct axidma_bd {
  * @coalesce_usec_rx:	IRQ coalesce delay for RX
  * @coalesce_count_tx:	Store the irq coalesce on TX side.
  * @coalesce_usec_tx:	IRQ coalesce delay for TX
+ * @use_dmaengine: flag to check dmaengine framework usage.
+ * @tx_chan:	TX DMA channel.
+ * @rx_chan:	RX DMA channel.
+ * @tx_skb_ring: Pointer to TX skb ring buffer array.
+ * @rx_skb_ring: Pointer to RX skb ring buffer array.
+ * @tx_ring_head: TX skb ring buffer head index.
+ * @tx_ring_tail: TX skb ring buffer tail index.
+ * @rx_ring_head: RX skb ring buffer head index.
+ * @rx_ring_tail: RX skb ring buffer tail index.
  */
 struct axienet_local {
 	struct net_device *ndev;
@@ -499,6 +525,15 @@ struct axienet_local {
 	u32 coalesce_usec_rx;
 	u32 coalesce_count_tx;
 	u32 coalesce_usec_tx;
+	u8  use_dmaengine;
+	struct dma_chan *tx_chan;
+	struct dma_chan *rx_chan;
+	struct skbuf_dma_descriptor **tx_skb_ring;
+	struct skbuf_dma_descriptor **rx_skb_ring;
+	int tx_ring_head;
+	int tx_ring_tail;
+	int rx_ring_head;
+	int rx_ring_tail;
 };
 
 /**
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index bf6e33990490..aaf780fd4f5e 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -38,6 +38,11 @@
 #include <linux/phy.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma/xilinx_dma.h>
+#include <linux/circ_buf.h>
+#include <net/netdev_queues.h>
 
 #include "xilinx_axienet.h"
 
@@ -47,6 +52,9 @@
 #define TX_BD_NUM_MIN			(MAX_SKB_FRAGS + 1)
 #define TX_BD_NUM_MAX			4096
 #define RX_BD_NUM_MAX			4096
+#define DMA_NUM_APP_WORDS		5
+#define LEN_APP				4
+#define RX_BUF_NUM_DEFAULT		128
 
 /* Must be shorter than length of ethtool_drvinfo.driver field to fit */
 #define DRIVER_NAME		"xaxienet"
@@ -55,6 +63,8 @@
 
 #define AXIENET_REGS_N		40
 
+static void axienet_rx_submit_desc(struct net_device *ndev);
+
 /* Match table for of_platform binding */
 static const struct of_device_id axienet_of_match[] = {
 	{ .compatible = "xlnx,axi-ethernet-1.00.a", },
@@ -120,6 +130,16 @@ static struct axienet_option axienet_options[] = {
 	{}
 };
 
+static struct skbuf_dma_descriptor *axienet_get_rx_desc(struct axienet_local *lp, int i)
+{
+	return lp->rx_skb_ring[i & (RX_BUF_NUM_DEFAULT - 1)];
+}
+
+static struct skbuf_dma_descriptor *axienet_get_tx_desc(struct axienet_local *lp, int i)
+{
+	return lp->tx_skb_ring[i & (TX_BD_NUM_MAX - 1)];
+}
+
 /**
  * axienet_dma_in32 - Memory mapped Axi DMA register read
  * @lp:		Pointer to axienet local structure
@@ -589,10 +609,6 @@ static int axienet_device_reset(struct net_device *ndev)
 	struct axienet_local *lp = netdev_priv(ndev);
 	int ret;
 
-	ret = __axienet_device_reset(lp);
-	if (ret)
-		return ret;
-
 	lp->max_frm_size = XAE_MAX_VLAN_FRAME_SIZE;
 	lp->options |= XAE_OPTION_VLAN;
 	lp->options &= (~XAE_OPTION_JUMBO);
@@ -606,11 +622,17 @@ static int axienet_device_reset(struct net_device *ndev)
 			lp->options |= XAE_OPTION_JUMBO;
 	}
 
-	ret = axienet_dma_bd_init(ndev);
-	if (ret) {
-		netdev_err(ndev, "%s: descriptor allocation failed\n",
-			   __func__);
-		return ret;
+	if (!lp->use_dmaengine) {
+		ret = __axienet_device_reset(lp);
+		if (ret)
+			return ret;
+
+		ret = axienet_dma_bd_init(ndev);
+		if (ret) {
+			netdev_err(ndev, "%s: descriptor allocation failed\n",
+				   __func__);
+			return ret;
+		}
 	}
 
 	axienet_status = axienet_ior(lp, XAE_RCW1_OFFSET);
@@ -726,6 +748,128 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
 }
 
 /**
+ * axienet_dma_tx_cb - DMA engine callback for TX channel.
+ * @data:       Pointer to the axienet_local structure.
+ * @result:     error reporting through dmaengine_result.
+ * This function is called by dmaengine driver for TX channel to notify
+ * that the transmit is done.
+ */
+static void axienet_dma_tx_cb(void *data, const struct dmaengine_result *result)
+{
+	struct skbuf_dma_descriptor *skbuf_dma;
+	struct axienet_local *lp = data;
+	struct netdev_queue *txq;
+	int len;
+
+	skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_tail++);
+	len = skbuf_dma->skb->len;
+	txq = skb_get_tx_queue(lp->ndev, skbuf_dma->skb);
+	u64_stats_update_begin(&lp->tx_stat_sync);
+	u64_stats_add(&lp->tx_bytes, len);
+	u64_stats_add(&lp->tx_packets, 1);
+	u64_stats_update_end(&lp->tx_stat_sync);
+	dma_unmap_sg(lp->dev, skbuf_dma->sgl, skbuf_dma->sg_len, DMA_TO_DEVICE);
+	dev_consume_skb_any(skbuf_dma->skb);
+	netif_txq_completed_wake(txq, 1, len,
+				 CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX),
+				 2 * MAX_SKB_FRAGS);
+}
+
+/**
+ * axienet_start_xmit_dmaengine - Starts the transmission.
+ * @skb:        sk_buff pointer that contains data to be Txed.
+ * @ndev:       Pointer to net_device structure.
+ *
+ * Return: NETDEV_TX_OK on success or any non space errors.
+ *         NETDEV_TX_BUSY when free element in TX skb ring buffer
+ *         is not available.
+ *
+ * This function is invoked to initiate transmission. The
+ * function sets the skbs, register dma callback API and submit
+ * the dma transaction.
+ * Additionally if checksum offloading is supported,
+ * it populates AXI Stream Control fields with appropriate values.
+ */
+static netdev_tx_t
+axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev)
+{
+	struct dma_async_tx_descriptor *dma_tx_desc = NULL;
+	struct axienet_local *lp = netdev_priv(ndev);
+	u32 app_metadata[DMA_NUM_APP_WORDS] = {0};
+	struct skbuf_dma_descriptor *skbuf_dma;
+	struct dma_device *dma_dev;
+	struct netdev_queue *txq;
+	u32 csum_start_off;
+	u32 csum_index_off;
+	int sg_len;
+	int ret;
+
+	dma_dev = lp->tx_chan->device;
+	sg_len = skb_shinfo(skb)->nr_frags + 1;
+	if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= sg_len) {
+		netif_stop_queue(ndev);
+		if (net_ratelimit())
+			netdev_warn(ndev, "TX ring unexpectedly full\n");
+		return NETDEV_TX_BUSY;
+	}
+
+	skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_head);
+	if (!skbuf_dma)
+		goto xmit_error_drop_skb;
+
+	lp->tx_ring_head++;
+	sg_init_table(skbuf_dma->sgl, sg_len);
+	ret = skb_to_sgvec(skb, skbuf_dma->sgl, 0, skb->len);
+	if (ret < 0)
+		goto xmit_error_drop_skb;
+
+	ret = dma_map_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE);
+	if (!ret)
+		goto xmit_error_drop_skb;
+
+	/* Fill up app fields for checksum */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (lp->features & XAE_FEATURE_FULL_TX_CSUM) {
+			/* Tx Full Checksum Offload Enabled */
+			app_metadata[0] |= 2;
+		} else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) {
+			csum_start_off = skb_transport_offset(skb);
+			csum_index_off = csum_start_off + skb->csum_offset;
+			/* Tx Partial Checksum Offload Enabled */
+			app_metadata[0] |= 1;
+			app_metadata[1] = (csum_start_off << 16) | csum_index_off;
+		}
+	} else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		app_metadata[0] |= 2; /* Tx Full Checksum Offload Enabled */
+	}
+
+	dma_tx_desc = dma_dev->device_prep_slave_sg(lp->tx_chan, skbuf_dma->sgl,
+			sg_len, DMA_MEM_TO_DEV,
+			DMA_PREP_INTERRUPT, (void *)app_metadata);
+	if (!dma_tx_desc)
+		goto xmit_error_unmap_sg;
+
+	skbuf_dma->skb = skb;
+	skbuf_dma->sg_len = sg_len;
+	dma_tx_desc->callback_param = lp;
+	dma_tx_desc->callback_result = axienet_dma_tx_cb;
+	dmaengine_submit(dma_tx_desc);
+	dma_async_issue_pending(lp->tx_chan);
+	txq = skb_get_tx_queue(lp->ndev, skb);
+	netdev_tx_sent_queue(txq, skb->len);
+	netif_txq_maybe_stop(txq, CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX),
+			     MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS);
+
+	return NETDEV_TX_OK;
+
+xmit_error_unmap_sg:
+	dma_unmap_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE);
+xmit_error_drop_skb:
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+/**
  * axienet_tx_poll - Invoked once a transmit is completed by the
  * Axi DMA Tx channel.
  * @napi:	Pointer to NAPI structure.
@@ -892,6 +1036,42 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 }
 
 /**
+ * axienet_dma_rx_cb - DMA engine callback for RX channel.
+ * @data:       Pointer to the skbuf_dma_descriptor structure.
+ * @result:     error reporting through dmaengine_result.
+ * This function is called by dmaengine driver for RX channel to notify
+ * that the packet is received.
+ */
+static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result)
+{
+	struct skbuf_dma_descriptor *skbuf_dma;
+	size_t meta_len, meta_max_len, rx_len;
+	struct axienet_local *lp = data;
+	struct sk_buff *skb;
+	u32 *app_metadata;
+
+	skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_tail++);
+	skb = skbuf_dma->skb;
+	app_metadata = dmaengine_desc_get_metadata_ptr(skbuf_dma->desc, &meta_len,
+						       &meta_max_len);
+	dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp->max_frm_size,
+			 DMA_FROM_DEVICE);
+	/* TODO: Derive app word index programmatically */
+	rx_len = (app_metadata[LEN_APP] & 0xFFFF);
+	skb_put(skb, rx_len);
+	skb->protocol = eth_type_trans(skb, lp->ndev);
+	skb->ip_summed = CHECKSUM_NONE;
+
+	__netif_rx(skb);
+	u64_stats_update_begin(&lp->rx_stat_sync);
+	u64_stats_add(&lp->rx_packets, 1);
+	u64_stats_add(&lp->rx_bytes, rx_len);
+	u64_stats_update_end(&lp->rx_stat_sync);
+	axienet_rx_submit_desc(lp->ndev);
+	dma_async_issue_pending(lp->rx_chan);
+}
+
+/**
  * axienet_rx_poll - Triggered by RX ISR to complete the BD processing.
  * @napi:	Pointer to NAPI structure.
  * @budget:	Max number of RX packets to process.
@@ -1125,40 +1305,158 @@ static irqreturn_t axienet_eth_irq(int irq, void *_ndev)
 static void axienet_dma_err_handler(struct work_struct *work);
 
 /**
- * axienet_open - Driver open routine.
- * @ndev:	Pointer to net_device structure
+ * axienet_rx_submit_desc - Submit the rx descriptors to dmaengine.
+ * allocate skbuff, map the scatterlist and obtain a descriptor
+ * and then add the callback information and submit descriptor.
+ *
+ * @ndev:	net_device pointer
+ *
+ */
+static void axienet_rx_submit_desc(struct net_device *ndev)
+{
+	struct dma_async_tx_descriptor *dma_rx_desc = NULL;
+	struct axienet_local *lp = netdev_priv(ndev);
+	struct skbuf_dma_descriptor *skbuf_dma;
+	struct sk_buff *skb;
+	dma_addr_t addr;
+
+	skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_head);
+	if (!skbuf_dma)
+		return;
+
+	lp->rx_ring_head++;
+	skb = netdev_alloc_skb(ndev, lp->max_frm_size);
+	if (!skb)
+		return;
+
+	sg_init_table(skbuf_dma->sgl, 1);
+	addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(lp->dev, addr))) {
+		if (net_ratelimit())
+			netdev_err(ndev, "DMA mapping error\n");
+		goto rx_submit_err_free_skb;
+	}
+	sg_dma_address(skbuf_dma->sgl) = addr;
+	sg_dma_len(skbuf_dma->sgl) = lp->max_frm_size;
+	dma_rx_desc = dmaengine_prep_slave_sg(lp->rx_chan, skbuf_dma->sgl,
+					      1, DMA_DEV_TO_MEM,
+					      DMA_PREP_INTERRUPT);
+	if (!dma_rx_desc)
+		goto rx_submit_err_unmap_skb;
+
+	skbuf_dma->skb = skb;
+	skbuf_dma->dma_address = sg_dma_address(skbuf_dma->sgl);
+	skbuf_dma->desc = dma_rx_desc;
+	dma_rx_desc->callback_param = lp;
+	dma_rx_desc->callback_result = axienet_dma_rx_cb;
+	dmaengine_submit(dma_rx_desc);
+
+	return;
+
+rx_submit_err_unmap_skb:
+	dma_unmap_single(lp->dev, addr, lp->max_frm_size, DMA_FROM_DEVICE);
+rx_submit_err_free_skb:
+	dev_kfree_skb(skb);
+}
+
+/**
+ * axienet_init_dmaengine - init the dmaengine code.
+ * @ndev:       Pointer to net_device structure
  *
  * Return: 0, on success.
- *	    non-zero error value on failure
+ *          non-zero error value on failure
  *
- * This is the driver open routine. It calls phylink_start to start the
- * PHY device.
- * It also allocates interrupt service routines, enables the interrupt lines
- * and ISR handling. Axi Ethernet core is reset through Axi DMA core. Buffer
- * descriptors are initialized.
+ * This is the dmaengine initialization code.
  */
-static int axienet_open(struct net_device *ndev)
+static int axienet_init_dmaengine(struct net_device *ndev)
 {
-	int ret;
 	struct axienet_local *lp = netdev_priv(ndev);
+	struct skbuf_dma_descriptor *skbuf_dma;
+	int i, ret;
 
-	dev_dbg(&ndev->dev, "axienet_open()\n");
+	lp->tx_chan = dma_request_chan(lp->dev, "tx_chan0");
+	if (IS_ERR(lp->tx_chan)) {
+		dev_err(lp->dev, "No Ethernet DMA (TX) channel found\n");
+		return PTR_ERR(lp->tx_chan);
+	}
 
-	/* When we do an Axi Ethernet reset, it resets the complete core
-	 * including the MDIO. MDIO must be disabled before resetting.
-	 * Hold MDIO bus lock to avoid MDIO accesses during the reset.
-	 */
-	axienet_lock_mii(lp);
-	ret = axienet_device_reset(ndev);
-	axienet_unlock_mii(lp);
+	lp->rx_chan = dma_request_chan(lp->dev, "rx_chan0");
+	if (IS_ERR(lp->rx_chan)) {
+		ret = PTR_ERR(lp->rx_chan);
+		dev_err(lp->dev, "No Ethernet DMA (RX) channel found\n");
+		goto err_dma_release_tx;
+	}
 
-	ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0);
-	if (ret) {
-		dev_err(lp->dev, "phylink_of_phy_connect() failed: %d\n", ret);
-		return ret;
+	lp->tx_ring_tail = 0;
+	lp->tx_ring_head = 0;
+	lp->rx_ring_tail = 0;
+	lp->rx_ring_head = 0;
+	lp->tx_skb_ring = kcalloc(TX_BD_NUM_MAX, sizeof(*lp->tx_skb_ring),
+				  GFP_KERNEL);
+	if (!lp->tx_skb_ring) {
+		ret = -ENOMEM;
+		goto err_dma_release_rx;
+	}
+	for (i = 0; i < TX_BD_NUM_MAX; i++) {
+		skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL);
+		if (!skbuf_dma) {
+			ret = -ENOMEM;
+			goto err_free_tx_skb_ring;
+		}
+		lp->tx_skb_ring[i] = skbuf_dma;
 	}
 
-	phylink_start(lp->phylink);
+	lp->rx_skb_ring = kcalloc(RX_BUF_NUM_DEFAULT, sizeof(*lp->rx_skb_ring),
+				  GFP_KERNEL);
+	if (!lp->rx_skb_ring) {
+		ret = -ENOMEM;
+		goto err_free_tx_skb_ring;
+	}
+	for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) {
+		skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL);
+		if (!skbuf_dma) {
+			ret = -ENOMEM;
+			goto err_free_rx_skb_ring;
+		}
+		lp->rx_skb_ring[i] = skbuf_dma;
+	}
+	/* TODO: Instead of BD_NUM_DEFAULT use runtime support */
+	for (i = 0; i < RX_BUF_NUM_DEFAULT; i++)
+		axienet_rx_submit_desc(ndev);
+	dma_async_issue_pending(lp->rx_chan);
+
+	return 0;
+
+err_free_rx_skb_ring:
+	for (i = 0; i < RX_BUF_NUM_DEFAULT; i++)
+		kfree(lp->rx_skb_ring[i]);
+	kfree(lp->rx_skb_ring);
+err_free_tx_skb_ring:
+	for (i = 0; i < TX_BD_NUM_MAX; i++)
+		kfree(lp->tx_skb_ring[i]);
+	kfree(lp->tx_skb_ring);
+err_dma_release_rx:
+	dma_release_channel(lp->rx_chan);
+err_dma_release_tx:
+	dma_release_channel(lp->tx_chan);
+	return ret;
+}
+
+/**
+ * axienet_init_legacy_dma - init the dma legacy code.
+ * @ndev:       Pointer to net_device structure
+ *
+ * Return: 0, on success.
+ *          non-zero error value on failure
+ *
+ * This is the dma  initialization code. It also allocates interrupt
+ * service routines, enables the interrupt lines and ISR handling.
+ *
+ */
+static int axienet_init_legacy_dma(struct net_device *ndev)
+{
+	int ret;
+	struct axienet_local *lp = netdev_priv(ndev);
 
 	/* Enable worker thread for Axi DMA error handling */
 	INIT_WORK(&lp->dma_err_task, axienet_dma_err_handler);
@@ -1193,14 +1491,77 @@ err_rx_irq:
 err_tx_irq:
 	napi_disable(&lp->napi_tx);
 	napi_disable(&lp->napi_rx);
-	phylink_stop(lp->phylink);
-	phylink_disconnect_phy(lp->phylink);
 	cancel_work_sync(&lp->dma_err_task);
 	dev_err(lp->dev, "request_irq() failed\n");
 	return ret;
 }
 
 /**
+ * axienet_open - Driver open routine.
+ * @ndev:	Pointer to net_device structure
+ *
+ * Return: 0, on success.
+ *	    non-zero error value on failure
+ *
+ * This is the driver open routine. It calls phylink_start to start the
+ * PHY device.
+ * It also allocates interrupt service routines, enables the interrupt lines
+ * and ISR handling. Axi Ethernet core is reset through Axi DMA core. Buffer
+ * descriptors are initialized.
+ */
+static int axienet_open(struct net_device *ndev)
+{
+	int ret;
+	struct axienet_local *lp = netdev_priv(ndev);
+
+	dev_dbg(&ndev->dev, "%s\n", __func__);
+
+	/* When we do an Axi Ethernet reset, it resets the complete core
+	 * including the MDIO. MDIO must be disabled before resetting.
+	 * Hold MDIO bus lock to avoid MDIO accesses during the reset.
+	 */
+	axienet_lock_mii(lp);
+	ret = axienet_device_reset(ndev);
+	axienet_unlock_mii(lp);
+
+	ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0);
+	if (ret) {
+		dev_err(lp->dev, "phylink_of_phy_connect() failed: %d\n", ret);
+		return ret;
+	}
+
+	phylink_start(lp->phylink);
+
+	if (lp->use_dmaengine) {
+		/* Enable interrupts for Axi Ethernet core (if defined) */
+		if (lp->eth_irq > 0) {
+			ret = request_irq(lp->eth_irq, axienet_eth_irq, IRQF_SHARED,
+					  ndev->name, ndev);
+			if (ret)
+				goto err_phy;
+		}
+
+		ret = axienet_init_dmaengine(ndev);
+		if (ret < 0)
+			goto err_free_eth_irq;
+	} else {
+		ret = axienet_init_legacy_dma(ndev);
+		if (ret)
+			goto err_phy;
+	}
+
+	return 0;
+
+err_free_eth_irq:
+	if (lp->eth_irq > 0)
+		free_irq(lp->eth_irq, ndev);
+err_phy:
+	phylink_stop(lp->phylink);
+	phylink_disconnect_phy(lp->phylink);
+	return ret;
+}
+
+/**
  * axienet_stop - Driver stop routine.
  * @ndev:	Pointer to net_device structure
  *
@@ -1213,11 +1574,14 @@ err_tx_irq:
 static int axienet_stop(struct net_device *ndev)
 {
 	struct axienet_local *lp = netdev_priv(ndev);
+	int i;
 
 	dev_dbg(&ndev->dev, "axienet_close()\n");
 
-	napi_disable(&lp->napi_tx);
-	napi_disable(&lp->napi_rx);
+	if (!lp->use_dmaengine) {
+		napi_disable(&lp->napi_tx);
+		napi_disable(&lp->napi_rx);
+	}
 
 	phylink_stop(lp->phylink);
 	phylink_disconnect_phy(lp->phylink);
@@ -1225,18 +1589,33 @@ static int axienet_stop(struct net_device *ndev)
 	axienet_setoptions(ndev, lp->options &
 			   ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
 
-	axienet_dma_stop(lp);
+	if (!lp->use_dmaengine) {
+		axienet_dma_stop(lp);
+		cancel_work_sync(&lp->dma_err_task);
+		free_irq(lp->tx_irq, ndev);
+		free_irq(lp->rx_irq, ndev);
+		axienet_dma_bd_release(ndev);
+	} else {
+		dmaengine_terminate_sync(lp->tx_chan);
+		dmaengine_synchronize(lp->tx_chan);
+		dmaengine_terminate_sync(lp->rx_chan);
+		dmaengine_synchronize(lp->rx_chan);
+
+		for (i = 0; i < TX_BD_NUM_MAX; i++)
+			kfree(lp->tx_skb_ring[i]);
+		kfree(lp->tx_skb_ring);
+		for (i = 0; i < RX_BUF_NUM_DEFAULT; i++)
+			kfree(lp->rx_skb_ring[i]);
+		kfree(lp->rx_skb_ring);
+
+		dma_release_channel(lp->rx_chan);
+		dma_release_channel(lp->tx_chan);
+	}
 
 	axienet_iow(lp, XAE_IE_OFFSET, 0);
 
-	cancel_work_sync(&lp->dma_err_task);
-
 	if (lp->eth_irq > 0)
 		free_irq(lp->eth_irq, ndev);
-	free_irq(lp->tx_irq, ndev);
-	free_irq(lp->rx_irq, ndev);
-
-	axienet_dma_bd_release(ndev);
 	return 0;
 }
 
@@ -1333,6 +1712,18 @@ static const struct net_device_ops axienet_netdev_ops = {
 #endif
 };
 
+static const struct net_device_ops axienet_netdev_dmaengine_ops = {
+	.ndo_open = axienet_open,
+	.ndo_stop = axienet_stop,
+	.ndo_start_xmit = axienet_start_xmit_dmaengine,
+	.ndo_get_stats64 = axienet_get_stats64,
+	.ndo_change_mtu	= axienet_change_mtu,
+	.ndo_set_mac_address = netdev_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_eth_ioctl = axienet_ioctl,
+	.ndo_set_rx_mode = axienet_set_multicast_list,
+};
+
 /**
  * axienet_ethtools_get_drvinfo - Get various Axi Ethernet driver information.
  * @ndev:	Pointer to net_device structure
@@ -1412,14 +1803,16 @@ static void axienet_ethtools_get_regs(struct net_device *ndev,
 	data[29] = axienet_ior(lp, XAE_FMI_OFFSET);
 	data[30] = axienet_ior(lp, XAE_AF0_OFFSET);
 	data[31] = axienet_ior(lp, XAE_AF1_OFFSET);
-	data[32] = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-	data[33] = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
-	data[34] = axienet_dma_in32(lp, XAXIDMA_TX_CDESC_OFFSET);
-	data[35] = axienet_dma_in32(lp, XAXIDMA_TX_TDESC_OFFSET);
-	data[36] = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-	data[37] = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
-	data[38] = axienet_dma_in32(lp, XAXIDMA_RX_CDESC_OFFSET);
-	data[39] = axienet_dma_in32(lp, XAXIDMA_RX_TDESC_OFFSET);
+	if (!lp->use_dmaengine) {
+		data[32] = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
+		data[33] = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+		data[34] = axienet_dma_in32(lp, XAXIDMA_TX_CDESC_OFFSET);
+		data[35] = axienet_dma_in32(lp, XAXIDMA_TX_TDESC_OFFSET);
+		data[36] = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
+		data[37] = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+		data[38] = axienet_dma_in32(lp, XAXIDMA_RX_CDESC_OFFSET);
+		data[39] = axienet_dma_in32(lp, XAXIDMA_RX_TDESC_OFFSET);
+	}
 }
 
 static void
@@ -1863,7 +2256,6 @@ static int axienet_probe(struct platform_device *pdev)
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 	ndev->flags &= ~IFF_MULTICAST;  /* clear multicast */
 	ndev->features = NETIF_F_SG;
-	ndev->netdev_ops = &axienet_netdev_ops;
 	ndev->ethtool_ops = &axienet_ethtool_ops;
 
 	/* MTU range: 64 - 9000 */
@@ -1880,9 +2272,6 @@ static int axienet_probe(struct platform_device *pdev)
 	u64_stats_init(&lp->rx_stat_sync);
 	u64_stats_init(&lp->tx_stat_sync);
 
-	netif_napi_add(ndev, &lp->napi_rx, axienet_rx_poll);
-	netif_napi_add(ndev, &lp->napi_tx, axienet_tx_poll);
-
 	lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk");
 	if (!lp->axi_clk) {
 		/* For backward compatibility, if named AXI clock is not present,
@@ -2008,82 +2397,118 @@ static int axienet_probe(struct platform_device *pdev)
 		goto cleanup_clk;
 	}
 
-	/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
-	np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
-	if (np) {
-		struct resource dmares;
+	if (!of_find_property(pdev->dev.of_node, "dmas", NULL)) {
+		/* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
+		np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0);
 
-		ret = of_address_to_resource(np, 0, &dmares);
-		if (ret) {
-			dev_err(&pdev->dev,
-				"unable to get DMA resource\n");
+		if (np) {
+			struct resource dmares;
+
+			ret = of_address_to_resource(np, 0, &dmares);
+			if (ret) {
+				dev_err(&pdev->dev,
+					"unable to get DMA resource\n");
+				of_node_put(np);
+				goto cleanup_clk;
+			}
+			lp->dma_regs = devm_ioremap_resource(&pdev->dev,
+							     &dmares);
+			lp->rx_irq = irq_of_parse_and_map(np, 1);
+			lp->tx_irq = irq_of_parse_and_map(np, 0);
 			of_node_put(np);
+			lp->eth_irq = platform_get_irq_optional(pdev, 0);
+		} else {
+			/* Check for these resources directly on the Ethernet node. */
+			lp->dma_regs = devm_platform_get_and_ioremap_resource(pdev, 1, NULL);
+			lp->rx_irq = platform_get_irq(pdev, 1);
+			lp->tx_irq = platform_get_irq(pdev, 0);
+			lp->eth_irq = platform_get_irq_optional(pdev, 2);
+		}
+		if (IS_ERR(lp->dma_regs)) {
+			dev_err(&pdev->dev, "could not map DMA regs\n");
+			ret = PTR_ERR(lp->dma_regs);
+			goto cleanup_clk;
+		}
+		if (lp->rx_irq <= 0 || lp->tx_irq <= 0) {
+			dev_err(&pdev->dev, "could not determine irqs\n");
+			ret = -ENOMEM;
 			goto cleanup_clk;
 		}
-		lp->dma_regs = devm_ioremap_resource(&pdev->dev,
-						     &dmares);
-		lp->rx_irq = irq_of_parse_and_map(np, 1);
-		lp->tx_irq = irq_of_parse_and_map(np, 0);
-		of_node_put(np);
-		lp->eth_irq = platform_get_irq_optional(pdev, 0);
-	} else {
-		/* Check for these resources directly on the Ethernet node. */
-		lp->dma_regs = devm_platform_get_and_ioremap_resource(pdev, 1, NULL);
-		lp->rx_irq = platform_get_irq(pdev, 1);
-		lp->tx_irq = platform_get_irq(pdev, 0);
-		lp->eth_irq = platform_get_irq_optional(pdev, 2);
-	}
-	if (IS_ERR(lp->dma_regs)) {
-		dev_err(&pdev->dev, "could not map DMA regs\n");
-		ret = PTR_ERR(lp->dma_regs);
-		goto cleanup_clk;
-	}
-	if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) {
-		dev_err(&pdev->dev, "could not determine irqs\n");
-		ret = -ENOMEM;
-		goto cleanup_clk;
-	}
 
-	/* Reset core now that clocks are enabled, prior to accessing MDIO */
-	ret = __axienet_device_reset(lp);
-	if (ret)
-		goto cleanup_clk;
+		/* Reset core now that clocks are enabled, prior to accessing MDIO */
+		ret = __axienet_device_reset(lp);
+		if (ret)
+			goto cleanup_clk;
+
+		/* Autodetect the need for 64-bit DMA pointers.
+		 * When the IP is configured for a bus width bigger than 32 bits,
+		 * writing the MSB registers is mandatory, even if they are all 0.
+		 * We can detect this case by writing all 1's to one such register
+		 * and see if that sticks: when the IP is configured for 32 bits
+		 * only, those registers are RES0.
+		 * Those MSB registers were introduced in IP v7.1, which we check first.
+		 */
+		if ((axienet_ior(lp, XAE_ID_OFFSET) >> 24) >= 0x9) {
+			void __iomem *desc = lp->dma_regs + XAXIDMA_TX_CDESC_OFFSET + 4;
 
-	/* Autodetect the need for 64-bit DMA pointers.
-	 * When the IP is configured for a bus width bigger than 32 bits,
-	 * writing the MSB registers is mandatory, even if they are all 0.
-	 * We can detect this case by writing all 1's to one such register
-	 * and see if that sticks: when the IP is configured for 32 bits
-	 * only, those registers are RES0.
-	 * Those MSB registers were introduced in IP v7.1, which we check first.
-	 */
-	if ((axienet_ior(lp, XAE_ID_OFFSET) >> 24) >= 0x9) {
-		void __iomem *desc = lp->dma_regs + XAXIDMA_TX_CDESC_OFFSET + 4;
-
-		iowrite32(0x0, desc);
-		if (ioread32(desc) == 0) {	/* sanity check */
-			iowrite32(0xffffffff, desc);
-			if (ioread32(desc) > 0) {
-				lp->features |= XAE_FEATURE_DMA_64BIT;
-				addr_width = 64;
-				dev_info(&pdev->dev,
-					 "autodetected 64-bit DMA range\n");
-			}
 			iowrite32(0x0, desc);
+			if (ioread32(desc) == 0) {	/* sanity check */
+				iowrite32(0xffffffff, desc);
+				if (ioread32(desc) > 0) {
+					lp->features |= XAE_FEATURE_DMA_64BIT;
+					addr_width = 64;
+					dev_info(&pdev->dev,
+						 "autodetected 64-bit DMA range\n");
+				}
+				iowrite32(0x0, desc);
+			}
+		}
+		if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) {
+			dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n");
+			ret = -EINVAL;
+			goto cleanup_clk;
 		}
-	}
-	if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) {
-		dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n");
-		ret = -EINVAL;
-		goto cleanup_clk;
-	}
 
-	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));
-	if (ret) {
-		dev_err(&pdev->dev, "No suitable DMA available\n");
-		goto cleanup_clk;
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));
+		if (ret) {
+			dev_err(&pdev->dev, "No suitable DMA available\n");
+			goto cleanup_clk;
+		}
+		netif_napi_add(ndev, &lp->napi_rx, axienet_rx_poll);
+		netif_napi_add(ndev, &lp->napi_tx, axienet_tx_poll);
+	} else {
+		struct xilinx_vdma_config cfg;
+		struct dma_chan *tx_chan;
+
+		lp->eth_irq = platform_get_irq_optional(pdev, 0);
+		if (lp->eth_irq < 0 && lp->eth_irq != -ENXIO) {
+			ret = lp->eth_irq;
+			goto cleanup_clk;
+		}
+		tx_chan = dma_request_chan(lp->dev, "tx_chan0");
+		if (IS_ERR(tx_chan)) {
+			ret = PTR_ERR(tx_chan);
+			dev_err_probe(lp->dev, ret, "No Ethernet DMA (TX) channel found\n");
+			goto cleanup_clk;
+		}
+
+		cfg.reset = 1;
+		/* As name says VDMA but it has support for DMA channel reset */
+		ret = xilinx_vdma_channel_set_config(tx_chan, &cfg);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "Reset channel failed\n");
+			dma_release_channel(tx_chan);
+			goto cleanup_clk;
+		}
+
+		dma_release_channel(tx_chan);
+		lp->use_dmaengine = 1;
 	}
 
+	if (lp->use_dmaengine)
+		ndev->netdev_ops = &axienet_netdev_dmaengine_ops;
+	else
+		ndev->netdev_ops = &axienet_netdev_ops;
 	/* Check for Ethernet core IRQ (optional) */
 	if (lp->eth_irq <= 0)
 		dev_info(&pdev->dev, "Ethernet core IRQ not defined\n");
@@ -2099,8 +2524,8 @@ static int axienet_probe(struct platform_device *pdev)
 	}
 
 	lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
-	lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC;
 	lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
+	lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC;
 	lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC;
 
 	ret = axienet_mdio_setup(lp);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index c8da94af4161..88dd8a2245b0 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1086,20 +1086,8 @@ static int macvlan_ethtool_get_ts_info(struct net_device *dev,
 				       struct ethtool_ts_info *info)
 {
 	struct net_device *real_dev = macvlan_dev_real_dev(dev);
-	const struct ethtool_ops *ops = real_dev->ethtool_ops;
-	struct phy_device *phydev = real_dev->phydev;
 
-	if (phy_has_tsinfo(phydev)) {
-		return phy_ts_info(phydev, info);
-	} else if (ops->get_ts_info) {
-		return ops->get_ts_info(real_dev, info);
-	} else {
-		info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
-			SOF_TIMESTAMPING_SOFTWARE;
-		info->phc_index = -1;
-	}
-
-	return 0;
+	return ethtool_get_ts_info_by_layer(real_dev, info);
 }
 
 static netdev_features_t macvlan_fix_features(struct net_device *dev,
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 421d2b62918f..25cfc5ded1da 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -96,10 +96,7 @@ config ADIN1100_PHY
 	  Currently supports the:
 	  - ADIN1100 - Robust,Industrial, Low Power 10BASE-T1L Ethernet PHY
 
-config AQUANTIA_PHY
-	tristate "Aquantia PHYs"
-	help
-	  Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
+source "drivers/net/phy/aquantia/Kconfig"
 
 config AX88796B_PHY
 	tristate "Asix PHYs"
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index c945ed9bd14b..f65e85c91fc1 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -35,11 +35,7 @@ obj-y				+= $(sfp-obj-y) $(sfp-obj-m)
 obj-$(CONFIG_ADIN_PHY)		+= adin.o
 obj-$(CONFIG_ADIN1100_PHY)	+= adin1100.o
 obj-$(CONFIG_AMD_PHY)		+= amd.o
-aquantia-objs			+= aquantia_main.o
-ifdef CONFIG_HWMON
-aquantia-objs			+= aquantia_hwmon.o
-endif
-obj-$(CONFIG_AQUANTIA_PHY)	+= aquantia.o
+obj-$(CONFIG_AQUANTIA_PHY)	+= aquantia/
 obj-$(CONFIG_AT803X_PHY)	+= at803x.o
 obj-$(CONFIG_AX88796B_PHY)	+= ax88796b.o
 obj-$(CONFIG_BCM54140_PHY)	+= bcm54140.o
diff --git a/drivers/net/phy/aquantia.h b/drivers/net/phy/aquantia.h
deleted file mode 100644
index c684b65c642c..000000000000
--- a/drivers/net/phy/aquantia.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* HWMON driver for Aquantia PHY
- *
- * Author: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
- * Author: Andrew Lunn <andrew@lunn.ch>
- * Author: Heiner Kallweit <hkallweit1@gmail.com>
- */
-
-#include <linux/device.h>
-#include <linux/phy.h>
-
-#if IS_REACHABLE(CONFIG_HWMON)
-int aqr_hwmon_probe(struct phy_device *phydev);
-#else
-static inline int aqr_hwmon_probe(struct phy_device *phydev) { return 0; }
-#endif
diff --git a/drivers/net/phy/aquantia/Kconfig b/drivers/net/phy/aquantia/Kconfig
new file mode 100644
index 000000000000..a35de4b9b554
--- /dev/null
+++ b/drivers/net/phy/aquantia/Kconfig
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config AQUANTIA_PHY
+	tristate "Aquantia PHYs"
+	select CRC_CCITT
+	help
+	  Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
diff --git a/drivers/net/phy/aquantia/Makefile b/drivers/net/phy/aquantia/Makefile
new file mode 100644
index 000000000000..aa77fb63c8ec
--- /dev/null
+++ b/drivers/net/phy/aquantia/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+aquantia-objs			+= aquantia_main.o aquantia_firmware.o
+ifdef CONFIG_HWMON
+aquantia-objs			+= aquantia_hwmon.o
+endif
+obj-$(CONFIG_AQUANTIA_PHY)	+= aquantia.o
diff --git a/drivers/net/phy/aquantia/aquantia.h b/drivers/net/phy/aquantia/aquantia.h
new file mode 100644
index 000000000000..9ed38972abdb
--- /dev/null
+++ b/drivers/net/phy/aquantia/aquantia.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* HWMON driver for Aquantia PHY
+ *
+ * Author: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
+ * Author: Andrew Lunn <andrew@lunn.ch>
+ * Author: Heiner Kallweit <hkallweit1@gmail.com>
+ */
+
+#include <linux/device.h>
+#include <linux/phy.h>
+
+/* Vendor specific 1, MDIO_MMD_VEND1 */
+#define VEND1_GLOBAL_SC				0x0
+#define VEND1_GLOBAL_SC_SOFT_RESET		BIT(15)
+#define VEND1_GLOBAL_SC_LOW_POWER		BIT(11)
+
+#define VEND1_GLOBAL_FW_ID			0x0020
+#define VEND1_GLOBAL_FW_ID_MAJOR		GENMASK(15, 8)
+#define VEND1_GLOBAL_FW_ID_MINOR		GENMASK(7, 0)
+
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1			0x0200
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1_EXECUTE		BIT(15)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1_WRITE		BIT(14)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1_CRC_RESET	BIT(12)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE1_BUSY		BIT(8)
+
+#define VEND1_GLOBAL_MAILBOX_INTERFACE2			0x0201
+#define VEND1_GLOBAL_MAILBOX_INTERFACE3			0x0202
+#define VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR_MASK	GENMASK(15, 0)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR(x)	FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR_MASK, (u16)((x) >> 16))
+#define VEND1_GLOBAL_MAILBOX_INTERFACE4			0x0203
+#define VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR_MASK	GENMASK(15, 2)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR(x)	FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR_MASK, (u16)(x))
+
+#define VEND1_GLOBAL_MAILBOX_INTERFACE5			0x0204
+#define VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA_MASK	GENMASK(15, 0)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA(x)	FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA_MASK, (u16)((x) >> 16))
+#define VEND1_GLOBAL_MAILBOX_INTERFACE6			0x0205
+#define VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA_MASK	GENMASK(15, 0)
+#define VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA(x)	FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA_MASK, (u16)(x))
+
+/* The following registers all have similar layouts; first the registers... */
+#define VEND1_GLOBAL_CFG_10M			0x0310
+#define VEND1_GLOBAL_CFG_100M			0x031b
+#define VEND1_GLOBAL_CFG_1G			0x031c
+#define VEND1_GLOBAL_CFG_2_5G			0x031d
+#define VEND1_GLOBAL_CFG_5G			0x031e
+#define VEND1_GLOBAL_CFG_10G			0x031f
+/* ...and now the fields */
+#define VEND1_GLOBAL_CFG_RATE_ADAPT		GENMASK(8, 7)
+#define VEND1_GLOBAL_CFG_RATE_ADAPT_NONE	0
+#define VEND1_GLOBAL_CFG_RATE_ADAPT_USX		1
+#define VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE	2
+
+/* Vendor specific 1, MDIO_MMD_VEND2 */
+#define VEND1_GLOBAL_CONTROL2			0xc001
+#define VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_RST	BIT(15)
+#define VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD	BIT(6)
+#define VEND1_GLOBAL_CONTROL2_UP_RUN_STALL	BIT(0)
+
+#define VEND1_THERMAL_PROV_HIGH_TEMP_FAIL	0xc421
+#define VEND1_THERMAL_PROV_LOW_TEMP_FAIL	0xc422
+#define VEND1_THERMAL_PROV_HIGH_TEMP_WARN	0xc423
+#define VEND1_THERMAL_PROV_LOW_TEMP_WARN	0xc424
+#define VEND1_THERMAL_STAT1			0xc820
+#define VEND1_THERMAL_STAT2			0xc821
+#define VEND1_THERMAL_STAT2_VALID		BIT(0)
+#define VEND1_GENERAL_STAT1			0xc830
+#define VEND1_GENERAL_STAT1_HIGH_TEMP_FAIL	BIT(14)
+#define VEND1_GENERAL_STAT1_LOW_TEMP_FAIL	BIT(13)
+#define VEND1_GENERAL_STAT1_HIGH_TEMP_WARN	BIT(12)
+#define VEND1_GENERAL_STAT1_LOW_TEMP_WARN	BIT(11)
+
+#define VEND1_GLOBAL_GEN_STAT2			0xc831
+#define VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG	BIT(15)
+
+#define VEND1_GLOBAL_RSVD_STAT1			0xc885
+#define VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID	GENMASK(7, 4)
+#define VEND1_GLOBAL_RSVD_STAT1_PROV_ID		GENMASK(3, 0)
+
+#define VEND1_GLOBAL_RSVD_STAT9			0xc88d
+#define VEND1_GLOBAL_RSVD_STAT9_MODE		GENMASK(7, 0)
+#define VEND1_GLOBAL_RSVD_STAT9_1000BT2		0x23
+
+#define VEND1_GLOBAL_INT_STD_STATUS		0xfc00
+#define VEND1_GLOBAL_INT_VEND_STATUS		0xfc01
+
+#define VEND1_GLOBAL_INT_STD_MASK		0xff00
+#define VEND1_GLOBAL_INT_STD_MASK_PMA1		BIT(15)
+#define VEND1_GLOBAL_INT_STD_MASK_PMA2		BIT(14)
+#define VEND1_GLOBAL_INT_STD_MASK_PCS1		BIT(13)
+#define VEND1_GLOBAL_INT_STD_MASK_PCS2		BIT(12)
+#define VEND1_GLOBAL_INT_STD_MASK_PCS3		BIT(11)
+#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS1	BIT(10)
+#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS2	BIT(9)
+#define VEND1_GLOBAL_INT_STD_MASK_AN1		BIT(8)
+#define VEND1_GLOBAL_INT_STD_MASK_AN2		BIT(7)
+#define VEND1_GLOBAL_INT_STD_MASK_GBE		BIT(6)
+#define VEND1_GLOBAL_INT_STD_MASK_ALL		BIT(0)
+
+#define VEND1_GLOBAL_INT_VEND_MASK		0xff01
+#define VEND1_GLOBAL_INT_VEND_MASK_PMA		BIT(15)
+#define VEND1_GLOBAL_INT_VEND_MASK_PCS		BIT(14)
+#define VEND1_GLOBAL_INT_VEND_MASK_PHY_XS	BIT(13)
+#define VEND1_GLOBAL_INT_VEND_MASK_AN		BIT(12)
+#define VEND1_GLOBAL_INT_VEND_MASK_GBE		BIT(11)
+#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL1	BIT(2)
+#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL2	BIT(1)
+#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3	BIT(0)
+
+#if IS_REACHABLE(CONFIG_HWMON)
+int aqr_hwmon_probe(struct phy_device *phydev);
+#else
+static inline int aqr_hwmon_probe(struct phy_device *phydev) { return 0; }
+#endif
+
+int aqr_firmware_load(struct phy_device *phydev);
diff --git a/drivers/net/phy/aquantia/aquantia_firmware.c b/drivers/net/phy/aquantia/aquantia_firmware.c
new file mode 100644
index 000000000000..c5f292b1c4c8
--- /dev/null
+++ b/drivers/net/phy/aquantia/aquantia_firmware.c
@@ -0,0 +1,370 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bitfield.h>
+#include <linux/of.h>
+#include <linux/firmware.h>
+#include <linux/crc-ccitt.h>
+#include <linux/nvmem-consumer.h>
+
+#include <asm/unaligned.h>
+
+#include "aquantia.h"
+
+#define UP_RESET_SLEEP		100
+
+/* addresses of memory segments in the phy */
+#define DRAM_BASE_ADDR		0x3FFE0000
+#define IRAM_BASE_ADDR		0x40000000
+
+/* firmware image format constants */
+#define VERSION_STRING_SIZE		0x40
+#define VERSION_STRING_OFFSET		0x0200
+/* primary offset is written at an offset from the start of the fw blob */
+#define PRIMARY_OFFSET_OFFSET		0x8
+/* primary offset needs to be then added to a base offset */
+#define PRIMARY_OFFSET_SHIFT		12
+#define PRIMARY_OFFSET(x)		((x) << PRIMARY_OFFSET_SHIFT)
+#define HEADER_OFFSET			0x300
+
+struct aqr_fw_header {
+	u32 padding;
+	u8 iram_offset[3];
+	u8 iram_size[3];
+	u8 dram_offset[3];
+	u8 dram_size[3];
+} __packed;
+
+enum aqr_fw_src {
+	AQR_FW_SRC_NVMEM = 0,
+	AQR_FW_SRC_FS,
+};
+
+static const char * const aqr_fw_src_string[] = {
+	[AQR_FW_SRC_NVMEM] = "NVMEM",
+	[AQR_FW_SRC_FS] = "FS",
+};
+
+/* AQR firmware doesn't have fixed offsets for iram and dram section
+ * but instead provide an header with the offset to use on reading
+ * and parsing the firmware.
+ *
+ * AQR firmware can't be trusted and each offset is validated to be
+ * not negative and be in the size of the firmware itself.
+ */
+static bool aqr_fw_validate_get(size_t size, size_t offset, size_t get_size)
+{
+	return offset + get_size <= size;
+}
+
+static int aqr_fw_get_be16(const u8 *data, size_t offset, size_t size, u16 *value)
+{
+	if (!aqr_fw_validate_get(size, offset, sizeof(u16)))
+		return -EINVAL;
+
+	*value = get_unaligned_be16(data + offset);
+
+	return 0;
+}
+
+static int aqr_fw_get_le16(const u8 *data, size_t offset, size_t size, u16 *value)
+{
+	if (!aqr_fw_validate_get(size, offset, sizeof(u16)))
+		return -EINVAL;
+
+	*value = get_unaligned_le16(data + offset);
+
+	return 0;
+}
+
+static int aqr_fw_get_le24(const u8 *data, size_t offset, size_t size, u32 *value)
+{
+	if (!aqr_fw_validate_get(size, offset, sizeof(u8) * 3))
+		return -EINVAL;
+
+	*value = get_unaligned_le24(data + offset);
+
+	return 0;
+}
+
+/* load data into the phy's memory */
+static int aqr_fw_load_memory(struct phy_device *phydev, u32 addr,
+			      const u8 *data, size_t len)
+{
+	u16 crc = 0, up_crc;
+	size_t pos;
+
+	/* PHY expect addr in LE */
+	addr = (__force u32)cpu_to_le32(addr);
+
+	phy_write_mmd(phydev, MDIO_MMD_VEND1,
+		      VEND1_GLOBAL_MAILBOX_INTERFACE1,
+		      VEND1_GLOBAL_MAILBOX_INTERFACE1_CRC_RESET);
+	phy_write_mmd(phydev, MDIO_MMD_VEND1,
+		      VEND1_GLOBAL_MAILBOX_INTERFACE3,
+		      VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR(addr));
+	phy_write_mmd(phydev, MDIO_MMD_VEND1,
+		      VEND1_GLOBAL_MAILBOX_INTERFACE4,
+		      VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR(addr));
+
+	/* We assume and enforce the size to be word aligned.
+	 * If a firmware that is not word aligned is found, please report upstream.
+	 */
+	for (pos = 0; pos < len; pos += sizeof(u32)) {
+		u32 word;
+
+		/* FW data is always stored in little-endian */
+		word = get_unaligned((const u32 *)(data + pos));
+
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE5,
+			      VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA(word));
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE6,
+			      VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA(word));
+
+		phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE1,
+			      VEND1_GLOBAL_MAILBOX_INTERFACE1_EXECUTE |
+			      VEND1_GLOBAL_MAILBOX_INTERFACE1_WRITE);
+
+		/* calculate CRC as we load data to the mailbox.
+		 * We convert word to big-endian as PHY is BE and mailbox will
+		 * return a BE CRC.
+		 */
+		word = (__force u32)cpu_to_be32(word);
+		crc = crc_ccitt_false(crc, (u8 *)&word, sizeof(word));
+	}
+
+	up_crc = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE2);
+	if (crc != up_crc) {
+		phydev_err(phydev, "CRC mismatch: calculated 0x%04x PHY 0x%04x\n",
+			   crc, up_crc);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int aqr_fw_boot(struct phy_device *phydev, const u8 *data, size_t size,
+		       enum aqr_fw_src fw_src)
+{
+	u16 calculated_crc, read_crc, read_primary_offset;
+	u32 iram_offset = 0, iram_size = 0;
+	u32 dram_offset = 0, dram_size = 0;
+	char version[VERSION_STRING_SIZE];
+	u32 primary_offset = 0;
+	int ret;
+
+	/* extract saved CRC at the end of the fw
+	 * CRC is saved in big-endian as PHY is BE
+	 */
+	ret = aqr_fw_get_be16(data, size - sizeof(u16), size, &read_crc);
+	if (ret) {
+		phydev_err(phydev, "bad firmware CRC in firmware\n");
+		return ret;
+	}
+	calculated_crc = crc_ccitt_false(0, data, size - sizeof(u16));
+	if (read_crc != calculated_crc) {
+		phydev_err(phydev, "bad firmware CRC: file 0x%04x calculated 0x%04x\n",
+			   read_crc, calculated_crc);
+		return -EINVAL;
+	}
+
+	/* Get the primary offset to extract DRAM and IRAM sections. */
+	ret = aqr_fw_get_le16(data, PRIMARY_OFFSET_OFFSET, size, &read_primary_offset);
+	if (ret) {
+		phydev_err(phydev, "bad primary offset in firmware\n");
+		return ret;
+	}
+	primary_offset = PRIMARY_OFFSET(read_primary_offset);
+
+	/* Find the DRAM and IRAM sections within the firmware file.
+	 * Make sure the fw_header is correctly in the firmware.
+	 */
+	if (!aqr_fw_validate_get(size, primary_offset + HEADER_OFFSET,
+				 sizeof(struct aqr_fw_header))) {
+		phydev_err(phydev, "bad fw_header in firmware\n");
+		return -EINVAL;
+	}
+
+	/* offset are in LE and values needs to be converted to cpu endian */
+	ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET +
+			      offsetof(struct aqr_fw_header, iram_offset),
+			      size, &iram_offset);
+	if (ret) {
+		phydev_err(phydev, "bad iram offset in firmware\n");
+		return ret;
+	}
+	ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET +
+			      offsetof(struct aqr_fw_header, iram_size),
+			      size, &iram_size);
+	if (ret) {
+		phydev_err(phydev, "invalid iram size in firmware\n");
+		return ret;
+	}
+	ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET +
+			      offsetof(struct aqr_fw_header, dram_offset),
+			      size, &dram_offset);
+	if (ret) {
+		phydev_err(phydev, "bad dram offset in firmware\n");
+		return ret;
+	}
+	ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET +
+			      offsetof(struct aqr_fw_header, dram_size),
+			      size, &dram_size);
+	if (ret) {
+		phydev_err(phydev, "invalid dram size in firmware\n");
+		return ret;
+	}
+
+	/* Increment the offset with the primary offset.
+	 * Validate iram/dram offset and size.
+	 */
+	iram_offset += primary_offset;
+	if (iram_size % sizeof(u32)) {
+		phydev_err(phydev, "iram size if not aligned to word size. Please report this upstream!\n");
+		return -EINVAL;
+	}
+	if (!aqr_fw_validate_get(size, iram_offset, iram_size)) {
+		phydev_err(phydev, "invalid iram offset for iram size\n");
+		return -EINVAL;
+	}
+
+	dram_offset += primary_offset;
+	if (dram_size % sizeof(u32)) {
+		phydev_err(phydev, "dram size if not aligned to word size. Please report this upstream!\n");
+		return -EINVAL;
+	}
+	if (!aqr_fw_validate_get(size, dram_offset, dram_size)) {
+		phydev_err(phydev, "invalid iram offset for iram size\n");
+		return -EINVAL;
+	}
+
+	phydev_dbg(phydev, "primary %d IRAM offset=%d size=%d DRAM offset=%d size=%d\n",
+		   primary_offset, iram_offset, iram_size, dram_offset, dram_size);
+
+	if (!aqr_fw_validate_get(size, dram_offset + VERSION_STRING_OFFSET,
+				 VERSION_STRING_SIZE)) {
+		phydev_err(phydev, "invalid version in firmware\n");
+		return -EINVAL;
+	}
+	strscpy(version, (char *)data + dram_offset + VERSION_STRING_OFFSET,
+		VERSION_STRING_SIZE);
+	if (version[0] == '\0') {
+		phydev_err(phydev, "invalid version in firmware\n");
+		return -EINVAL;
+	}
+	phydev_info(phydev, "loading firmware version '%s' from '%s'\n", version,
+		    aqr_fw_src_string[fw_src]);
+
+	/* stall the microcprocessor */
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_CONTROL2,
+		      VEND1_GLOBAL_CONTROL2_UP_RUN_STALL | VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD);
+
+	phydev_dbg(phydev, "loading DRAM 0x%08x from offset=%d size=%d\n",
+		   DRAM_BASE_ADDR, dram_offset, dram_size);
+	ret = aqr_fw_load_memory(phydev, DRAM_BASE_ADDR, data + dram_offset,
+				 dram_size);
+	if (ret)
+		return ret;
+
+	phydev_dbg(phydev, "loading IRAM 0x%08x from offset=%d size=%d\n",
+		   IRAM_BASE_ADDR, iram_offset, iram_size);
+	ret = aqr_fw_load_memory(phydev, IRAM_BASE_ADDR, data + iram_offset,
+				 iram_size);
+	if (ret)
+		return ret;
+
+	/* make sure soft reset and low power mode are clear */
+	phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_SC,
+			   VEND1_GLOBAL_SC_SOFT_RESET | VEND1_GLOBAL_SC_LOW_POWER);
+
+	/* Release the microprocessor. UP_RESET must be held for 100 usec. */
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_CONTROL2,
+		      VEND1_GLOBAL_CONTROL2_UP_RUN_STALL |
+		      VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD |
+		      VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_RST);
+	usleep_range(UP_RESET_SLEEP, UP_RESET_SLEEP * 2);
+
+	phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_CONTROL2,
+		      VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD);
+
+	return 0;
+}
+
+static int aqr_firmware_load_nvmem(struct phy_device *phydev)
+{
+	struct nvmem_cell *cell;
+	size_t size;
+	u8 *buf;
+	int ret;
+
+	cell = nvmem_cell_get(&phydev->mdio.dev, "firmware");
+	if (IS_ERR(cell))
+		return PTR_ERR(cell);
+
+	buf = nvmem_cell_read(cell, &size);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		goto exit;
+	}
+
+	ret = aqr_fw_boot(phydev, buf, size, AQR_FW_SRC_NVMEM);
+	if (ret)
+		phydev_err(phydev, "firmware loading failed: %d\n", ret);
+
+	kfree(buf);
+exit:
+	nvmem_cell_put(cell);
+
+	return ret;
+}
+
+static int aqr_firmware_load_fs(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	const struct firmware *fw;
+	const char *fw_name;
+	int ret;
+
+	ret = of_property_read_string(dev->of_node, "firmware-name",
+				      &fw_name);
+	if (ret)
+		return ret;
+
+	ret = request_firmware(&fw, fw_name, dev);
+	if (ret) {
+		phydev_err(phydev, "failed to find FW file %s (%d)\n",
+			   fw_name, ret);
+		return ret;
+	}
+
+	ret = aqr_fw_boot(phydev, fw->data, fw->size, AQR_FW_SRC_FS);
+	if (ret)
+		phydev_err(phydev, "firmware loading failed: %d\n", ret);
+
+	release_firmware(fw);
+
+	return ret;
+}
+
+int aqr_firmware_load(struct phy_device *phydev)
+{
+	int ret;
+
+	/* Check if the firmware is not already loaded by pooling
+	 * the current version returned by the PHY. If 0 is returned,
+	 * no firmware is loaded.
+	 */
+	ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_FW_ID);
+	if (ret > 0)
+		goto exit;
+
+	ret = aqr_firmware_load_nvmem(phydev);
+	if (!ret)
+		goto exit;
+
+	ret = aqr_firmware_load_fs(phydev);
+	if (ret)
+		return ret;
+
+exit:
+	return 0;
+}
diff --git a/drivers/net/phy/aquantia_hwmon.c b/drivers/net/phy/aquantia/aquantia_hwmon.c
index 0da451e46f69..7b3c49c3bf49 100644
--- a/drivers/net/phy/aquantia_hwmon.c
+++ b/drivers/net/phy/aquantia/aquantia_hwmon.c
@@ -13,20 +13,6 @@
 
 #include "aquantia.h"
 
-/* Vendor specific 1, MDIO_MMD_VEND2 */
-#define VEND1_THERMAL_PROV_HIGH_TEMP_FAIL	0xc421
-#define VEND1_THERMAL_PROV_LOW_TEMP_FAIL	0xc422
-#define VEND1_THERMAL_PROV_HIGH_TEMP_WARN	0xc423
-#define VEND1_THERMAL_PROV_LOW_TEMP_WARN	0xc424
-#define VEND1_THERMAL_STAT1			0xc820
-#define VEND1_THERMAL_STAT2			0xc821
-#define VEND1_THERMAL_STAT2_VALID		BIT(0)
-#define VEND1_GENERAL_STAT1			0xc830
-#define VEND1_GENERAL_STAT1_HIGH_TEMP_FAIL	BIT(14)
-#define VEND1_GENERAL_STAT1_LOW_TEMP_FAIL	BIT(13)
-#define VEND1_GENERAL_STAT1_HIGH_TEMP_WARN	BIT(12)
-#define VEND1_GENERAL_STAT1_LOW_TEMP_WARN	BIT(11)
-
 #if IS_REACHABLE(CONFIG_HWMON)
 
 static umode_t aqr_hwmon_is_visible(const void *data,
diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c
index 334a6904ca5a..cc4a97741c4a 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia/aquantia_main.c
@@ -91,61 +91,6 @@
 #define MDIO_C22EXT_STAT_SGMII_TX_FRAME_ALIGN_ERR	0xd31a
 #define MDIO_C22EXT_STAT_SGMII_TX_RUNT_FRAMES		0xd31b
 
-/* Vendor specific 1, MDIO_MMD_VEND1 */
-#define VEND1_GLOBAL_FW_ID			0x0020
-#define VEND1_GLOBAL_FW_ID_MAJOR		GENMASK(15, 8)
-#define VEND1_GLOBAL_FW_ID_MINOR		GENMASK(7, 0)
-
-#define VEND1_GLOBAL_GEN_STAT2			0xc831
-#define VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG	BIT(15)
-
-/* The following registers all have similar layouts; first the registers... */
-#define VEND1_GLOBAL_CFG_10M			0x0310
-#define VEND1_GLOBAL_CFG_100M			0x031b
-#define VEND1_GLOBAL_CFG_1G			0x031c
-#define VEND1_GLOBAL_CFG_2_5G			0x031d
-#define VEND1_GLOBAL_CFG_5G			0x031e
-#define VEND1_GLOBAL_CFG_10G			0x031f
-/* ...and now the fields */
-#define VEND1_GLOBAL_CFG_RATE_ADAPT		GENMASK(8, 7)
-#define VEND1_GLOBAL_CFG_RATE_ADAPT_NONE	0
-#define VEND1_GLOBAL_CFG_RATE_ADAPT_USX		1
-#define VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE	2
-
-#define VEND1_GLOBAL_RSVD_STAT1			0xc885
-#define VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID	GENMASK(7, 4)
-#define VEND1_GLOBAL_RSVD_STAT1_PROV_ID		GENMASK(3, 0)
-
-#define VEND1_GLOBAL_RSVD_STAT9			0xc88d
-#define VEND1_GLOBAL_RSVD_STAT9_MODE		GENMASK(7, 0)
-#define VEND1_GLOBAL_RSVD_STAT9_1000BT2		0x23
-
-#define VEND1_GLOBAL_INT_STD_STATUS		0xfc00
-#define VEND1_GLOBAL_INT_VEND_STATUS		0xfc01
-
-#define VEND1_GLOBAL_INT_STD_MASK		0xff00
-#define VEND1_GLOBAL_INT_STD_MASK_PMA1		BIT(15)
-#define VEND1_GLOBAL_INT_STD_MASK_PMA2		BIT(14)
-#define VEND1_GLOBAL_INT_STD_MASK_PCS1		BIT(13)
-#define VEND1_GLOBAL_INT_STD_MASK_PCS2		BIT(12)
-#define VEND1_GLOBAL_INT_STD_MASK_PCS3		BIT(11)
-#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS1	BIT(10)
-#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS2	BIT(9)
-#define VEND1_GLOBAL_INT_STD_MASK_AN1		BIT(8)
-#define VEND1_GLOBAL_INT_STD_MASK_AN2		BIT(7)
-#define VEND1_GLOBAL_INT_STD_MASK_GBE		BIT(6)
-#define VEND1_GLOBAL_INT_STD_MASK_ALL		BIT(0)
-
-#define VEND1_GLOBAL_INT_VEND_MASK		0xff01
-#define VEND1_GLOBAL_INT_VEND_MASK_PMA		BIT(15)
-#define VEND1_GLOBAL_INT_VEND_MASK_PCS		BIT(14)
-#define VEND1_GLOBAL_INT_VEND_MASK_PHY_XS	BIT(13)
-#define VEND1_GLOBAL_INT_VEND_MASK_AN		BIT(12)
-#define VEND1_GLOBAL_INT_VEND_MASK_GBE		BIT(11)
-#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL1	BIT(2)
-#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL2	BIT(1)
-#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3	BIT(0)
-
 /* Sleep and timeout for checking if the Processor-Intensive
  * MDIO operation is finished
  */
@@ -713,11 +658,17 @@ static int aqr107_resume(struct phy_device *phydev)
 
 static int aqr107_probe(struct phy_device *phydev)
 {
+	int ret;
+
 	phydev->priv = devm_kzalloc(&phydev->mdio.dev,
 				    sizeof(struct aqr107_priv), GFP_KERNEL);
 	if (!phydev->priv)
 		return -ENOMEM;
 
+	ret = aqr_firmware_load(phydev);
+	if (ret)
+		return ret;
+
 	return aqr_hwmon_probe(phydev);
 }
 
diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c
index cb4b91af5e17..617d384d4551 100644
--- a/drivers/net/phy/bcm-phy-ptp.c
+++ b/drivers/net/phy/bcm-phy-ptp.c
@@ -782,16 +782,13 @@ out:
 }
 
 static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts,
-			    struct ifreq *ifr)
+			    struct kernel_hwtstamp_config *cfg,
+			    struct netlink_ext_ack *extack)
 {
 	struct bcm_ptp_private *priv = mii2priv(mii_ts);
-	struct hwtstamp_config cfg;
 	u16 mode, ctrl;
 
-	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
-		return -EFAULT;
-
-	switch (cfg.rx_filter) {
+	switch (cfg->rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		priv->hwts_rx = false;
 		break;
@@ -804,14 +801,14 @@ static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts,
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		priv->hwts_rx = true;
 		break;
 	default:
 		return -ERANGE;
 	}
 
-	priv->tx_type = cfg.tx_type;
+	priv->tx_type = cfg->tx_type;
 
 	ctrl  = priv->hwts_rx ? SLICE_RX_EN : 0;
 	ctrl |= priv->tx_type != HWTSTAMP_TX_OFF ? SLICE_TX_EN : 0;
@@ -840,7 +837,7 @@ static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts,
 	/* purge existing data */
 	skb_queue_purge(&priv->tx_queue);
 
-	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+	return 0;
 }
 
 static int bcm_ptp_ts_info(struct mii_timestamper *mii_ts,
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 3a627105675a..312a8bb35d78 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -1135,6 +1135,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.link_change_notify	= bcm54xx_link_change_notify,
 	.led_brightness_set	= bcm_phy_led_brightness_set,
+	.suspend	= bcm54xx_suspend,
+	.resume		= bcm54xx_resume,
 }, {
 	.phy_id		= PHY_ID_BCM54616S,
 	.phy_id_mask	= 0xfffffff0,
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 2657be7cc049..5c42c47dc564 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1207,22 +1207,20 @@ static irqreturn_t dp83640_handle_interrupt(struct phy_device *phydev)
 	return IRQ_HANDLED;
 }
 
-static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int dp83640_hwtstamp(struct mii_timestamper *mii_ts,
+			    struct kernel_hwtstamp_config *cfg,
+			    struct netlink_ext_ack *extack)
 {
 	struct dp83640_private *dp83640 =
 		container_of(mii_ts, struct dp83640_private, mii_ts);
-	struct hwtstamp_config cfg;
 	u16 txcfg0, rxcfg0;
 
-	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
-		return -EFAULT;
-
-	if (cfg.tx_type < 0 || cfg.tx_type > HWTSTAMP_TX_ONESTEP_SYNC)
+	if (cfg->tx_type < 0 || cfg->tx_type > HWTSTAMP_TX_ONESTEP_SYNC)
 		return -ERANGE;
 
-	dp83640->hwts_tx_en = cfg.tx_type;
+	dp83640->hwts_tx_en = cfg->tx_type;
 
-	switch (cfg.rx_filter) {
+	switch (cfg->rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		dp83640->hwts_rx_en = 0;
 		dp83640->layer = 0;
@@ -1234,7 +1232,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 		dp83640->hwts_rx_en = 1;
 		dp83640->layer = PTP_CLASS_L4;
 		dp83640->version = PTP_CLASS_V1;
-		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+		cfg->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
@@ -1242,7 +1240,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 		dp83640->hwts_rx_en = 1;
 		dp83640->layer = PTP_CLASS_L4;
 		dp83640->version = PTP_CLASS_V2;
-		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
+		cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
@@ -1250,7 +1248,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 		dp83640->hwts_rx_en = 1;
 		dp83640->layer = PTP_CLASS_L2;
 		dp83640->version = PTP_CLASS_V2;
-		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+		cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
@@ -1258,7 +1256,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 		dp83640->hwts_rx_en = 1;
 		dp83640->layer = PTP_CLASS_L4 | PTP_CLASS_L2;
 		dp83640->version = PTP_CLASS_V2;
-		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		break;
 	default:
 		return -ERANGE;
@@ -1292,7 +1290,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 
 	mutex_unlock(&dp83640->clock->extreg_lock);
 
-	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+	return 0;
 }
 
 static void rx_timestamp_work(struct work_struct *work)
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 08e3915001c3..bd4cd082662f 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -2395,24 +2395,22 @@ static void lan8814_flush_fifo(struct phy_device *phydev, bool egress)
 	lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS);
 }
 
-static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
+			    struct kernel_hwtstamp_config *config,
+			    struct netlink_ext_ack *extack)
 {
 	struct kszphy_ptp_priv *ptp_priv =
 			  container_of(mii_ts, struct kszphy_ptp_priv, mii_ts);
 	struct phy_device *phydev = ptp_priv->phydev;
 	struct lan8814_shared_priv *shared = phydev->shared->priv;
 	struct lan8814_ptp_rx_ts *rx_ts, *tmp;
-	struct hwtstamp_config config;
 	int txcfg = 0, rxcfg = 0;
 	int pkt_ts_enable;
 
-	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-		return -EFAULT;
+	ptp_priv->hwts_tx_type = config->tx_type;
+	ptp_priv->rx_filter = config->rx_filter;
 
-	ptp_priv->hwts_tx_type = config.tx_type;
-	ptp_priv->rx_filter = config.rx_filter;
-
-	switch (config.rx_filter) {
+	switch (config->rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		ptp_priv->layer = 0;
 		ptp_priv->version = 0;
@@ -2458,13 +2456,13 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 		lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
 				      PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
 
-	if (config.rx_filter != HWTSTAMP_FILTER_NONE)
+	if (config->rx_filter != HWTSTAMP_FILTER_NONE)
 		lan8814_config_ts_intr(ptp_priv->phydev, true);
 	else
 		lan8814_config_ts_intr(ptp_priv->phydev, false);
 
 	mutex_lock(&shared->shared_lock);
-	if (config.rx_filter != HWTSTAMP_FILTER_NONE)
+	if (config->rx_filter != HWTSTAMP_FILTER_NONE)
 		shared->ref++;
 	else
 		shared->ref--;
@@ -2488,7 +2486,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 	lan8814_flush_fifo(ptp_priv->phydev, false);
 	lan8814_flush_fifo(ptp_priv->phydev, true);
 
-	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0;
+	return 0;
 }
 
 static void lan8814_txtstamp(struct mii_timestamper *mii_ts,
@@ -3631,12 +3629,8 @@ static int lan8841_ts_info(struct mii_timestamper *mii_ts,
 
 	info->phc_index = ptp_priv->ptp_clock ?
 				ptp_clock_index(ptp_priv->ptp_clock) : -1;
-	if (info->phc_index == -1) {
-		info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
-					 SOF_TIMESTAMPING_RX_SOFTWARE |
-					 SOF_TIMESTAMPING_SOFTWARE;
+	if (info->phc_index == -1)
 		return 0;
-	}
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
 				SOF_TIMESTAMPING_RX_HARDWARE |
@@ -3703,21 +3697,19 @@ static void lan8841_ptp_enable_processing(struct kszphy_ptp_priv *ptp_priv,
 #define LAN8841_PTP_TX_TIMESTAMP_EN		443
 #define LAN8841_PTP_TX_MOD			445
 
-static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int lan8841_hwtstamp(struct mii_timestamper *mii_ts,
+			    struct kernel_hwtstamp_config *config,
+			    struct netlink_ext_ack *extack)
 {
 	struct kszphy_ptp_priv *ptp_priv = container_of(mii_ts, struct kszphy_ptp_priv, mii_ts);
 	struct phy_device *phydev = ptp_priv->phydev;
-	struct hwtstamp_config config;
 	int txcfg = 0, rxcfg = 0;
 	int pkt_ts_enable;
 
-	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
-		return -EFAULT;
+	ptp_priv->hwts_tx_type = config->tx_type;
+	ptp_priv->rx_filter = config->rx_filter;
 
-	ptp_priv->hwts_tx_type = config.tx_type;
-	ptp_priv->rx_filter = config.rx_filter;
-
-	switch (config.rx_filter) {
+	switch (config->rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		ptp_priv->layer = 0;
 		ptp_priv->version = 0;
@@ -3771,13 +3763,13 @@ static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 
 	/* Now enable/disable the timestamping */
 	lan8841_ptp_enable_processing(ptp_priv,
-				      config.rx_filter != HWTSTAMP_FILTER_NONE);
+				      config->rx_filter != HWTSTAMP_FILTER_NONE);
 
 	skb_queue_purge(&ptp_priv->tx_queue);
 
 	lan8841_ptp_flush_fifo(ptp_priv);
 
-	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0;
+	return 0;
 }
 
 static bool lan8841_rxtstamp(struct mii_timestamper *mii_ts,
diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c
index cf728bfd83e2..eb0b032cb613 100644
--- a/drivers/net/phy/mscc/mscc_ptp.c
+++ b/drivers/net/phy/mscc/mscc_ptp.c
@@ -1045,19 +1045,17 @@ static void vsc85xx_ts_reset_fifo(struct phy_device *phydev)
 			     val);
 }
 
-static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts,
+			    struct kernel_hwtstamp_config *cfg,
+			    struct netlink_ext_ack *extack)
 {
 	struct vsc8531_private *vsc8531 =
 		container_of(mii_ts, struct vsc8531_private, mii_ts);
 	struct phy_device *phydev = vsc8531->ptp->phydev;
-	struct hwtstamp_config cfg;
 	bool one_step = false;
 	u32 val;
 
-	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
-		return -EFAULT;
-
-	switch (cfg.tx_type) {
+	switch (cfg->tx_type) {
 	case HWTSTAMP_TX_ONESTEP_SYNC:
 		one_step = true;
 		break;
@@ -1069,9 +1067,9 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 		return -ERANGE;
 	}
 
-	vsc8531->ptp->tx_type = cfg.tx_type;
+	vsc8531->ptp->tx_type = cfg->tx_type;
 
-	switch (cfg.rx_filter) {
+	switch (cfg->rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
@@ -1084,7 +1082,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 		return -ERANGE;
 	}
 
-	vsc8531->ptp->rx_filter = cfg.rx_filter;
+	vsc8531->ptp->rx_filter = cfg->rx_filter;
 
 	mutex_lock(&vsc8531->ts_lock);
 
@@ -1132,7 +1130,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 	vsc8531->ptp->configured = 1;
 	mutex_unlock(&vsc8531->ts_lock);
 
-	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+	return 0;
 }
 
 static int vsc85xx_ts_info(struct mii_timestamper *mii_ts,
diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 7ab080ff02df..780ad353cf55 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -1022,24 +1022,21 @@ static bool nxp_c45_rxtstamp(struct mii_timestamper *mii_ts,
 }
 
 static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts,
-			    struct ifreq *ifreq)
+			    struct kernel_hwtstamp_config *cfg,
+			    struct netlink_ext_ack *extack)
 {
 	struct nxp_c45_phy *priv = container_of(mii_ts, struct nxp_c45_phy,
 						mii_ts);
 	struct phy_device *phydev = priv->phydev;
 	const struct nxp_c45_phy_data *data;
-	struct hwtstamp_config cfg;
 
-	if (copy_from_user(&cfg, ifreq->ifr_data, sizeof(cfg)))
-		return -EFAULT;
-
-	if (cfg.tx_type < 0 || cfg.tx_type > HWTSTAMP_TX_ON)
+	if (cfg->tx_type < 0 || cfg->tx_type > HWTSTAMP_TX_ON)
 		return -ERANGE;
 
 	data = nxp_c45_get_data(phydev);
-	priv->hwts_tx = cfg.tx_type;
+	priv->hwts_tx = cfg->tx_type;
 
-	switch (cfg.rx_filter) {
+	switch (cfg->rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		priv->hwts_rx = 0;
 		break;
@@ -1047,7 +1044,7 @@ static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts,
 	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
 		priv->hwts_rx = 1;
-		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+		cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
 		break;
 	default:
 		return -ERANGE;
@@ -1074,7 +1071,7 @@ static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts,
 		nxp_c45_clear_reg_field(phydev, &data->regmap->irq_egr_ts_en);
 
 nxp_c45_no_ptp_irq:
-	return copy_to_user(ifreq->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+	return 0;
 }
 
 static int nxp_c45_ts_info(struct mii_timestamper *mii_ts,
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index a5fa077650e8..3376e58e2b88 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -325,9 +325,13 @@ EXPORT_SYMBOL(phy_ethtool_ksettings_get);
 int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 {
 	struct mii_ioctl_data *mii_data = if_mii(ifr);
+	struct kernel_hwtstamp_config kernel_cfg;
+	struct netlink_ext_ack extack = {};
 	u16 val = mii_data->val_in;
 	bool change_autoneg = false;
+	struct hwtstamp_config cfg;
 	int prtad, devad;
+	int ret;
 
 	switch (cmd) {
 	case SIOCGMIIPHY:
@@ -411,8 +415,21 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 		return 0;
 
 	case SIOCSHWTSTAMP:
-		if (phydev->mii_ts && phydev->mii_ts->hwtstamp)
-			return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr);
+		if (phydev->mii_ts && phydev->mii_ts->hwtstamp) {
+			if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+				return -EFAULT;
+
+			hwtstamp_config_to_kernel(&kernel_cfg, &cfg);
+			ret = phydev->mii_ts->hwtstamp(phydev->mii_ts, &kernel_cfg, &extack);
+			if (ret)
+				return ret;
+
+			hwtstamp_config_from_kernel(&cfg, &kernel_cfg);
+			if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)))
+				return -EFAULT;
+
+			return 0;
+		}
 		fallthrough;
 
 	default:
@@ -469,7 +486,7 @@ int __phy_hwtstamp_get(struct phy_device *phydev,
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, config->ifr, SIOCGHWTSTAMP);
+	return -EOPNOTSUPP;
 }
 
 /**
@@ -486,7 +503,10 @@ int __phy_hwtstamp_set(struct phy_device *phydev,
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, config->ifr, SIOCSHWTSTAMP);
+	if (phydev->mii_ts && phydev->mii_ts->hwtstamp)
+		return phydev->mii_ts->hwtstamp(phydev->mii_ts, config, extack);
+
+	return -EOPNOTSUPP;
 }
 
 /**
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 25c19496a336..c276f9482f78 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -697,18 +697,16 @@ static int phylink_validate_mask(struct phylink *pl, unsigned long *supported,
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(all_s) = { 0, };
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(s);
 	struct phylink_link_state t;
-	int intf;
+	int interface;
 
-	for (intf = 0; intf < PHY_INTERFACE_MODE_MAX; intf++) {
-		if (test_bit(intf, interfaces)) {
-			linkmode_copy(s, supported);
+	for_each_set_bit(interface, interfaces, PHY_INTERFACE_MODE_MAX) {
+		linkmode_copy(s, supported);
 
-			t = *state;
-			t.interface = intf;
-			if (!phylink_validate_mac_and_pcs(pl, s, &t)) {
-				linkmode_or(all_s, all_s, s);
-				linkmode_or(all_adv, all_adv, t.advertising);
-			}
+		t = *state;
+		t.interface = interface;
+		if (!phylink_validate_mac_and_pcs(pl, s, &t)) {
+			linkmode_or(all_s, all_s, s);
+			linkmode_or(all_adv, all_adv, t.advertising);
 		}
 	}
 
@@ -805,7 +803,7 @@ static int phylink_parse_fixedlink(struct phylink *pl,
 		phylink_warn(pl, "fixed link specifies half duplex for %dMbps link?\n",
 			     pl->link_config.speed);
 
-	bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	linkmode_fill(pl->supported);
 	linkmode_copy(pl->link_config.advertising, pl->supported);
 	phylink_validate(pl, pl->supported, &pl->link_config);
 
@@ -1640,7 +1638,7 @@ struct phylink *phylink_create(struct phylink_config *config,
 	__set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
 	timer_setup(&pl->link_poll, phylink_fixed_poll, 0);
 
-	bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	linkmode_fill(pl->supported);
 	linkmode_copy(pl->link_config.advertising, pl->supported);
 	phylink_validate(pl, pl->supported, &pl->link_config);
 
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 208a9393c2df..6fa679b36290 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -328,7 +328,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	 * modules use 2500Mbaud rather than 3100 or 3200Mbaud for
 	 * 2500BASE-X, so we allow some slack here.
 	 */
-	if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS) && br_nom) {
+	if (linkmode_empty(modes) && br_nom) {
 		if (br_min <= 1300 && br_max >= 1200) {
 			phylink_set(modes, 1000baseX_Full);
 			__set_bit(PHY_INTERFACE_MODE_1000BASEX, interfaces);
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 4ea0e155bb0d..8d835fbc4316 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1298,8 +1298,6 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 
 	netif_set_tso_max_size(dev->net, 16384);
 
-	ax88179_reset(dev);
-
 	return 0;
 }
 
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 412c3c0b6990..764ea02ff911 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -2379,7 +2379,17 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		else
 			udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
 #if IS_ENABLED(CONFIG_IPV6)
-		key.label = vxlan->cfg.label;
+		switch (vxlan->cfg.label_policy) {
+		case VXLAN_LABEL_FIXED:
+			key.label = vxlan->cfg.label;
+			break;
+		case VXLAN_LABEL_INHERIT:
+			key.label = ip_tunnel_get_flowlabel(old_iph, skb);
+			break;
+		default:
+			DEBUG_NET_WARN_ON_ONCE(1);
+			goto drop;
+		}
 #endif
 	} else {
 		if (!info) {
@@ -3366,6 +3376,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_DF]		= { .type = NLA_U8 },
 	[IFLA_VXLAN_VNIFILTER]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_LOCALBYPASS]	= NLA_POLICY_MAX(NLA_U8, 1),
+	[IFLA_VXLAN_LABEL_POLICY]       = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -3740,6 +3751,12 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
 		return -EINVAL;
 	}
 
+	if (conf->label_policy && !use_ipv6) {
+		NL_SET_ERR_MSG(extack,
+			       "Label policy only applies to IPv6 VXLAN devices");
+		return -EINVAL;
+	}
+
 	if (conf->remote_ifindex) {
 		struct net_device *lowerdev;
 
@@ -4082,6 +4099,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 	if (data[IFLA_VXLAN_LABEL])
 		conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
 			     IPV6_FLOWLABEL_MASK;
+	if (data[IFLA_VXLAN_LABEL_POLICY])
+		conf->label_policy = nla_get_u32(data[IFLA_VXLAN_LABEL_POLICY]);
 
 	if (data[IFLA_VXLAN_LEARNING]) {
 		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
@@ -4398,6 +4417,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TOS */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_DF */
 		nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
+		nla_total_size(sizeof(__u32)) +  /* IFLA_VXLAN_LABEL_POLICY */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_LEARNING */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_PROXY */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_RSC */
@@ -4471,6 +4491,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
 	    nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
 	    nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
+	    nla_put_u32(skb, IFLA_VXLAN_LABEL_POLICY, vxlan->cfg.label_policy) ||
 	    nla_put_u8(skb, IFLA_VXLAN_LEARNING,
 		       !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
 	    nla_put_u8(skb, IFLA_VXLAN_PROXY,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 55bc3576a985..69d20d585f88 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1219,6 +1219,9 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
 	if (delay > PCI_RESET_WAIT)
 		pci_info(dev, "ready %dms after %s\n", delay - 1,
 			 reset_type);
+	else
+		pci_dbg(dev, "ready %dms after %s\n", delay - 1,
+			reset_type);
 
 	return 0;
 }
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index ea476252280a..d208047d1b8f 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3787,6 +3787,19 @@ DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
 			       PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset);
 
 /*
+ * Spectrum-{1,2,3,4} devices report that a D3hot->D0 transition causes a reset
+ * (i.e., they advertise NoSoftRst-). However, this transition does not have
+ * any effect on the device: It continues to be operational and network ports
+ * remain up. Advertising this support makes it seem as if a PM reset is viable
+ * for these devices. Mark it as unavailable to skip it when testing reset
+ * methods.
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcb84, quirk_no_pm_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf6c, quirk_no_pm_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf70, quirk_no_pm_reset);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf80, quirk_no_pm_reset);
+
+/*
  * Thunderbolt controllers with broken MSI hotplug signaling:
  * Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part
  * of the 2nd generation (Cactus Ridge 4C up to revision 1, Port Ridge).
diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c
index ed215b458183..1d2940a78455 100644
--- a/drivers/ptp/ptp_ines.c
+++ b/drivers/ptp/ptp_ines.c
@@ -328,17 +328,15 @@ static u64 ines_find_txts(struct ines_port *port, struct sk_buff *skb)
 	return ns;
 }
 
-static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+static int ines_hwtstamp(struct mii_timestamper *mii_ts,
+			 struct kernel_hwtstamp_config *cfg,
+			 struct netlink_ext_ack *extack)
 {
 	struct ines_port *port = container_of(mii_ts, struct ines_port, mii_ts);
 	u32 cm_one_step = 0, port_conf, ts_stat_rx, ts_stat_tx;
-	struct hwtstamp_config cfg;
 	unsigned long flags;
 
-	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
-		return -EFAULT;
-
-	switch (cfg.tx_type) {
+	switch (cfg->tx_type) {
 	case HWTSTAMP_TX_OFF:
 		ts_stat_tx = 0;
 		break;
@@ -353,7 +351,7 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 		return -ERANGE;
 	}
 
-	switch (cfg.rx_filter) {
+	switch (cfg->rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		ts_stat_rx = 0;
 		break;
@@ -372,7 +370,7 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
 		ts_stat_rx = TS_ENABLE;
-		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		break;
 	default:
 		return -ERANGE;
@@ -393,7 +391,7 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
 
 	spin_unlock_irqrestore(&port->lock, flags);
 
-	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+	return 0;
 }
 
 static void ines_link_state(struct mii_timestamper *mii_ts,
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6762dac3ef76..258ba232e302 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -186,8 +186,8 @@ enum btf_field_type {
 	BPF_LIST_NODE  = (1 << 6),
 	BPF_RB_ROOT    = (1 << 7),
 	BPF_RB_NODE    = (1 << 8),
-	BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
-				 BPF_RB_NODE | BPF_RB_ROOT,
+	BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE,
+	BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
 	BPF_REFCOUNT   = (1 << 9),
 };
 
@@ -1226,6 +1226,8 @@ enum bpf_dynptr_type {
 
 int bpf_dynptr_check_size(u32 size);
 u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
+const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
+void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
 
 #ifdef CONFIG_BPF_JIT
 int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index aa4d19d0bc94..d99a636d36a7 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -618,6 +618,7 @@ struct bpf_verifier_env {
 	int stack_size;			/* number of states to be processed */
 	bool strict_alignment;		/* perform strict pointer alignment checks */
 	bool test_state_freq;		/* test verifier with different pruning frequency */
+	bool test_reg_invariants;	/* fail verification on register invariants violations */
 	struct bpf_verifier_state *cur_state; /* current verifier state */
 	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
 	struct bpf_verifier_state_list *free_list;
@@ -695,6 +696,10 @@ int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level,
 void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos);
 int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual);
 
+__printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
+				  u32 insn_off,
+				  const char *prefix_fmt, ...);
+
 static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *cur = env->cur_state;
@@ -794,4 +799,76 @@ static inline bool bpf_type_has_unsafe_modifiers(u32 type)
 	return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS;
 }
 
+static inline bool type_is_ptr_alloc_obj(u32 type)
+{
+	return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
+}
+
+static inline bool type_is_non_owning_ref(u32 type)
+{
+	return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
+}
+
+static inline bool type_is_pkt_pointer(enum bpf_reg_type type)
+{
+	type = base_type(type);
+	return type == PTR_TO_PACKET ||
+	       type == PTR_TO_PACKET_META;
+}
+
+static inline bool type_is_sk_pointer(enum bpf_reg_type type)
+{
+	return type == PTR_TO_SOCKET ||
+		type == PTR_TO_SOCK_COMMON ||
+		type == PTR_TO_TCP_SOCK ||
+		type == PTR_TO_XDP_SOCK;
+}
+
+static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
+{
+	env->scratched_regs |= 1U << regno;
+}
+
+static inline void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
+{
+	env->scratched_stack_slots |= 1ULL << spi;
+}
+
+static inline bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
+{
+	return (env->scratched_regs >> regno) & 1;
+}
+
+static inline bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
+{
+	return (env->scratched_stack_slots >> regno) & 1;
+}
+
+static inline bool verifier_state_scratched(const struct bpf_verifier_env *env)
+{
+	return env->scratched_regs || env->scratched_stack_slots;
+}
+
+static inline void mark_verifier_state_clean(struct bpf_verifier_env *env)
+{
+	env->scratched_regs = 0U;
+	env->scratched_stack_slots = 0ULL;
+}
+
+/* Used for printing the entire verifier state. */
+static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env)
+{
+	env->scratched_regs = ~0U;
+	env->scratched_stack_slots = ~0ULL;
+}
+
+const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type);
+const char *dynptr_type_str(enum bpf_dynptr_type type);
+const char *iter_type_str(const struct btf *btf, u32 btf_id);
+const char *iter_state_str(enum bpf_iter_state state);
+
+void print_verifier_state(struct bpf_verifier_env *env,
+			  const struct bpf_func_state *state, bool print_all);
+void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state);
+
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 4a6b6b77ccb6..4caab0c6b361 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -563,6 +563,7 @@ struct cgroup_root {
 
 	/* A list running through the active hierarchies */
 	struct list_head root_list;
+	struct rcu_head rcu;
 
 	/* Hierarchy-specific flags */
 	unsigned int flags;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 0ef0af66080e..34aaf0e87def 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -69,6 +69,7 @@ struct css_task_iter {
 extern struct file_system_type cgroup_fs_type;
 extern struct cgroup_root cgrp_dfl_root;
 extern struct css_set init_css_set;
+extern spinlock_t css_set_lock;
 
 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
 #include <linux/cgroup_subsys.h>
@@ -386,7 +387,6 @@ static inline void cgroup_unlock(void)
  * as locks used during the cgroup_subsys::attach() methods.
  */
 #ifdef CONFIG_PROVE_RCU
-extern spinlock_t css_set_lock;
 #define task_css_set_check(task, __c)					\
 	rcu_dereference_check((task)->cgroups,				\
 		rcu_read_lock_sched_held() ||				\
@@ -853,4 +853,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 
 #endif /* CONFIG_CGROUP_BPF */
 
+struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id);
+
 #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 2ceba3fe4ec1..aebb65bf95a7 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -136,7 +136,7 @@
 #endif
 
 #define __diag_ignore_all(option, comment) \
-	__diag_GCC(8, ignore, option)
+	__diag(__diag_GCC_ignore option)
 
 /*
  * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size"
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 689028257fcc..c2bb74143eda 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1044,6 +1044,14 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add,
 }
 
 /**
+ * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer.
+ * @dev: pointer to net_device structure
+ * @info: buffer to hold the result
+ * Returns zero on success, non-zero otherwise.
+ */
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info);
+
+/**
  * ethtool_sprintf - Write formatted string to ethtool string data
  * @data: Pointer to a pointer to the start of string to update
  * @fmt: Format of string to write
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index c1c76a70a6ce..adb83a42a6b9 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -11,7 +11,7 @@
  *  @__VA_ARGS__: arguments for @f
  *
  * Avoid retpoline overhead for known builtin, checking @f vs each of them and
- * eventually invoking directly the builtin function. The functions are check
+ * eventually invoking directly the builtin function. The functions are checked
  * in the given order. Fallback to the indirect call.
  */
 #define INDIRECT_CALL_1(f, f1, ...)					\
diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h
index 7303b4bc2ce0..287f590ed56b 100644
--- a/include/linux/linkmode.h
+++ b/include/linux/linkmode.h
@@ -10,6 +10,11 @@ static inline void linkmode_zero(unsigned long *dst)
 	bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
+static inline void linkmode_fill(unsigned long *dst)
+{
+	bitmap_fill(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
 static inline void linkmode_copy(unsigned long *dst, const unsigned long *src)
 {
 	bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h
index fa940bbaf8ae..26b04f73f214 100644
--- a/include/linux/mii_timestamper.h
+++ b/include/linux/mii_timestamper.h
@@ -9,6 +9,7 @@
 #include <linux/device.h>
 #include <linux/ethtool.h>
 #include <linux/skbuff.h>
+#include <linux/net_tstamp.h>
 
 struct phy_device;
 
@@ -51,7 +52,8 @@ struct mii_timestamper {
 			 struct sk_buff *skb, int type);
 
 	int  (*hwtstamp)(struct mii_timestamper *mii_ts,
-			 struct ifreq *ifreq);
+			 struct kernel_hwtstamp_config *kernel_config,
+			 struct netlink_ext_ack *extack);
 
 	void (*link_state)(struct mii_timestamper *mii_ts,
 			   struct phy_device *phydev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6f3631425f38..ce2e71cd6d2a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10103,7 +10103,10 @@ enum {
 struct mlx5_ifc_mtutc_reg_bits {
 	u8         reserved_at_0[0x5];
 	u8         freq_adj_units[0x3];
-	u8         reserved_at_8[0x14];
+	u8         reserved_at_8[0x3];
+	u8         log_max_freq_adjustment[0x5];
+
+	u8         reserved_at_10[0xc];
 	u8         operation[0x4];
 
 	u8         freq_adjustment[0x20];
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2564e209465e..e87caa81f70c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3964,6 +3964,9 @@ int generic_hwtstamp_get_lower(struct net_device *dev,
 int generic_hwtstamp_set_lower(struct net_device *dev,
 			       struct kernel_hwtstamp_config *kernel_cfg,
 			       struct netlink_ext_ack *extack);
+int dev_set_hwtstamp_phylib(struct net_device *dev,
+			    struct kernel_hwtstamp_config *cfg,
+			    struct netlink_ext_ack *extack);
 int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata);
 unsigned int dev_get_flags(const struct net_device *);
 int __dev_change_flags(struct net_device *dev, unsigned int flags,
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 75d7de34c908..abe91ed6b9aa 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -351,5 +351,6 @@ bool netlink_ns_capable(const struct sk_buff *skb,
 			struct user_namespace *ns, int cap);
 bool netlink_capable(const struct sk_buff *skb, int cap);
 bool netlink_net_capable(const struct sk_buff *skb, int cap);
+struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast);
 
 #endif	/* __LINUX_NETLINK_H */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3cc52826f18e..e5f1f41e399c 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1560,9 +1560,11 @@ static inline bool phy_has_txtstamp(struct phy_device *phydev)
 	return phydev && phydev->mii_ts && phydev->mii_ts->txtstamp;
 }
 
-static inline int phy_hwtstamp(struct phy_device *phydev, struct ifreq *ifr)
+static inline int phy_hwtstamp(struct phy_device *phydev,
+			       struct kernel_hwtstamp_config *cfg,
+			       struct netlink_ext_ack *extack)
 {
-	return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr);
+	return phydev->mii_ts->hwtstamp(phydev->mii_ts, cfg, extack);
 }
 
 static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb,
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 1c3948a1d6ad..3c13240077b8 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -106,6 +106,10 @@ int tnum_sbin(char *str, size_t size, struct tnum a);
 struct tnum tnum_subreg(struct tnum a);
 /* Returns the tnum with the lower 32-bit subreg cleared */
 struct tnum tnum_clear_subreg(struct tnum a);
+/* Returns the tnum with the lower 32-bit subreg in *reg* set to the lower
+ * 32-bit subreg in *subreg*
+ */
+struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg);
 /* Returns the tnum with the lower 32-bit subreg set to value */
 struct tnum tnum_const_subreg(struct tnum a, u32 value);
 /* Returns true if 32-bit subreg @a is a known constant*/
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f346b4efbc30..2d746f4c9a0a 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -416,6 +416,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
 		return 0;
 }
 
+static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph,
+					     const struct sk_buff *skb)
+{
+	__be16 payload_protocol = skb_protocol(skb, true);
+
+	if (payload_protocol == htons(ETH_P_IPV6))
+		return ip6_flowlabel((const struct ipv6hdr *)iph);
+	else
+		return 0;
+}
+
 static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
 				       const struct sk_buff *skb)
 {
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 83bdf787aeee..167b91348e57 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1011,6 +1011,20 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
 }
 
 /**
+ * nlmsg_new_large - Allocate a new netlink message with non-contiguous
+ * physical memory
+ * @payload: size of the message payload
+ *
+ * The allocated skb is unable to have frag page for shinfo->frags*,
+ * as the NULL setting for skb->head in netlink_skb_destructor() will
+ * bypass most of the handling in skb_release_data()
+ */
+static inline struct sk_buff *nlmsg_new_large(size_t payload)
+{
+	return netlink_alloc_large_skb(nlmsg_total_size(payload), 0);
+}
+
+/**
  * nlmsg_end - Finalize a netlink message
  * @skb: socket buffer the message is stored in
  * @nlh: netlink message header
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 6fc5134095ed..e1bb92c192de 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -54,18 +54,22 @@ struct pp_alloc_cache {
  * @offset:	DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
  */
 struct page_pool_params {
-	unsigned int	flags;
-	unsigned int	order;
-	unsigned int	pool_size;
-	int		nid;
-	struct device	*dev;
-	struct napi_struct *napi;
-	enum dma_data_direction dma_dir;
-	unsigned int	max_len;
-	unsigned int	offset;
+	struct_group_tagged(page_pool_params_fast, fast,
+		unsigned int	flags;
+		unsigned int	order;
+		unsigned int	pool_size;
+		int		nid;
+		struct device	*dev;
+		struct napi_struct *napi;
+		enum dma_data_direction dma_dir;
+		unsigned int	max_len;
+		unsigned int	offset;
+	);
+	struct_group_tagged(page_pool_params_slow, slow,
 /* private: used by test code only */
-	void (*init_callback)(struct page *page, void *arg);
-	void *init_arg;
+		void (*init_callback)(struct page *page, void *arg);
+		void *init_arg;
+	);
 };
 
 #ifdef CONFIG_PAGE_POOL_STATS
@@ -119,7 +123,9 @@ struct page_pool_stats {
 #endif
 
 struct page_pool {
-	struct page_pool_params p;
+	struct page_pool_params_fast p;
+
+	bool has_init_callback;
 
 	long frag_users;
 	struct page *frag_page;
@@ -178,6 +184,9 @@ struct page_pool {
 	refcount_t user_cnt;
 
 	u64 destroy_cnt;
+
+	/* Slow/Control-path information follows */
+	struct page_pool_params_slow slow;
 };
 
 struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 6a9f8a5f387c..33ba6fc151cf 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -210,22 +210,23 @@ struct vxlan_rdst {
 };
 
 struct vxlan_config {
-	union vxlan_addr	remote_ip;
-	union vxlan_addr	saddr;
-	__be32			vni;
-	int			remote_ifindex;
-	int			mtu;
-	__be16			dst_port;
-	u16			port_min;
-	u16			port_max;
-	u8			tos;
-	u8			ttl;
-	__be32			label;
-	u32			flags;
-	unsigned long		age_interval;
-	unsigned int		addrmax;
-	bool			no_share;
-	enum ifla_vxlan_df	df;
+	union vxlan_addr		remote_ip;
+	union vxlan_addr		saddr;
+	__be32				vni;
+	int				remote_ifindex;
+	int				mtu;
+	__be16				dst_port;
+	u16				port_min;
+	u16				port_max;
+	u8				tos;
+	u8				ttl;
+	__be32				label;
+	enum ifla_vxlan_label_policy	label_policy;
+	u32				flags;
+	unsigned long			age_interval;
+	unsigned int			addrmax;
+	bool				no_share;
+	enum ifla_vxlan_df		df;
 };
 
 enum {
diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 9204e4494b25..6e25753015df 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -116,6 +116,9 @@ enum batadv_icmp_packettype {
  * only need routable IPv4 multicast packets we signed up for explicitly
  * @BATADV_MCAST_WANT_NO_RTR6: we have no IPv6 multicast router and therefore
  * only need routable IPv6 multicast packets we signed up for explicitly
+ * @BATADV_MCAST_HAVE_MC_PTYPE_CAPA: we can parse, receive and forward
+ * batman-adv multicast packets with a multicast tracker TVLV. And all our
+ * hard interfaces have an MTU of at least 1280 bytes.
  */
 enum batadv_mcast_flags {
 	BATADV_MCAST_WANT_ALL_UNSNOOPABLES	= 1UL << 0,
@@ -123,6 +126,7 @@ enum batadv_mcast_flags {
 	BATADV_MCAST_WANT_ALL_IPV6		= 1UL << 2,
 	BATADV_MCAST_WANT_NO_RTR4		= 1UL << 3,
 	BATADV_MCAST_WANT_NO_RTR6		= 1UL << 4,
+	BATADV_MCAST_HAVE_MC_PTYPE_CAPA		= 1UL << 5,
 };
 
 /* tt data subtypes */
@@ -174,14 +178,16 @@ enum batadv_bla_claimframe {
  * @BATADV_TVLV_TT: translation table tvlv
  * @BATADV_TVLV_ROAM: roaming advertisement tvlv
  * @BATADV_TVLV_MCAST: multicast capability tvlv
+ * @BATADV_TVLV_MCAST_TRACKER: multicast tracker tvlv
  */
 enum batadv_tvlv_type {
-	BATADV_TVLV_GW		= 0x01,
-	BATADV_TVLV_DAT		= 0x02,
-	BATADV_TVLV_NC		= 0x03,
-	BATADV_TVLV_TT		= 0x04,
-	BATADV_TVLV_ROAM	= 0x05,
-	BATADV_TVLV_MCAST	= 0x06,
+	BATADV_TVLV_GW			= 0x01,
+	BATADV_TVLV_DAT			= 0x02,
+	BATADV_TVLV_NC			= 0x03,
+	BATADV_TVLV_TT			= 0x04,
+	BATADV_TVLV_ROAM		= 0x05,
+	BATADV_TVLV_MCAST		= 0x06,
+	BATADV_TVLV_MCAST_TRACKER	= 0x07,
 };
 
 #pragma pack(2)
@@ -488,6 +494,25 @@ struct batadv_bcast_packet {
 };
 
 /**
+ * struct batadv_mcast_packet - multicast packet for network payload
+ * @packet_type: batman-adv packet type, part of the general header
+ * @version: batman-adv protocol version, part of the general header
+ * @ttl: time to live for this packet, part of the general header
+ * @reserved: reserved byte for alignment
+ * @tvlv_len: length of the appended tvlv buffer (in bytes)
+ */
+struct batadv_mcast_packet {
+	__u8 packet_type;
+	__u8 version;
+	__u8 ttl;
+	__u8 reserved;
+	__be16 tvlv_len;
+	/* "4 bytes boundary + 2 bytes" long to make the payload after the
+	 * following ethernet header again 4 bytes boundary aligned
+	 */
+};
+
+/**
  * struct batadv_coded_packet - network coded packet
  * @packet_type: batman-adv packet type, part of the general header
  * @version: batman-adv protocol version, part of the general header
@@ -628,6 +653,14 @@ struct batadv_tvlv_mcast_data {
 	__u8 reserved[3];
 };
 
+/**
+ * struct batadv_tvlv_mcast_tracker - payload of a multicast tracker tvlv
+ * @num_dests: number of subsequent destination originator MAC addresses
+ */
+struct batadv_tvlv_mcast_tracker {
+	__be16	num_dests;
+};
+
 #pragma pack()
 
 #endif /* _UAPI_LINUX_BATADV_PACKET_H_ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0f6cdf52b1da..7a5498242eaa 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1200,6 +1200,9 @@ enum bpf_perf_event_type {
  */
 #define BPF_F_XDP_DEV_BOUND_ONLY	(1U << 6)
 
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_REG_INVARIANTS	(1U << 7)
+
 /* link_create.kprobe_multi.flags used in LINK_CREATE command for
  * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
  */
@@ -4517,6 +4520,8 @@ union bpf_attr {
  * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
  *	Description
  *		Return a user or a kernel stack in bpf program provided buffer.
+ *		Note: the user stack will only be populated if the *task* is
+ *		the current task; all other tasks will return -EOPNOTSUPP.
  *		To achieve this, the helper needs *task*, which is a valid
  *		pointer to **struct task_struct**. To store the stacktrace, the
  *		bpf program provides *buf* with a nonnegative *size*.
@@ -4528,6 +4533,7 @@ union bpf_attr {
  *
  *		**BPF_F_USER_STACK**
  *			Collect a user space stack instead of a kernel stack.
+ *			The *task* must be the current task.
  *		**BPF_F_USER_BUILD_ID**
  *			Collect buildid+offset instead of ips for user stack,
  *			only valid if **BPF_F_USER_STACK** is also specified.
@@ -7151,40 +7157,31 @@ struct bpf_spin_lock {
 };
 
 struct bpf_timer {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_dynptr {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_list_head {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_list_node {
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[3];
 } __attribute__((aligned(8)));
 
 struct bpf_rb_root {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_rb_node {
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[4];
 } __attribute__((aligned(8)));
 
 struct bpf_refcount {
-	__u32 :32;
+	__u32 __opaque[1];
 } __attribute__((aligned(4)));
 
 struct bpf_sysctl {
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 29ff80da2775..8181ef23a7a2 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -856,6 +856,7 @@ enum {
 	IFLA_VXLAN_DF,
 	IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
 	IFLA_VXLAN_LOCALBYPASS,
+	IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
@@ -873,6 +874,13 @@ enum ifla_vxlan_df {
 	VXLAN_DF_MAX = __VXLAN_DF_END - 1,
 };
 
+enum ifla_vxlan_label_policy {
+	VXLAN_LABEL_FIXED = 0,
+	VXLAN_LABEL_INHERIT = 1,
+	__VXLAN_LABEL_END,
+	VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1,
+};
+
 /* GENEVE section */
 enum {
 	IFLA_GENEVE_UNSPEC,
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 15d71d2986d3..63cf4128fc05 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3840,9 +3840,6 @@ end:
 	return ERR_PTR(ret);
 }
 
-#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT)
-#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE)
-
 int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
 {
 	int i;
@@ -3855,13 +3852,13 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
 	 * Hence we only need to ensure that bpf_{list_head,rb_root} ownership
 	 * does not form cycles.
 	 */
-	if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK))
+	if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT))
 		return 0;
 	for (i = 0; i < rec->cnt; i++) {
 		struct btf_struct_meta *meta;
 		u32 btf_id;
 
-		if (!(rec->fields[i].type & GRAPH_ROOT_MASK))
+		if (!(rec->fields[i].type & BPF_GRAPH_ROOT))
 			continue;
 		btf_id = rec->fields[i].graph_root.value_btf_id;
 		meta = btf_find_struct_meta(btf, btf_id);
@@ -3873,7 +3870,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
 		 * to check ownership cycle for a type unless it's also a
 		 * node type.
 		 */
-		if (!(rec->field_mask & GRAPH_NODE_MASK))
+		if (!(rec->field_mask & BPF_GRAPH_NODE))
 			continue;
 
 		/* We need to ensure ownership acyclicity among all types. The
@@ -3909,7 +3906,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
 		 * - A is both an root and node.
 		 * - B is only an node.
 		 */
-		if (meta->record->field_mask & GRAPH_ROOT_MASK)
+		if (meta->record->field_mask & BPF_GRAPH_ROOT)
 			return -ELOOP;
 	}
 	return 0;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 56b0c1f678ee..b45a8381f9bd 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1937,10 +1937,7 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu)
 		ma = &bpf_global_percpu_ma;
 	else
 		ma = &bpf_global_ma;
-	if (rec && rec->refcount_off >= 0)
-		bpf_mem_free_rcu(ma, p);
-	else
-		bpf_mem_free(ma, p);
+	bpf_mem_free_rcu(ma, p);
 }
 
 __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
@@ -2231,6 +2228,25 @@ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
 	rcu_read_unlock();
 	return ret;
 }
+
+/**
+ * bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a
+ * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
+ * hierarchy ID.
+ * @task: The target task
+ * @hierarchy_id: The ID of a cgroup1 hierarchy
+ *
+ * On success, the cgroup is returen. On failure, NULL is returned.
+ */
+__bpf_kfunc struct cgroup *
+bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id)
+{
+	struct cgroup *cgrp = task_get_cgroup1(task, hierarchy_id);
+
+	if (IS_ERR(cgrp))
+		return NULL;
+	return cgrp;
+}
 #endif /* CONFIG_CGROUPS */
 
 /**
@@ -2520,7 +2536,7 @@ BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
 BTF_ID_FLAGS(func, bpf_list_push_front_impl)
 BTF_ID_FLAGS(func, bpf_list_push_back_impl)
 BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
@@ -2537,6 +2553,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
+BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 #endif
 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_throw)
@@ -2618,3 +2635,22 @@ static int __init kfunc_init(void)
 }
 
 late_initcall(kfunc_init);
+
+/* Get a pointer to dynptr data up to len bytes for read only access. If
+ * the dynptr doesn't have continuous data up to len bytes, return NULL.
+ */
+const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len)
+{
+	return bpf_dynptr_slice(ptr, 0, NULL, len);
+}
+
+/* Get a pointer to dynptr data up to len bytes for read write access. If
+ * the dynptr doesn't have continuous data up to len bytes, or the dynptr
+ * is read only, return NULL.
+ */
+void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len)
+{
+	if (__bpf_dynptr_is_rdonly(ptr))
+		return NULL;
+	return (void *)__bpf_dynptr_data(ptr, len);
+}
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 850494423530..3505f3e5ae96 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -10,6 +10,8 @@
 #include <linux/bpf_verifier.h>
 #include <linux/math64.h>
 
+#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
+
 static bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log)
 {
 	/* ubuf and len_total should both be specified (or not) together */
@@ -325,3 +327,481 @@ __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
 	va_end(args);
 }
 EXPORT_SYMBOL_GPL(bpf_log);
+
+static const struct bpf_line_info *
+find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
+{
+	const struct bpf_line_info *linfo;
+	const struct bpf_prog *prog;
+	u32 i, nr_linfo;
+
+	prog = env->prog;
+	nr_linfo = prog->aux->nr_linfo;
+
+	if (!nr_linfo || insn_off >= prog->len)
+		return NULL;
+
+	linfo = prog->aux->linfo;
+	for (i = 1; i < nr_linfo; i++)
+		if (insn_off < linfo[i].insn_off)
+			break;
+
+	return &linfo[i - 1];
+}
+
+static const char *ltrim(const char *s)
+{
+	while (isspace(*s))
+		s++;
+
+	return s;
+}
+
+__printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
+				  u32 insn_off,
+				  const char *prefix_fmt, ...)
+{
+	const struct bpf_line_info *linfo;
+
+	if (!bpf_verifier_log_needed(&env->log))
+		return;
+
+	linfo = find_linfo(env, insn_off);
+	if (!linfo || linfo == env->prev_linfo)
+		return;
+
+	if (prefix_fmt) {
+		va_list args;
+
+		va_start(args, prefix_fmt);
+		bpf_verifier_vlog(&env->log, prefix_fmt, args);
+		va_end(args);
+	}
+
+	verbose(env, "%s\n",
+		ltrim(btf_name_by_offset(env->prog->aux->btf,
+					 linfo->line_off)));
+
+	env->prev_linfo = linfo;
+}
+
+static const char *btf_type_name(const struct btf *btf, u32 id)
+{
+	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
+}
+
+/* string representation of 'enum bpf_reg_type'
+ *
+ * Note that reg_type_str() can not appear more than once in a single verbose()
+ * statement.
+ */
+const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)
+{
+	char postfix[16] = {0}, prefix[64] = {0};
+	static const char * const str[] = {
+		[NOT_INIT]		= "?",
+		[SCALAR_VALUE]		= "scalar",
+		[PTR_TO_CTX]		= "ctx",
+		[CONST_PTR_TO_MAP]	= "map_ptr",
+		[PTR_TO_MAP_VALUE]	= "map_value",
+		[PTR_TO_STACK]		= "fp",
+		[PTR_TO_PACKET]		= "pkt",
+		[PTR_TO_PACKET_META]	= "pkt_meta",
+		[PTR_TO_PACKET_END]	= "pkt_end",
+		[PTR_TO_FLOW_KEYS]	= "flow_keys",
+		[PTR_TO_SOCKET]		= "sock",
+		[PTR_TO_SOCK_COMMON]	= "sock_common",
+		[PTR_TO_TCP_SOCK]	= "tcp_sock",
+		[PTR_TO_TP_BUFFER]	= "tp_buffer",
+		[PTR_TO_XDP_SOCK]	= "xdp_sock",
+		[PTR_TO_BTF_ID]		= "ptr_",
+		[PTR_TO_MEM]		= "mem",
+		[PTR_TO_BUF]		= "buf",
+		[PTR_TO_FUNC]		= "func",
+		[PTR_TO_MAP_KEY]	= "map_key",
+		[CONST_PTR_TO_DYNPTR]	= "dynptr_ptr",
+	};
+
+	if (type & PTR_MAYBE_NULL) {
+		if (base_type(type) == PTR_TO_BTF_ID)
+			strncpy(postfix, "or_null_", 16);
+		else
+			strncpy(postfix, "_or_null", 16);
+	}
+
+	snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
+		 type & MEM_RDONLY ? "rdonly_" : "",
+		 type & MEM_RINGBUF ? "ringbuf_" : "",
+		 type & MEM_USER ? "user_" : "",
+		 type & MEM_PERCPU ? "percpu_" : "",
+		 type & MEM_RCU ? "rcu_" : "",
+		 type & PTR_UNTRUSTED ? "untrusted_" : "",
+		 type & PTR_TRUSTED ? "trusted_" : ""
+	);
+
+	snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s",
+		 prefix, str[base_type(type)], postfix);
+	return env->tmp_str_buf;
+}
+
+const char *dynptr_type_str(enum bpf_dynptr_type type)
+{
+	switch (type) {
+	case BPF_DYNPTR_TYPE_LOCAL:
+		return "local";
+	case BPF_DYNPTR_TYPE_RINGBUF:
+		return "ringbuf";
+	case BPF_DYNPTR_TYPE_SKB:
+		return "skb";
+	case BPF_DYNPTR_TYPE_XDP:
+		return "xdp";
+	case BPF_DYNPTR_TYPE_INVALID:
+		return "<invalid>";
+	default:
+		WARN_ONCE(1, "unknown dynptr type %d\n", type);
+		return "<unknown>";
+	}
+}
+
+const char *iter_type_str(const struct btf *btf, u32 btf_id)
+{
+	if (!btf || btf_id == 0)
+		return "<invalid>";
+
+	/* we already validated that type is valid and has conforming name */
+	return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1;
+}
+
+const char *iter_state_str(enum bpf_iter_state state)
+{
+	switch (state) {
+	case BPF_ITER_STATE_ACTIVE:
+		return "active";
+	case BPF_ITER_STATE_DRAINED:
+		return "drained";
+	case BPF_ITER_STATE_INVALID:
+		return "<invalid>";
+	default:
+		WARN_ONCE(1, "unknown iter state %d\n", state);
+		return "<unknown>";
+	}
+}
+
+static char slot_type_char[] = {
+	[STACK_INVALID]	= '?',
+	[STACK_SPILL]	= 'r',
+	[STACK_MISC]	= 'm',
+	[STACK_ZERO]	= '0',
+	[STACK_DYNPTR]	= 'd',
+	[STACK_ITER]	= 'i',
+};
+
+static void print_liveness(struct bpf_verifier_env *env,
+			   enum bpf_reg_liveness live)
+{
+	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
+	    verbose(env, "_");
+	if (live & REG_LIVE_READ)
+		verbose(env, "r");
+	if (live & REG_LIVE_WRITTEN)
+		verbose(env, "w");
+	if (live & REG_LIVE_DONE)
+		verbose(env, "D");
+}
+
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool is_unum_decimal(u64 num)
+{
+	return num <= UNUM_MAX_DECIMAL;
+}
+
+static bool is_snum_decimal(s64 num)
+{
+	return num >= SNUM_MIN_DECIMAL && num <= SNUM_MAX_DECIMAL;
+}
+
+static void verbose_unum(struct bpf_verifier_env *env, u64 num)
+{
+	if (is_unum_decimal(num))
+		verbose(env, "%llu", num);
+	else
+		verbose(env, "%#llx", num);
+}
+
+static void verbose_snum(struct bpf_verifier_env *env, s64 num)
+{
+	if (is_snum_decimal(num))
+		verbose(env, "%lld", num);
+	else
+		verbose(env, "%#llx", num);
+}
+
+static void print_scalar_ranges(struct bpf_verifier_env *env,
+				const struct bpf_reg_state *reg,
+				const char **sep)
+{
+	/* For signed ranges, we want to unify 64-bit and 32-bit values in the
+	 * output as much as possible, but there is a bit of a complication.
+	 * If we choose to print values as decimals, this is natural to do,
+	 * because negative 64-bit and 32-bit values >= -S32_MIN have the same
+	 * representation due to sign extension. But if we choose to print
+	 * them in hex format (see is_snum_decimal()), then sign extension is
+	 * misleading.
+	 * E.g., smin=-2 and smin32=-2 are exactly the same in decimal, but in
+	 * hex they will be smin=0xfffffffffffffffe and smin32=0xfffffffe, two
+	 * very different numbers.
+	 * So we avoid sign extension if we choose to print values in hex.
+	 */
+	struct {
+		const char *name;
+		u64 val;
+		bool omit;
+	} minmaxs[] = {
+		{"smin",   reg->smin_value,         reg->smin_value == S64_MIN},
+		{"smax",   reg->smax_value,         reg->smax_value == S64_MAX},
+		{"umin",   reg->umin_value,         reg->umin_value == 0},
+		{"umax",   reg->umax_value,         reg->umax_value == U64_MAX},
+		{"smin32",
+		 is_snum_decimal((s64)reg->s32_min_value)
+			 ? (s64)reg->s32_min_value
+			 : (u32)reg->s32_min_value, reg->s32_min_value == S32_MIN},
+		{"smax32",
+		 is_snum_decimal((s64)reg->s32_max_value)
+			 ? (s64)reg->s32_max_value
+			 : (u32)reg->s32_max_value, reg->s32_max_value == S32_MAX},
+		{"umin32", reg->u32_min_value,      reg->u32_min_value == 0},
+		{"umax32", reg->u32_max_value,      reg->u32_max_value == U32_MAX},
+	}, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)];
+	bool neg1, neg2;
+
+	for (m1 = &minmaxs[0]; m1 < mend; m1++) {
+		if (m1->omit)
+			continue;
+
+		neg1 = m1->name[0] == 's' && (s64)m1->val < 0;
+
+		verbose(env, "%s%s=", *sep, m1->name);
+		*sep = ",";
+
+		for (m2 = m1 + 2; m2 < mend; m2 += 2) {
+			if (m2->omit || m2->val != m1->val)
+				continue;
+			/* don't mix negatives with positives */
+			neg2 = m2->name[0] == 's' && (s64)m2->val < 0;
+			if (neg2 != neg1)
+				continue;
+			m2->omit = true;
+			verbose(env, "%s=", m2->name);
+		}
+
+		if (m1->name[0] == 's')
+			verbose_snum(env, m1->val);
+		else
+			verbose_unum(env, m1->val);
+	}
+}
+
+static bool type_is_map_ptr(enum bpf_reg_type t) {
+	switch (base_type(t)) {
+	case CONST_PTR_TO_MAP:
+	case PTR_TO_MAP_KEY:
+	case PTR_TO_MAP_VALUE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void print_reg_state(struct bpf_verifier_env *env,
+			    const struct bpf_func_state *state,
+			    const struct bpf_reg_state *reg)
+{
+	enum bpf_reg_type t;
+	const char *sep = "";
+
+	t = reg->type;
+	if (t == SCALAR_VALUE && reg->precise)
+		verbose(env, "P");
+	if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) {
+		/* reg->off should be 0 for SCALAR_VALUE */
+		verbose_snum(env, reg->var_off.value + reg->off);
+		return;
+	}
+/*
+ * _a stands for append, was shortened to avoid multiline statements below.
+ * This macro is used to output a comma separated list of attributes.
+ */
+#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, ##__VA_ARGS__); sep = ","; })
+
+	verbose(env, "%s", reg_type_str(env, t));
+	if (t == PTR_TO_STACK) {
+		if (state->frameno != reg->frameno)
+			verbose(env, "[%d]", reg->frameno);
+		if (tnum_is_const(reg->var_off)) {
+			verbose_snum(env, reg->var_off.value + reg->off);
+			return;
+		}
+	}
+	if (base_type(t) == PTR_TO_BTF_ID)
+		verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id));
+	verbose(env, "(");
+	if (reg->id)
+		verbose_a("id=%d", reg->id);
+	if (reg->ref_obj_id)
+		verbose_a("ref_obj_id=%d", reg->ref_obj_id);
+	if (type_is_non_owning_ref(reg->type))
+		verbose_a("%s", "non_own_ref");
+	if (type_is_map_ptr(t)) {
+		if (reg->map_ptr->name[0])
+			verbose_a("map=%s", reg->map_ptr->name);
+		verbose_a("ks=%d,vs=%d",
+			  reg->map_ptr->key_size,
+			  reg->map_ptr->value_size);
+	}
+	if (t != SCALAR_VALUE && reg->off) {
+		verbose_a("off=");
+		verbose_snum(env, reg->off);
+	}
+	if (type_is_pkt_pointer(t)) {
+		verbose_a("r=");
+		verbose_unum(env, reg->range);
+	}
+	if (tnum_is_const(reg->var_off)) {
+		/* a pointer register with fixed offset */
+		if (reg->var_off.value) {
+			verbose_a("imm=");
+			verbose_snum(env, reg->var_off.value);
+		}
+	} else {
+		print_scalar_ranges(env, reg, &sep);
+		if (!tnum_is_unknown(reg->var_off)) {
+			char tn_buf[48];
+
+			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+			verbose_a("var_off=%s", tn_buf);
+		}
+	}
+	verbose(env, ")");
+
+#undef verbose_a
+}
+
+void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state,
+			  bool print_all)
+{
+	const struct bpf_reg_state *reg;
+	int i;
+
+	if (state->frameno)
+		verbose(env, " frame%d:", state->frameno);
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		reg = &state->regs[i];
+		if (reg->type == NOT_INIT)
+			continue;
+		if (!print_all && !reg_scratched(env, i))
+			continue;
+		verbose(env, " R%d", i);
+		print_liveness(env, reg->live);
+		verbose(env, "=");
+		print_reg_state(env, state, reg);
+	}
+	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+		char types_buf[BPF_REG_SIZE + 1];
+		bool valid = false;
+		u8 slot_type;
+		int j;
+
+		if (!print_all && !stack_slot_scratched(env, i))
+			continue;
+
+		for (j = 0; j < BPF_REG_SIZE; j++) {
+			slot_type = state->stack[i].slot_type[j];
+			if (slot_type != STACK_INVALID)
+				valid = true;
+			types_buf[j] = slot_type_char[slot_type];
+		}
+		types_buf[BPF_REG_SIZE] = 0;
+		if (!valid)
+			continue;
+
+		reg = &state->stack[i].spilled_ptr;
+		switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
+		case STACK_SPILL:
+			/* print MISC/ZERO/INVALID slots above subreg spill */
+			for (j = 0; j < BPF_REG_SIZE; j++)
+				if (state->stack[i].slot_type[j] == STACK_SPILL)
+					break;
+			types_buf[j] = '\0';
+
+			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+			print_liveness(env, reg->live);
+			verbose(env, "=%s", types_buf);
+			print_reg_state(env, state, reg);
+			break;
+		case STACK_DYNPTR:
+			/* skip to main dynptr slot */
+			i += BPF_DYNPTR_NR_SLOTS - 1;
+			reg = &state->stack[i].spilled_ptr;
+
+			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+			print_liveness(env, reg->live);
+			verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
+			if (reg->ref_obj_id)
+				verbose(env, "(ref_id=%d)", reg->ref_obj_id);
+			break;
+		case STACK_ITER:
+			/* only main slot has ref_obj_id set; skip others */
+			if (!reg->ref_obj_id)
+				continue;
+
+			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+			print_liveness(env, reg->live);
+			verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
+				iter_type_str(reg->iter.btf, reg->iter.btf_id),
+				reg->ref_obj_id, iter_state_str(reg->iter.state),
+				reg->iter.depth);
+			break;
+		case STACK_MISC:
+		case STACK_ZERO:
+		default:
+			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+			print_liveness(env, reg->live);
+			verbose(env, "=%s", types_buf);
+			break;
+		}
+	}
+	if (state->acquired_refs && state->refs[0].id) {
+		verbose(env, " refs=%d", state->refs[0].id);
+		for (i = 1; i < state->acquired_refs; i++)
+			if (state->refs[i].id)
+				verbose(env, ",%d", state->refs[i].id);
+	}
+	if (state->in_callback_fn)
+		verbose(env, " cb");
+	if (state->in_async_callback_fn)
+		verbose(env, " async_cb");
+	verbose(env, "\n");
+	if (!print_all)
+		mark_verifier_state_clean(env);
+}
+
+static inline u32 vlog_alignment(u32 pos)
+{
+	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
+			BPF_LOG_MIN_ALIGNMENT) - pos - 1;
+}
+
+void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state)
+{
+	if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
+		/* remove new line character */
+		bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
+		verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
+	} else {
+		verbose(env, "%d:", env->insn_idx);
+	}
+	print_verifier_state(env, state, false);
+}
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 17c7e7782a1f..b32be680da6c 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -231,6 +231,9 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key)
 	struct lpm_trie_node *node, *found = NULL;
 	struct bpf_lpm_trie_key *key = _key;
 
+	if (key->prefixlen > trie->max_prefixlen)
+		return NULL;
+
 	/* Start walking the trie from the root node ... */
 
 	for (node = rcu_dereference_check(trie->root, rcu_read_lock_bh_held());
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index d6b277482085..dff7ba539701 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -388,6 +388,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 {
 	u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
 	bool user_build_id = flags & BPF_F_USER_BUILD_ID;
+	bool crosstask = task && task != current;
 	u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
 	bool user = flags & BPF_F_USER_STACK;
 	struct perf_callchain_entry *trace;
@@ -410,6 +411,14 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 	if (task && user && !user_mode(regs))
 		goto err_fault;
 
+	/* get_perf_callchain does not support crosstask user stack walking
+	 * but returns an empty stack instead of NULL.
+	 */
+	if (crosstask && user) {
+		err = -EOPNOTSUPP;
+		goto clear;
+	}
+
 	num_elem = size / elem_size;
 	max_depth = num_elem + skip;
 	if (sysctl_perf_event_max_stack < max_depth)
@@ -421,7 +430,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
 		trace = get_callchain_entry_for_task(task, max_depth);
 	else
 		trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
-					   false, false);
+					   crosstask, false);
 	if (unlikely(!trace))
 		goto err_fault;
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0ed286b8a0f0..5e43ddd1b83f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2573,7 +2573,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 				 BPF_F_SLEEPABLE |
 				 BPF_F_TEST_RND_HI32 |
 				 BPF_F_XDP_HAS_FRAGS |
-				 BPF_F_XDP_DEV_BOUND_ONLY))
+				 BPF_F_XDP_DEV_BOUND_ONLY |
+				 BPF_F_TEST_REG_INVARIANTS))
 		return -EINVAL;
 
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 26082b97894d..e5c3500443c6 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -70,15 +70,13 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm
 		return NULL;
 
 retry:
-	task = next_thread(task);
+	task = __next_thread(task);
+	if (!task)
+		return NULL;
 
 	next_tid = __task_pid_nr_ns(task, PIDTYPE_PID, common->ns);
-	if (!next_tid || next_tid == common->pid) {
-		/* Run out of tasks of a process.  The tasks of a
-		 * thread_group are linked as circular linked list.
-		 */
-		return NULL;
-	}
+	if (!next_tid)
+		goto retry;
 
 	if (skip_if_dup_files && task->files == task->group_leader->files)
 		goto retry;
@@ -980,7 +978,6 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it,
 	BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
 					__alignof__(struct bpf_iter_task));
 
-	kit->task = kit->pos = NULL;
 	switch (flags) {
 	case BPF_TASK_ITER_ALL_THREADS:
 	case BPF_TASK_ITER_ALL_PROCS:
@@ -1017,20 +1014,16 @@ __bpf_kfunc struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it)
 	if (flags == BPF_TASK_ITER_ALL_PROCS)
 		goto get_next_task;
 
-	kit->pos = next_thread(kit->pos);
-	if (kit->pos == kit->task) {
-		if (flags == BPF_TASK_ITER_PROC_THREADS) {
-			kit->pos = NULL;
-			return pos;
-		}
-	} else
+	kit->pos = __next_thread(kit->pos);
+	if (kit->pos || flags == BPF_TASK_ITER_PROC_THREADS)
 		return pos;
 
 get_next_task:
-	kit->pos = next_task(kit->pos);
-	kit->task = kit->pos;
-	if (kit->pos == &init_task)
+	kit->task = next_task(kit->task);
+	if (kit->task == &init_task)
 		kit->pos = NULL;
+	else
+		kit->pos = kit->task;
 
 	return pos;
 }
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 3d7127f439a1..f4c91c9b27d7 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -208,7 +208,12 @@ struct tnum tnum_clear_subreg(struct tnum a)
 	return tnum_lshift(tnum_rshift(a, 32), 32);
 }
 
+struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg)
+{
+	return tnum_or(tnum_clear_subreg(reg), tnum_subreg(subreg));
+}
+
 struct tnum tnum_const_subreg(struct tnum a, u32 value)
 {
-	return tnum_or(tnum_clear_subreg(a), tnum_const(value));
+	return tnum_with_subreg(a, tnum_const(value));
 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index af2819d5c8ee..405da1f9e724 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -342,27 +342,6 @@ struct btf *btf_vmlinux;
 static DEFINE_MUTEX(bpf_verifier_lock);
 static DEFINE_MUTEX(bpf_percpu_ma_lock);
 
-static const struct bpf_line_info *
-find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
-{
-	const struct bpf_line_info *linfo;
-	const struct bpf_prog *prog;
-	u32 i, nr_linfo;
-
-	prog = env->prog;
-	nr_linfo = prog->aux->nr_linfo;
-
-	if (!nr_linfo || insn_off >= prog->len)
-		return NULL;
-
-	linfo = prog->aux->linfo;
-	for (i = 1; i < nr_linfo; i++)
-		if (insn_off < linfo[i].insn_off)
-			break;
-
-	return &linfo[i - 1];
-}
-
 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 {
 	struct bpf_verifier_env *env = private_data;
@@ -376,42 +355,6 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 	va_end(args);
 }
 
-static const char *ltrim(const char *s)
-{
-	while (isspace(*s))
-		s++;
-
-	return s;
-}
-
-__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
-					 u32 insn_off,
-					 const char *prefix_fmt, ...)
-{
-	const struct bpf_line_info *linfo;
-
-	if (!bpf_verifier_log_needed(&env->log))
-		return;
-
-	linfo = find_linfo(env, insn_off);
-	if (!linfo || linfo == env->prev_linfo)
-		return;
-
-	if (prefix_fmt) {
-		va_list args;
-
-		va_start(args, prefix_fmt);
-		bpf_verifier_vlog(&env->log, prefix_fmt, args);
-		va_end(args);
-	}
-
-	verbose(env, "%s\n",
-		ltrim(btf_name_by_offset(env->prog->aux->btf,
-					 linfo->line_off)));
-
-	env->prev_linfo = linfo;
-}
-
 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
 				   struct bpf_reg_state *reg,
 				   struct tnum *range, const char *ctx,
@@ -430,21 +373,6 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env,
 	verbose(env, " should have been in %s\n", tn_buf);
 }
 
-static bool type_is_pkt_pointer(enum bpf_reg_type type)
-{
-	type = base_type(type);
-	return type == PTR_TO_PACKET ||
-	       type == PTR_TO_PACKET_META;
-}
-
-static bool type_is_sk_pointer(enum bpf_reg_type type)
-{
-	return type == PTR_TO_SOCKET ||
-		type == PTR_TO_SOCK_COMMON ||
-		type == PTR_TO_TCP_SOCK ||
-		type == PTR_TO_XDP_SOCK;
-}
-
 static bool type_may_be_null(u32 type)
 {
 	return type & PTR_MAYBE_NULL;
@@ -468,16 +396,6 @@ static bool reg_not_null(const struct bpf_reg_state *reg)
 		type == PTR_TO_MEM;
 }
 
-static bool type_is_ptr_alloc_obj(u32 type)
-{
-	return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
-}
-
-static bool type_is_non_owning_ref(u32 type)
-{
-	return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
-}
-
 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
 {
 	struct btf_record *rec = NULL;
@@ -605,83 +523,6 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
 	       insn->imm == BPF_CMPXCHG;
 }
 
-/* string representation of 'enum bpf_reg_type'
- *
- * Note that reg_type_str() can not appear more than once in a single verbose()
- * statement.
- */
-static const char *reg_type_str(struct bpf_verifier_env *env,
-				enum bpf_reg_type type)
-{
-	char postfix[16] = {0}, prefix[64] = {0};
-	static const char * const str[] = {
-		[NOT_INIT]		= "?",
-		[SCALAR_VALUE]		= "scalar",
-		[PTR_TO_CTX]		= "ctx",
-		[CONST_PTR_TO_MAP]	= "map_ptr",
-		[PTR_TO_MAP_VALUE]	= "map_value",
-		[PTR_TO_STACK]		= "fp",
-		[PTR_TO_PACKET]		= "pkt",
-		[PTR_TO_PACKET_META]	= "pkt_meta",
-		[PTR_TO_PACKET_END]	= "pkt_end",
-		[PTR_TO_FLOW_KEYS]	= "flow_keys",
-		[PTR_TO_SOCKET]		= "sock",
-		[PTR_TO_SOCK_COMMON]	= "sock_common",
-		[PTR_TO_TCP_SOCK]	= "tcp_sock",
-		[PTR_TO_TP_BUFFER]	= "tp_buffer",
-		[PTR_TO_XDP_SOCK]	= "xdp_sock",
-		[PTR_TO_BTF_ID]		= "ptr_",
-		[PTR_TO_MEM]		= "mem",
-		[PTR_TO_BUF]		= "buf",
-		[PTR_TO_FUNC]		= "func",
-		[PTR_TO_MAP_KEY]	= "map_key",
-		[CONST_PTR_TO_DYNPTR]	= "dynptr_ptr",
-	};
-
-	if (type & PTR_MAYBE_NULL) {
-		if (base_type(type) == PTR_TO_BTF_ID)
-			strncpy(postfix, "or_null_", 16);
-		else
-			strncpy(postfix, "_or_null", 16);
-	}
-
-	snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
-		 type & MEM_RDONLY ? "rdonly_" : "",
-		 type & MEM_RINGBUF ? "ringbuf_" : "",
-		 type & MEM_USER ? "user_" : "",
-		 type & MEM_PERCPU ? "percpu_" : "",
-		 type & MEM_RCU ? "rcu_" : "",
-		 type & PTR_UNTRUSTED ? "untrusted_" : "",
-		 type & PTR_TRUSTED ? "trusted_" : ""
-	);
-
-	snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s",
-		 prefix, str[base_type(type)], postfix);
-	return env->tmp_str_buf;
-}
-
-static char slot_type_char[] = {
-	[STACK_INVALID]	= '?',
-	[STACK_SPILL]	= 'r',
-	[STACK_MISC]	= 'm',
-	[STACK_ZERO]	= '0',
-	[STACK_DYNPTR]	= 'd',
-	[STACK_ITER]	= 'i',
-};
-
-static void print_liveness(struct bpf_verifier_env *env,
-			   enum bpf_reg_liveness live)
-{
-	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
-	    verbose(env, "_");
-	if (live & REG_LIVE_READ)
-		verbose(env, "r");
-	if (live & REG_LIVE_WRITTEN)
-		verbose(env, "w");
-	if (live & REG_LIVE_DONE)
-		verbose(env, "D");
-}
-
 static int __get_spi(s32 off)
 {
 	return (-off - 1) / BPF_REG_SIZE;
@@ -751,87 +592,6 @@ static const char *btf_type_name(const struct btf *btf, u32 id)
 	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
 }
 
-static const char *dynptr_type_str(enum bpf_dynptr_type type)
-{
-	switch (type) {
-	case BPF_DYNPTR_TYPE_LOCAL:
-		return "local";
-	case BPF_DYNPTR_TYPE_RINGBUF:
-		return "ringbuf";
-	case BPF_DYNPTR_TYPE_SKB:
-		return "skb";
-	case BPF_DYNPTR_TYPE_XDP:
-		return "xdp";
-	case BPF_DYNPTR_TYPE_INVALID:
-		return "<invalid>";
-	default:
-		WARN_ONCE(1, "unknown dynptr type %d\n", type);
-		return "<unknown>";
-	}
-}
-
-static const char *iter_type_str(const struct btf *btf, u32 btf_id)
-{
-	if (!btf || btf_id == 0)
-		return "<invalid>";
-
-	/* we already validated that type is valid and has conforming name */
-	return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1;
-}
-
-static const char *iter_state_str(enum bpf_iter_state state)
-{
-	switch (state) {
-	case BPF_ITER_STATE_ACTIVE:
-		return "active";
-	case BPF_ITER_STATE_DRAINED:
-		return "drained";
-	case BPF_ITER_STATE_INVALID:
-		return "<invalid>";
-	default:
-		WARN_ONCE(1, "unknown iter state %d\n", state);
-		return "<unknown>";
-	}
-}
-
-static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
-{
-	env->scratched_regs |= 1U << regno;
-}
-
-static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
-{
-	env->scratched_stack_slots |= 1ULL << spi;
-}
-
-static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
-{
-	return (env->scratched_regs >> regno) & 1;
-}
-
-static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
-{
-	return (env->scratched_stack_slots >> regno) & 1;
-}
-
-static bool verifier_state_scratched(const struct bpf_verifier_env *env)
-{
-	return env->scratched_regs || env->scratched_stack_slots;
-}
-
-static void mark_verifier_state_clean(struct bpf_verifier_env *env)
-{
-	env->scratched_regs = 0U;
-	env->scratched_stack_slots = 0ULL;
-}
-
-/* Used for printing the entire verifier state. */
-static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
-{
-	env->scratched_regs = ~0U;
-	env->scratched_stack_slots = ~0ULL;
-}
-
 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
 {
 	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
@@ -1371,226 +1131,6 @@ static void scrub_spilled_slot(u8 *stype)
 		*stype = STACK_MISC;
 }
 
-static void print_scalar_ranges(struct bpf_verifier_env *env,
-				const struct bpf_reg_state *reg,
-				const char **sep)
-{
-	struct {
-		const char *name;
-		u64 val;
-		bool omit;
-	} minmaxs[] = {
-		{"smin",   reg->smin_value,         reg->smin_value == S64_MIN},
-		{"smax",   reg->smax_value,         reg->smax_value == S64_MAX},
-		{"umin",   reg->umin_value,         reg->umin_value == 0},
-		{"umax",   reg->umax_value,         reg->umax_value == U64_MAX},
-		{"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN},
-		{"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX},
-		{"umin32", reg->u32_min_value,      reg->u32_min_value == 0},
-		{"umax32", reg->u32_max_value,      reg->u32_max_value == U32_MAX},
-	}, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)];
-	bool neg1, neg2;
-
-	for (m1 = &minmaxs[0]; m1 < mend; m1++) {
-		if (m1->omit)
-			continue;
-
-		neg1 = m1->name[0] == 's' && (s64)m1->val < 0;
-
-		verbose(env, "%s%s=", *sep, m1->name);
-		*sep = ",";
-
-		for (m2 = m1 + 2; m2 < mend; m2 += 2) {
-			if (m2->omit || m2->val != m1->val)
-				continue;
-			/* don't mix negatives with positives */
-			neg2 = m2->name[0] == 's' && (s64)m2->val < 0;
-			if (neg2 != neg1)
-				continue;
-			m2->omit = true;
-			verbose(env, "%s=", m2->name);
-		}
-
-		verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val);
-	}
-}
-
-static void print_verifier_state(struct bpf_verifier_env *env,
-				 const struct bpf_func_state *state,
-				 bool print_all)
-{
-	const struct bpf_reg_state *reg;
-	enum bpf_reg_type t;
-	int i;
-
-	if (state->frameno)
-		verbose(env, " frame%d:", state->frameno);
-	for (i = 0; i < MAX_BPF_REG; i++) {
-		reg = &state->regs[i];
-		t = reg->type;
-		if (t == NOT_INIT)
-			continue;
-		if (!print_all && !reg_scratched(env, i))
-			continue;
-		verbose(env, " R%d", i);
-		print_liveness(env, reg->live);
-		verbose(env, "=");
-		if (t == SCALAR_VALUE && reg->precise)
-			verbose(env, "P");
-		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
-		    tnum_is_const(reg->var_off)) {
-			/* reg->off should be 0 for SCALAR_VALUE */
-			verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
-			verbose(env, "%lld", reg->var_off.value + reg->off);
-		} else {
-			const char *sep = "";
-
-			verbose(env, "%s", reg_type_str(env, t));
-			if (base_type(t) == PTR_TO_BTF_ID)
-				verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id));
-			verbose(env, "(");
-/*
- * _a stands for append, was shortened to avoid multiline statements below.
- * This macro is used to output a comma separated list of attributes.
- */
-#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
-
-			if (reg->id)
-				verbose_a("id=%d", reg->id);
-			if (reg->ref_obj_id)
-				verbose_a("ref_obj_id=%d", reg->ref_obj_id);
-			if (type_is_non_owning_ref(reg->type))
-				verbose_a("%s", "non_own_ref");
-			if (t != SCALAR_VALUE)
-				verbose_a("off=%d", reg->off);
-			if (type_is_pkt_pointer(t))
-				verbose_a("r=%d", reg->range);
-			else if (base_type(t) == CONST_PTR_TO_MAP ||
-				 base_type(t) == PTR_TO_MAP_KEY ||
-				 base_type(t) == PTR_TO_MAP_VALUE)
-				verbose_a("ks=%d,vs=%d",
-					  reg->map_ptr->key_size,
-					  reg->map_ptr->value_size);
-			if (tnum_is_const(reg->var_off)) {
-				/* Typically an immediate SCALAR_VALUE, but
-				 * could be a pointer whose offset is too big
-				 * for reg->off
-				 */
-				verbose_a("imm=%llx", reg->var_off.value);
-			} else {
-				print_scalar_ranges(env, reg, &sep);
-				if (!tnum_is_unknown(reg->var_off)) {
-					char tn_buf[48];
-
-					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
-					verbose_a("var_off=%s", tn_buf);
-				}
-			}
-#undef verbose_a
-
-			verbose(env, ")");
-		}
-	}
-	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
-		char types_buf[BPF_REG_SIZE + 1];
-		bool valid = false;
-		int j;
-
-		for (j = 0; j < BPF_REG_SIZE; j++) {
-			if (state->stack[i].slot_type[j] != STACK_INVALID)
-				valid = true;
-			types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
-		}
-		types_buf[BPF_REG_SIZE] = 0;
-		if (!valid)
-			continue;
-		if (!print_all && !stack_slot_scratched(env, i))
-			continue;
-		switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
-		case STACK_SPILL:
-			reg = &state->stack[i].spilled_ptr;
-			t = reg->type;
-
-			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
-			print_liveness(env, reg->live);
-			verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
-			if (t == SCALAR_VALUE && reg->precise)
-				verbose(env, "P");
-			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
-				verbose(env, "%lld", reg->var_off.value + reg->off);
-			break;
-		case STACK_DYNPTR:
-			i += BPF_DYNPTR_NR_SLOTS - 1;
-			reg = &state->stack[i].spilled_ptr;
-
-			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
-			print_liveness(env, reg->live);
-			verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
-			if (reg->ref_obj_id)
-				verbose(env, "(ref_id=%d)", reg->ref_obj_id);
-			break;
-		case STACK_ITER:
-			/* only main slot has ref_obj_id set; skip others */
-			reg = &state->stack[i].spilled_ptr;
-			if (!reg->ref_obj_id)
-				continue;
-
-			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
-			print_liveness(env, reg->live);
-			verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
-				iter_type_str(reg->iter.btf, reg->iter.btf_id),
-				reg->ref_obj_id, iter_state_str(reg->iter.state),
-				reg->iter.depth);
-			break;
-		case STACK_MISC:
-		case STACK_ZERO:
-		default:
-			reg = &state->stack[i].spilled_ptr;
-
-			for (j = 0; j < BPF_REG_SIZE; j++)
-				types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
-			types_buf[BPF_REG_SIZE] = 0;
-
-			verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
-			print_liveness(env, reg->live);
-			verbose(env, "=%s", types_buf);
-			break;
-		}
-	}
-	if (state->acquired_refs && state->refs[0].id) {
-		verbose(env, " refs=%d", state->refs[0].id);
-		for (i = 1; i < state->acquired_refs; i++)
-			if (state->refs[i].id)
-				verbose(env, ",%d", state->refs[i].id);
-	}
-	if (state->in_callback_fn)
-		verbose(env, " cb");
-	if (state->in_async_callback_fn)
-		verbose(env, " async_cb");
-	verbose(env, "\n");
-	if (!print_all)
-		mark_verifier_state_clean(env);
-}
-
-static inline u32 vlog_alignment(u32 pos)
-{
-	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
-			BPF_LOG_MIN_ALIGNMENT) - pos - 1;
-}
-
-static void print_insn_state(struct bpf_verifier_env *env,
-			     const struct bpf_func_state *state)
-{
-	if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
-		/* remove new line character */
-		bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
-		verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
-	} else {
-		verbose(env, "%d:", env->insn_idx);
-	}
-	print_verifier_state(env, state, false);
-}
-
 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
  * small to hold src. This is different from krealloc since we don't want to preserve
  * the contents of dst.
@@ -2341,69 +1881,214 @@ static void __update_reg_bounds(struct bpf_reg_state *reg)
 /* Uses signed min/max values to inform unsigned, and vice-versa */
 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
 {
-	/* Learn sign from signed bounds.
-	 * If we cannot cross the sign boundary, then signed and unsigned bounds
-	 * are the same, so combine.  This works even in the negative case, e.g.
-	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
+	/* If upper 32 bits of u64/s64 range don't change, we can use lower 32
+	 * bits to improve our u32/s32 boundaries.
+	 *
+	 * E.g., the case where we have upper 32 bits as zero ([10, 20] in
+	 * u64) is pretty trivial, it's obvious that in u32 we'll also have
+	 * [10, 20] range. But this property holds for any 64-bit range as
+	 * long as upper 32 bits in that entire range of values stay the same.
+	 *
+	 * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311]
+	 * in decimal) has the same upper 32 bits throughout all the values in
+	 * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15])
+	 * range.
+	 *
+	 * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32,
+	 * following the rules outlined below about u64/s64 correspondence
+	 * (which equally applies to u32 vs s32 correspondence). In general it
+	 * depends on actual hexadecimal values of 32-bit range. They can form
+	 * only valid u32, or only valid s32 ranges in some cases.
+	 *
+	 * So we use all these insights to derive bounds for subregisters here.
 	 */
-	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
-		reg->s32_min_value = reg->u32_min_value =
-			max_t(u32, reg->s32_min_value, reg->u32_min_value);
-		reg->s32_max_value = reg->u32_max_value =
-			min_t(u32, reg->s32_max_value, reg->u32_max_value);
-		return;
+	if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) {
+		/* u64 to u32 casting preserves validity of low 32 bits as
+		 * a range, if upper 32 bits are the same
+		 */
+		reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value);
+		reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value);
+
+		if ((s32)reg->umin_value <= (s32)reg->umax_value) {
+			reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
+			reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
+		}
+	}
+	if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) {
+		/* low 32 bits should form a proper u32 range */
+		if ((u32)reg->smin_value <= (u32)reg->smax_value) {
+			reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value);
+			reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value);
+		}
+		/* low 32 bits should form a proper s32 range */
+		if ((s32)reg->smin_value <= (s32)reg->smax_value) {
+			reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
+			reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
+		}
+	}
+	/* Special case where upper bits form a small sequence of two
+	 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
+	 * 0x00000000 is also valid), while lower bits form a proper s32 range
+	 * going from negative numbers to positive numbers. E.g., let's say we
+	 * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]).
+	 * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff,
+	 * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits,
+	 * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]).
+	 * Note that it doesn't have to be 0xffffffff going to 0x00000000 in
+	 * upper 32 bits. As a random example, s64 range
+	 * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range
+	 * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister.
+	 */
+	if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) &&
+	    (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) {
+		reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value);
+		reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value);
+	}
+	if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) &&
+	    (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) {
+		reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value);
+		reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value);
+	}
+	/* if u32 range forms a valid s32 range (due to matching sign bit),
+	 * try to learn from that
+	 */
+	if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) {
+		reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value);
+		reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value);
 	}
-	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
-	 * boundary, so we must be careful.
+	/* If we cannot cross the sign boundary, then signed and unsigned bounds
+	 * are the same, so combine.  This works even in the negative case, e.g.
+	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 	 */
-	if ((s32)reg->u32_max_value >= 0) {
-		/* Positive.  We can't learn anything from the smin, but smax
-		 * is positive, hence safe.
-		 */
-		reg->s32_min_value = reg->u32_min_value;
-		reg->s32_max_value = reg->u32_max_value =
-			min_t(u32, reg->s32_max_value, reg->u32_max_value);
-	} else if ((s32)reg->u32_min_value < 0) {
-		/* Negative.  We can't learn anything from the smax, but smin
-		 * is negative, hence safe.
-		 */
-		reg->s32_min_value = reg->u32_min_value =
-			max_t(u32, reg->s32_min_value, reg->u32_min_value);
-		reg->s32_max_value = reg->u32_max_value;
+	if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
+		reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
+		reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
 	}
 }
 
 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
 {
-	/* Learn sign from signed bounds.
-	 * If we cannot cross the sign boundary, then signed and unsigned bounds
+	/* If u64 range forms a valid s64 range (due to matching sign bit),
+	 * try to learn from that. Let's do a bit of ASCII art to see when
+	 * this is happening. Let's take u64 range first:
+	 *
+	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
+	 * |-------------------------------|--------------------------------|
+	 *
+	 * Valid u64 range is formed when umin and umax are anywhere in the
+	 * range [0, U64_MAX], and umin <= umax. u64 case is simple and
+	 * straightforward. Let's see how s64 range maps onto the same range
+	 * of values, annotated below the line for comparison:
+	 *
+	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
+	 * |-------------------------------|--------------------------------|
+	 * 0                        S64_MAX S64_MIN                        -1
+	 *
+	 * So s64 values basically start in the middle and they are logically
+	 * contiguous to the right of it, wrapping around from -1 to 0, and
+	 * then finishing as S64_MAX (0x7fffffffffffffff) right before
+	 * S64_MIN. We can try drawing the continuity of u64 vs s64 values
+	 * more visually as mapped to sign-agnostic range of hex values.
+	 *
+	 *  u64 start                                               u64 end
+	 *  _______________________________________________________________
+	 * /                                                               \
+	 * 0             0x7fffffffffffffff 0x8000000000000000        U64_MAX
+	 * |-------------------------------|--------------------------------|
+	 * 0                        S64_MAX S64_MIN                        -1
+	 *                                / \
+	 * >------------------------------   ------------------------------->
+	 * s64 continues...        s64 end   s64 start          s64 "midpoint"
+	 *
+	 * What this means is that, in general, we can't always derive
+	 * something new about u64 from any random s64 range, and vice versa.
+	 *
+	 * But we can do that in two particular cases. One is when entire
+	 * u64/s64 range is *entirely* contained within left half of the above
+	 * diagram or when it is *entirely* contained in the right half. I.e.:
+	 *
+	 * |-------------------------------|--------------------------------|
+	 *     ^                   ^            ^                 ^
+	 *     A                   B            C                 D
+	 *
+	 * [A, B] and [C, D] are contained entirely in their respective halves
+	 * and form valid contiguous ranges as both u64 and s64 values. [A, B]
+	 * will be non-negative both as u64 and s64 (and in fact it will be
+	 * identical ranges no matter the signedness). [C, D] treated as s64
+	 * will be a range of negative values, while in u64 it will be
+	 * non-negative range of values larger than 0x8000000000000000.
+	 *
+	 * Now, any other range here can't be represented in both u64 and s64
+	 * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid
+	 * contiguous u64 ranges, but they are discontinuous in s64. [B, C]
+	 * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX],
+	 * for example. Similarly, valid s64 range [D, A] (going from negative
+	 * to positive values), would be two separate [D, U64_MAX] and [0, A]
+	 * ranges as u64. Currently reg_state can't represent two segments per
+	 * numeric domain, so in such situations we can only derive maximal
+	 * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64).
+	 *
+	 * So we use these facts to derive umin/umax from smin/smax and vice
+	 * versa only if they stay within the same "half". This is equivalent
+	 * to checking sign bit: lower half will have sign bit as zero, upper
+	 * half have sign bit 1. Below in code we simplify this by just
+	 * casting umin/umax as smin/smax and checking if they form valid
+	 * range, and vice versa. Those are equivalent checks.
+	 */
+	if ((s64)reg->umin_value <= (s64)reg->umax_value) {
+		reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value);
+		reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value);
+	}
+	/* If we cannot cross the sign boundary, then signed and unsigned bounds
 	 * are the same, so combine.  This works even in the negative case, e.g.
 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 	 */
-	if (reg->smin_value >= 0 || reg->smax_value < 0) {
-		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
-							  reg->umin_value);
-		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
-							  reg->umax_value);
-		return;
+	if ((u64)reg->smin_value <= (u64)reg->smax_value) {
+		reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
+		reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
 	}
-	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
-	 * boundary, so we must be careful.
+}
+
+static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
+{
+	/* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
+	 * values on both sides of 64-bit range in hope to have tigher range.
+	 * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
+	 * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
+	 * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
+	 * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
+	 * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
+	 * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
+	 * We just need to make sure that derived bounds we are intersecting
+	 * with are well-formed ranges in respecitve s64 or u64 domain, just
+	 * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
 	 */
-	if ((s64)reg->umax_value >= 0) {
-		/* Positive.  We can't learn anything from the smin, but smax
-		 * is positive, hence safe.
-		 */
-		reg->smin_value = reg->umin_value;
-		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
-							  reg->umax_value);
-	} else if ((s64)reg->umin_value < 0) {
-		/* Negative.  We can't learn anything from the smax, but smin
-		 * is negative, hence safe.
-		 */
-		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
-							  reg->umin_value);
-		reg->smax_value = reg->umax_value;
+	__u64 new_umin, new_umax;
+	__s64 new_smin, new_smax;
+
+	/* u32 -> u64 tightening, it's always well-formed */
+	new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value;
+	new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value;
+	reg->umin_value = max_t(u64, reg->umin_value, new_umin);
+	reg->umax_value = min_t(u64, reg->umax_value, new_umax);
+	/* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */
+	new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value;
+	new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value;
+	reg->smin_value = max_t(s64, reg->smin_value, new_smin);
+	reg->smax_value = min_t(s64, reg->smax_value, new_smax);
+
+	/* if s32 can be treated as valid u32 range, we can use it as well */
+	if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
+		/* s32 -> u64 tightening */
+		new_umin = (reg->umin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
+		new_umax = (reg->umax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
+		reg->umin_value = max_t(u64, reg->umin_value, new_umin);
+		reg->umax_value = min_t(u64, reg->umax_value, new_umax);
+		/* s32 -> s64 tightening */
+		new_smin = (reg->smin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
+		new_smax = (reg->smax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
+		reg->smin_value = max_t(s64, reg->smin_value, new_smin);
+		reg->smax_value = min_t(s64, reg->smax_value, new_smax);
 	}
 }
 
@@ -2411,6 +2096,7 @@ static void __reg_deduce_bounds(struct bpf_reg_state *reg)
 {
 	__reg32_deduce_bounds(reg);
 	__reg64_deduce_bounds(reg);
+	__reg_deduce_mixed_bounds(reg);
 }
 
 /* Attempts to improve var_off based on unsigned min/max information */
@@ -2432,6 +2118,7 @@ static void reg_bounds_sync(struct bpf_reg_state *reg)
 	__update_reg_bounds(reg);
 	/* We might have learned something about the sign bit. */
 	__reg_deduce_bounds(reg);
+	__reg_deduce_bounds(reg);
 	/* We might have learned some bits from the bounds. */
 	__reg_bound_offset(reg);
 	/* Intersecting with the old var_off might have improved our bounds
@@ -2441,6 +2128,56 @@ static void reg_bounds_sync(struct bpf_reg_state *reg)
 	__update_reg_bounds(reg);
 }
 
+static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
+				   struct bpf_reg_state *reg, const char *ctx)
+{
+	const char *msg;
+
+	if (reg->umin_value > reg->umax_value ||
+	    reg->smin_value > reg->smax_value ||
+	    reg->u32_min_value > reg->u32_max_value ||
+	    reg->s32_min_value > reg->s32_max_value) {
+		    msg = "range bounds violation";
+		    goto out;
+	}
+
+	if (tnum_is_const(reg->var_off)) {
+		u64 uval = reg->var_off.value;
+		s64 sval = (s64)uval;
+
+		if (reg->umin_value != uval || reg->umax_value != uval ||
+		    reg->smin_value != sval || reg->smax_value != sval) {
+			msg = "const tnum out of sync with range bounds";
+			goto out;
+		}
+	}
+
+	if (tnum_subreg_is_const(reg->var_off)) {
+		u32 uval32 = tnum_subreg(reg->var_off).value;
+		s32 sval32 = (s32)uval32;
+
+		if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 ||
+		    reg->s32_min_value != sval32 || reg->s32_max_value != sval32) {
+			msg = "const subreg tnum out of sync with range bounds";
+			goto out;
+		}
+	}
+
+	return 0;
+out:
+	verbose(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
+		"s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n",
+		ctx, msg, reg->umin_value, reg->umax_value,
+		reg->smin_value, reg->smax_value,
+		reg->u32_min_value, reg->u32_max_value,
+		reg->s32_min_value, reg->s32_max_value,
+		reg->var_off.value, reg->var_off.mask);
+	if (env->test_reg_invariants)
+		return -EFAULT;
+	__mark_reg_unbounded(reg);
+	return 0;
+}
+
 static bool __reg32_bound_s64(s32 a)
 {
 	return a >= 0 && a <= S32_MAX;
@@ -2465,51 +2202,6 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
 	}
 }
 
-static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
-{
-	/* special case when 64-bit register has upper 32-bit register
-	 * zeroed. Typically happens after zext or <<32, >>32 sequence
-	 * allowing us to use 32-bit bounds directly,
-	 */
-	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
-		__reg_assign_32_into_64(reg);
-	} else {
-		/* Otherwise the best we can do is push lower 32bit known and
-		 * unknown bits into register (var_off set from jmp logic)
-		 * then learn as much as possible from the 64-bit tnum
-		 * known and unknown bits. The previous smin/smax bounds are
-		 * invalid here because of jmp32 compare so mark them unknown
-		 * so they do not impact tnum bounds calculation.
-		 */
-		__mark_reg64_unbounded(reg);
-	}
-	reg_bounds_sync(reg);
-}
-
-static bool __reg64_bound_s32(s64 a)
-{
-	return a >= S32_MIN && a <= S32_MAX;
-}
-
-static bool __reg64_bound_u32(u64 a)
-{
-	return a >= U32_MIN && a <= U32_MAX;
-}
-
-static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
-{
-	__mark_reg32_unbounded(reg);
-	if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
-		reg->s32_min_value = (s32)reg->smin_value;
-		reg->s32_max_value = (s32)reg->smax_value;
-	}
-	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
-		reg->u32_min_value = (u32)reg->umin_value;
-		reg->u32_max_value = (u32)reg->umax_value;
-	}
-	reg_bounds_sync(reg);
-}
-
 /* Mark a register as having a completely unknown (scalar) value. */
 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
 			       struct bpf_reg_state *reg)
@@ -4592,9 +4284,17 @@ static bool register_is_null(struct bpf_reg_state *reg)
 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
 }
 
-static bool register_is_const(struct bpf_reg_state *reg)
+/* check if register is a constant scalar value */
+static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32)
 {
-	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
+	return reg->type == SCALAR_VALUE &&
+	       tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off);
+}
+
+/* assuming is_reg_const() is true, return constant value of a register */
+static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
+{
+	return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
 }
 
 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
@@ -5451,10 +5151,23 @@ BTF_SET_END(rcu_protected_types)
 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
 {
 	if (!btf_is_kernel(btf))
-		return false;
+		return true;
 	return btf_id_set_contains(&rcu_protected_types, btf_id);
 }
 
+static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field)
+{
+	struct btf_struct_meta *meta;
+
+	if (btf_is_kernel(kptr_field->kptr.btf))
+		return NULL;
+
+	meta = btf_find_struct_meta(kptr_field->kptr.btf,
+				    kptr_field->kptr.btf_id);
+
+	return meta ? meta->record : NULL;
+}
+
 static bool rcu_safe_kptr(const struct btf_field *field)
 {
 	const struct btf_field_kptr *kptr = &field->kptr;
@@ -5465,12 +5178,25 @@ static bool rcu_safe_kptr(const struct btf_field *field)
 
 static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
 {
+	struct btf_record *rec;
+	u32 ret;
+
+	ret = PTR_MAYBE_NULL;
 	if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
-		if (kptr_field->type != BPF_KPTR_PERCPU)
-			return PTR_MAYBE_NULL | MEM_RCU;
-		return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
+		ret |= MEM_RCU;
+		if (kptr_field->type == BPF_KPTR_PERCPU)
+			ret |= MEM_PERCPU;
+		else if (!btf_is_kernel(kptr_field->kptr.btf))
+			ret |= MEM_ALLOC;
+
+		rec = kptr_pointee_btf_record(kptr_field);
+		if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE))
+			ret |= NON_OWN_REF;
+	} else {
+		ret |= PTR_UNTRUSTED;
 	}
-	return PTR_MAYBE_NULL | PTR_UNTRUSTED;
+
+	return ret;
 }
 
 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
@@ -6244,9 +5970,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
 	 * values are also truncated so we push 64-bit bounds into
 	 * 32-bit bounds. Above were truncated < 32-bits already.
 	 */
-	if (size >= 4)
-		return;
-	__reg_combine_64_into_32(reg);
+	if (size < 4) {
+		__mark_reg32_unbounded(reg);
+		reg_bounds_sync(reg);
+	}
 }
 
 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
@@ -8626,6 +8353,54 @@ static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
 	return state->stack[spi].spilled_ptr.dynptr.type;
 }
 
+static int check_reg_const_str(struct bpf_verifier_env *env,
+			       struct bpf_reg_state *reg, u32 regno)
+{
+	struct bpf_map *map = reg->map_ptr;
+	int err;
+	int map_off;
+	u64 map_addr;
+	char *str_ptr;
+
+	if (reg->type != PTR_TO_MAP_VALUE)
+		return -EINVAL;
+
+	if (!bpf_map_is_rdonly(map)) {
+		verbose(env, "R%d does not point to a readonly map'\n", regno);
+		return -EACCES;
+	}
+
+	if (!tnum_is_const(reg->var_off)) {
+		verbose(env, "R%d is not a constant address'\n", regno);
+		return -EACCES;
+	}
+
+	if (!map->ops->map_direct_value_addr) {
+		verbose(env, "no direct value access support for this map type\n");
+		return -EACCES;
+	}
+
+	err = check_map_access(env, regno, reg->off,
+			       map->value_size - reg->off, false,
+			       ACCESS_HELPER);
+	if (err)
+		return err;
+
+	map_off = reg->off + reg->var_off.value;
+	err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
+	if (err) {
+		verbose(env, "direct value access on string failed\n");
+		return err;
+	}
+
+	str_ptr = (char *)(long)(map_addr);
+	if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
+		verbose(env, "string is not zero-terminated\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 			  struct bpf_call_arg_meta *meta,
 			  const struct bpf_func_proto *fn,
@@ -8870,44 +8645,9 @@ skip_type_check:
 	}
 	case ARG_PTR_TO_CONST_STR:
 	{
-		struct bpf_map *map = reg->map_ptr;
-		int map_off;
-		u64 map_addr;
-		char *str_ptr;
-
-		if (!bpf_map_is_rdonly(map)) {
-			verbose(env, "R%d does not point to a readonly map'\n", regno);
-			return -EACCES;
-		}
-
-		if (!tnum_is_const(reg->var_off)) {
-			verbose(env, "R%d is not a constant address'\n", regno);
-			return -EACCES;
-		}
-
-		if (!map->ops->map_direct_value_addr) {
-			verbose(env, "no direct value access support for this map type\n");
-			return -EACCES;
-		}
-
-		err = check_map_access(env, regno, reg->off,
-				       map->value_size - reg->off, false,
-				       ACCESS_HELPER);
+		err = check_reg_const_str(env, reg, regno);
 		if (err)
 			return err;
-
-		map_off = reg->off + reg->var_off.value;
-		err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
-		if (err) {
-			verbose(env, "direct value access on string failed\n");
-			return err;
-		}
-
-		str_ptr = (char *)(long)(map_addr);
-		if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
-			verbose(env, "string is not zero-terminated\n");
-			return -EINVAL;
-		}
 		break;
 	}
 	case ARG_PTR_TO_KPTR:
@@ -9896,14 +9636,15 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 	return 0;
 }
 
-static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
-				   int func_id,
-				   struct bpf_call_arg_meta *meta)
+static int do_refine_retval_range(struct bpf_verifier_env *env,
+				  struct bpf_reg_state *regs, int ret_type,
+				  int func_id,
+				  struct bpf_call_arg_meta *meta)
 {
 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
 
 	if (ret_type != RET_INTEGER)
-		return;
+		return 0;
 
 	switch (func_id) {
 	case BPF_FUNC_get_stack:
@@ -9929,6 +9670,8 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
 		reg_bounds_sync(ret_reg);
 		break;
 	}
+
+	return reg_bounds_sanity_check(env, ret_reg, "retval");
 }
 
 static int
@@ -9998,7 +9741,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	val = reg->var_off.value;
 	max = map->max_entries;
 
-	if (!(register_is_const(reg) && val < max)) {
+	if (!(is_reg_const(reg, false) && val < max)) {
 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
 		return 0;
 	}
@@ -10593,7 +10336,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		regs[BPF_REG_0].ref_obj_id = id;
 	}
 
-	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
+	err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta);
+	if (err)
+		return err;
 
 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
 	if (err)
@@ -10771,6 +10516,11 @@ static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param
 	return __kfunc_param_match_suffix(btf, arg, "__nullable");
 }
 
+static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg)
+{
+	return __kfunc_param_match_suffix(btf, arg, "__str");
+}
+
 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
 					  const struct btf_param *arg,
 					  const char *name)
@@ -10914,6 +10664,7 @@ enum kfunc_ptr_arg_type {
 	KF_ARG_PTR_TO_RB_ROOT,
 	KF_ARG_PTR_TO_RB_NODE,
 	KF_ARG_PTR_TO_NULL,
+	KF_ARG_PTR_TO_CONST_STR,
 };
 
 enum special_kfunc_type {
@@ -11064,6 +10815,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 	if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
 		return KF_ARG_PTR_TO_RB_NODE;
 
+	if (is_kfunc_arg_const_str(meta->btf, &args[argno]))
+		return KF_ARG_PTR_TO_CONST_STR;
+
 	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
 		if (!btf_type_is_struct(ref_t)) {
 			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
@@ -11695,6 +11449,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 		case KF_ARG_PTR_TO_MEM_SIZE:
 		case KF_ARG_PTR_TO_CALLBACK:
 		case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
+		case KF_ARG_PTR_TO_CONST_STR:
 			/* Trusted by default */
 			break;
 		default:
@@ -11966,6 +11721,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			meta->arg_btf = reg->btf;
 			meta->arg_btf_id = reg->btf_id;
 			break;
+		case KF_ARG_PTR_TO_CONST_STR:
+			if (reg->type != PTR_TO_MAP_VALUE) {
+				verbose(env, "arg#%d doesn't point to a const string\n", i);
+				return -EINVAL;
+			}
+			ret = check_reg_const_str(env, reg, regno);
+			if (ret)
+				return ret;
+			break;
 		}
 	}
 
@@ -14086,13 +13850,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
 		/* check dest operand */
 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+		err = err ?: adjust_reg_min_max_vals(env, insn);
 		if (err)
 			return err;
-
-		return adjust_reg_min_max_vals(env, insn);
 	}
 
-	return 0;
+	return reg_bounds_sanity_check(env, &regs[insn->dst_reg], "alu");
 }
 
 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
@@ -14174,161 +13937,130 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 	}));
 }
 
-static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
-{
-	struct tnum subreg = tnum_subreg(reg->var_off);
-	s32 sval = (s32)val;
-
-	switch (opcode) {
-	case BPF_JEQ:
-		if (tnum_is_const(subreg))
-			return !!tnum_equals_const(subreg, val);
-		else if (val < reg->u32_min_value || val > reg->u32_max_value)
-			return 0;
-		else if (sval < reg->s32_min_value || sval > reg->s32_max_value)
-			return 0;
-		break;
-	case BPF_JNE:
-		if (tnum_is_const(subreg))
-			return !tnum_equals_const(subreg, val);
-		else if (val < reg->u32_min_value || val > reg->u32_max_value)
-			return 1;
-		else if (sval < reg->s32_min_value || sval > reg->s32_max_value)
-			return 1;
-		break;
-	case BPF_JSET:
-		if ((~subreg.mask & subreg.value) & val)
-			return 1;
-		if (!((subreg.mask | subreg.value) & val))
-			return 0;
-		break;
-	case BPF_JGT:
-		if (reg->u32_min_value > val)
-			return 1;
-		else if (reg->u32_max_value <= val)
-			return 0;
-		break;
-	case BPF_JSGT:
-		if (reg->s32_min_value > sval)
-			return 1;
-		else if (reg->s32_max_value <= sval)
-			return 0;
-		break;
-	case BPF_JLT:
-		if (reg->u32_max_value < val)
-			return 1;
-		else if (reg->u32_min_value >= val)
-			return 0;
-		break;
-	case BPF_JSLT:
-		if (reg->s32_max_value < sval)
-			return 1;
-		else if (reg->s32_min_value >= sval)
-			return 0;
-		break;
-	case BPF_JGE:
-		if (reg->u32_min_value >= val)
-			return 1;
-		else if (reg->u32_max_value < val)
-			return 0;
-		break;
-	case BPF_JSGE:
-		if (reg->s32_min_value >= sval)
-			return 1;
-		else if (reg->s32_max_value < sval)
-			return 0;
-		break;
-	case BPF_JLE:
-		if (reg->u32_max_value <= val)
-			return 1;
-		else if (reg->u32_min_value > val)
-			return 0;
-		break;
-	case BPF_JSLE:
-		if (reg->s32_max_value <= sval)
-			return 1;
-		else if (reg->s32_min_value > sval)
-			return 0;
-		break;
-	}
-
-	return -1;
-}
-
-
-static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
-{
-	s64 sval = (s64)val;
+/*
+ * <reg1> <op> <reg2>, currently assuming reg2 is a constant
+ */
+static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
+				  u8 opcode, bool is_jmp32)
+{
+	struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off;
+	struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off;
+	u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value;
+	u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value;
+	s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value;
+	s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value;
+	u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value;
+	u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value;
+	s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value;
+	s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value;
 
 	switch (opcode) {
 	case BPF_JEQ:
-		if (tnum_is_const(reg->var_off))
-			return !!tnum_equals_const(reg->var_off, val);
-		else if (val < reg->umin_value || val > reg->umax_value)
+		/* constants, umin/umax and smin/smax checks would be
+		 * redundant in this case because they all should match
+		 */
+		if (tnum_is_const(t1) && tnum_is_const(t2))
+			return t1.value == t2.value;
+		/* non-overlapping ranges */
+		if (umin1 > umax2 || umax1 < umin2)
 			return 0;
-		else if (sval < reg->smin_value || sval > reg->smax_value)
+		if (smin1 > smax2 || smax1 < smin2)
 			return 0;
+		if (!is_jmp32) {
+			/* if 64-bit ranges are inconclusive, see if we can
+			 * utilize 32-bit subrange knowledge to eliminate
+			 * branches that can't be taken a priori
+			 */
+			if (reg1->u32_min_value > reg2->u32_max_value ||
+			    reg1->u32_max_value < reg2->u32_min_value)
+				return 0;
+			if (reg1->s32_min_value > reg2->s32_max_value ||
+			    reg1->s32_max_value < reg2->s32_min_value)
+				return 0;
+		}
 		break;
 	case BPF_JNE:
-		if (tnum_is_const(reg->var_off))
-			return !tnum_equals_const(reg->var_off, val);
-		else if (val < reg->umin_value || val > reg->umax_value)
+		/* constants, umin/umax and smin/smax checks would be
+		 * redundant in this case because they all should match
+		 */
+		if (tnum_is_const(t1) && tnum_is_const(t2))
+			return t1.value != t2.value;
+		/* non-overlapping ranges */
+		if (umin1 > umax2 || umax1 < umin2)
 			return 1;
-		else if (sval < reg->smin_value || sval > reg->smax_value)
+		if (smin1 > smax2 || smax1 < smin2)
 			return 1;
+		if (!is_jmp32) {
+			/* if 64-bit ranges are inconclusive, see if we can
+			 * utilize 32-bit subrange knowledge to eliminate
+			 * branches that can't be taken a priori
+			 */
+			if (reg1->u32_min_value > reg2->u32_max_value ||
+			    reg1->u32_max_value < reg2->u32_min_value)
+				return 1;
+			if (reg1->s32_min_value > reg2->s32_max_value ||
+			    reg1->s32_max_value < reg2->s32_min_value)
+				return 1;
+		}
 		break;
 	case BPF_JSET:
-		if ((~reg->var_off.mask & reg->var_off.value) & val)
+		if (!is_reg_const(reg2, is_jmp32)) {
+			swap(reg1, reg2);
+			swap(t1, t2);
+		}
+		if (!is_reg_const(reg2, is_jmp32))
+			return -1;
+		if ((~t1.mask & t1.value) & t2.value)
 			return 1;
-		if (!((reg->var_off.mask | reg->var_off.value) & val))
+		if (!((t1.mask | t1.value) & t2.value))
 			return 0;
 		break;
 	case BPF_JGT:
-		if (reg->umin_value > val)
+		if (umin1 > umax2)
 			return 1;
-		else if (reg->umax_value <= val)
+		else if (umax1 <= umin2)
 			return 0;
 		break;
 	case BPF_JSGT:
-		if (reg->smin_value > sval)
+		if (smin1 > smax2)
 			return 1;
-		else if (reg->smax_value <= sval)
+		else if (smax1 <= smin2)
 			return 0;
 		break;
 	case BPF_JLT:
-		if (reg->umax_value < val)
+		if (umax1 < umin2)
 			return 1;
-		else if (reg->umin_value >= val)
+		else if (umin1 >= umax2)
 			return 0;
 		break;
 	case BPF_JSLT:
-		if (reg->smax_value < sval)
+		if (smax1 < smin2)
 			return 1;
-		else if (reg->smin_value >= sval)
+		else if (smin1 >= smax2)
 			return 0;
 		break;
 	case BPF_JGE:
-		if (reg->umin_value >= val)
+		if (umin1 >= umax2)
 			return 1;
-		else if (reg->umax_value < val)
+		else if (umax1 < umin2)
 			return 0;
 		break;
 	case BPF_JSGE:
-		if (reg->smin_value >= sval)
+		if (smin1 >= smax2)
 			return 1;
-		else if (reg->smax_value < sval)
+		else if (smax1 < smin2)
 			return 0;
 		break;
 	case BPF_JLE:
-		if (reg->umax_value <= val)
+		if (umax1 <= umin2)
 			return 1;
-		else if (reg->umin_value > val)
+		else if (umin1 > umax2)
 			return 0;
 		break;
 	case BPF_JSLE:
-		if (reg->smax_value <= sval)
+		if (smax1 <= smin2)
 			return 1;
-		else if (reg->smin_value > sval)
+		else if (smin1 > smax2)
 			return 0;
 		break;
 	}
@@ -14336,41 +14068,6 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 	return -1;
 }
 
-/* compute branch direction of the expression "if (reg opcode val) goto target;"
- * and return:
- *  1 - branch will be taken and "goto target" will be executed
- *  0 - branch will not be taken and fall-through to next insn
- * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
- *      range [0,10]
- */
-static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
-			   bool is_jmp32)
-{
-	if (__is_pointer_value(false, reg)) {
-		if (!reg_not_null(reg))
-			return -1;
-
-		/* If pointer is valid tests against zero will fail so we can
-		 * use this to direct branch taken.
-		 */
-		if (val != 0)
-			return -1;
-
-		switch (opcode) {
-		case BPF_JEQ:
-			return 0;
-		case BPF_JNE:
-			return 1;
-		default:
-			return -1;
-		}
-	}
-
-	if (is_jmp32)
-		return is_branch32_taken(reg, val, opcode);
-	return is_branch64_taken(reg, val, opcode);
-}
-
 static int flip_opcode(u32 opcode)
 {
 	/* How can we transform "a <op> b" into "b <op> a"? */
@@ -14432,216 +14129,244 @@ static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
 	return -1;
 }
 
-/* Adjusts the register min/max values in the case that the dst_reg is the
- * variable register that we are working on, and src_reg is a constant or we're
- * simply doing a BPF_K check.
- * In JEQ/JNE cases we also adjust the var_off values.
+/* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;"
+ * and return:
+ *  1 - branch will be taken and "goto target" will be executed
+ *  0 - branch will not be taken and fall-through to next insn
+ * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value
+ *      range [0,10]
  */
-static void reg_set_min_max(struct bpf_reg_state *true_reg,
-			    struct bpf_reg_state *false_reg,
-			    u64 val, u32 val32,
-			    u8 opcode, bool is_jmp32)
-{
-	struct tnum false_32off = tnum_subreg(false_reg->var_off);
-	struct tnum false_64off = false_reg->var_off;
-	struct tnum true_32off = tnum_subreg(true_reg->var_off);
-	struct tnum true_64off = true_reg->var_off;
-	s64 sval = (s64)val;
-	s32 sval32 = (s32)val32;
-
-	/* If the dst_reg is a pointer, we can't learn anything about its
-	 * variable offset from the compare (unless src_reg were a pointer into
-	 * the same object, but we don't bother with that.
-	 * Since false_reg and true_reg have the same type by construction, we
-	 * only need to check one of them for pointerness.
-	 */
-	if (__is_pointer_value(false, false_reg))
-		return;
+static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
+			   u8 opcode, bool is_jmp32)
+{
+	if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32)
+		return is_pkt_ptr_branch_taken(reg1, reg2, opcode);
+
+	if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) {
+		u64 val;
+
+		/* arrange that reg2 is a scalar, and reg1 is a pointer */
+		if (!is_reg_const(reg2, is_jmp32)) {
+			opcode = flip_opcode(opcode);
+			swap(reg1, reg2);
+		}
+		/* and ensure that reg2 is a constant */
+		if (!is_reg_const(reg2, is_jmp32))
+			return -1;
+
+		if (!reg_not_null(reg1))
+			return -1;
+
+		/* If pointer is valid tests against zero will fail so we can
+		 * use this to direct branch taken.
+		 */
+		val = reg_const_value(reg2, is_jmp32);
+		if (val != 0)
+			return -1;
+
+		switch (opcode) {
+		case BPF_JEQ:
+			return 0;
+		case BPF_JNE:
+			return 1;
+		default:
+			return -1;
+		}
+	}
 
+	/* now deal with two scalars, but not necessarily constants */
+	return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32);
+}
+
+/* Opcode that corresponds to a *false* branch condition.
+ * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2
+ */
+static u8 rev_opcode(u8 opcode)
+{
 	switch (opcode) {
-	/* JEQ/JNE comparison doesn't change the register equivalence.
-	 *
-	 * r1 = r2;
-	 * if (r1 == 42) goto label;
-	 * ...
-	 * label: // here both r1 and r2 are known to be 42.
-	 *
-	 * Hence when marking register as known preserve it's ID.
+	case BPF_JEQ:		return BPF_JNE;
+	case BPF_JNE:		return BPF_JEQ;
+	/* JSET doesn't have it's reverse opcode in BPF, so add
+	 * BPF_X flag to denote the reverse of that operation
 	 */
+	case BPF_JSET:		return BPF_JSET | BPF_X;
+	case BPF_JSET | BPF_X:	return BPF_JSET;
+	case BPF_JGE:		return BPF_JLT;
+	case BPF_JGT:		return BPF_JLE;
+	case BPF_JLE:		return BPF_JGT;
+	case BPF_JLT:		return BPF_JGE;
+	case BPF_JSGE:		return BPF_JSLT;
+	case BPF_JSGT:		return BPF_JSLE;
+	case BPF_JSLE:		return BPF_JSGT;
+	case BPF_JSLT:		return BPF_JSGE;
+	default:		return 0;
+	}
+}
+
+/* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */
+static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2,
+				u8 opcode, bool is_jmp32)
+{
+	struct tnum t;
+	u64 val;
+
+again:
+	switch (opcode) {
 	case BPF_JEQ:
 		if (is_jmp32) {
-			__mark_reg32_known(true_reg, val32);
-			true_32off = tnum_subreg(true_reg->var_off);
+			reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
+			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
+			reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
+			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
+			reg2->u32_min_value = reg1->u32_min_value;
+			reg2->u32_max_value = reg1->u32_max_value;
+			reg2->s32_min_value = reg1->s32_min_value;
+			reg2->s32_max_value = reg1->s32_max_value;
+
+			t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off));
+			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
+			reg2->var_off = tnum_with_subreg(reg2->var_off, t);
 		} else {
-			___mark_reg_known(true_reg, val);
-			true_64off = true_reg->var_off;
+			reg1->umin_value = max(reg1->umin_value, reg2->umin_value);
+			reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
+			reg1->smin_value = max(reg1->smin_value, reg2->smin_value);
+			reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
+			reg2->umin_value = reg1->umin_value;
+			reg2->umax_value = reg1->umax_value;
+			reg2->smin_value = reg1->smin_value;
+			reg2->smax_value = reg1->smax_value;
+
+			reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off);
+			reg2->var_off = reg1->var_off;
 		}
 		break;
 	case BPF_JNE:
-		if (is_jmp32) {
-			__mark_reg32_known(false_reg, val32);
-			false_32off = tnum_subreg(false_reg->var_off);
-		} else {
-			___mark_reg_known(false_reg, val);
-			false_64off = false_reg->var_off;
-		}
+		/* we don't derive any new information for inequality yet */
 		break;
 	case BPF_JSET:
+		if (!is_reg_const(reg2, is_jmp32))
+			swap(reg1, reg2);
+		if (!is_reg_const(reg2, is_jmp32))
+			break;
+		val = reg_const_value(reg2, is_jmp32);
+		/* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X)
+		 * requires single bit to learn something useful. E.g., if we
+		 * know that `r1 & 0x3` is true, then which bits (0, 1, or both)
+		 * are actually set? We can learn something definite only if
+		 * it's a single-bit value to begin with.
+		 *
+		 * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have
+		 * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor
+		 * bit 1 is set, which we can readily use in adjustments.
+		 */
+		if (!is_power_of_2(val))
+			break;
 		if (is_jmp32) {
-			false_32off = tnum_and(false_32off, tnum_const(~val32));
-			if (is_power_of_2(val32))
-				true_32off = tnum_or(true_32off,
-						     tnum_const(val32));
+			t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val));
+			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
 		} else {
-			false_64off = tnum_and(false_64off, tnum_const(~val));
-			if (is_power_of_2(val))
-				true_64off = tnum_or(true_64off,
-						     tnum_const(val));
+			reg1->var_off = tnum_or(reg1->var_off, tnum_const(val));
 		}
 		break;
-	case BPF_JGE:
-	case BPF_JGT:
-	{
+	case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */
+		if (!is_reg_const(reg2, is_jmp32))
+			swap(reg1, reg2);
+		if (!is_reg_const(reg2, is_jmp32))
+			break;
+		val = reg_const_value(reg2, is_jmp32);
 		if (is_jmp32) {
-			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
-			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
-
-			false_reg->u32_max_value = min(false_reg->u32_max_value,
-						       false_umax);
-			true_reg->u32_min_value = max(true_reg->u32_min_value,
-						      true_umin);
+			t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
+			reg1->var_off = tnum_with_subreg(reg1->var_off, t);
 		} else {
-			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
-			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
-
-			false_reg->umax_value = min(false_reg->umax_value, false_umax);
-			true_reg->umin_value = max(true_reg->umin_value, true_umin);
+			reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val));
 		}
 		break;
-	}
-	case BPF_JSGE:
-	case BPF_JSGT:
-	{
+	case BPF_JLE:
 		if (is_jmp32) {
-			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
-			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
-
-			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
-			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
+			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value);
+			reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value);
 		} else {
-			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
-			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
-
-			false_reg->smax_value = min(false_reg->smax_value, false_smax);
-			true_reg->smin_value = max(true_reg->smin_value, true_smin);
+			reg1->umax_value = min(reg1->umax_value, reg2->umax_value);
+			reg2->umin_value = max(reg1->umin_value, reg2->umin_value);
 		}
 		break;
-	}
-	case BPF_JLE:
 	case BPF_JLT:
-	{
 		if (is_jmp32) {
-			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
-			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
-
-			false_reg->u32_min_value = max(false_reg->u32_min_value,
-						       false_umin);
-			true_reg->u32_max_value = min(true_reg->u32_max_value,
-						      true_umax);
+			reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1);
+			reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value);
 		} else {
-			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
-			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
-
-			false_reg->umin_value = max(false_reg->umin_value, false_umin);
-			true_reg->umax_value = min(true_reg->umax_value, true_umax);
+			reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1);
+			reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value);
 		}
 		break;
-	}
 	case BPF_JSLE:
+		if (is_jmp32) {
+			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value);
+			reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value);
+		} else {
+			reg1->smax_value = min(reg1->smax_value, reg2->smax_value);
+			reg2->smin_value = max(reg1->smin_value, reg2->smin_value);
+		}
+		break;
 	case BPF_JSLT:
-	{
 		if (is_jmp32) {
-			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
-			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
-
-			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
-			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
+			reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1);
+			reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value);
 		} else {
-			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
-			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
-
-			false_reg->smin_value = max(false_reg->smin_value, false_smin);
-			true_reg->smax_value = min(true_reg->smax_value, true_smax);
+			reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1);
+			reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value);
 		}
 		break;
-	}
+	case BPF_JGE:
+	case BPF_JGT:
+	case BPF_JSGE:
+	case BPF_JSGT:
+		/* just reuse LE/LT logic above */
+		opcode = flip_opcode(opcode);
+		swap(reg1, reg2);
+		goto again;
 	default:
 		return;
 	}
-
-	if (is_jmp32) {
-		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
-					     tnum_subreg(false_32off));
-		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
-					    tnum_subreg(true_32off));
-		__reg_combine_32_into_64(false_reg);
-		__reg_combine_32_into_64(true_reg);
-	} else {
-		false_reg->var_off = false_64off;
-		true_reg->var_off = true_64off;
-		__reg_combine_64_into_32(false_reg);
-		__reg_combine_64_into_32(true_reg);
-	}
 }
 
-/* Same as above, but for the case that dst_reg holds a constant and src_reg is
- * the variable reg.
+/* Adjusts the register min/max values in the case that the dst_reg and
+ * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K
+ * check, in which case we havea fake SCALAR_VALUE representing insn->imm).
+ * Technically we can do similar adjustments for pointers to the same object,
+ * but we don't support that right now.
  */
-static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
-				struct bpf_reg_state *false_reg,
-				u64 val, u32 val32,
-				u8 opcode, bool is_jmp32)
+static int reg_set_min_max(struct bpf_verifier_env *env,
+			   struct bpf_reg_state *true_reg1,
+			   struct bpf_reg_state *true_reg2,
+			   struct bpf_reg_state *false_reg1,
+			   struct bpf_reg_state *false_reg2,
+			   u8 opcode, bool is_jmp32)
 {
-	opcode = flip_opcode(opcode);
-	/* This uses zero as "not present in table"; luckily the zero opcode,
-	 * BPF_JA, can't get here.
+	int err;
+
+	/* If either register is a pointer, we can't learn anything about its
+	 * variable offset from the compare (unless they were a pointer into
+	 * the same object, but we don't bother with that).
 	 */
-	if (opcode)
-		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
-}
-
-/* Regs are known to be equal, so intersect their min/max/var_off */
-static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
-				  struct bpf_reg_state *dst_reg)
-{
-	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
-							dst_reg->umin_value);
-	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
-							dst_reg->umax_value);
-	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
-							dst_reg->smin_value);
-	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
-							dst_reg->smax_value);
-	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
-							     dst_reg->var_off);
-	reg_bounds_sync(src_reg);
-	reg_bounds_sync(dst_reg);
-}
+	if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE)
+		return 0;
 
-static void reg_combine_min_max(struct bpf_reg_state *true_src,
-				struct bpf_reg_state *true_dst,
-				struct bpf_reg_state *false_src,
-				struct bpf_reg_state *false_dst,
-				u8 opcode)
-{
-	switch (opcode) {
-	case BPF_JEQ:
-		__reg_combine_min_max(true_src, true_dst);
-		break;
-	case BPF_JNE:
-		__reg_combine_min_max(false_src, false_dst);
-		break;
-	}
+	/* fallthrough (FALSE) branch */
+	regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32);
+	reg_bounds_sync(false_reg1);
+	reg_bounds_sync(false_reg2);
+
+	/* jump (TRUE) branch */
+	regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32);
+	reg_bounds_sync(true_reg1);
+	reg_bounds_sync(true_reg2);
+
+	err = reg_bounds_sanity_check(env, true_reg1, "true_reg1");
+	err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2");
+	err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1");
+	err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2");
+	return err;
 }
 
 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
@@ -14839,6 +14564,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
 	struct bpf_reg_state *eq_branch_regs;
+	struct bpf_reg_state fake_reg = {};
 	u8 opcode = BPF_OP(insn->code);
 	bool is_jmp32;
 	int pred = -1;
@@ -14879,42 +14605,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
 			return -EINVAL;
 		}
+		src_reg = &fake_reg;
+		src_reg->type = SCALAR_VALUE;
+		__mark_reg_known(src_reg, insn->imm);
 	}
 
 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-
-	if (BPF_SRC(insn->code) == BPF_K) {
-		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
-	} else if (src_reg->type == SCALAR_VALUE &&
-		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
-		pred = is_branch_taken(dst_reg,
-				       tnum_subreg(src_reg->var_off).value,
-				       opcode,
-				       is_jmp32);
-	} else if (src_reg->type == SCALAR_VALUE &&
-		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
-		pred = is_branch_taken(dst_reg,
-				       src_reg->var_off.value,
-				       opcode,
-				       is_jmp32);
-	} else if (dst_reg->type == SCALAR_VALUE &&
-		   is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) {
-		pred = is_branch_taken(src_reg,
-				       tnum_subreg(dst_reg->var_off).value,
-				       flip_opcode(opcode),
-				       is_jmp32);
-	} else if (dst_reg->type == SCALAR_VALUE &&
-		   !is_jmp32 && tnum_is_const(dst_reg->var_off)) {
-		pred = is_branch_taken(src_reg,
-				       dst_reg->var_off.value,
-				       flip_opcode(opcode),
-				       is_jmp32);
-	} else if (reg_is_pkt_pointer_any(dst_reg) &&
-		   reg_is_pkt_pointer_any(src_reg) &&
-		   !is_jmp32) {
-		pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
-	}
-
+	pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32);
 	if (pred >= 0) {
 		/* If we get here with a dst_reg pointer type it is because
 		 * above is_branch_taken() special cased the 0 comparison.
@@ -14962,53 +14659,27 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		return -EFAULT;
 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
 
-	/* detect if we are comparing against a constant value so we can adjust
-	 * our min/max values for our dst register.
-	 * this is only legit if both are scalars (or pointers to the same
-	 * object, I suppose, see the PTR_MAYBE_NULL related if block below),
-	 * because otherwise the different base pointers mean the offsets aren't
-	 * comparable.
-	 */
 	if (BPF_SRC(insn->code) == BPF_X) {
-		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
-
-		if (dst_reg->type == SCALAR_VALUE &&
-		    src_reg->type == SCALAR_VALUE) {
-			if (tnum_is_const(src_reg->var_off) ||
-			    (is_jmp32 &&
-			     tnum_is_const(tnum_subreg(src_reg->var_off))))
-				reg_set_min_max(&other_branch_regs[insn->dst_reg],
-						dst_reg,
-						src_reg->var_off.value,
-						tnum_subreg(src_reg->var_off).value,
-						opcode, is_jmp32);
-			else if (tnum_is_const(dst_reg->var_off) ||
-				 (is_jmp32 &&
-				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
-				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
-						    src_reg,
-						    dst_reg->var_off.value,
-						    tnum_subreg(dst_reg->var_off).value,
-						    opcode, is_jmp32);
-			else if (!is_jmp32 &&
-				 (opcode == BPF_JEQ || opcode == BPF_JNE))
-				/* Comparing for equality, we can combine knowledge */
-				reg_combine_min_max(&other_branch_regs[insn->src_reg],
-						    &other_branch_regs[insn->dst_reg],
-						    src_reg, dst_reg, opcode);
-			if (src_reg->id &&
-			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
-				find_equal_scalars(this_branch, src_reg);
-				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
-			}
-
-		}
-	} else if (dst_reg->type == SCALAR_VALUE) {
-		reg_set_min_max(&other_branch_regs[insn->dst_reg],
-					dst_reg, insn->imm, (u32)insn->imm,
-					opcode, is_jmp32);
+		err = reg_set_min_max(env,
+				      &other_branch_regs[insn->dst_reg],
+				      &other_branch_regs[insn->src_reg],
+				      dst_reg, src_reg, opcode, is_jmp32);
+	} else /* BPF_SRC(insn->code) == BPF_K */ {
+		err = reg_set_min_max(env,
+				      &other_branch_regs[insn->dst_reg],
+				      src_reg /* fake one */,
+				      dst_reg, src_reg /* same fake one */,
+				      opcode, is_jmp32);
 	}
+	if (err)
+		return err;
 
+	if (BPF_SRC(insn->code) == BPF_X &&
+	    src_reg->type == SCALAR_VALUE && src_reg->id &&
+	    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
+		find_equal_scalars(this_branch, src_reg);
+		find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
+	}
 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
 		find_equal_scalars(this_branch, dst_reg);
@@ -17541,10 +17212,8 @@ static int do_check(struct bpf_verifier_env *env)
 					       insn->off, BPF_SIZE(insn->code),
 					       BPF_READ, insn->dst_reg, false,
 					       BPF_MODE(insn->code) == BPF_MEMSX);
-			if (err)
-				return err;
-
-			err = save_aux_ptr_type(env, src_reg_type, true);
+			err = err ?: save_aux_ptr_type(env, src_reg_type, true);
+			err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], "ldx");
 			if (err)
 				return err;
 		} else if (class == BPF_STX) {
@@ -20831,6 +20500,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 
 	if (is_priv)
 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
+	env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
 
 	env->explored_states = kvcalloc(state_htab_size(env),
 				       sizeof(struct bpf_verifier_state_list *),
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index c56071f150f2..520b90dd97ec 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -164,13 +164,13 @@ struct cgroup_mgctx {
 #define DEFINE_CGROUP_MGCTX(name)						\
 	struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
 
-extern spinlock_t css_set_lock;
 extern struct cgroup_subsys *cgroup_subsys[];
 extern struct list_head cgroup_roots;
 
 /* iterate across the hierarchies */
 #define for_each_root(root)						\
-	list_for_each_entry((root), &cgroup_roots, root_list)
+	list_for_each_entry_rcu((root), &cgroup_roots, root_list,	\
+				lockdep_is_held(&cgroup_mutex))
 
 /**
  * for_each_subsys - iterate all enabled cgroup subsystems
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 76db6c67e39a..04d11a7dd95f 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1262,6 +1262,40 @@ int cgroup1_get_tree(struct fs_context *fc)
 	return ret;
 }
 
+/**
+ * task_get_cgroup1 - Acquires the associated cgroup of a task within a
+ * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
+ * hierarchy ID.
+ * @tsk: The target task
+ * @hierarchy_id: The ID of a cgroup1 hierarchy
+ *
+ * On success, the cgroup is returned. On failure, ERR_PTR is returned.
+ * We limit it to cgroup1 only.
+ */
+struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id)
+{
+	struct cgroup *cgrp = ERR_PTR(-ENOENT);
+	struct cgroup_root *root;
+	unsigned long flags;
+
+	rcu_read_lock();
+	for_each_root(root) {
+		/* cgroup1 only*/
+		if (root == &cgrp_dfl_root)
+			continue;
+		if (root->hierarchy_id != hierarchy_id)
+			continue;
+		spin_lock_irqsave(&css_set_lock, flags);
+		cgrp = task_cgroup_from_root(tsk, root);
+		if (!cgrp || !cgroup_tryget(cgrp))
+			cgrp = ERR_PTR(-ENOENT);
+		spin_unlock_irqrestore(&css_set_lock, flags);
+		break;
+	}
+	rcu_read_unlock();
+	return cgrp;
+}
+
 static int __init cgroup1_wq_init(void)
 {
 	/*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4b9ff41ca603..8f3cef1a4d8a 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1315,7 +1315,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root)
 
 void cgroup_free_root(struct cgroup_root *root)
 {
-	kfree(root);
+	kfree_rcu(root, rcu);
 }
 
 static void cgroup_destroy_root(struct cgroup_root *root)
@@ -1347,10 +1347,9 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 
 	spin_unlock_irq(&css_set_lock);
 
-	if (!list_empty(&root->root_list)) {
-		list_del(&root->root_list);
-		cgroup_root_count--;
-	}
+	WARN_ON_ONCE(list_empty(&root->root_list));
+	list_del_rcu(&root->root_list);
+	cgroup_root_count--;
 
 	if (!have_favordynmods)
 		cgroup_favor_dynmods(root, false);
@@ -1390,7 +1389,15 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset,
 		}
 	}
 
-	BUG_ON(!res_cgroup);
+	/*
+	 * If cgroup_mutex is not held, the cgrp_cset_link will be freed
+	 * before we remove the cgroup root from the root_list. Consequently,
+	 * when accessing a cgroup root, the cset_link may have already been
+	 * freed, resulting in a NULL res_cgroup. However, by holding the
+	 * cgroup_mutex, we ensure that res_cgroup can't be NULL.
+	 * If we don't hold cgroup_mutex in the caller, we must do the NULL
+	 * check.
+	 */
 	return res_cgroup;
 }
 
@@ -1413,6 +1420,11 @@ current_cgns_cgroup_from_root(struct cgroup_root *root)
 
 	rcu_read_unlock();
 
+	/*
+	 * The namespace_sem is held by current, so the root cgroup can't
+	 * be umounted. Therefore, we can ensure that the res is non-NULL.
+	 */
+	WARN_ON_ONCE(!res);
 	return res;
 }
 
@@ -1449,7 +1461,6 @@ static struct cgroup *current_cgns_cgroup_dfl(void)
 static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
 					    struct cgroup_root *root)
 {
-	lockdep_assert_held(&cgroup_mutex);
 	lockdep_assert_held(&css_set_lock);
 
 	return __cset_cgroup_from_root(cset, root);
@@ -1457,7 +1468,9 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
 
 /*
  * Return the cgroup for "task" from the given hierarchy. Must be
- * called with cgroup_mutex and css_set_lock held.
+ * called with css_set_lock held to prevent task's groups from being modified.
+ * Must be called with either cgroup_mutex or rcu read lock to prevent the
+ * cgroup root from being destroyed.
  */
 struct cgroup *task_cgroup_from_root(struct task_struct *task,
 				     struct cgroup_root *root)
@@ -2032,7 +2045,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
 	struct cgroup_root *root = ctx->root;
 	struct cgroup *cgrp = &root->cgrp;
 
-	INIT_LIST_HEAD(&root->root_list);
+	INIT_LIST_HEAD_RCU(&root->root_list);
 	atomic_set(&root->nr_cgrps, 1);
 	cgrp->root = root;
 	init_cgroup_housekeeping(cgrp);
@@ -2115,7 +2128,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
 	 * care of subsystems' refcounts, which are explicitly dropped in
 	 * the failure exit path.
 	 */
-	list_add(&root->root_list, &cgroup_roots);
+	list_add_rcu(&root->root_list, &cgroup_roots);
 	cgroup_root_count++;
 
 	/*
@@ -6265,7 +6278,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 	if (!buf)
 		goto out;
 
-	cgroup_lock();
+	rcu_read_lock();
 	spin_lock_irq(&css_set_lock);
 
 	for_each_root(root) {
@@ -6276,6 +6289,11 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		if (root == &cgrp_dfl_root && !READ_ONCE(cgrp_dfl_visible))
 			continue;
 
+		cgrp = task_cgroup_from_root(tsk, root);
+		/* The root has already been unmounted. */
+		if (!cgrp)
+			continue;
+
 		seq_printf(m, "%d:", root->hierarchy_id);
 		if (root != &cgrp_dfl_root)
 			for_each_subsys(ss, ssid)
@@ -6286,9 +6304,6 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 			seq_printf(m, "%sname=%s", count ? "," : "",
 				   root->name);
 		seq_putc(m, ':');
-
-		cgrp = task_cgroup_from_root(tsk, root);
-
 		/*
 		 * On traditional hierarchies, all zombie tasks show up as
 		 * belonging to the root cgroup.  On the default hierarchy,
@@ -6320,7 +6335,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 	retval = 0;
 out_unlock:
 	spin_unlock_irq(&css_set_lock);
-	cgroup_unlock();
+	rcu_read_unlock();
 	kfree(buf);
 out:
 	return retval;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 84e8a0f6e4e0..f0b8b7c29126 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1376,6 +1376,8 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
 			       struct bpf_dynptr_kern *sig_ptr,
 			       struct bpf_key *trusted_keyring)
 {
+	const void *data, *sig;
+	u32 data_len, sig_len;
 	int ret;
 
 	if (trusted_keyring->has_ref) {
@@ -1392,10 +1394,12 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
 			return ret;
 	}
 
-	return verify_pkcs7_signature(data_ptr->data,
-				      __bpf_dynptr_size(data_ptr),
-				      sig_ptr->data,
-				      __bpf_dynptr_size(sig_ptr),
+	data_len = __bpf_dynptr_size(data_ptr);
+	data = __bpf_dynptr_data(data_ptr, data_len);
+	sig_len = __bpf_dynptr_size(sig_ptr);
+	sig = __bpf_dynptr_data(sig_ptr, sig_len);
+
+	return verify_pkcs7_signature(data, data_len, sig, sig_len,
 				      trusted_keyring->key,
 				      VERIFYING_UNSPECIFIED_SIGNATURE, NULL,
 				      NULL);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 7916503e6a6a..c148f8d1e564 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5144,22 +5144,6 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 0, 0x1 } },
 	},
-	{
-		"ALU_MOVSX | BPF_W",
-		.u.insns_int = {
-			BPF_LD_IMM64(R2, 0x00000000deadbeefLL),
-			BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL),
-			BPF_MOVSX32_REG(R1, R3, 32),
-			BPF_JMP_REG(BPF_JEQ, R2, R1, 2),
-			BPF_MOV32_IMM(R0, 2),
-			BPF_EXIT_INSN(),
-			BPF_MOV32_IMM(R0, 1),
-			BPF_EXIT_INSN(),
-		},
-		INTERNAL,
-		{ },
-		{ { 0, 0x1 } },
-	},
 	/* MOVSX64 REG */
 	{
 		"ALU64_MOVSX | BPF_B",
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2a7f1b15714a..407b2335f091 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -702,20 +702,7 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
 				    struct ethtool_ts_info *info)
 {
 	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
-	const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
-	struct phy_device *phydev = vlan->real_dev->phydev;
-
-	if (phy_has_tsinfo(phydev)) {
-		return phy_ts_info(phydev, info);
-	} else if (ops->get_ts_info) {
-		return ops->get_ts_info(vlan->real_dev, info);
-	} else {
-		info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
-			SOF_TIMESTAMPING_SOFTWARE;
-		info->phc_index = -1;
-	}
-
-	return 0;
+	return ethtool_get_ts_info_by_layer(vlan->real_dev, info);
 }
 
 static void vlan_dev_get_stats64(struct net_device *dev,
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 3bd0760c76a2..b51d8b071b56 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -20,6 +20,7 @@ batman-adv-y += hash.o
 batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
 batman-adv-y += main.o
 batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
+batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast_forw.o
 batman-adv-y += netlink.o
 batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
 batman-adv-y += originator.o
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 37ce6cfb3520..5f46ca3d4bb8 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -20,7 +20,6 @@
 #include <linux/if_vlan.h>
 #include <linux/jhash.h>
 #include <linux/jiffies.h>
-#include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
@@ -31,6 +30,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/sprintf.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index c120c7c6d25f..757c084ac2d1 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -25,7 +25,6 @@
 
 #include "hard-interface.h"
 #include "originator.h"
-#include "routing.h"
 #include "send.h"
 
 /**
@@ -351,18 +350,14 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
 			 struct batadv_orig_node *orig_node_src)
 {
 	struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
-	struct batadv_orig_node *orig_node_dst;
 	struct batadv_neigh_node *neigh_node = NULL;
 	struct batadv_frag_packet *packet;
 	u16 total_size;
 	bool ret = false;
 
 	packet = (struct batadv_frag_packet *)skb->data;
-	orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest);
-	if (!orig_node_dst)
-		goto out;
 
-	neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if);
+	neigh_node = batadv_orig_to_router(bat_priv, packet->dest, recv_if);
 	if (!neigh_node)
 		goto out;
 
@@ -381,7 +376,6 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
 	}
 
 out:
-	batadv_orig_node_put(orig_node_dst);
 	batadv_neigh_node_put(neigh_node);
 	return ret;
 }
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index d26124bc27e1..0ddd8b4b3f4c 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -18,7 +18,6 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
@@ -29,6 +28,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/sprintf.h>
 #include <linux/stddef.h>
 #include <linux/udp.h>
 #include <net/sock.h>
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index e8a449915566..5fc754b0b3f7 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -6,6 +6,7 @@
 
 #include "main.h"
 
+#include <linux/array_size.h>
 #include <linux/atomic.h>
 #include <linux/build_bug.h>
 #include <linux/byteorder/generic.h>
@@ -20,7 +21,6 @@
 #include <linux/init.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include <linux/kernel.h>
 #include <linux/kobject.h>
 #include <linux/kref.h>
 #include <linux/list.h>
@@ -33,6 +33,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/sprintf.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
@@ -532,6 +533,8 @@ static void batadv_recv_handler_init(void)
 
 	/* broadcast packet */
 	batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
+	/* multicast packet */
+	batadv_rx_handler[BATADV_MCAST] = batadv_recv_mcast_packet;
 
 	/* unicast packets ... */
 	/* unicast with 4 addresses packet */
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 10007c5894a1..870dcd7f1786 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2023.3"
+#define BATADV_SOURCE_VERSION "2024.0"
 #endif
 
 /* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 315394f12c55..d982daea8329 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -25,7 +25,6 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/jiffies.h>
-#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
@@ -36,6 +35,7 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/sprintf.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
@@ -236,6 +236,37 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv,
 }
 
 /**
+ * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Checks if all active hard interfaces have an MTU larger or equal to 1280
+ * bytes (IPv6 minimum MTU).
+ *
+ * Return: BATADV_MCAST_HAVE_MC_PTYPE_CAPA if yes, BATADV_NO_FLAGS otherwise.
+ */
+static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv)
+{
+	const struct batadv_hard_iface *hard_iface;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->if_status != BATADV_IF_ACTIVE)
+			continue;
+
+		if (hard_iface->soft_iface != bat_priv->soft_iface)
+			continue;
+
+		if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) {
+			rcu_read_unlock();
+			return BATADV_NO_FLAGS;
+		}
+	}
+	rcu_read_unlock();
+
+	return BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
+}
+
+/**
  * batadv_mcast_mla_flags_get() - get the new multicast flags
  * @bat_priv: the bat priv with all the soft interface information
  *
@@ -256,6 +287,7 @@ batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv)
 	mla_flags.enabled = 1;
 	mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv,
 							       bridge);
+	mla_flags.tvlv_flags |= batadv_mcast_mla_forw_flags_get(bat_priv);
 
 	if (!bridge)
 		return mla_flags;
@@ -806,23 +838,25 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
 {
 	bool old_enabled = bat_priv->mcast.mla_flags.enabled;
 	u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags;
-	char str_old_flags[] = "[.... . ]";
+	char str_old_flags[] = "[.... . .]";
 
-	sprintf(str_old_flags, "[%c%c%c%s%s]",
+	sprintf(str_old_flags, "[%c%c%c%s%s%c]",
 		(old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
 		(old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
 		(old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
 		!(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
-		!(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+		!(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+		!(old_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
 
 	batadv_dbg(BATADV_DBG_MCAST, bat_priv,
-		   "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n",
+		   "Changing multicast flags from '%s' to '[%c%c%c%s%s%c]'\n",
 		   old_enabled ? str_old_flags : "<undefined>",
 		   (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
 		   (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
 		   (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
 		   !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
-		   !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+		   !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+		   !(flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
 }
 
 /**
@@ -1136,16 +1170,61 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv,
 }
 
 /**
+ * batadv_mcast_forw_mode_by_count() - get forwarding mode by count
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to check
+ * @vid: the vlan identifier
+ * @is_routable: stores whether the destination is routable
+ * @count: the number of originators the multicast packet need to be sent to
+ *
+ * For a multicast packet with multiple destination originators, checks which
+ * mode to use. For BATADV_FORW_MCAST it also encapsulates the packet with a
+ * complete batman-adv multicast header.
+ *
+ * Return:
+ *	BATADV_FORW_MCAST: If all nodes have multicast packet routing
+ *	capabilities and an MTU >= 1280 on all hard interfaces (including us)
+ *	and the encapsulated multicast packet with all destination addresses
+ *	would still fit into an 1280 bytes batman-adv multicast packet
+ *	(excluding the outer ethernet frame) and we could successfully push
+ *	the full batman-adv multicast packet header.
+ *	BATADV_FORW_UCASTS: If the packet cannot be sent in a batman-adv
+ *	multicast packet and the amount of batman-adv unicast packets needed
+ *	is smaller or equal to the configured multicast fanout.
+ *	BATADV_FORW_BCAST: Otherwise.
+ */
+static enum batadv_forw_mode
+batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv,
+				struct sk_buff *skb, unsigned short vid,
+				int is_routable, int count)
+{
+	unsigned int mcast_hdrlen = batadv_mcast_forw_packet_hdrlen(count);
+	u8 own_tvlv_flags = bat_priv->mcast.mla_flags.tvlv_flags;
+
+	if (!atomic_read(&bat_priv->mcast.num_no_mc_ptype_capa) &&
+	    own_tvlv_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+	    skb->len + mcast_hdrlen <= IPV6_MIN_MTU &&
+	    batadv_mcast_forw_push(bat_priv, skb, vid, is_routable, count))
+		return BATADV_FORW_MCAST;
+
+	if (count <= atomic_read(&bat_priv->multicast_fanout))
+		return BATADV_FORW_UCASTS;
+
+	return BATADV_FORW_BCAST;
+}
+
+/**
  * batadv_mcast_forw_mode() - check on how to forward a multicast packet
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: the multicast packet to check
+ * @vid: the vlan identifier
  * @is_routable: stores whether the destination is routable
  *
  * Return: The forwarding mode as enum batadv_forw_mode.
  */
 enum batadv_forw_mode
 batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
-		       int *is_routable)
+		       unsigned short vid, int *is_routable)
 {
 	int ret, tt_count, ip_count, unsnoop_count, total_count;
 	bool is_unsnoopable = false;
@@ -1175,10 +1254,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
 	else if (unsnoop_count)
 		return BATADV_FORW_BCAST;
 
-	if (total_count <= atomic_read(&bat_priv->multicast_fanout))
-		return BATADV_FORW_UCASTS;
-
-	return BATADV_FORW_BCAST;
+	return batadv_mcast_forw_mode_by_count(bat_priv, skb, vid, *is_routable,
+					       total_count);
 }
 
 /**
@@ -1739,6 +1816,31 @@ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv,
 }
 
 /**
+ * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag of this originator, orig, has
+ * toggled then this method updates the counter accordingly.
+ */
+static void batadv_mcast_have_mc_ptype_update(struct batadv_priv *bat_priv,
+					      struct batadv_orig_node *orig,
+					      u8 mcast_flags)
+{
+	lockdep_assert_held(&orig->mcast_handler_lock);
+
+	/* switched from flag set to unset */
+	if (!(mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) &&
+	    orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA)
+		atomic_inc(&bat_priv->mcast.num_no_mc_ptype_capa);
+	/* switched from flag unset to set */
+	else if (mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+		 !(orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA))
+		atomic_dec(&bat_priv->mcast.num_no_mc_ptype_capa);
+}
+
+/**
  * batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV
  * @enabled: whether the originator has multicast TVLV support enabled
  * @tvlv_value: tvlv buffer containing the multicast flags
@@ -1806,6 +1908,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
 	batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags);
 	batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags);
 	batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags);
+	batadv_mcast_have_mc_ptype_update(bat_priv, orig, mcast_flags);
 
 	orig->mcast_flags = mcast_flags;
 	spin_unlock_bh(&orig->mcast_handler_lock);
@@ -1820,6 +1923,10 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
 	batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
 				     NULL, NULL, BATADV_TVLV_MCAST, 2,
 				     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+	batadv_tvlv_handler_register(bat_priv, NULL, NULL,
+				     batadv_mcast_forw_tracker_tvlv_handler,
+				     BATADV_TVLV_MCAST_TRACKER, 1,
+				     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
 
 	INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
 	batadv_mcast_start_timer(bat_priv);
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index a9770d8d6d36..d97ee51d26f2 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -11,6 +11,7 @@
 
 #include <linux/netlink.h>
 #include <linux/skbuff.h>
+#include <linux/types.h>
 
 /**
  * enum batadv_forw_mode - the way a packet should be forwarded as
@@ -28,6 +29,12 @@ enum batadv_forw_mode {
 	 */
 	BATADV_FORW_UCASTS,
 
+	/**
+	 * @BATADV_FORW_MCAST: forward the packet to some nodes via a
+	 *  batman-adv multicast packet
+	 */
+	BATADV_FORW_MCAST,
+
 	/** @BATADV_FORW_NONE: don't forward, drop it */
 	BATADV_FORW_NONE,
 };
@@ -36,7 +43,7 @@ enum batadv_forw_mode {
 
 enum batadv_forw_mode
 batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
-		       int *is_routable);
+		       unsigned short vid, int *is_routable);
 
 int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
 			   unsigned short vid, int is_routable);
@@ -52,11 +59,23 @@ void batadv_mcast_free(struct batadv_priv *bat_priv);
 
 void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
 
+/* multicast_forw.c */
+
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+					   struct sk_buff *skb);
+
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests);
+
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+			    unsigned short vid, int is_routable, int count);
+
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
 #else
 
 static inline enum batadv_forw_mode
 batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
-		       int *is_routable)
+		       unsigned short vid, int *is_routable)
 {
 	return BATADV_FORW_BCAST;
 }
@@ -94,6 +113,13 @@ static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node)
 {
 }
 
+static inline int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+					   struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
 #endif /* CONFIG_BATMAN_ADV_MCAST */
 
 #endif /* _NET_BATMAN_ADV_MULTICAST_H_ */
diff --git a/net/batman-adv/multicast_forw.c b/net/batman-adv/multicast_forw.c
new file mode 100644
index 000000000000..fafd6ba8c056
--- /dev/null
+++ b/net/batman-adv/multicast_forw.c
@@ -0,0 +1,1178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) B.A.T.M.A.N. contributors:
+ *
+ * Linus Lüssing
+ */
+
+#include "multicast.h"
+#include "main.h"
+
+#include <linux/bug.h>
+#include <linux/build_bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/gfp.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ipv6.h>
+#include <linux/limits.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <uapi/linux/batadv_packet.h>
+
+#include "bridge_loop_avoidance.h"
+#include "originator.h"
+#include "send.h"
+#include "translation-table.h"
+
+#define batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) \
+	for (; num_dests; num_dests--, (dest) += ETH_ALEN)
+
+#define batadv_mcast_forw_tracker_for_each_dest2(dest1, dest2, num_dests) \
+	for (; num_dests; num_dests--, (dest1) += ETH_ALEN, (dest2) += ETH_ALEN)
+
+/**
+ * batadv_mcast_forw_skb_push() - skb_push and memorize amount of pushed bytes
+ * @skb: the skb to push onto
+ * @size: the amount of bytes to push
+ * @len: stores the total amount of bytes pushed
+ *
+ * Performs an skb_push() onto the given skb and adds the amount of pushed bytes
+ * to the given len pointer.
+ *
+ * Return: the return value of the skb_push() call.
+ */
+static void *batadv_mcast_forw_skb_push(struct sk_buff *skb, size_t size,
+					unsigned short *len)
+{
+	*len += size;
+	return skb_push(skb, size);
+}
+
+/**
+ * batadv_mcast_forw_push_padding() - push 2 padding bytes to skb's front
+ * @skb: the skb to push onto
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes two padding bytes to the front of the given skb.
+ *
+ * Return: On success a pointer to the first byte of the two pushed padding
+ * bytes within the skb. NULL otherwise.
+ */
+static char *
+batadv_mcast_forw_push_padding(struct sk_buff *skb, unsigned short *tvlv_len)
+{
+	const int pad_len = 2;
+	char *padding;
+
+	if (skb_headroom(skb) < pad_len)
+		return NULL;
+
+	padding = batadv_mcast_forw_skb_push(skb, pad_len, tvlv_len);
+	memset(padding, 0, pad_len);
+
+	return padding;
+}
+
+/**
+ * batadv_mcast_forw_push_est_padding() - push padding bytes if necessary
+ * @skb: the skb to potentially push the padding onto
+ * @count: the (estimated) number of originators the multicast packet needs to
+ *  be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the number of destination entries is even then this adds two
+ * padding bytes to the end of the tracker TVLV.
+ *
+ * Return: true on success or if no padding is needed, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_est_padding(struct sk_buff *skb, int count,
+				   unsigned short *tvlv_len)
+{
+	if (!(count % 2) && !batadv_mcast_forw_push_padding(skb, tvlv_len))
+		return false;
+
+	return true;
+}
+
+/**
+ * batadv_mcast_forw_orig_entry() - get orig_node from an hlist node
+ * @node: the hlist node to get the orig_node from
+ * @entry_offset: the offset of the hlist node within the orig_node struct
+ *
+ * Return: The orig_node containing the hlist node on success, NULL on error.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_orig_entry(struct hlist_node *node,
+			     size_t entry_offset)
+{
+	/* sanity check */
+	switch (entry_offset) {
+	case offsetof(struct batadv_orig_node, mcast_want_all_ipv4_node):
+	case offsetof(struct batadv_orig_node, mcast_want_all_ipv6_node):
+	case offsetof(struct batadv_orig_node, mcast_want_all_rtr4_node):
+	case offsetof(struct batadv_orig_node, mcast_want_all_rtr6_node):
+		break;
+	default:
+		WARN_ON(1);
+		return NULL;
+	}
+
+	return (struct batadv_orig_node *)((void *)node - entry_offset);
+}
+
+/**
+ * batadv_mcast_forw_push_dest() - push an originator MAC address onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination address onto
+ * @vid: the vlan identifier
+ * @orig_node: the originator node to get the MAC address from
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the orig_node is a BLA backbone gateway, if there is not enough skb
+ * headroom available or if num_dests is already at its maximum (65535) then
+ * neither the skb nor num_dests is changed. Otherwise the originator's MAC
+ * address is pushed onto the given skb and num_dests incremented by one.
+ *
+ * Return: true if the orig_node is a backbone gateway or if an orig address
+ *  was pushed successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_push_dest(struct batadv_priv *bat_priv,
+					struct sk_buff *skb, unsigned short vid,
+					struct batadv_orig_node *orig_node,
+					unsigned short *num_dests,
+					unsigned short *tvlv_len)
+{
+	BUILD_BUG_ON(sizeof_field(struct batadv_tvlv_mcast_tracker, num_dests)
+		     != sizeof(__be16));
+
+	/* Avoid sending to other BLA gateways - they already got the frame from
+	 * the LAN side we share with them.
+	 * TODO: Refactor to take BLA into account earlier in mode check.
+	 */
+	if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid))
+		return true;
+
+	if (skb_headroom(skb) < ETH_ALEN || *num_dests == U16_MAX)
+		return false;
+
+	batadv_mcast_forw_skb_push(skb, ETH_ALEN, tvlv_len);
+	ether_addr_copy(skb->data, orig_node->orig);
+	(*num_dests)++;
+
+	return true;
+}
+
+/**
+ * batadv_mcast_forw_push_dests_list() - push originators from list onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @head: the list to gather originators from
+ * @entry_offset: offset of an hlist node in an orig_node structure
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators in the given list onto the given
+ * skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static int batadv_mcast_forw_push_dests_list(struct batadv_priv *bat_priv,
+					     struct sk_buff *skb,
+					     unsigned short vid,
+					     struct hlist_head *head,
+					     size_t entry_offset,
+					     unsigned short *num_dests,
+					     unsigned short *tvlv_len)
+{
+	struct hlist_node *node;
+	struct batadv_orig_node *orig_node;
+
+	rcu_read_lock();
+	__hlist_for_each_rcu(node, head) {
+		orig_node = batadv_mcast_forw_orig_entry(node, entry_offset);
+		if (!orig_node ||
+		    !batadv_mcast_forw_push_dest(bat_priv, skb, vid, orig_node,
+						 num_dests, tvlv_len)) {
+			rcu_read_unlock();
+			return false;
+		}
+	}
+	rcu_read_unlock();
+
+	return true;
+}
+
+/**
+ * batadv_mcast_forw_push_tt() - push originators with interest through TT
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the translation table onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_tt(struct batadv_priv *bat_priv, struct sk_buff *skb,
+			  unsigned short vid, unsigned short *num_dests,
+			  unsigned short *tvlv_len)
+{
+	struct batadv_tt_orig_list_entry *orig_entry;
+
+	struct batadv_tt_global_entry *tt_global;
+	const u8 *addr = eth_hdr(skb)->h_dest;
+
+	/* ok */
+	int ret = true;
+
+	tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
+	if (!tt_global)
+		goto out;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) {
+		if (!batadv_mcast_forw_push_dest(bat_priv, skb, vid,
+						 orig_entry->orig_node,
+						 num_dests, tvlv_len)) {
+			ret = false;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	batadv_tt_global_entry_put(tt_global);
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_want_all() - push originators with want-all flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_all(struct batadv_priv *bat_priv,
+					    struct sk_buff *skb,
+					    unsigned short vid,
+					    unsigned short *num_dests,
+					    unsigned short *tvlv_len)
+{
+	struct hlist_head *head = NULL;
+	size_t offset;
+	int ret;
+
+	switch (eth_hdr(skb)->h_proto) {
+	case htons(ETH_P_IP):
+		head = &bat_priv->mcast.want_all_ipv4_list;
+		offset = offsetof(struct batadv_orig_node,
+				  mcast_want_all_ipv4_node);
+		break;
+	case htons(ETH_P_IPV6):
+		head = &bat_priv->mcast.want_all_ipv6_list;
+		offset = offsetof(struct batadv_orig_node,
+				  mcast_want_all_ipv6_node);
+		break;
+	default:
+		return false;
+	}
+
+	ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+						offset, num_dests, tvlv_len);
+	if (!ret)
+		return false;
+
+	return true;
+}
+
+/**
+ * batadv_mcast_forw_push_want_rtr() - push originators with want-router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all-rtr flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_rtr(struct batadv_priv *bat_priv,
+					    struct sk_buff *skb,
+					    unsigned short vid,
+					    unsigned short *num_dests,
+					    unsigned short *tvlv_len)
+{
+	struct hlist_head *head = NULL;
+	size_t offset;
+	int ret;
+
+	switch (eth_hdr(skb)->h_proto) {
+	case htons(ETH_P_IP):
+		head = &bat_priv->mcast.want_all_rtr4_list;
+		offset = offsetof(struct batadv_orig_node,
+				  mcast_want_all_rtr4_node);
+		break;
+	case htons(ETH_P_IPV6):
+		head = &bat_priv->mcast.want_all_rtr6_list;
+		offset = offsetof(struct batadv_orig_node,
+				  mcast_want_all_rtr6_node);
+		break;
+	default:
+		return false;
+	}
+
+	ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+						offset, num_dests, tvlv_len);
+	if (!ret)
+		return false;
+
+	return true;
+}
+
+/**
+ * batadv_mcast_forw_scrape() - remove bytes within skb data
+ * @skb: the skb to remove bytes from
+ * @offset: the offset from the skb data from which to scrape
+ * @len: the amount of bytes to scrape starting from the offset
+ *
+ * Scrapes/removes len bytes from the given skb at the given offset from the
+ * skb data.
+ *
+ * Caller needs to ensure that the region from the skb data's start up
+ * to/including the to be removed bytes are linearized.
+ */
+static void batadv_mcast_forw_scrape(struct sk_buff *skb,
+				     unsigned short offset,
+				     unsigned short len)
+{
+	char *to, *from;
+
+	SKB_LINEAR_ASSERT(skb);
+
+	to = skb_pull(skb, len);
+	from = to - len;
+
+	memmove(to, from, offset);
+}
+
+/**
+ * batadv_mcast_forw_push_scrape_padding() - remove TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Remove two padding bytes from the end of the multicast tracker TVLV,
+ * from before the payload data.
+ *
+ * Caller needs to ensure that the TVLV bytes are linearized.
+ */
+static void batadv_mcast_forw_push_scrape_padding(struct sk_buff *skb,
+						  unsigned short *tvlv_len)
+{
+	const int pad_len = 2;
+
+	batadv_mcast_forw_scrape(skb, *tvlv_len - pad_len, pad_len);
+	*tvlv_len -= pad_len;
+}
+
+/**
+ * batadv_mcast_forw_push_insert_padding() - insert TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Inserts two padding bytes at the end of the multicast tracker TVLV,
+ * before the payload data in the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_insert_padding(struct sk_buff *skb,
+						  unsigned short *tvlv_len)
+{
+	unsigned short offset =	*tvlv_len;
+	char *to, *from = skb->data;
+
+	to = batadv_mcast_forw_push_padding(skb, tvlv_len);
+	if (!to)
+		return false;
+
+	memmove(to, from, offset);
+	memset(to + offset, 0, *tvlv_len - offset);
+	return true;
+}
+
+/**
+ * batadv_mcast_forw_push_adjust_padding() - adjust padding if necessary
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @count: the estimated number of originators the multicast packet needs to
+ *  be sent to
+ * @num_dests_pushed: the number of originators that were actually added to the
+ *  multicast packet's tracker TVLV
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Adjusts the padding in the multicast packet's tracker TVLV depending on the
+ * initially estimated amount of destinations versus the amount of destinations
+ * that were actually added to the tracker TVLV.
+ *
+ * If the initial estimate was correct or at least the oddness was the same then
+ * no padding adjustment is performed.
+ * If the initially estimated number was even, so padding was initially added,
+ * but it turned out to be odd then padding is removed.
+ * If the initially estimated number was odd, so no padding was initially added,
+ * but it turned out to be even then padding is added.
+ *
+ * Return: true if no padding adjustment is needed or the adjustment was
+ * successful, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_adjust_padding(struct sk_buff *skb, int *count,
+				      unsigned short num_dests_pushed,
+				      unsigned short *tvlv_len)
+{
+	int ret = true;
+
+	if (likely((num_dests_pushed % 2) == (*count % 2)))
+		goto out;
+
+	/**
+	 * estimated even number of destinations, but turned out to be odd
+	 * -> remove padding
+	 */
+	if (!(*count % 2) && (num_dests_pushed % 2))
+		batadv_mcast_forw_push_scrape_padding(skb, tvlv_len);
+	/**
+	 * estimated odd number of destinations, but turned out to be even
+	 * -> add padding
+	 */
+	else if ((*count % 2) && (!(num_dests_pushed % 2)))
+		ret = batadv_mcast_forw_push_insert_padding(skb, tvlv_len);
+
+out:
+	*count = num_dests_pushed;
+	return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_dests() - push originator addresses onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet onto the given skb.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_dests(struct batadv_priv *bat_priv, struct sk_buff *skb,
+			     unsigned short vid, int is_routable, int *count,
+			     unsigned short *tvlv_len)
+{
+	unsigned short num_dests = 0;
+
+	if (!batadv_mcast_forw_push_est_padding(skb, *count, tvlv_len))
+		goto err;
+
+	if (!batadv_mcast_forw_push_tt(bat_priv, skb, vid, &num_dests,
+				       tvlv_len))
+		goto err;
+
+	if (!batadv_mcast_forw_push_want_all(bat_priv, skb, vid, &num_dests,
+					     tvlv_len))
+		goto err;
+
+	if (is_routable &&
+	    !batadv_mcast_forw_push_want_rtr(bat_priv, skb, vid, &num_dests,
+					     tvlv_len))
+		goto err;
+
+	if (!batadv_mcast_forw_push_adjust_padding(skb, count, num_dests,
+						   tvlv_len))
+		goto err;
+
+	return 0;
+err:
+	return -ENOMEM;
+}
+
+/**
+ * batadv_mcast_forw_push_tracker() - push a multicast tracker TVLV header
+ * @skb: the skb to push the tracker TVLV onto
+ * @num_dests: the number of destination addresses to set in the header
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV header onto the given skb, including the
+ * generic TVLV header but excluding the destination MAC addresses.
+ *
+ * The provided num_dests value is taken into consideration to set the
+ * num_dests field in the tracker header and to set the appropriate TVLV length
+ * value fields.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int batadv_mcast_forw_push_tracker(struct sk_buff *skb, int num_dests,
+					  unsigned short *tvlv_len)
+{
+	struct batadv_tvlv_mcast_tracker *mcast_tracker;
+	struct batadv_tvlv_hdr *tvlv_hdr;
+	unsigned int tvlv_value_len;
+
+	if (skb_headroom(skb) < sizeof(*mcast_tracker) + sizeof(*tvlv_hdr))
+		return -ENOMEM;
+
+	tvlv_value_len = sizeof(*mcast_tracker) + *tvlv_len;
+	if (tvlv_value_len + sizeof(*tvlv_hdr) > U16_MAX)
+		return -ENOMEM;
+
+	batadv_mcast_forw_skb_push(skb, sizeof(*mcast_tracker), tvlv_len);
+	mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb->data;
+	mcast_tracker->num_dests = htons(num_dests);
+
+	skb_reset_network_header(skb);
+
+	batadv_mcast_forw_skb_push(skb, sizeof(*tvlv_hdr), tvlv_len);
+	tvlv_hdr = (struct batadv_tvlv_hdr *)skb->data;
+	tvlv_hdr->type = BATADV_TVLV_MCAST_TRACKER;
+	tvlv_hdr->version = 1;
+	tvlv_hdr->len = htons(tvlv_value_len);
+
+	return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_tvlvs() - push a multicast tracker TVLV onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the tracker TVLV onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV onto the given skb, including the collected
+ * destination MAC addresses and the generic TVLV header.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_tvlvs(struct batadv_priv *bat_priv, struct sk_buff *skb,
+			     unsigned short vid, int is_routable, int count,
+			     unsigned short *tvlv_len)
+{
+	int ret;
+
+	ret = batadv_mcast_forw_push_dests(bat_priv, skb, vid, is_routable,
+					   &count, tvlv_len);
+	if (ret < 0)
+		return ret;
+
+	ret = batadv_mcast_forw_push_tracker(skb, count, tvlv_len);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_hdr() - push a multicast packet header onto an skb
+ * @skb: the skb to push the header onto
+ * @tvlv_len: the total TVLV length value to set in the header
+ *
+ * Pushes a batman-adv multicast packet header onto the given skb and sets
+ * the provided total TVLV length value in it.
+ *
+ * Caller needs to ensure enough skb headroom is available.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_hdr(struct sk_buff *skb, unsigned short tvlv_len)
+{
+	struct batadv_mcast_packet *mcast_packet;
+
+	if (skb_headroom(skb) < sizeof(*mcast_packet))
+		return -ENOMEM;
+
+	skb_push(skb, sizeof(*mcast_packet));
+
+	mcast_packet = (struct batadv_mcast_packet *)skb->data;
+	mcast_packet->version = BATADV_COMPAT_VERSION;
+	mcast_packet->ttl = BATADV_TTL;
+	mcast_packet->packet_type = BATADV_MCAST;
+	mcast_packet->reserved = 0;
+	mcast_packet->tvlv_len = htons(tvlv_len);
+
+	return 0;
+}
+
+/**
+ * batadv_mcast_forw_scrub_dests() - scrub destinations in a tracker TVLV
+ * @bat_priv: the bat priv with all the soft interface information
+ * @comp_neigh: next hop neighbor to scrub+collect destinations for
+ * @dest: start MAC entry in original skb's tracker TVLV
+ * @next_dest: start MAC entry in to be sent skb's tracker TVLV
+ * @num_dests: number of remaining destination MAC entries to iterate over
+ *
+ * This sorts destination entries into either the original batman-adv
+ * multicast packet or the skb (copy) that is going to be sent to comp_neigh
+ * next.
+ *
+ * In preparation for the next, to be (unicast) transmitted batman-adv multicast
+ * packet skb to be sent to the given neighbor node, tries to collect all
+ * originator MAC addresses that have the given neighbor node as their next hop
+ * in the to be transmitted skb (copy), which next_dest points into. That is we
+ * zero all destination entries in next_dest which do not have comp_neigh as
+ * their next hop. And zero all destination entries in the original skb that
+ * would have comp_neigh as their next hop (to avoid redundant transmissions and
+ * duplicated payload later).
+ */
+static void
+batadv_mcast_forw_scrub_dests(struct batadv_priv *bat_priv,
+			      struct batadv_neigh_node *comp_neigh, u8 *dest,
+			      u8 *next_dest, u16 num_dests)
+{
+	struct batadv_neigh_node *next_neigh;
+
+	/* skip first entry, this is what we are comparing with */
+	eth_zero_addr(dest);
+	dest += ETH_ALEN;
+	next_dest += ETH_ALEN;
+	num_dests--;
+
+	batadv_mcast_forw_tracker_for_each_dest2(dest, next_dest, num_dests) {
+		if (is_zero_ether_addr(next_dest))
+			continue;
+
+		/* sanity check, we expect unicast destinations */
+		if (is_multicast_ether_addr(next_dest)) {
+			eth_zero_addr(dest);
+			eth_zero_addr(next_dest);
+			continue;
+		}
+
+		next_neigh = batadv_orig_to_router(bat_priv, next_dest, NULL);
+		if (!next_neigh) {
+			eth_zero_addr(next_dest);
+			continue;
+		}
+
+		if (!batadv_compare_eth(next_neigh->addr, comp_neigh->addr)) {
+			eth_zero_addr(next_dest);
+			batadv_neigh_node_put(next_neigh);
+			continue;
+		}
+
+		/* found an entry for our next packet to transmit, so remove it
+		 * from the original packet
+		 */
+		eth_zero_addr(dest);
+		batadv_neigh_node_put(next_neigh);
+	}
+}
+
+/**
+ * batadv_mcast_forw_shrink_fill() - swap slot with next non-zero destination
+ * @slot: the to be filled zero-MAC destination entry in a tracker TVLV
+ * @num_dests_slot: remaining entries in tracker TVLV from/including slot
+ *
+ * Searches for the next non-zero-MAC destination entry in a tracker TVLV after
+ * the given slot pointer. And if found, swaps it with the zero-MAC destination
+ * entry which the slot points to.
+ *
+ * Return: true if slot was swapped/filled successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_shrink_fill(u8 *slot, u16 num_dests_slot)
+{
+	u16 num_dests_filler;
+	u8 *filler;
+
+	/* sanity check, should not happen */
+	if (!num_dests_slot)
+		return false;
+
+	num_dests_filler = num_dests_slot - 1;
+	filler = slot + ETH_ALEN;
+
+	/* find a candidate to fill the empty slot */
+	batadv_mcast_forw_tracker_for_each_dest(filler, num_dests_filler) {
+		if (is_zero_ether_addr(filler))
+			continue;
+
+		ether_addr_copy(slot, filler);
+		eth_zero_addr(filler);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * batadv_mcast_forw_shrink_pack_dests() - pack destinations of a tracker TVLV
+ * @skb: the batman-adv multicast packet to compact destinations in
+ *
+ * Compacts the originator destination MAC addresses in the multicast tracker
+ * TVLV of the given multicast packet. This is done by moving all non-zero
+ * MAC addresses in direction of the skb head and all zero MAC addresses in skb
+ * tail direction, within the multicast tracker TVLV.
+ *
+ * Return: The number of consecutive zero MAC address destinations which are
+ * now at the end of the multicast tracker TVLV.
+ */
+static int batadv_mcast_forw_shrink_pack_dests(struct sk_buff *skb)
+{
+	struct batadv_tvlv_mcast_tracker *mcast_tracker;
+	unsigned char *skb_net_hdr;
+	u16 num_dests_slot;
+	u8 *slot;
+
+	skb_net_hdr = skb_network_header(skb);
+	mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+	num_dests_slot = ntohs(mcast_tracker->num_dests);
+
+	slot = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+	batadv_mcast_forw_tracker_for_each_dest(slot, num_dests_slot) {
+		/* find an empty slot */
+		if (!is_zero_ether_addr(slot))
+			continue;
+
+		if (!batadv_mcast_forw_shrink_fill(slot, num_dests_slot))
+			/* could not find a filler, so we successfully packed
+			 * and can stop - and must not reduce num_dests_slot!
+			 */
+			break;
+	}
+
+	/* num_dests_slot is now the amount of reduced, zeroed
+	 * destinations at the end of the tracker TVLV
+	 */
+	return num_dests_slot;
+}
+
+/**
+ * batadv_mcast_forw_shrink_align_offset() - get new alignment offset
+ * @num_dests_old: the old, to be updated amount of destination nodes
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * Calculates the amount of potential extra alignment offset that is needed to
+ * adjust the TVLV padding after the change in destination nodes.
+ *
+ * Return:
+ *	0: If no change to padding is needed.
+ *	2: If padding needs to be removed.
+ *	-2: If padding needs to be added.
+ */
+static short
+batadv_mcast_forw_shrink_align_offset(unsigned int num_dests_old,
+				      unsigned int num_dests_reduce)
+{
+	/* even amount of removed destinations -> no alignment change */
+	if (!(num_dests_reduce % 2))
+		return 0;
+
+	/* even to odd amount of destinations -> remove padding */
+	if (!(num_dests_old % 2))
+		return 2;
+
+	/* odd to even amount of destinations -> add padding */
+	return -2;
+}
+
+/**
+ * batadv_mcast_forw_shrink_update_headers() - update shrunk mc packet headers
+ * @skb: the batman-adv multicast packet to update headers of
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * This updates any fields of a batman-adv multicast packet that are affected
+ * by the reduced number of destinations in the multicast tracket TVLV. In
+ * particular this updates:
+ *
+ * The num_dest field of the multicast tracker TVLV.
+ * The TVLV length field of the according generic TVLV header.
+ * The batman-adv multicast packet's total TVLV length field.
+ *
+ * Return: The offset in skb's tail direction at which the new batman-adv
+ * multicast packet header needs to start.
+ */
+static unsigned int
+batadv_mcast_forw_shrink_update_headers(struct sk_buff *skb,
+					unsigned int num_dests_reduce)
+{
+	struct batadv_tvlv_mcast_tracker *mcast_tracker;
+	struct batadv_mcast_packet *mcast_packet;
+	struct batadv_tvlv_hdr *tvlv_hdr;
+	unsigned char *skb_net_hdr;
+	unsigned int offset;
+	short align_offset;
+	u16 num_dests;
+
+	skb_net_hdr = skb_network_header(skb);
+	mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+	num_dests = ntohs(mcast_tracker->num_dests);
+
+	align_offset = batadv_mcast_forw_shrink_align_offset(num_dests,
+							     num_dests_reduce);
+	offset = ETH_ALEN * num_dests_reduce + align_offset;
+	num_dests -= num_dests_reduce;
+
+	/* update tracker header */
+	mcast_tracker->num_dests = htons(num_dests);
+
+	/* update tracker's tvlv header's length field */
+	tvlv_hdr = (struct batadv_tvlv_hdr *)(skb_network_header(skb) -
+					      sizeof(*tvlv_hdr));
+	tvlv_hdr->len = htons(ntohs(tvlv_hdr->len) - offset);
+
+	/* update multicast packet header's tvlv length field */
+	mcast_packet = (struct batadv_mcast_packet *)skb->data;
+	mcast_packet->tvlv_len = htons(ntohs(mcast_packet->tvlv_len) - offset);
+
+	return offset;
+}
+
+/**
+ * batadv_mcast_forw_shrink_move_headers() - move multicast headers by offset
+ * @skb: the batman-adv multicast packet to move headers for
+ * @offset: a non-negative offset to move headers by, towards the skb tail
+ *
+ * Moves the batman-adv multicast packet header, its multicast tracker TVLV and
+ * any TVLVs in between by the given offset in direction towards the tail.
+ */
+static void
+batadv_mcast_forw_shrink_move_headers(struct sk_buff *skb, unsigned int offset)
+{
+	struct batadv_tvlv_mcast_tracker *mcast_tracker;
+	unsigned char *skb_net_hdr;
+	unsigned int len;
+	u16 num_dests;
+
+	skb_net_hdr = skb_network_header(skb);
+	mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+	num_dests = ntohs(mcast_tracker->num_dests);
+	len = skb_network_offset(skb) + sizeof(*mcast_tracker);
+	len += num_dests * ETH_ALEN;
+
+	batadv_mcast_forw_scrape(skb, len, offset);
+}
+
+/**
+ * batadv_mcast_forw_shrink_tracker() - remove zero addresses in a tracker tvlv
+ * @skb: the batman-adv multicast packet to (potentially) shrink
+ *
+ * Removes all destinations with a zero MAC addresses (00:00:00:00:00:00) from
+ * the given batman-adv multicast packet's tracker TVLV and updates headers
+ * accordingly to maintain a valid batman-adv multicast packet.
+ */
+static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb)
+{
+	unsigned int offset;
+	u16 dests_reduced;
+
+	dests_reduced = batadv_mcast_forw_shrink_pack_dests(skb);
+	if (!dests_reduced)
+		return;
+
+	offset = batadv_mcast_forw_shrink_update_headers(skb, dests_reduced);
+	batadv_mcast_forw_shrink_move_headers(skb, offset);
+}
+
+/**
+ * batadv_mcast_forw_packet() - forward a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received or locally generated batman-adv multicast packet
+ * @local_xmit: indicates that the packet was locally generated and not received
+ *
+ * Parses the tracker TVLV of a batman-adv multicast packet and forwards the
+ * packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv,
+				    struct sk_buff *skb, bool local_xmit)
+{
+	struct batadv_tvlv_mcast_tracker *mcast_tracker;
+	struct batadv_neigh_node *neigh_node;
+	unsigned long offset, num_dests_off;
+	struct sk_buff *nexthop_skb;
+	unsigned char *skb_net_hdr;
+	bool local_recv = false;
+	unsigned int tvlv_len;
+	bool xmitted = false;
+	u8 *dest, *next_dest;
+	u16 num_dests;
+	int ret;
+
+	/* (at least) TVLV part needs to be linearized */
+	SKB_LINEAR_ASSERT(skb);
+
+	/* check if num_dests is within skb length */
+	num_dests_off = offsetof(struct batadv_tvlv_mcast_tracker, num_dests);
+	if (num_dests_off > skb_network_header_len(skb))
+		return -EINVAL;
+
+	skb_net_hdr = skb_network_header(skb);
+	mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+	num_dests = ntohs(mcast_tracker->num_dests);
+
+	dest = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+	/* check if full tracker tvlv is within skb length */
+	tvlv_len = sizeof(*mcast_tracker) + ETH_ALEN * num_dests;
+	if (tvlv_len > skb_network_header_len(skb))
+		return -EINVAL;
+
+	/* invalidate checksum: */
+	skb->ip_summed = CHECKSUM_NONE;
+
+	batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) {
+		if (is_zero_ether_addr(dest))
+			continue;
+
+		/* only unicast originator addresses supported */
+		if (is_multicast_ether_addr(dest)) {
+			eth_zero_addr(dest);
+			continue;
+		}
+
+		if (batadv_is_my_mac(bat_priv, dest)) {
+			eth_zero_addr(dest);
+			local_recv = true;
+			continue;
+		}
+
+		neigh_node = batadv_orig_to_router(bat_priv, dest, NULL);
+		if (!neigh_node) {
+			eth_zero_addr(dest);
+			continue;
+		}
+
+		nexthop_skb = skb_copy(skb, GFP_ATOMIC);
+		if (!nexthop_skb) {
+			batadv_neigh_node_put(neigh_node);
+			return -ENOMEM;
+		}
+
+		offset = dest - skb->data;
+		next_dest = nexthop_skb->data + offset;
+
+		batadv_mcast_forw_scrub_dests(bat_priv, neigh_node, dest,
+					      next_dest, num_dests);
+		batadv_mcast_forw_shrink_tracker(nexthop_skb);
+
+		batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX);
+		batadv_add_counter(bat_priv, BATADV_CNT_MCAST_TX_BYTES,
+				   nexthop_skb->len + ETH_HLEN);
+		xmitted = true;
+		ret = batadv_send_unicast_skb(nexthop_skb, neigh_node);
+
+		batadv_neigh_node_put(neigh_node);
+
+		if (ret < 0)
+			return ret;
+	}
+
+	if (xmitted) {
+		if (local_xmit) {
+			batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX_LOCAL);
+			batadv_add_counter(bat_priv,
+					   BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+					   skb->len -
+					   skb_transport_offset(skb));
+		} else {
+			batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_FWD);
+			batadv_add_counter(bat_priv, BATADV_CNT_MCAST_FWD_BYTES,
+					   skb->len + ETH_HLEN);
+		}
+	}
+
+	if (local_recv)
+		return NET_RX_SUCCESS;
+	else
+		return NET_RX_DROP;
+}
+
+/**
+ * batadv_mcast_forw_tracker_tvlv_handler() - handle an mcast tracker tvlv
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received batman-adv multicast packet
+ *
+ * Parses the tracker TVLV of an incoming batman-adv multicast packet and
+ * forwards the packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+					   struct sk_buff *skb)
+{
+	return batadv_mcast_forw_packet(bat_priv, skb, false);
+}
+
+/**
+ * batadv_mcast_forw_packet_hdrlen() - multicast packet header length
+ * @num_dests: number of destination nodes
+ *
+ * Calculates the total batman-adv multicast packet header length for a given
+ * number of destination nodes (excluding the outer ethernet frame).
+ *
+ * Return: The calculated total batman-adv multicast packet header length.
+ */
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests)
+{
+	/**
+	 * If the number of destination entries is even then we need to add
+	 * two byte padding to the tracker TVLV.
+	 */
+	int padding = (!(num_dests % 2)) ? 2 : 0;
+
+	return padding + num_dests * ETH_ALEN +
+	       sizeof(struct batadv_tvlv_mcast_tracker) +
+	       sizeof(struct batadv_tvlv_hdr) +
+	       sizeof(struct batadv_mcast_packet);
+}
+
+/**
+ * batadv_mcast_forw_expand_head() - expand headroom for an mcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to send
+ *
+ * Tries to expand an skb's headroom so that its head to tail is 1298
+ * bytes (minimum IPv6 MTU + vlan ethernet header size) large.
+ *
+ * Return: -EINVAL if the given skb's length is too large or -ENOMEM on memory
+ * allocation failure. Otherwise, on success, zero is returned.
+ */
+static int batadv_mcast_forw_expand_head(struct batadv_priv *bat_priv,
+					 struct sk_buff *skb)
+{
+	int hdr_size = VLAN_ETH_HLEN + IPV6_MIN_MTU - skb->len;
+
+	 /* TODO: Could be tightened to actual number of destination nodes?
+	  * But it's tricky, number of destinations might have increased since
+	  * we last checked.
+	  */
+	if (hdr_size < 0) {
+		/* batadv_mcast_forw_mode_check_count() should ensure we do not
+		 * end up here
+		 */
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	if (skb_headroom(skb) < hdr_size &&
+	    pskb_expand_head(skb, hdr_size, 0, GFP_ATOMIC) < 0)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * batadv_mcast_forw_push() - encapsulate skb in a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ *
+ * Encapsulates the given multicast packet in a batman-adv multicast packet.
+ * A multicast tracker TVLV with destination originator addresses for any node
+ * that signaled interest in it, that is either via the translation table or the
+ * according want-all flags, is attached accordingly.
+ *
+ * Return: true on success, false otherwise.
+ */
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+			    unsigned short vid, int is_routable, int count)
+{
+	unsigned short tvlv_len = 0;
+	int ret;
+
+	if (batadv_mcast_forw_expand_head(bat_priv, skb) < 0)
+		goto err;
+
+	skb_reset_transport_header(skb);
+
+	ret = batadv_mcast_forw_push_tvlvs(bat_priv, skb, vid, is_routable,
+					   count, &tvlv_len);
+	if (ret < 0)
+		goto err;
+
+	ret = batadv_mcast_forw_push_hdr(skb, tvlv_len);
+	if (ret < 0)
+		goto err;
+
+	return true;
+
+err:
+	if (tvlv_len)
+		skb_pull(skb, tvlv_len);
+
+	return false;
+}
+
+/**
+ * batadv_mcast_forw_mcsend() - send a self prepared batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ *
+ * Transmits a batman-adv multicast packet that was locally prepared and
+ * consumes/frees it.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure. NET_XMIT_SUCCESS
+ * otherwise.
+ */
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+			     struct sk_buff *skb)
+{
+	int ret = batadv_mcast_forw_packet(bat_priv, skb, true);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return NET_XMIT_DROP;
+	}
+
+	consume_skb(skb);
+	return NET_XMIT_SUCCESS;
+}
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0c64d81a7761..1f7ed9d4f6fd 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -7,6 +7,7 @@
 #include "netlink.h"
 #include "main.h"
 
+#include <linux/array_size.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
@@ -20,7 +21,6 @@
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
 #include <linux/init.h>
-#include <linux/kernel.h>
 #include <linux/limits.h>
 #include <linux/list.h>
 #include <linux/minmax.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 34903df4fe93..71c143d4b6d0 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -312,6 +312,33 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
 }
 
 /**
+ * batadv_orig_to_router() - get next hop neighbor to an orig address
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_addr: the originator MAC address to search the best next hop router for
+ * @if_outgoing: the interface where the payload packet has been received or
+ *  the OGM should be sent to
+ *
+ * Return: A neighbor node which is the best router towards the given originator
+ * address.
+ */
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+		      struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_neigh_node *neigh_node;
+	struct batadv_orig_node *orig_node;
+
+	orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+	if (!orig_node)
+		return NULL;
+
+	neigh_node = batadv_find_router(bat_priv, orig_node, if_outgoing);
+	batadv_orig_node_put(orig_node);
+
+	return neigh_node;
+}
+
+/**
  * batadv_orig_ifinfo_get() - find the ifinfo from an orig_node
  * @orig_node: the orig node to be queried
  * @if_outgoing: the interface for which the ifinfo should be acquired
@@ -942,6 +969,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
 #ifdef CONFIG_BATMAN_ADV_MCAST
 	orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4;
 	orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6;
+	orig_node->mcast_flags |= BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
 	INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node);
 	INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node);
 	INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index ea3d69e4e670..db0c55128170 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -36,6 +36,9 @@ void batadv_neigh_node_release(struct kref *ref);
 struct batadv_neigh_node *
 batadv_orig_router_get(struct batadv_orig_node *orig_node,
 		       const struct batadv_hard_iface *if_outgoing);
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+		      struct batadv_hard_iface *if_outgoing);
 struct batadv_neigh_ifinfo *
 batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
 			struct batadv_hard_iface *if_outgoing);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 163cd43c4821..f1061985149f 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1270,3 +1270,73 @@ out:
 	batadv_orig_node_put(orig_node);
 	return ret;
 }
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * batadv_recv_mcast_packet() - process received batman-adv multicast packet
+ * @skb: the received batman-adv multicast packet
+ * @recv_if: interface that the skb is received on
+ *
+ * Parses the given, received batman-adv multicast packet. Depending on the
+ * contents of its TVLV forwards it and/or decapsulates it to hand it to the
+ * soft interface.
+ *
+ * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
+ */
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+			     struct batadv_hard_iface *recv_if)
+{
+	struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+	struct batadv_mcast_packet *mcast_packet;
+	int hdr_size = sizeof(*mcast_packet);
+	unsigned char *tvlv_buff;
+	int ret = NET_RX_DROP;
+	u16 tvlv_buff_len;
+
+	if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
+		goto free_skb;
+
+	/* create a copy of the skb, if needed, to modify it. */
+	if (skb_cow(skb, ETH_HLEN) < 0)
+		goto free_skb;
+
+	/* packet needs to be linearized to access the tvlv content */
+	if (skb_linearize(skb) < 0)
+		goto free_skb;
+
+	mcast_packet = (struct batadv_mcast_packet *)skb->data;
+	if (mcast_packet->ttl-- < 2)
+		goto free_skb;
+
+	tvlv_buff = (unsigned char *)(skb->data + hdr_size);
+	tvlv_buff_len = ntohs(mcast_packet->tvlv_len);
+
+	if (tvlv_buff_len > skb->len - hdr_size)
+		goto free_skb;
+
+	ret = batadv_tvlv_containers_process(bat_priv, BATADV_MCAST, NULL, skb,
+					     tvlv_buff, tvlv_buff_len);
+	if (ret >= 0) {
+		batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX);
+		batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_BYTES,
+				   skb->len + ETH_HLEN);
+	}
+
+	hdr_size += tvlv_buff_len;
+
+	if (ret == NET_RX_SUCCESS && (skb->len - hdr_size >= ETH_HLEN)) {
+		batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL);
+		batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+				   skb->len - hdr_size);
+
+		batadv_interface_rx(bat_priv->soft_iface, skb, hdr_size, NULL);
+		/* skb was consumed */
+		skb = NULL;
+	}
+
+free_skb:
+	kfree_skb(skb);
+
+	return ret;
+}
+#endif /* CONFIG_BATMAN_ADV_MCAST */
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index afd15b3879f1..e9849f032a24 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -27,6 +27,17 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
 			    struct batadv_hard_iface *iface);
 int batadv_recv_bcast_packet(struct sk_buff *skb,
 			     struct batadv_hard_iface *recv_if);
+#ifdef CONFIG_BATMAN_ADV_MCAST
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+			     struct batadv_hard_iface *recv_if);
+#else
+static inline int batadv_recv_mcast_packet(struct sk_buff *skb,
+					   struct batadv_hard_iface *recv_if)
+{
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
+#endif
 int batadv_recv_unicast_tvlv(struct sk_buff *skb,
 			     struct batadv_hard_iface *recv_if);
 int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 1bf1232a4f75..89c51b3cf430 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -301,12 +301,13 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
 
 send:
 		if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) {
-			forw_mode = batadv_mcast_forw_mode(bat_priv, skb,
+			forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid,
 							   &mcast_is_routable);
 			switch (forw_mode) {
 			case BATADV_FORW_BCAST:
 				break;
 			case BATADV_FORW_UCASTS:
+			case BATADV_FORW_MCAST:
 				do_bcast = false;
 				break;
 			case BATADV_FORW_NONE:
@@ -365,6 +366,8 @@ send:
 		} else if (forw_mode == BATADV_FORW_UCASTS) {
 			ret = batadv_mcast_forw_send(bat_priv, skb, vid,
 						     mcast_is_routable);
+		} else if (forw_mode == BATADV_FORW_MCAST) {
+			ret = batadv_mcast_forw_mcsend(bat_priv, skb);
 		} else {
 			if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
 								  skb))
@@ -762,6 +765,7 @@ static int batadv_softif_init_late(struct net_device *dev)
 	atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0);
 	atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
 	atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
+	atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0);
 #endif
 	atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
 	atomic_set(&bat_priv->gw.bandwidth_down, 100);
@@ -925,6 +929,18 @@ static const struct {
 	{ "tt_response_rx" },
 	{ "tt_roam_adv_tx" },
 	{ "tt_roam_adv_rx" },
+#ifdef CONFIG_BATMAN_ADV_MCAST
+	{ "mcast_tx" },
+	{ "mcast_tx_bytes" },
+	{ "mcast_tx_local" },
+	{ "mcast_tx_local_bytes" },
+	{ "mcast_rx" },
+	{ "mcast_rx_bytes" },
+	{ "mcast_rx_local" },
+	{ "mcast_rx_local_bytes" },
+	{ "mcast_fwd" },
+	{ "mcast_fwd_bytes" },
+#endif
 #ifdef CONFIG_BATMAN_ADV_DAT
 	{ "dat_get_tx" },
 	{ "dat_get_rx" },
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 17d5ea1d8e84..00840d5784fe 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -862,6 +862,70 @@ enum batadv_counters {
 	 */
 	BATADV_CNT_TT_ROAM_ADV_RX,
 
+#ifdef CONFIG_BATMAN_ADV_MCAST
+	/**
+	 * @BATADV_CNT_MCAST_TX: transmitted batman-adv multicast packets
+	 *  counter
+	 */
+	BATADV_CNT_MCAST_TX,
+
+	/**
+	 * @BATADV_CNT_MCAST_TX_BYTES: transmitted batman-adv multicast packets
+	 *  bytes counter
+	 */
+	BATADV_CNT_MCAST_TX_BYTES,
+
+	/**
+	 * @BATADV_CNT_MCAST_TX_LOCAL: counter for multicast packets which
+	 *  were locally encapsulated and transmitted as batman-adv multicast
+	 *  packets
+	 */
+	BATADV_CNT_MCAST_TX_LOCAL,
+
+	/**
+	 * @BATADV_CNT_MCAST_TX_LOCAL_BYTES: bytes counter for multicast packets
+	 *  which were locally encapsulated and transmitted as batman-adv
+	 *  multicast packets
+	 */
+	BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+
+	/**
+	 * @BATADV_CNT_MCAST_RX: received batman-adv multicast packet counter
+	 */
+	BATADV_CNT_MCAST_RX,
+
+	/**
+	 * @BATADV_CNT_MCAST_RX_BYTES: received batman-adv multicast packet
+	 *  bytes counter
+	 */
+	BATADV_CNT_MCAST_RX_BYTES,
+
+	/**
+	 * @BATADV_CNT_MCAST_RX_LOCAL: counter for received batman-adv multicast
+	 *  packets which were forwarded to the local soft interface
+	 */
+	BATADV_CNT_MCAST_RX_LOCAL,
+
+	/**
+	 * @BATADV_CNT_MCAST_RX_LOCAL_BYTES: bytes counter for received
+	 *  batman-adv multicast packets which were forwarded to the local soft
+	 *  interface
+	 */
+	BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+
+	/**
+	 * @BATADV_CNT_MCAST_FWD: counter for received batman-adv multicast
+	 *  packets which were forwarded to other, neighboring nodes
+	 */
+	BATADV_CNT_MCAST_FWD,
+
+	/**
+	 * @BATADV_CNT_MCAST_FWD_BYTES: bytes counter for received batman-adv
+	 *  multicast packets which were forwarded to other, neighboring nodes
+	 */
+	BATADV_CNT_MCAST_FWD_BYTES,
+#endif
+
 #ifdef CONFIG_BATMAN_ADV_DAT
 	/**
 	 * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
@@ -1279,6 +1343,12 @@ struct batadv_priv_mcast {
 	atomic_t num_want_all_rtr6;
 
 	/**
+	 * @num_no_mc_ptype_capa: counter for number of nodes without the
+	 *  BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag
+	 */
+	atomic_t num_no_mc_ptype_capa;
+
+	/**
 	 * @want_lists_lock: lock for protecting modifications to mcasts
 	 *  want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked)
 	 */
diff --git a/net/core/dev.c b/net/core/dev.c
index c879246be48d..3950ced396b5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11236,17 +11236,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
 	dev_net_set(dev, net);
 	dev->ifindex = new_ifindex;
 
-	/* Send a netdev-add uevent to the new namespace */
-	kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
-	netdev_adjacent_add_links(dev);
-
 	if (new_name[0]) /* Rename the netdev to prepared name */
 		strscpy(dev->name, new_name, IFNAMSIZ);
 
 	/* Fixup kobjects */
+	dev_set_uevent_suppress(&dev->dev, 1);
 	err = device_rename(&dev->dev, dev->name);
+	dev_set_uevent_suppress(&dev->dev, 0);
 	WARN_ON(err);
 
+	/* Send a netdev-add uevent to the new namespace */
+	kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+	netdev_adjacent_add_links(dev);
+
 	/* Adapt owner in case owning user namespace of target network
 	 * namespace is different from the original one.
 	 */
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index feeddf95f450..9a66cf5015f2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -322,9 +322,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
  * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
  * dev->priv_flags.
  */
-static int dev_set_hwtstamp_phylib(struct net_device *dev,
-				   struct kernel_hwtstamp_config *cfg,
-				   struct netlink_ext_ack *extack)
+int dev_set_hwtstamp_phylib(struct net_device *dev,
+			    struct kernel_hwtstamp_config *cfg,
+			    struct netlink_ext_ack *extack)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	bool phy_ts = phy_has_hwtstamp(dev->phydev);
@@ -363,6 +363,7 @@ static int dev_set_hwtstamp_phylib(struct net_device *dev,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib);
 
 static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
 {
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index dec544337236..df2a06d7da52 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -173,7 +173,8 @@ static int page_pool_init(struct page_pool *pool,
 {
 	unsigned int ring_qsize = 1024; /* Default */
 
-	memcpy(&pool->p, params, sizeof(pool->p));
+	memcpy(&pool->p, &params->fast, sizeof(pool->p));
+	memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
 
 	/* Validate only known flags were used */
 	if (pool->p.flags & ~(PP_FLAG_ALL))
@@ -211,6 +212,8 @@ static int page_pool_init(struct page_pool *pool,
 		 */
 	}
 
+	pool->has_init_callback = !!pool->slow.init_callback;
+
 #ifdef CONFIG_PAGE_POOL_STATS
 	pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
 	if (!pool->recycle_stats)
@@ -388,8 +391,8 @@ static void page_pool_set_pp_info(struct page_pool *pool,
 	 * the overhead is negligible.
 	 */
 	page_pool_fragment_page(page, 1);
-	if (pool->p.init_callback)
-		pool->p.init_callback(page, pool->p.init_arg);
+	if (pool->has_init_callback)
+		pool->slow.init_callback(page, pool->slow.init_arg);
 }
 
 static void page_pool_clear_pp_info(struct page *page)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e8431c6c8490..592164c2a540 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3849,7 +3849,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto out;
 
 	err = -ENOBUFS;
-	nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
+	nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask));
 	if (nskb == NULL)
 		goto out;
 
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 6984877e9f10..4275a2bc6d8e 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -503,14 +503,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
 	 * all devlink instances from this namespace into init_net.
 	 */
 	devlinks_xa_for_each_registered_get(net, index, devlink) {
-		devl_lock(devlink);
+		devl_dev_lock(devlink, true);
 		err = 0;
 		if (devl_is_registered(devlink))
 			err = devlink_reload(devlink, &init_net,
 					     DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
 					     DEVLINK_RELOAD_LIMIT_UNSPEC,
 					     &actions_performed, NULL);
-		devl_unlock(devlink);
+		devl_dev_unlock(devlink, true);
 		devlink_put(devlink);
 		if (err && err != -EOPNOTSUPP)
 			pr_warn("Failed to reload devlink instance into init_net\n");
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 4fc7adb32663..ea6a92f2e6a2 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
  */
 
+#include <linux/device.h>
 #include <net/genetlink.h>
 #include <net/sock.h>
 #include "devl_internal.h"
@@ -433,6 +434,13 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
 	struct net *curr_net;
 	int err;
 
+	/* Make sure the reload operations are invoked with the device lock
+	 * held to allow drivers to trigger functionality that expects it
+	 * (e.g., PCI reset) and to close possible races between these
+	 * operations and probe/remove.
+	 */
+	device_lock_assert(devlink->dev);
+
 	memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
 	       sizeof(remote_reload_stats));
 
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 183dbe3807ab..5ea2e2012e93 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -3,6 +3,7 @@
  * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
  */
 
+#include <linux/device.h>
 #include <linux/etherdevice.h>
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
@@ -96,6 +97,20 @@ static inline bool devl_is_registered(struct devlink *devlink)
 	return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
 }
 
+static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock)
+{
+	if (dev_lock)
+		device_lock(devlink->dev);
+	devl_lock(devlink);
+}
+
+static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
+{
+	devl_unlock(devlink);
+	if (dev_lock)
+		device_unlock(devlink->dev);
+}
+
 typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
 typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
 				      u32 rel_index);
@@ -111,9 +126,6 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
 				   bool *msg_updated);
 
 /* Netlink */
-#define DEVLINK_NL_FLAG_NEED_PORT		BIT(0)
-#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT	BIT(1)
-
 enum devlink_multicast_groups {
 	DEVLINK_MCGRP_CONFIG,
 };
@@ -140,7 +152,8 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
 				       int flags);
 
 struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+			    bool dev_lock);
 
 int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
 		      devlink_nl_dump_one_func_t *dump_one);
diff --git a/net/devlink/health.c b/net/devlink/health.c
index 695df61f8ac2..71ae121dc739 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -1151,7 +1151,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb)
 	struct nlattr **attrs = info->attrs;
 	struct devlink *devlink;
 
-	devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+	devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+					      false);
 	if (IS_ERR(devlink))
 		return NULL;
 
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index d0b90ebc8b15..fa9afe3e6d9b 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -9,6 +9,10 @@
 
 #include "devl_internal.h"
 
+#define DEVLINK_NL_FLAG_NEED_PORT		BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT	BIT(1)
+#define DEVLINK_NL_FLAG_NEED_DEV_LOCK		BIT(2)
+
 static const struct genl_multicast_group devlink_nl_mcgrps[] = {
 	[DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
 };
@@ -61,7 +65,8 @@ int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
 }
 
 struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+			    bool dev_lock)
 {
 	struct devlink *devlink;
 	unsigned long index;
@@ -75,12 +80,12 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
 	devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
 
 	devlinks_xa_for_each_registered_get(net, index, devlink) {
-		devl_lock(devlink);
+		devl_dev_lock(devlink, dev_lock);
 		if (devl_is_registered(devlink) &&
 		    strcmp(devlink->dev->bus->name, busname) == 0 &&
 		    strcmp(dev_name(devlink->dev), devname) == 0)
 			return devlink;
-		devl_unlock(devlink);
+		devl_dev_unlock(devlink, dev_lock);
 		devlink_put(devlink);
 	}
 
@@ -90,11 +95,13 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
 static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
 				 u8 flags)
 {
+	bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
 	struct devlink_port *devlink_port;
 	struct devlink *devlink;
 	int err;
 
-	devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs);
+	devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs,
+					      dev_lock);
 	if (IS_ERR(devlink))
 		return PTR_ERR(devlink);
 
@@ -114,7 +121,7 @@ static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
 	return 0;
 
 unlock:
-	devl_unlock(devlink);
+	devl_dev_unlock(devlink, dev_lock);
 	devlink_put(devlink);
 	return err;
 }
@@ -131,6 +138,12 @@ int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
 	return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT);
 }
 
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+				 struct sk_buff *skb, struct genl_info *info)
+{
+	return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
 int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
 				      struct sk_buff *skb,
 				      struct genl_info *info)
@@ -138,16 +151,30 @@ int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
 	return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT);
 }
 
-void devlink_nl_post_doit(const struct genl_split_ops *ops,
-			  struct sk_buff *skb, struct genl_info *info)
+static void __devlink_nl_post_doit(struct sk_buff *skb, struct genl_info *info,
+				   u8 flags)
 {
+	bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
 	struct devlink *devlink;
 
 	devlink = info->user_ptr[0];
-	devl_unlock(devlink);
+	devl_dev_unlock(devlink, dev_lock);
 	devlink_put(devlink);
 }
 
+void devlink_nl_post_doit(const struct genl_split_ops *ops,
+			  struct sk_buff *skb, struct genl_info *info)
+{
+	__devlink_nl_post_doit(skb, info, 0);
+}
+
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+			      struct sk_buff *skb, struct genl_info *info)
+{
+	__devlink_nl_post_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
 static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
 					 struct netlink_callback *cb, int flags,
 					 devlink_nl_dump_one_func_t *dump_one,
@@ -156,7 +183,7 @@ static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
 	struct devlink *devlink;
 	int err;
 
-	devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs);
+	devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs, false);
 	if (IS_ERR(devlink))
 		return PTR_ERR(devlink);
 	err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED);
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index 788dfdc498a9..95f9b4350ab7 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -846,9 +846,9 @@ const struct genl_split_ops devlink_nl_ops[73] = {
 	{
 		.cmd		= DEVLINK_CMD_RELOAD,
 		.validate	= GENL_DONT_VALIDATE_STRICT,
-		.pre_doit	= devlink_nl_pre_doit,
+		.pre_doit	= devlink_nl_pre_doit_dev_lock,
 		.doit		= devlink_nl_reload_doit,
-		.post_doit	= devlink_nl_post_doit,
+		.post_doit	= devlink_nl_post_doit_dev_lock,
 		.policy		= devlink_reload_nl_policy,
 		.maxattr	= DEVLINK_ATTR_RELOAD_LIMITS,
 		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
index 0e9e89c31c31..02f3c0bfae0e 100644
--- a/net/devlink/netlink_gen.h
+++ b/net/devlink/netlink_gen.h
@@ -22,12 +22,17 @@ int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
 			struct genl_info *info);
 int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
 			     struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+				 struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
 				      struct sk_buff *skb,
 				      struct genl_info *info);
 void
 devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
 		     struct genl_info *info);
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+			      struct sk_buff *skb, struct genl_info *info);
 
 int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
 int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/devlink/region.c b/net/devlink/region.c
index 0aab7b82d678..e3bab458db94 100644
--- a/net/devlink/region.c
+++ b/net/devlink/region.c
@@ -883,7 +883,8 @@ int devlink_nl_region_read_dumpit(struct sk_buff *skb,
 
 	start_offset = state->start_offset;
 
-	devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+	devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+					      false);
 	if (IS_ERR(devlink))
 		return PTR_ERR(devlink);
 
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 4da5bad1a7aa..a019226ec6d2 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -23,7 +23,6 @@
 #define RTL4_A_NAME		"rtl4a"
 
 #define RTL4_A_HDR_LEN		4
-#define RTL4_A_ETHERTYPE	0x8899
 #define RTL4_A_PROTOCOL_SHIFT	12
 /*
  * 0x1 = Realtek Remote Control protocol (RRCP)
@@ -54,7 +53,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
 
 	/* Set Ethertype */
 	p = (__be16 *)tag;
-	*p = htons(RTL4_A_ETHERTYPE);
+	*p = htons(ETH_P_REALTEK);
 
 	out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT);
 	/* The lower bits indicate the port number */
@@ -82,7 +81,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
 	tag = dsa_etype_header_pos_rx(skb);
 	p = (__be16 *)tag;
 	etype = ntohs(*p);
-	if (etype != RTL4_A_ETHERTYPE) {
+	if (etype != ETH_P_REALTEK) {
 		/* Not custom, just pass through */
 		netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
 		return skb;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index b4419fb6df6a..11d8797f63f6 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -661,6 +661,12 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
 }
 EXPORT_SYMBOL(ethtool_get_phc_vclocks);
 
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info)
+{
+	return __ethtool_get_ts_info(dev, info);
+}
+EXPORT_SYMBOL(ethtool_get_ts_info_by_layer);
+
 const struct ethtool_phy_ops *ethtool_phy_ops;
 
 void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 306f942c3b28..7ceb9ac6e730 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -173,7 +173,24 @@ static int hsr_dev_open(struct net_device *dev)
 
 static int hsr_dev_close(struct net_device *dev)
 {
-	/* Nothing to do here. */
+	struct hsr_port *port;
+	struct hsr_priv *hsr;
+
+	hsr = netdev_priv(dev);
+	hsr_for_each_port(hsr, port) {
+		if (port->type == HSR_PT_MASTER)
+			continue;
+		switch (port->type) {
+		case HSR_PT_SLAVE_A:
+		case HSR_PT_SLAVE_B:
+			dev_uc_unsync(port->dev, dev);
+			dev_mc_unsync(port->dev, dev);
+			break;
+		default:
+			break;
+		}
+	}
+
 	return 0;
 }
 
@@ -404,12 +421,60 @@ void hsr_del_ports(struct hsr_priv *hsr)
 		hsr_del_port(port);
 }
 
+static void hsr_set_rx_mode(struct net_device *dev)
+{
+	struct hsr_port *port;
+	struct hsr_priv *hsr;
+
+	hsr = netdev_priv(dev);
+
+	hsr_for_each_port(hsr, port) {
+		if (port->type == HSR_PT_MASTER)
+			continue;
+		switch (port->type) {
+		case HSR_PT_SLAVE_A:
+		case HSR_PT_SLAVE_B:
+			dev_mc_sync_multiple(port->dev, dev);
+			dev_uc_sync_multiple(port->dev, dev);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static void hsr_change_rx_flags(struct net_device *dev, int change)
+{
+	struct hsr_port *port;
+	struct hsr_priv *hsr;
+
+	hsr = netdev_priv(dev);
+
+	hsr_for_each_port(hsr, port) {
+		if (port->type == HSR_PT_MASTER)
+			continue;
+		switch (port->type) {
+		case HSR_PT_SLAVE_A:
+		case HSR_PT_SLAVE_B:
+			if (change & IFF_ALLMULTI)
+				dev_set_allmulti(port->dev,
+						 dev->flags &
+						 IFF_ALLMULTI ? 1 : -1);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
 static const struct net_device_ops hsr_device_ops = {
 	.ndo_change_mtu = hsr_dev_change_mtu,
 	.ndo_open = hsr_dev_open,
 	.ndo_stop = hsr_dev_close,
 	.ndo_start_xmit = hsr_dev_xmit,
+	.ndo_change_rx_flags = hsr_change_rx_flags,
 	.ndo_fix_features = hsr_fix_features,
+	.ndo_set_rx_mode = hsr_set_rx_mode,
 };
 
 static struct device_type hsr_type = {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5f693bbd578d..86cc6d36f818 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -482,6 +482,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
 	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	struct request_sock *fastopen;
+	bool harderr = false;
 	u32 seq, snd_una;
 	int err;
 	struct net *net = dev_net(skb->dev);
@@ -555,6 +556,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
 		goto out;
 	case ICMP_PARAMETERPROB:
 		err = EPROTO;
+		harderr = true;
 		break;
 	case ICMP_DEST_UNREACH:
 		if (code > NR_ICMP_UNREACH)
@@ -579,6 +581,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
 		}
 
 		err = icmp_err_convert[code].errno;
+		harderr = icmp_err_convert[code].fatal;
 		/* check if this ICMP message allows revert of backoff.
 		 * (see RFC 6069)
 		 */
@@ -604,6 +607,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
 
 		ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
 
+		if (!harderr)
+			break;
+
 		if (!sock_owned_by_user(sk)) {
 			WRITE_ONCE(sk->sk_err, err);
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1f9f6c1c196b..d1ad20ce1c8c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -626,7 +626,6 @@ void tcp_retransmit_timer(struct sock *sk)
 	 * implemented ftp to mars will work nicely. We will have to fix
 	 * the 120 second clamps though!
 	 */
-	icsk->icsk_backoff++;
 
 out_reset_timer:
 	/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
@@ -647,11 +646,12 @@ out_reset_timer:
 				       tcp_rto_min(sk),
 				       TCP_RTO_MAX);
 	} else if (sk->sk_state != TCP_SYN_SENT ||
-		   icsk->icsk_backoff >
+		   tp->total_rto >
 		   READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
 		/* Use normal (exponential) backoff unless linear timeouts are
 		 * activated.
 		 */
+		icsk->icsk_backoff++;
 		icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
 	}
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 937a02c2e534..43deda49cc52 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -381,7 +381,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct tcp_sock *tp;
 	__u32 seq, snd_una;
 	struct sock *sk;
-	bool fatal;
+	bool harderr;
 	int err;
 
 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
@@ -402,9 +402,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		return 0;
 	}
 	seq = ntohl(th->seq);
-	fatal = icmpv6_err_convert(type, code, &err);
+	harderr = icmpv6_err_convert(type, code, &err);
 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
-		tcp_req_err(sk, seq, fatal);
+		tcp_req_err(sk, seq, harderr);
 		return 0;
 	}
 
@@ -489,6 +489,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
 
+		if (!harderr)
+			break;
+
 		if (!sock_owned_by_user(sk)) {
 			WRITE_ONCE(sk->sk_err, err);
 			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 03757e76bb6b..374412ed780b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -105,8 +105,11 @@ enum {
 
 
 struct ncsi_channel_version {
-	u32 version;		/* Supported BCD encoded NCSI version */
-	u32 alpha2;		/* Supported BCD encoded NCSI version */
+	u8   major;		/* NCSI version major */
+	u8   minor;		/* NCSI version minor */
+	u8   update;		/* NCSI version update */
+	char alpha1;		/* NCSI version alpha1 */
+	char alpha2;		/* NCSI version alpha2 */
 	u8  fw_name[12];	/* Firmware name string                */
 	u32 fw_version;		/* Firmware version                   */
 	u16 pci_ids[4];		/* PCI identification                 */
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index fd2236ee9a79..b3ff37a181d7 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -270,7 +270,8 @@ static struct ncsi_cmd_handler {
 	{ NCSI_PKT_CMD_GPS,    0, ncsi_cmd_handler_default },
 	{ NCSI_PKT_CMD_OEM,   -1, ncsi_cmd_handler_oem     },
 	{ NCSI_PKT_CMD_PLDM,   0, NULL                     },
-	{ NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }
+	{ NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default },
+	{ NCSI_PKT_CMD_GMCMA,  0, ncsi_cmd_handler_default }
 };
 
 static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index d9da942ad53d..745c788f1d1d 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -689,8 +689,6 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
-
 static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
 {
 	unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN];
@@ -716,10 +714,6 @@ static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
 	return ret;
 }
 
-#endif
-
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
-
 /* NCSI OEM Command APIs */
 static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
 {
@@ -856,8 +850,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
 	return nch->handler(nca);
 }
 
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
 /* Determine if a given channel from the channel_queue should be used for Tx */
 static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp,
 			       struct ncsi_channel *nc)
@@ -1039,20 +1031,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 			goto error;
 		}
 
-		nd->state = ncsi_dev_state_config_oem_gma;
+		nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+			  ? ncsi_dev_state_config_oem_gma
+			  : ncsi_dev_state_config_clear_vids;
 		break;
 	case ncsi_dev_state_config_oem_gma:
 		nd->state = ncsi_dev_state_config_clear_vids;
-		ret = -1;
 
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
-		nca.type = NCSI_PKT_CMD_OEM;
 		nca.package = np->id;
 		nca.channel = nc->id;
 		ndp->pending_req_num = 1;
-		ret = ncsi_gma_handler(&nca, nc->version.mf_id);
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
+		if (nc->version.major >= 1 && nc->version.minor >= 2) {
+			nca.type = NCSI_PKT_CMD_GMCMA;
+			ret = ncsi_xmit_cmd(&nca);
+		} else {
+			nca.type = NCSI_PKT_CMD_OEM;
+			ret = ncsi_gma_handler(&nca, nc->version.mf_id);
+		}
 		if (ret < 0)
 			schedule_work(&ndp->work);
 
@@ -1404,7 +1399,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 
 		schedule_work(&ndp->work);
 		break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
 	case ncsi_dev_state_probe_mlx_gma:
 		ndp->pending_req_num = 1;
 
@@ -1429,7 +1423,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 
 		nd->state = ncsi_dev_state_probe_cis;
 		break;
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
 	case ncsi_dev_state_probe_cis:
 		ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
 
@@ -1447,7 +1440,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 		if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
 			nd->state = ncsi_dev_state_probe_keep_phy;
 		break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
 	case ncsi_dev_state_probe_keep_phy:
 		ndp->pending_req_num = 1;
 
@@ -1460,7 +1452,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 
 		nd->state = ncsi_dev_state_probe_gvi;
 		break;
-#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */
 	case ncsi_dev_state_probe_gvi:
 	case ncsi_dev_state_probe_gc:
 	case ncsi_dev_state_probe_gls:
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index a3a6753a1db7..2f872d064396 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -71,8 +71,8 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
 	if (nc == nc->package->preferred_channel)
 		nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
 
-	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
-	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major);
+	nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor);
 	nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
 
 	vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index ba66c7dc3a21..f2f3b5c1b941 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -197,9 +197,12 @@ struct ncsi_rsp_gls_pkt {
 /* Get Version ID */
 struct ncsi_rsp_gvi_pkt {
 	struct ncsi_rsp_pkt_hdr rsp;          /* Response header */
-	__be32                  ncsi_version; /* NCSI version    */
+	unsigned char           major;        /* NCSI version major */
+	unsigned char           minor;        /* NCSI version minor */
+	unsigned char           update;       /* NCSI version update */
+	unsigned char           alpha1;       /* NCSI version alpha1 */
 	unsigned char           reserved[3];  /* Reserved        */
-	unsigned char           alpha2;       /* NCSI version    */
+	unsigned char           alpha2;       /* NCSI version alpha2 */
 	unsigned char           fw_name[12];  /* f/w name string */
 	__be32                  fw_version;   /* f/w version     */
 	__be16                  pci_ids[4];   /* PCI IDs         */
@@ -335,6 +338,14 @@ struct ncsi_rsp_gpuuid_pkt {
 	__be32                  checksum;
 };
 
+/* Get MC MAC Address */
+struct ncsi_rsp_gmcma_pkt {
+	struct ncsi_rsp_pkt_hdr rsp;
+	unsigned char           address_count;
+	unsigned char           reserved[3];
+	unsigned char           addresses[][ETH_ALEN];
+};
+
 /* AEN: Link State Change */
 struct ncsi_aen_lsc_pkt {
 	struct ncsi_aen_pkt_hdr aen;        /* AEN header      */
@@ -395,6 +406,7 @@ struct ncsi_aen_hncdsc_pkt {
 #define NCSI_PKT_CMD_GPUUID	0x52 /* Get package UUID                 */
 #define NCSI_PKT_CMD_QPNPR	0x56 /* Query Pending NC PLDM request */
 #define NCSI_PKT_CMD_SNPR	0x57 /* Send NC PLDM Reply  */
+#define NCSI_PKT_CMD_GMCMA	0x58 /* Get MC MAC Address */
 
 
 /* NCSI packet responses */
@@ -430,6 +442,7 @@ struct ncsi_aen_hncdsc_pkt {
 #define NCSI_PKT_RSP_GPUUID	(NCSI_PKT_CMD_GPUUID + 0x80)
 #define NCSI_PKT_RSP_QPNPR	(NCSI_PKT_CMD_QPNPR   + 0x80)
 #define NCSI_PKT_RSP_SNPR	(NCSI_PKT_CMD_SNPR   + 0x80)
+#define NCSI_PKT_RSP_GMCMA	(NCSI_PKT_CMD_GMCMA  + 0x80)
 
 /* NCSI response code/reason */
 #define NCSI_PKT_RSP_C_COMPLETED	0x0000 /* Command Completed        */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 069c2659074b..bee290d0f48b 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -19,6 +19,19 @@
 #include "ncsi-pkt.h"
 #include "ncsi-netlink.h"
 
+/* Nibbles within [0xA, 0xF] add zero "0" to the returned value.
+ * Optional fields (encoded as 0xFF) will default to zero.
+ */
+static u8 decode_bcd_u8(u8 x)
+{
+	int lo = x & 0xF;
+	int hi = x >> 4;
+
+	lo = lo < 0xA ? lo : 0;
+	hi = hi < 0xA ? hi : 0;
+	return lo + hi * 10;
+}
+
 static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
 				 unsigned short payload)
 {
@@ -755,9 +768,18 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
 	if (!nc)
 		return -ENODEV;
 
-	/* Update to channel's version info */
+	/* Update channel's version info
+	 *
+	 * Major, minor, and update fields are supposed to be
+	 * unsigned integers encoded as packed BCD.
+	 *
+	 * Alpha1 and alpha2 are ISO/IEC 8859-1 characters.
+	 */
 	ncv = &nc->version;
-	ncv->version = ntohl(rsp->ncsi_version);
+	ncv->major = decode_bcd_u8(rsp->major);
+	ncv->minor = decode_bcd_u8(rsp->minor);
+	ncv->update = decode_bcd_u8(rsp->update);
+	ncv->alpha1 = rsp->alpha1;
 	ncv->alpha2 = rsp->alpha2;
 	memcpy(ncv->fw_name, rsp->fw_name, 12);
 	ncv->fw_version = ntohl(rsp->fw_version);
@@ -1069,6 +1091,44 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
 	return ret;
 }
 
+static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr)
+{
+	struct ncsi_dev_priv *ndp = nr->ndp;
+	struct net_device *ndev = ndp->ndev.dev;
+	struct ncsi_rsp_gmcma_pkt *rsp;
+	struct sockaddr saddr;
+	int ret = -1;
+	int i;
+
+	rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp);
+	saddr.sa_family = ndev->type;
+	ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
+	netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n",
+		    rsp->address_count);
+	for (i = 0; i < rsp->address_count; i++) {
+		netdev_info(ndev, "NCSI: MAC address %d: %02x:%02x:%02x:%02x:%02x:%02x\n",
+			    i, rsp->addresses[i][0], rsp->addresses[i][1],
+			    rsp->addresses[i][2], rsp->addresses[i][3],
+			    rsp->addresses[i][4], rsp->addresses[i][5]);
+	}
+
+	for (i = 0; i < rsp->address_count; i++) {
+		memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN);
+		ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr);
+		if (ret < 0) {
+			netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n",
+				    saddr.sa_data);
+			continue;
+		}
+		netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data);
+		break;
+	}
+
+	ndp->gma_flag = ret == 0;
+	return ret;
+}
+
 static struct ncsi_rsp_handler {
 	unsigned char	type;
 	int             payload;
@@ -1105,7 +1165,8 @@ static struct ncsi_rsp_handler {
 	{ NCSI_PKT_RSP_PLDM,   -1, ncsi_rsp_handler_pldm    },
 	{ NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid  },
 	{ NCSI_PKT_RSP_QPNPR,  -1, ncsi_rsp_handler_pldm    },
-	{ NCSI_PKT_RSP_SNPR,   -1, ncsi_rsp_handler_pldm    }
+	{ NCSI_PKT_RSP_SNPR,   -1, ncsi_rsp_handler_pldm    },
+	{ NCSI_PKT_RSP_GMCMA,  -1, ncsi_rsp_handler_gmcma   },
 };
 
 int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index eb086b06d60d..177126fb0484 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1204,8 +1204,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
 	return sock;
 }
 
-static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
-					       int broadcast)
+struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
 {
 	struct sk_buff *skb;
 	void *data;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d5bdfd4a7655..289e1755c26b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -71,7 +71,7 @@ struct tc_u_hnode {
 	struct tc_u_hnode __rcu	*next;
 	u32			handle;
 	u32			prio;
-	int			refcnt;
+	refcount_t		refcnt;
 	unsigned int		divisor;
 	struct idr		handle_idr;
 	bool			is_root;
@@ -86,7 +86,7 @@ struct tc_u_hnode {
 struct tc_u_common {
 	struct tc_u_hnode __rcu	*hlist;
 	void			*ptr;
-	int			refcnt;
+	refcount_t		refcnt;
 	struct idr		handle_idr;
 	struct hlist_node	hnode;
 	long			knodes;
@@ -359,7 +359,7 @@ static int u32_init(struct tcf_proto *tp)
 	if (root_ht == NULL)
 		return -ENOBUFS;
 
-	root_ht->refcnt++;
+	refcount_set(&root_ht->refcnt, 1);
 	root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
 	root_ht->prio = tp->prio;
 	root_ht->is_root = true;
@@ -371,18 +371,20 @@ static int u32_init(struct tcf_proto *tp)
 			kfree(root_ht);
 			return -ENOBUFS;
 		}
+		refcount_set(&tp_c->refcnt, 1);
 		tp_c->ptr = key;
 		INIT_HLIST_NODE(&tp_c->hnode);
 		idr_init(&tp_c->handle_idr);
 
 		hlist_add_head(&tp_c->hnode, tc_u_hash(key));
+	} else {
+		refcount_inc(&tp_c->refcnt);
 	}
 
-	tp_c->refcnt++;
 	RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
 	rcu_assign_pointer(tp_c->hlist, root_ht);
 
-	root_ht->refcnt++;
+	/* root_ht must be destroyed when tcf_proto is destroyed */
 	rcu_assign_pointer(tp->root, root_ht);
 	tp->data = tp_c;
 	return 0;
@@ -393,7 +395,7 @@ static void __u32_destroy_key(struct tc_u_knode *n)
 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
 
 	tcf_exts_destroy(&n->exts);
-	if (ht && --ht->refcnt == 0)
+	if (ht && refcount_dec_and_test(&ht->refcnt))
 		kfree(ht);
 	kfree(n);
 }
@@ -601,8 +603,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 	struct tc_u_hnode __rcu **hn;
 	struct tc_u_hnode *phn;
 
-	WARN_ON(--ht->refcnt);
-
 	u32_clear_hnode(tp, ht, extack);
 
 	hn = &tp_c->hlist;
@@ -630,10 +630,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
 
 	WARN_ON(root_ht == NULL);
 
-	if (root_ht && --root_ht->refcnt == 1)
+	if (root_ht && refcount_dec_and_test(&root_ht->refcnt))
 		u32_destroy_hnode(tp, root_ht, extack);
 
-	if (--tp_c->refcnt == 0) {
+	if (refcount_dec_and_test(&tp_c->refcnt)) {
 		struct tc_u_hnode *ht;
 
 		hlist_del(&tp_c->hnode);
@@ -645,7 +645,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
 			/* u32_destroy_key() will later free ht for us, if it's
 			 * still referenced by some knode
 			 */
-			if (--ht->refcnt == 0)
+			if (refcount_dec_and_test(&ht->refcnt))
 				kfree_rcu(ht, rcu);
 		}
 
@@ -674,7 +674,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
 		return -EINVAL;
 	}
 
-	if (ht->refcnt == 1) {
+	if (refcount_dec_if_one(&ht->refcnt)) {
 		u32_destroy_hnode(tp, ht, extack);
 	} else {
 		NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
@@ -682,7 +682,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
 	}
 
 out:
-	*last = tp_c->refcnt == 1 && tp_c->knodes == 0;
+	*last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0;
 	return ret;
 }
 
@@ -766,14 +766,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 				NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
 				return -EINVAL;
 			}
-			ht_down->refcnt++;
+			refcount_inc(&ht_down->refcnt);
 		}
 
 		ht_old = rtnl_dereference(n->ht_down);
 		rcu_assign_pointer(n->ht_down, ht_down);
 
 		if (ht_old)
-			ht_old->refcnt--;
+			refcount_dec(&ht_old->refcnt);
 	}
 
 	if (ifindex >= 0)
@@ -852,7 +852,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
 
 	/* bump reference count as long as we hold pointer to structure */
 	if (ht)
-		ht->refcnt++;
+		refcount_inc(&ht->refcnt);
 
 	return new;
 }
@@ -932,7 +932,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 
 				ht_old = rtnl_dereference(n->ht_down);
 				if (ht_old)
-					ht_old->refcnt++;
+					refcount_inc(&ht_old->refcnt);
 			}
 			__u32_destroy_key(new);
 			return err;
@@ -980,7 +980,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 				return err;
 			}
 		}
-		ht->refcnt = 1;
+		refcount_set(&ht->refcnt, 1);
 		ht->divisor = divisor;
 		ht->handle = handle;
 		ht->prio = tp->prio;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4195a4bc26ca..8dd0e5925342 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t)
 
 			if (unlikely(timedout_ms)) {
 				trace_net_dev_xmit_timeout(dev, i);
-				WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n",
-					  dev->name, netdev_drivername(dev), i, timedout_ms);
+				netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n",
+					    raw_smp_processor_id(),
+					    i, timedout_ms);
 				netif_freeze_queues(dev);
 				dev->netdev_ops->ndo_tx_timeout(dev, i);
 				netif_unfreeze_queues(dev);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index c763008a8adb..079aebb16ed8 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -168,7 +168,7 @@ static struct sk_buff *tipc_get_err_tlv(char *str)
 	int str_len = strlen(str) + 1;
 	struct sk_buff *buf;
 
-	buf = tipc_tlv_alloc(TLV_SPACE(str_len));
+	buf = tipc_tlv_alloc(str_len);
 	if (buf)
 		tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len);
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 6965c94dfdaf..09e4f2ff5658 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -20,7 +20,7 @@ SYNOPSIS
 
 	**bpftool** **version**
 
-	*OBJECT* := { **map** | **program** | **link** | **cgroup** | **perf** | **net** | **feature** |
+	*OBJECT* := { **map** | **prog** | **link** | **cgroup** | **perf** | **net** | **feature** |
 	**btf** | **gen** | **struct_ops** | **iter** }
 
 	*OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| }
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0f6cdf52b1da..7a5498242eaa 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1200,6 +1200,9 @@ enum bpf_perf_event_type {
  */
 #define BPF_F_XDP_DEV_BOUND_ONLY	(1U << 6)
 
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_REG_INVARIANTS	(1U << 7)
+
 /* link_create.kprobe_multi.flags used in LINK_CREATE command for
  * BPF_TRACE_KPROBE_MULTI attach type to create return probe.
  */
@@ -4517,6 +4520,8 @@ union bpf_attr {
  * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
  *	Description
  *		Return a user or a kernel stack in bpf program provided buffer.
+ *		Note: the user stack will only be populated if the *task* is
+ *		the current task; all other tasks will return -EOPNOTSUPP.
  *		To achieve this, the helper needs *task*, which is a valid
  *		pointer to **struct task_struct**. To store the stacktrace, the
  *		bpf program provides *buf* with a nonnegative *size*.
@@ -4528,6 +4533,7 @@ union bpf_attr {
  *
  *		**BPF_F_USER_STACK**
  *			Collect a user space stack instead of a kernel stack.
+ *			The *task* must be the current task.
  *		**BPF_F_USER_BUILD_ID**
  *			Collect buildid+offset instead of ips for user stack,
  *			only valid if **BPF_F_USER_STACK** is also specified.
@@ -7151,40 +7157,31 @@ struct bpf_spin_lock {
 };
 
 struct bpf_timer {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_dynptr {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_list_head {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_list_node {
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[3];
 } __attribute__((aligned(8)));
 
 struct bpf_rb_root {
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[2];
 } __attribute__((aligned(8)));
 
 struct bpf_rb_node {
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
-	__u64 :64;
+	__u64 __opaque[4];
 } __attribute__((aligned(8)));
 
 struct bpf_refcount {
-	__u32 :32;
+	__u32 __opaque[1];
 } __attribute__((aligned(4)));
 
 struct bpf_sysctl {
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index b7060f254486..8fe248e14eb6 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -79,11 +79,14 @@
  */
 #define LIBBPF_OPTS_RESET(NAME, ...)					    \
 	do {								    \
-		memset(&NAME, 0, sizeof(NAME));				    \
-		NAME = (typeof(NAME)) {					    \
-			.sz = sizeof(NAME),				    \
-			__VA_ARGS__					    \
-		};							    \
+		typeof(NAME) ___##NAME = ({ 				    \
+			memset(&___##NAME, 0, sizeof(NAME));		    \
+			(typeof(NAME)) {				    \
+				.sz = sizeof(NAME),			    \
+				__VA_ARGS__				    \
+			};						    \
+		});							    \
+		memcpy(&NAME, &___##NAME, sizeof(NAME));		    \
 	} while (0)
 
 #endif /* __LIBBPF_LIBBPF_COMMON_H */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 5b1da2a32ea7..5aa133bf3688 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -45,9 +45,12 @@
 #define format_parent_cgroup_path(buf, path) \
 	format_cgroup_path_pid(buf, path, getppid())
 
-#define format_classid_path(buf)				\
-	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
-		 CGROUP_WORK_DIR)
+#define format_classid_path_pid(buf, pid)				\
+	snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH,	\
+		 CGROUP_WORK_DIR, pid)
+
+#define format_classid_path(buf)	\
+	format_classid_path_pid(buf, getpid())
 
 static __thread bool cgroup_workdir_mounted;
 
@@ -419,26 +422,23 @@ int create_and_get_cgroup(const char *relative_path)
 }
 
 /**
- * get_cgroup_id() - Get cgroup id for a particular cgroup path
- * @relative_path: The cgroup path, relative to the workdir, to join
+ * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path
+ * @cgroup_workdir: The absolute cgroup path
  *
  * On success, it returns the cgroup id. On failure it returns 0,
  * which is an invalid cgroup id.
  * If there is a failure, it prints the error to stderr.
  */
-unsigned long long get_cgroup_id(const char *relative_path)
+unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
 {
 	int dirfd, err, flags, mount_id, fhsize;
 	union {
 		unsigned long long cgid;
 		unsigned char raw_bytes[8];
 	} id;
-	char cgroup_workdir[PATH_MAX + 1];
 	struct file_handle *fhp, *fhp2;
 	unsigned long long ret = 0;
 
-	format_cgroup_path(cgroup_workdir, relative_path);
-
 	dirfd = AT_FDCWD;
 	flags = 0;
 	fhsize = sizeof(*fhp);
@@ -474,6 +474,14 @@ free_mem:
 	return ret;
 }
 
+unsigned long long get_cgroup_id(const char *relative_path)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_workdir, relative_path);
+	return get_cgroup_id_from_path(cgroup_workdir);
+}
+
 int cgroup_setup_and_join(const char *path) {
 	int cg_fd;
 
@@ -523,10 +531,20 @@ int setup_classid_environment(void)
 		return 1;
 	}
 
-	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
-	    errno != EBUSY) {
-		log_err("mount cgroup net_cls");
-		return 1;
+	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
+		if (errno != EBUSY) {
+			log_err("mount cgroup net_cls");
+			return 1;
+		}
+
+		if (rmdir(NETCLS_MOUNT_PATH)) {
+			log_err("rmdir cgroup net_cls");
+			return 1;
+		}
+		if (umount(CGROUP_MOUNT_DFLT)) {
+			log_err("umount cgroup base");
+			return 1;
+		}
 	}
 
 	cleanup_classid_environment();
@@ -541,15 +559,16 @@ int setup_classid_environment(void)
 
 /**
  * set_classid() - Set a cgroupv1 net_cls classid
- * @id: the numeric classid
  *
- * Writes the passed classid into the cgroup work dir's net_cls.classid
+ * Writes the classid into the cgroup work dir's net_cls.classid
  * file in order to later on trigger socket tagging.
  *
+ * We leverage the current pid as the classid, ensuring unique identification.
+ *
  * On success, it returns 0, otherwise on failure it returns 1. If there
  * is a failure, it prints the error to stderr.
  */
-int set_classid(unsigned int id)
+int set_classid(void)
 {
 	char cgroup_workdir[PATH_MAX - 42];
 	char cgroup_classid_path[PATH_MAX + 1];
@@ -565,7 +584,7 @@ int set_classid(unsigned int id)
 		return 1;
 	}
 
-	if (dprintf(fd, "%u\n", id) < 0) {
+	if (dprintf(fd, "%u\n", getpid()) < 0) {
 		log_err("Setting cgroup classid");
 		rc = 1;
 	}
@@ -607,3 +626,66 @@ void cleanup_classid_environment(void)
 	join_cgroup_from_top(NETCLS_MOUNT_PATH);
 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
 }
+
+/**
+ * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup
+ */
+unsigned long long get_classid_cgroup_id(void)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_classid_path(cgroup_workdir);
+	return get_cgroup_id_from_path(cgroup_workdir);
+}
+
+/**
+ * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
+ * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
+ * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like
+ * "net_cls,net_prio".
+ */
+int get_cgroup1_hierarchy_id(const char *subsys_name)
+{
+	char *c, *c2, *c3, *c4;
+	bool found = false;
+	char line[1024];
+	FILE *file;
+	int i, id;
+
+	if (!subsys_name)
+		return -1;
+
+	file = fopen("/proc/self/cgroup", "r");
+	if (!file) {
+		log_err("fopen /proc/self/cgroup");
+		return -1;
+	}
+
+	while (fgets(line, 1024, file)) {
+		i = 0;
+		for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {
+			if (i == 0) {
+				id = strtol(c, NULL, 10);
+			} else if (i == 1) {
+				if (!strcmp(c, subsys_name)) {
+					found = true;
+					break;
+				}
+
+				/* Multiple subsystems may share one single mount point */
+				for (c3 = strtok_r(c, ",", &c4); c3;
+				     c3 = strtok_r(NULL, ",", &c4)) {
+					if (!strcmp(c, subsys_name)) {
+						found = true;
+						break;
+					}
+				}
+			}
+			i++;
+		}
+		if (found)
+			break;
+	}
+	fclose(file);
+	return found ? id : -1;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5c2cb9c8b546..ee053641c026 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -20,6 +20,7 @@ int get_root_cgroup(void);
 int create_and_get_cgroup(const char *relative_path);
 void remove_cgroup(const char *relative_path);
 unsigned long long get_cgroup_id(const char *relative_path);
+int get_cgroup1_hierarchy_id(const char *subsys_name);
 
 int join_cgroup(const char *relative_path);
 int join_root_cgroup(void);
@@ -29,8 +30,9 @@ int setup_cgroup_environment(void);
 void cleanup_cgroup_environment(void);
 
 /* cgroupv1 related */
-int set_classid(unsigned int id);
+int set_classid(void);
 int join_classid(void);
+unsigned long long get_classid_cgroup_id(void);
 
 int setup_classid_environment(void);
 void cleanup_classid_environment(void);
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 253821494884..29c8635c5722 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
 CONFIG_ARCH_VEXPRESS=y
 CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
 CONFIG_ARM_SMMU_V3=y
@@ -37,6 +36,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=y
 CONFIG_DEBUG_ATOMIC_SLEEP=y
 CONFIG_DEBUG_INFO_BTF=y
 CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_INFO_REDUCED=n
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_LOCKDEP=y
 CONFIG_DEBUG_NOTIFIERS=y
@@ -46,7 +46,6 @@ CONFIG_DEBUG_SG=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_DEVTMPFS=y
-CONFIG_DRM_VIRTIO_GPU=y
 CONFIG_DRM=y
 CONFIG_DUMMY=y
 CONFIG_EXPERT=y
@@ -67,7 +66,6 @@ CONFIG_HAVE_KRETPROBES=y
 CONFIG_HEADERS_INSTALL=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HUGETLBFS=y
-CONFIG_HW_RANDOM_VIRTIO=y
 CONFIG_HW_RANDOM=y
 CONFIG_HZ_100=y
 CONFIG_IDLE_PAGE_TRACKING=y
@@ -99,8 +97,6 @@ CONFIG_MEMCG=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_NAMESPACES=y
-CONFIG_NET_9P_VIRTIO=y
-CONFIG_NET_9P=y
 CONFIG_NET_ACT_BPF=y
 CONFIG_NET_ACT_GACT=y
 CONFIG_NETDEVICES=y
@@ -140,7 +136,6 @@ CONFIG_SCHED_TRACER=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SCAN_ASYNC=y
-CONFIG_SCSI_VIRTIO=y
 CONFIG_SCSI=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
@@ -167,16 +162,6 @@ CONFIG_UPROBES=y
 CONFIG_USELIB=y
 CONFIG_USER_NS=y
 CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_FS=y
-CONFIG_VIRTIO_INPUT=y
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VIRTIO_MMIO=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
 CONFIG_VLAN_8021Q=y
 CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 2ba92167be35..e93330382849 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
 CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
 CONFIG_AUDIT=y
 CONFIG_BLK_CGROUP=y
@@ -84,8 +83,6 @@ CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_NAMESPACES=y
 CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
 CONFIG_NET_ACT_BPF=y
 CONFIG_NET_ACT_GACT=y
 CONFIG_NET_KEY=y
@@ -114,7 +111,6 @@ CONFIG_SAMPLE_SECCOMP=y
 CONFIG_SAMPLES=y
 CONFIG_SCHED_TRACER=y
 CONFIG_SCSI=y
-CONFIG_SCSI_VIRTIO=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_STACK_TRACER=y
 CONFIG_STATIC_KEYS_SELFTEST=y
@@ -136,11 +132,6 @@ CONFIG_UPROBES=y
 CONFIG_USELIB=y
 CONFIG_USER_NS=y
 CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
 CONFIG_VLAN_8021Q=y
 CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm
new file mode 100644
index 000000000000..a9746ca78777
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.vm
@@ -0,0 +1,12 @@
+CONFIG_9P_FS=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 2e70a6048278..f7bfb2b09c82 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -1,6 +1,3 @@
-CONFIG_9P_FS=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
 CONFIG_AGP=y
 CONFIG_AGP_AMD64=y
 CONFIG_AGP_INTEL=y
@@ -45,7 +42,6 @@ CONFIG_CPU_IDLE_GOV_LADDER=y
 CONFIG_CPUSETS=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_BLAKE2B=y
-CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_CRYPTO_SEQIV=y
 CONFIG_CRYPTO_XXHASH=y
 CONFIG_DCB=y
@@ -145,8 +141,6 @@ CONFIG_MEMORY_FAILURE=y
 CONFIG_MINIX_SUBPARTITION=y
 CONFIG_NAMESPACES=y
 CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
 CONFIG_NET_ACT_BPF=y
 CONFIG_NET_CLS_CGROUP=y
 CONFIG_NET_EMATCH=y
@@ -228,12 +222,6 @@ CONFIG_USER_NS=y
 CONFIG_VALIDATE_FS_PARSER=y
 CONFIG_VETH=y
 CONFIG_VIRT_DRIVERS=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
 CONFIG_VLAN_8021Q=y
 CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
index 8bf497a9843e..2ea36408816b 100644
--- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -131,10 +131,17 @@ static bool is_lru(__u32 map_type)
 	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
 }
 
+static bool is_percpu(__u32 map_type)
+{
+	return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
 struct upsert_opts {
 	__u32 map_type;
 	int map_fd;
 	__u32 n;
+	bool retry_for_nomem;
 };
 
 static int create_small_hash(void)
@@ -148,19 +155,38 @@ static int create_small_hash(void)
 	return map_fd;
 }
 
+static bool retry_for_nomem_fn(int err)
+{
+	return err == ENOMEM;
+}
+
 static void *patch_map_thread(void *arg)
 {
+	/* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
+	static __u8 blob[8 << 10];
 	struct upsert_opts *opts = arg;
+	void *val_ptr;
 	int val;
 	int ret;
 	int i;
 
 	for (i = 0; i < opts->n; i++) {
-		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 			val = create_small_hash();
-		else
+			val_ptr = &val;
+		} else if (is_percpu(opts->map_type)) {
+			val_ptr = blob;
+		} else {
 			val = rand();
-		ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0);
+			val_ptr = &val;
+		}
+
+		/* 2 seconds may be enough ? */
+		if (opts->retry_for_nomem)
+			ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
+						   40, retry_for_nomem_fn);
+		else
+			ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
 		CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
 
 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
@@ -281,6 +307,13 @@ static void __test(int map_fd)
 	else
 		opts.n /= 2;
 
+	/* per-cpu bpf memory allocator may not be able to allocate per-cpu
+	 * pointer successfully and it can not refill free llist timely, and
+	 * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
+	 * the problem temporarily.
+	 */
+	opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
+
 	/*
 	 * Upsert keys [0, n) under some competition: with random values from
 	 * N_THREADS threads. Check values, then delete all elements and check
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 465c1c3a3d3c..4ebd0da898f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -40,7 +40,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "2"},
 			{1, "R3_w", "4"},
@@ -68,7 +68,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "1"},
 			{1, "R3_w", "2"},
@@ -97,7 +97,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "4"},
 			{1, "R3_w", "8"},
@@ -119,7 +119,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "7"},
 			{1, "R3_w", "7"},
@@ -162,13 +162,13 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{6, "R0_w", "pkt(off=8,r=8,imm=0)"},
+			{6, "R0_w", "pkt(off=8,r=8)"},
 			{6, "R3_w", "var_off=(0x0; 0xff)"},
 			{7, "R3_w", "var_off=(0x0; 0x1fe)"},
 			{8, "R3_w", "var_off=(0x0; 0x3fc)"},
 			{9, "R3_w", "var_off=(0x0; 0x7f8)"},
 			{10, "R3_w", "var_off=(0x0; 0xff0)"},
-			{12, "R3_w", "pkt_end(off=0,imm=0)"},
+			{12, "R3_w", "pkt_end()"},
 			{17, "R4_w", "var_off=(0x0; 0xff)"},
 			{18, "R4_w", "var_off=(0x0; 0x1fe0)"},
 			{19, "R4_w", "var_off=(0x0; 0xff0)"},
@@ -235,11 +235,11 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{2, "R5_w", "pkt(off=0,r=0,imm=0)"},
-			{4, "R5_w", "pkt(off=14,r=0,imm=0)"},
-			{5, "R4_w", "pkt(off=14,r=0,imm=0)"},
-			{9, "R2", "pkt(off=0,r=18,imm=0)"},
-			{10, "R5", "pkt(off=14,r=18,imm=0)"},
+			{2, "R5_w", "pkt(r=0)"},
+			{4, "R5_w", "pkt(off=14,r=0)"},
+			{5, "R4_w", "pkt(off=14,r=0)"},
+			{9, "R2", "pkt(r=18)"},
+			{10, "R5", "pkt(off=14,r=18)"},
 			{10, "R4_w", "var_off=(0x0; 0xff)"},
 			{13, "R4_w", "var_off=(0x0; 0xffff)"},
 			{14, "R4_w", "var_off=(0x0; 0xffff)"},
@@ -299,7 +299,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Offset is added to packet pointer R5, resulting in
 			 * known fixed offset, and variable offset from R6.
@@ -337,7 +337,7 @@ static struct bpf_align_test tests[] = {
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			{26, "R5_w", "pkt(off=14,r=8,"},
+			{26, "R5_w", "pkt(off=14,r=8)"},
 			/* Variable offset is added to R5, resulting in a
 			 * variable offset of (4n). See comment for insn #18
 			 * for R4 = R5 trick.
@@ -397,7 +397,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{8, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -459,7 +459,7 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
 		.matches = {
-			{3, "R5_w", "pkt_end(off=0,imm=0)"},
+			{3, "R5_w", "pkt_end()"},
 			/* (ptr - ptr) << 2 == unknown, (4n) */
 			{5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
@@ -513,7 +513,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{8, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{9, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -566,7 +566,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{9, "R6_w", "var_off=(0x0; 0x3c)"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{10, "R6_w", "var_off=(0x2; 0x7c)"},
@@ -659,14 +659,14 @@ static int do_test_single(struct bpf_align_test *test)
 			/* Check the next line as well in case the previous line
 			 * did not have a corresponding bpf insn. Example:
 			 * func#0 @0
-			 * 0: R1=ctx(off=0,imm=0) R10=fp0
+			 * 0: R1=ctx() R10=fp0
 			 * 0: (b7) r3 = 2                 ; R3_w=2
 			 *
 			 * Sometimes it's actually two lines below, e.g. when
 			 * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
-			 *   from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
-			 *   6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
-			 *   6: (71) r3 = *(u8 *)(r2 +0)           ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
+			 *   from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+			 *   6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+			 *   6: (71) r3 = *(u8 *)(r2 +0)           ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
 			 */
 			while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
 				cur_line = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index a1766a298bb7..f7cd129cb82b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -9,8 +9,6 @@
 #include "cap_helpers.h"
 #include "bind_perm.skel.h"
 
-static int duration;
-
 static int create_netns(void)
 {
 	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
@@ -27,7 +25,7 @@ void try_bind(int family, int port, int expected_errno)
 	int fd = -1;
 
 	fd = socket(family, SOCK_STREAM, 0);
-	if (CHECK(fd < 0, "fd", "errno %d", errno))
+	if (!ASSERT_GE(fd, 0, "socket"))
 		goto close_socket;
 
 	if (family == AF_INET) {
@@ -60,7 +58,7 @@ void test_bind_perm(void)
 		return;
 
 	cgroup_fd = test__join_cgroup("/bind_perm");
-	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+	if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
 		return;
 
 	skel = bind_perm__open_and_load();
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index e3498f607b49..618af9dfae9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -34,8 +34,6 @@
 #include "bpf_iter_ksym.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
-static int duration;
-
 static void test_btf_id_or_null(void)
 {
 	struct bpf_iter_test_kern3 *skel;
@@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_
 	/* not check contents, but ensure read() ends without error */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+	ASSERT_GE(len, 0, "read");
 
 	close(iter_fd);
 
@@ -334,6 +332,8 @@ static void test_task_stack(void)
 	do_dummy_read(skel->progs.dump_task_stack);
 	do_dummy_read(skel->progs.get_task_user_stacks);
 
+	ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks");
+
 	bpf_iter_task_stack__destroy(skel);
 }
 
@@ -413,7 +413,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
 		goto free_link;
 	}
 
-	if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(err, 0, "read"))
 		goto free_link;
 
 	ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)",
@@ -526,11 +526,11 @@ static int do_read_with_fd(int iter_fd, const char *expected,
 	start = 0;
 	while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
 		start += len;
-		if (CHECK(start >= 16, "read", "read len %d\n", len))
+		if (!ASSERT_LT(start, 16, "read"))
 			return -1;
 		read_buf_len = read_one_char ? 1 : 16 - start;
 	}
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		return -1;
 
 	if (!ASSERT_STREQ(buf, expected, "read"))
@@ -571,8 +571,7 @@ static int do_read(const char *path, const char *expected)
 	int err, iter_fd;
 
 	iter_fd = open(path, O_RDONLY);
-	if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
-		  path, strerror(errno)))
+	if (!ASSERT_GE(iter_fd, 0, "open"))
 		return -1;
 
 	err = do_read_with_fd(iter_fd, expected, false);
@@ -600,7 +599,7 @@ static void test_file_iter(void)
 	unlink(path);
 
 	err = bpf_link__pin(link, path);
-	if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+	if (!ASSERT_OK(err, "pin_iter"))
 		goto free_link;
 
 	err = do_read(path, "abcd");
@@ -651,12 +650,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	 * overflow and needs restart.
 	 */
 	map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
-	if (CHECK(map1_fd < 0, "bpf_map_create",
-		  "map_creation failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(map1_fd, 0, "bpf_map_create"))
 		goto out;
 	map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
-	if (CHECK(map2_fd < 0, "bpf_map_create",
-		  "map_creation failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(map2_fd, 0, "bpf_map_create"))
 		goto free_map1;
 
 	/* bpf_seq_printf kernel buffer is 8 pages, so one map
@@ -685,14 +682,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	/* setup filtering map_id in bpf program */
 	map_info_len = sizeof(map_info);
 	err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len);
-	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
-		  strerror(errno)))
+	if (!ASSERT_OK(err, "get_map_info"))
 		goto free_map2;
 	skel->bss->map1_id = map_info.id;
 
 	err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len);
-	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
-		  strerror(errno)))
+	if (!ASSERT_OK(err, "get_map_info"))
 		goto free_map2;
 	skel->bss->map2_id = map_info.id;
 
@@ -705,7 +700,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 		goto free_link;
 
 	buf = malloc(expected_read_len);
-	if (!buf)
+	if (!ASSERT_OK_PTR(buf, "malloc"))
 		goto close_iter;
 
 	/* do read */
@@ -714,16 +709,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
 			total_read_len += len;
 
-		CHECK(len != -1 || errno != E2BIG, "read",
-		      "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
-			  len, strerror(errno));
+		ASSERT_EQ(len, -1, "read");
+		ASSERT_EQ(errno, E2BIG, "read");
 		goto free_buf;
 	} else if (!ret1) {
 		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
 			total_read_len += len;
 
-		if (CHECK(len < 0, "read", "read failed: %s\n",
-			  strerror(errno)))
+		if (!ASSERT_GE(len, 0, "read"))
 			goto free_buf;
 	} else {
 		do {
@@ -732,8 +725,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 				total_read_len += len;
 		} while (len > 0 || len == -EAGAIN);
 
-		if (CHECK(len < 0, "read", "read failed: %s\n",
-			  strerror(errno)))
+		if (!ASSERT_GE(len, 0, "read"))
 			goto free_buf;
 	}
 
@@ -836,7 +828,7 @@ static void test_bpf_hash_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
@@ -878,6 +870,8 @@ static void test_bpf_percpu_hash_map(void)
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
+	if (!ASSERT_OK_PTR(val, "malloc"))
+		goto out;
 
 	err = bpf_iter_bpf_percpu_hash_map__load(skel);
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load"))
@@ -917,7 +911,7 @@ static void test_bpf_percpu_hash_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
@@ -983,17 +977,14 @@ static void test_bpf_array_map(void)
 	start = 0;
 	while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
 		start += len;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
 	res_first_key = *(__u32 *)buf;
 	res_first_val = *(__u64 *)(buf + sizeof(__u32));
-	if (CHECK(res_first_key != 0 || res_first_val != first_val,
-		  "bpf_seq_write",
-		  "seq_write failure: first key %u vs expected 0, "
-		  " first value %llu vs expected %llu\n",
-		  res_first_key, res_first_val, first_val))
+	if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") ||
+			!ASSERT_EQ(res_first_val, first_val, "bpf_seq_write"))
 		goto close_iter;
 
 	if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
@@ -1057,6 +1048,8 @@ static void test_bpf_percpu_array_map(void)
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
+	if (!ASSERT_OK_PTR(val, "malloc"))
+		goto out;
 
 	err = bpf_iter_bpf_percpu_array_map__load(skel);
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load"))
@@ -1092,7 +1085,7 @@ static void test_bpf_percpu_array_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
@@ -1131,6 +1124,7 @@ static void test_bpf_sk_storage_delete(void)
 	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
 	if (!ASSERT_GE(sock_fd, 0, "socket"))
 		goto out;
+
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
 	if (!ASSERT_OK(err, "map_update"))
 		goto out;
@@ -1151,14 +1145,19 @@ static void test_bpf_sk_storage_delete(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
-		  "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
-		goto close_iter;
+
+	 /* Note: The following assertions serve to ensure
+	  * the value was deleted. It does so by asserting
+	  * that bpf_map_lookup_elem has failed. This might
+	  * seem counterintuitive at first.
+	  */
+	ASSERT_ERR(err, "bpf_map_lookup_elem");
+	ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem");
 
 close_iter:
 	close(iter_fd);
@@ -1203,17 +1202,15 @@ static void test_bpf_sk_storage_get(void)
 	do_dummy_read(skel->progs.fill_socket_owner);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
-	    "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
-	    getpid(), val, err))
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem") ||
+			!ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem"))
 		goto close_socket;
 
 	do_dummy_read(skel->progs.negate_socket_local_storage);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
-	      "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
-	      -getpid(), val, err);
+	ASSERT_OK(err, "bpf_map_lookup_elem");
+	ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem");
 
 close_socket:
 	close(sock_fd);
@@ -1290,7 +1287,7 @@ static void test_bpf_sk_storage_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 675b90b15280..f09d6ac2ef09 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -25,7 +25,7 @@ void serial_test_bpf_obj_id(void)
 	 */
 	__u32 map_ids[nr_iters + 1];
 	char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
-	__u32 i, next_id, info_len, nr_id_found, duration = 0;
+	__u32 i, next_id, info_len, nr_id_found;
 	struct timespec real_time_ts, boot_time_ts;
 	int err = 0;
 	__u64 array_value;
@@ -33,16 +33,16 @@ void serial_test_bpf_obj_id(void)
 	time_t now, load_time;
 
 	err = bpf_prog_get_fd_by_id(0);
-	CHECK(err >= 0 || errno != ENOENT,
-	      "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+	ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id");
+	ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id");
 
 	err = bpf_map_get_fd_by_id(0);
-	CHECK(err >= 0 || errno != ENOENT,
-	      "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+	ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+	ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
 
 	err = bpf_link_get_fd_by_id(0);
-	CHECK(err >= 0 || errno != ENOENT,
-	      "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
+	ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+	ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
 
 	/* Check bpf_map_get_info_by_fd() */
 	bzero(zeros, sizeof(zeros));
@@ -53,25 +53,26 @@ void serial_test_bpf_obj_id(void)
 		/* test_obj_id.o is a dumb prog. It should never fail
 		 * to load.
 		 */
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "bpf_prog_test_load"))
 			continue;
 
 		/* Insert a magic value to the map */
 		map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
-		if (CHECK_FAIL(map_fds[i] < 0))
+		if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map"))
 			goto done;
+
 		err = bpf_map_update_elem(map_fds[i], &array_key,
 					  &array_magic_value, 0);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
 			goto done;
 
-		prog = bpf_object__find_program_by_name(objs[i],
-							"test_obj_id");
-		if (CHECK_FAIL(!prog))
+		prog = bpf_object__find_program_by_name(objs[i], "test_obj_id");
+		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
 			goto done;
+
 		links[i] = bpf_program__attach(prog);
 		err = libbpf_get_error(links[i]);
-		if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+		if (!ASSERT_OK(err, "bpf_program__attach")) {
 			links[i] = NULL;
 			goto done;
 		}
@@ -81,24 +82,14 @@ void serial_test_bpf_obj_id(void)
 		bzero(&map_infos[i], info_len);
 		err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i],
 					     &info_len);
-		if (CHECK(err ||
-			  map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
-			  map_infos[i].key_size != sizeof(__u32) ||
-			  map_infos[i].value_size != sizeof(__u64) ||
-			  map_infos[i].max_entries != 1 ||
-			  map_infos[i].map_flags != 0 ||
-			  info_len != sizeof(struct bpf_map_info) ||
-			  strcmp((char *)map_infos[i].name, expected_map_name),
-			  "get-map-info(fd)",
-			  "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
-			  err, errno,
-			  map_infos[i].type, BPF_MAP_TYPE_ARRAY,
-			  info_len, sizeof(struct bpf_map_info),
-			  map_infos[i].key_size,
-			  map_infos[i].value_size,
-			  map_infos[i].max_entries,
-			  map_infos[i].map_flags,
-			  map_infos[i].name, expected_map_name))
+		if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") ||
+				!ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") ||
+				!ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") ||
+				!ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") ||
+				!ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") ||
+				!ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") ||
+				!ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") ||
+				!ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name"))
 			goto done;
 
 		/* Check getting prog info */
@@ -112,48 +103,34 @@ void serial_test_bpf_obj_id(void)
 		prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
 		prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
 		prog_infos[i].nr_map_ids = 2;
+
 		err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "clock_gettime"))
 			goto done;
+
 		err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "clock_gettime"))
 			goto done;
+
 		err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i],
 					      &info_len);
 		load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
 			+ (prog_infos[i].load_time / nsec_per_sec);
-		if (CHECK(err ||
-			  prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
-			  info_len != sizeof(struct bpf_prog_info) ||
-			  (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
-			  (env.jit_enabled &&
-			   !memcmp(jited_insns, zeros, sizeof(zeros))) ||
-			  !prog_infos[i].xlated_prog_len ||
-			  !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
-			  load_time < now - 60 || load_time > now + 60 ||
-			  prog_infos[i].created_by_uid != my_uid ||
-			  prog_infos[i].nr_map_ids != 1 ||
-			  *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
-			  strcmp((char *)prog_infos[i].name, expected_prog_name),
-			  "get-prog-info(fd)",
-			  "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
-			  "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
-			  "jited_prog %d xlated_prog %d load_time %lu(%lu) "
-			  "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
-			  "name %s(%s)\n",
-			  err, errno, i,
-			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
-			  info_len, sizeof(struct bpf_prog_info),
-			  env.jit_enabled,
-			  prog_infos[i].jited_prog_len,
-			  prog_infos[i].xlated_prog_len,
-			  !!memcmp(jited_insns, zeros, sizeof(zeros)),
-			  !!memcmp(xlated_insns, zeros, sizeof(zeros)),
-			  load_time, now,
-			  prog_infos[i].created_by_uid, my_uid,
-			  prog_infos[i].nr_map_ids, 1,
-			  *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
-			  prog_infos[i].name, expected_prog_name))
+
+		if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+				!ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") ||
+				!ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") ||
+				!ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") ||
+				!ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))),
+					"jited_insns") ||
+				!ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") ||
+				!ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") ||
+				!ASSERT_GE(load_time, (now - 60), "load_time") ||
+				!ASSERT_LE(load_time, (now + 60), "load_time") ||
+				!ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") ||
+				!ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") ||
+				!ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") ||
+				!ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name"))
 			goto done;
 
 		/* Check getting link info */
@@ -163,25 +140,12 @@ void serial_test_bpf_obj_id(void)
 		link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
 		err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]),
 					      &link_infos[i], &info_len);
-		if (CHECK(err ||
-			  link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
-			  link_infos[i].prog_id != prog_infos[i].id ||
-			  link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) ||
-			  strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
-				 "sys_enter") ||
-			  info_len != sizeof(struct bpf_link_info),
-			  "get-link-info(fd)",
-			  "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
-			  "prog_id %d (%d) tp_name %s(%s)\n",
-			  err, errno,
-			  info_len, sizeof(struct bpf_link_info),
-			  link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
-			  link_infos[i].id,
-			  link_infos[i].prog_id, prog_infos[i].id,
-			  (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
-			  "sys_enter"))
+		if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") ||
+				!ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") ||
+				!ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") ||
+				!ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") ||
+				!ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name"))
 			goto done;
-
 	}
 
 	/* Check bpf_prog_get_next_id() */
@@ -190,7 +154,7 @@ void serial_test_bpf_obj_id(void)
 	while (!bpf_prog_get_next_id(next_id, &next_id)) {
 		struct bpf_prog_info prog_info = {};
 		__u32 saved_map_id;
-		int prog_fd;
+		int prog_fd, cmp_res;
 
 		info_len = sizeof(prog_info);
 
@@ -198,9 +162,7 @@ void serial_test_bpf_obj_id(void)
 		if (prog_fd < 0 && errno == ENOENT)
 			/* The bpf_prog is in the dead row */
 			continue;
-		if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
-			  "prog_fd %d next_id %d errno %d\n",
-			  prog_fd, next_id, errno))
+		if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id"))
 			break;
 
 		for (i = 0; i < nr_iters; i++)
@@ -218,9 +180,8 @@ void serial_test_bpf_obj_id(void)
 		 */
 		prog_info.nr_map_ids = 1;
 		err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
-		if (CHECK(!err || errno != EFAULT,
-			  "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
-			  err, errno, EFAULT))
+		if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") ||
+				!ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd"))
 			break;
 		bzero(&prog_info, sizeof(prog_info));
 		info_len = sizeof(prog_info);
@@ -231,27 +192,22 @@ void serial_test_bpf_obj_id(void)
 		err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
 		prog_infos[i].jited_prog_insns = 0;
 		prog_infos[i].xlated_prog_insns = 0;
-		CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
-		      memcmp(&prog_info, &prog_infos[i], info_len) ||
-		      *(int *)(long)prog_info.map_ids != saved_map_id,
-		      "get-prog-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
-		      err, errno, info_len, sizeof(struct bpf_prog_info),
-		      memcmp(&prog_info, &prog_infos[i], info_len),
-		      *(int *)(long)prog_info.map_ids, saved_map_id);
+		cmp_res = memcmp(&prog_info, &prog_infos[i], info_len);
+
+		ASSERT_OK(err, "bpf_prog_get_info_by_fd");
+		ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len");
+		ASSERT_OK(cmp_res, "memcmp");
+		ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id");
 		close(prog_fd);
 	}
-	CHECK(nr_id_found != nr_iters,
-	      "check total prog id found by get_next_id",
-	      "nr_id_found %u(%u)\n",
-	      nr_id_found, nr_iters);
+	ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found");
 
 	/* Check bpf_map_get_next_id() */
 	nr_id_found = 0;
 	next_id = 0;
 	while (!bpf_map_get_next_id(next_id, &next_id)) {
 		struct bpf_map_info map_info = {};
-		int map_fd;
+		int map_fd, cmp_res;
 
 		info_len = sizeof(map_info);
 
@@ -259,9 +215,7 @@ void serial_test_bpf_obj_id(void)
 		if (map_fd < 0 && errno == ENOENT)
 			/* The bpf_map is in the dead row */
 			continue;
-		if (CHECK(map_fd < 0, "get-map-fd(next_id)",
-			  "map_fd %d next_id %u errno %d\n",
-			  map_fd, next_id, errno))
+		if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
 			break;
 
 		for (i = 0; i < nr_iters; i++)
@@ -274,25 +228,19 @@ void serial_test_bpf_obj_id(void)
 		nr_id_found++;
 
 		err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
 			goto done;
 
 		err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
-		CHECK(err || info_len != sizeof(struct bpf_map_info) ||
-		      memcmp(&map_info, &map_infos[i], info_len) ||
-		      array_value != array_magic_value,
-		      "check get-map-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
-		      err, errno, info_len, sizeof(struct bpf_map_info),
-		      memcmp(&map_info, &map_infos[i], info_len),
-		      array_value, array_magic_value);
+		cmp_res = memcmp(&map_info, &map_infos[i], info_len);
+		ASSERT_OK(err, "bpf_map_get_info_by_fd");
+		ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len");
+		ASSERT_OK(cmp_res, "memcmp");
+		ASSERT_EQ(array_value, array_magic_value, "array_value");
 
 		close(map_fd);
 	}
-	CHECK(nr_id_found != nr_iters,
-	      "check total map id found by get_next_id",
-	      "nr_id_found %u(%u)\n",
-	      nr_id_found, nr_iters);
+	ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found");
 
 	/* Check bpf_link_get_next_id() */
 	nr_id_found = 0;
@@ -308,9 +256,7 @@ void serial_test_bpf_obj_id(void)
 		if (link_fd < 0 && errno == ENOENT)
 			/* The bpf_link is in the dead row */
 			continue;
-		if (CHECK(link_fd < 0, "get-link-fd(next_id)",
-			  "link_fd %d next_id %u errno %d\n",
-			  link_fd, next_id, errno))
+		if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id"))
 			break;
 
 		for (i = 0; i < nr_iters; i++)
@@ -325,17 +271,13 @@ void serial_test_bpf_obj_id(void)
 		err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len);
 		cmp_res = memcmp(&link_info, &link_infos[i],
 				offsetof(struct bpf_link_info, raw_tracepoint));
-		CHECK(err || info_len != sizeof(link_info) || cmp_res,
-		      "check get-link-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%zu) memcmp %d\n",
-		      err, errno, info_len, sizeof(struct bpf_link_info),
-		      cmp_res);
+		ASSERT_OK(err, "bpf_link_get_info_by_fd");
+		ASSERT_EQ(info_len, sizeof(link_info), "info_len");
+		ASSERT_OK(cmp_res, "memcmp");
 
 		close(link_fd);
 	}
-	CHECK(nr_id_found != nr_iters,
-	      "check total link id found by get_next_id",
-	      "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+	ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found");
 
 done:
 	for (i = 0; i < nr_iters; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 4aabeaa525d4..a88e6e07e4f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -20,15 +20,14 @@
 
 static const unsigned int total_bytes = 10 * 1024 * 1024;
 static int expected_stg = 0xeB9F;
-static int stop, duration;
+static int stop;
 
 static int settcpca(int fd, const char *tcp_ca)
 {
 	int err;
 
 	err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
-	if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n",
-		  errno))
+	if (!ASSERT_NEQ(err, -1, "setsockopt"))
 		return -1;
 
 	return 0;
@@ -65,8 +64,7 @@ static void *server(void *arg)
 		bytes += nr_sent;
 	}
 
-	CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
-	      bytes, total_bytes, nr_sent, errno);
+	ASSERT_EQ(bytes, total_bytes, "send");
 
 done:
 	if (fd >= 0)
@@ -92,10 +90,11 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 	WRITE_ONCE(stop, 0);
 
 	lfd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(lfd, -1, "socket"))
 		return;
+
 	fd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
+	if (!ASSERT_NEQ(fd, -1, "socket")) {
 		close(lfd);
 		return;
 	}
@@ -108,26 +107,27 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 	sa6.sin6_family = AF_INET6;
 	sa6.sin6_addr = in6addr_loopback;
 	err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
-	if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "bind"))
 		goto done;
+
 	err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
-	if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "getsockname"))
 		goto done;
+
 	err = listen(lfd, 1);
-	if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "listen"))
 		goto done;
 
 	if (sk_stg_map) {
 		err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
 					  &expected_stg, BPF_NOEXIST);
-		if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
-			  "err:%d errno:%d\n", err, errno))
+		if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)"))
 			goto done;
 	}
 
 	/* connect to server */
 	err = connect(fd, (struct sockaddr *)&sa6, addrlen);
-	if (CHECK(err == -1, "connect", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "connect"))
 		goto done;
 
 	if (sk_stg_map) {
@@ -135,14 +135,13 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 
 		err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
 					  &tmp_stg);
-		if (CHECK(!err || errno != ENOENT,
-			  "bpf_map_lookup_elem(sk_stg_map)",
-			  "err:%d errno:%d\n", err, errno))
+		if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") ||
+				!ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)"))
 			goto done;
 	}
 
 	err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
-	if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+	if (!ASSERT_OK(err, "pthread_create"))
 		goto done;
 
 	/* recv total_bytes */
@@ -156,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 		bytes += nr_recv;
 	}
 
-	CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
-	      bytes, total_bytes, nr_recv, errno);
+	ASSERT_EQ(bytes, total_bytes, "recv");
 
 	WRITE_ONCE(stop, 1);
 	pthread_join(srv_thread, &thread_ret);
-	CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
-	      PTR_ERR(thread_ret));
+	ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
+
 done:
 	close(lfd);
 	close(fd);
@@ -174,7 +172,7 @@ static void test_cubic(void)
 	struct bpf_link *link;
 
 	cubic_skel = bpf_cubic__open_and_load();
-	if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n"))
+	if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load"))
 		return;
 
 	link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
@@ -197,7 +195,7 @@ static void test_dctcp(void)
 	struct bpf_link *link;
 
 	dctcp_skel = bpf_dctcp__open_and_load();
-	if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n"))
+	if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
 		return;
 
 	link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
@@ -207,9 +205,7 @@ static void test_dctcp(void)
 	}
 
 	do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
-	CHECK(dctcp_skel->bss->stg_result != expected_stg,
-	      "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
-	      dctcp_skel->bss->stg_result, expected_stg);
+	ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result");
 
 	bpf_link__destroy(link);
 	bpf_dctcp__destroy(dctcp_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 731c343897d8..e770912fc1d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type)
 	}
 
 	bpf_program__set_type(prog, type);
-	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
 	bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
 
 	err = bpf_object__load(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 92d51f377fe5..8fb4a04fbbc0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -5265,6 +5265,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind)
 #endif
 
 	assert(0);
+	return 0;
 }
 
 static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind,
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
new file mode 100644
index 000000000000..74d6d7546f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup1_hierarchy.skel.h"
+
+static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel)
+{
+	struct bpf_link *lsm_link, *fentry_link;
+	int err;
+
+	/* Attach LSM prog first */
+	lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+	if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+		return;
+
+	/* LSM prog will be triggered when attaching fentry */
+	fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+	ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+	err = bpf_link__destroy(lsm_link);
+	ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel)
+{
+	struct bpf_link *lsm_link, *fentry_link;
+	int err;
+
+	/* Attach LSM prog first */
+	lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run);
+	if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+		return;
+
+	/* LSM prog will be triggered when attaching fentry */
+	fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+	ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+	err = bpf_link__destroy(lsm_link);
+	ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel)
+{
+	struct bpf_link *lsm_link, *fentry_link;
+	int err;
+
+	/* Attach LSM prog first */
+	lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+	if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+		return;
+
+	/* LSM prog will be triggered when attaching fentry */
+	fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+	if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success"))
+		goto cleanup;
+
+	err = bpf_link__destroy(fentry_link);
+	ASSERT_OK(err, "destroy_lsm");
+
+cleanup:
+	err = bpf_link__destroy(lsm_link);
+	ASSERT_OK(err, "destroy_fentry");
+}
+
+void test_cgroup1_hierarchy(void)
+{
+	struct test_cgroup1_hierarchy *skel;
+	__u64 current_cgid;
+	int hid, err;
+
+	skel = test_cgroup1_hierarchy__open();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	skel->bss->target_pid = getpid();
+
+	err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1");
+	if (!ASSERT_OK(err, "fentry_set_target"))
+		goto destroy;
+
+	err = test_cgroup1_hierarchy__load(skel);
+	if (!ASSERT_OK(err, "load"))
+		goto destroy;
+
+	/* Setup cgroup1 hierarchy */
+	err = setup_classid_environment();
+	if (!ASSERT_OK(err, "setup_classid_environment"))
+		goto destroy;
+
+	err = join_classid();
+	if (!ASSERT_OK(err, "join_cgroup1"))
+		goto cleanup;
+
+	current_cgid = get_classid_cgroup_id();
+	if (!ASSERT_GE(current_cgid, 0, "cgroup1 id"))
+		goto cleanup;
+
+	hid = get_cgroup1_hierarchy_id("net_cls");
+	if (!ASSERT_GE(hid, 0, "cgroup1 id"))
+		goto cleanup;
+	skel->bss->target_hid = hid;
+
+	if (test__start_subtest("test_cgroup1_hierarchy")) {
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1(skel);
+	}
+
+	if (test__start_subtest("test_root_cgid")) {
+		skel->bss->target_ancestor_cgid = 1;
+		skel->bss->target_ancestor_level = 0;
+		bpf_cgroup1(skel);
+	}
+
+	if (test__start_subtest("test_invalid_level")) {
+		skel->bss->target_ancestor_cgid = 1;
+		skel->bss->target_ancestor_level = 1;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_cgid")) {
+		skel->bss->target_ancestor_cgid = 0;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_hid")) {
+		skel->bss->target_ancestor_cgid = 1;
+		skel->bss->target_ancestor_level = 0;
+		skel->bss->target_hid = -1;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_cgrp_name")) {
+		skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl");
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_cgrp_name2")) {
+		skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,");
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_sleepable_prog")) {
+		skel->bss->target_hid = hid;
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1_sleepable(skel);
+	}
+
+cleanup:
+	cleanup_classid_environment();
+destroy:
+	test_cgroup1_hierarchy__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index 9026b42914d3..addf720428f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -71,7 +71,7 @@ void test_cgroup_v1v2(void)
 	}
 	ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
 	setup_classid_environment();
-	set_classid(42);
+	set_classid();
 	ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
 	cleanup_classid_environment();
 	close(server_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
index b25b870f87ba..e6e50a394472 100644
--- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void)
 	local_kptr_stash__destroy(skel);
 }
 
+static void test_refcount_acquire_without_unstash(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+	struct local_kptr_stash *skel;
+	int ret;
+
+	skel = local_kptr_stash__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+				     &opts);
+	ASSERT_OK(ret, "refcount_acquire_without_unstash run");
+	ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval");
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts);
+	ASSERT_OK(ret, "stash_refcounted_node run");
+	ASSERT_OK(opts.retval, "stash_refcounted_node retval");
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+				     &opts);
+	ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run");
+	ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval");
+
+	local_kptr_stash__destroy(skel);
+}
+
 static void test_local_kptr_stash_fail(void)
 {
 	RUN_TESTS(local_kptr_stash_fail);
@@ -86,6 +117,8 @@ void test_local_kptr_stash(void)
 		test_local_kptr_stash_plain();
 	if (test__start_subtest("local_kptr_stash_unstash"))
 		test_local_kptr_stash_unstash();
+	if (test__start_subtest("refcount_acquire_without_unstash"))
+		test_refcount_acquire_without_unstash();
 	if (test__start_subtest("local_kptr_stash_fail"))
 		test_local_kptr_stash_fail();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index fe9a23e65ef4..0f7ea4d7d9f6 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -78,7 +78,7 @@ static void obj_load_log_buf(void)
 	ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"),
 		      "libbpf_log_not_empty");
 	ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty");
-	ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"),
+	ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"),
 		      "good_log_verbose");
 	ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"),
 		      "bad_log_not_empty");
@@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void)
 	opts.log_level = 2;
 	fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
 			   good_prog_insns, good_prog_insn_cnt, &opts);
-	ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2");
+	ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2");
 	ASSERT_GE(fd, 0, "good_fd2");
 	if (fd >= 0)
 		close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
new file mode 100644
index 000000000000..0c9abd279e18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -0,0 +1,2124 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <test_progs.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+/* =================================
+ * SHORT AND CONSISTENT NUMBER TYPES
+ * =================================
+ */
+#define U64_MAX ((u64)UINT64_MAX)
+#define U32_MAX ((u32)UINT_MAX)
+#define U16_MAX ((u32)UINT_MAX)
+#define S64_MIN ((s64)INT64_MIN)
+#define S64_MAX ((s64)INT64_MAX)
+#define S32_MIN ((s32)INT_MIN)
+#define S32_MAX ((s32)INT_MAX)
+#define S16_MIN ((s16)0x80000000)
+#define S16_MAX ((s16)0x7fffffff)
+
+typedef unsigned long long ___u64;
+typedef unsigned int ___u32;
+typedef long long ___s64;
+typedef int ___s32;
+
+/* avoid conflicts with already defined types in kernel headers */
+#define u64 ___u64
+#define u32 ___u32
+#define s64 ___s64
+#define s32 ___s32
+
+/* ==================================
+ * STRING BUF ABSTRACTION AND HELPERS
+ * ==================================
+ */
+struct strbuf {
+	size_t buf_sz;
+	int pos;
+	char buf[0];
+};
+
+#define DEFINE_STRBUF(name, N)						\
+	struct { struct strbuf buf; char data[(N)]; } ___##name;	\
+	struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf)
+
+__printf(2, 3)
+static inline void snappendf(struct strbuf *s, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	s->pos += vsnprintf(s->buf + s->pos,
+			    s->pos < s->buf_sz ? s->buf_sz - s->pos : 0,
+			    fmt, args);
+	va_end(args);
+}
+
+/* ==================================
+ * GENERIC NUMBER TYPE AND OPERATIONS
+ * ==================================
+ */
+enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 };
+
+static __always_inline u64 min_t(enum num_t t, u64 x, u64 y)
+{
+	switch (t) {
+	case U64: return (u64)x < (u64)y ? (u64)x : (u64)y;
+	case U32: return (u32)x < (u32)y ? (u32)x : (u32)y;
+	case S64: return (s64)x < (s64)y ? (s64)x : (s64)y;
+	case S32: return (s32)x < (s32)y ? (s32)x : (s32)y;
+	default: printf("min_t!\n"); exit(1);
+	}
+}
+
+static __always_inline u64 max_t(enum num_t t, u64 x, u64 y)
+{
+	switch (t) {
+	case U64: return (u64)x > (u64)y ? (u64)x : (u64)y;
+	case U32: return (u32)x > (u32)y ? (u32)x : (u32)y;
+	case S64: return (s64)x > (s64)y ? (s64)x : (s64)y;
+	case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y;
+	default: printf("max_t!\n"); exit(1);
+	}
+}
+
+static __always_inline u64 cast_t(enum num_t t, u64 x)
+{
+	switch (t) {
+	case U64: return (u64)x;
+	case U32: return (u32)x;
+	case S64: return (s64)x;
+	case S32: return (u32)(s32)x;
+	default: printf("cast_t!\n"); exit(1);
+	}
+}
+
+static const char *t_str(enum num_t t)
+{
+	switch (t) {
+	case U64: return "u64";
+	case U32: return "u32";
+	case S64: return "s64";
+	case S32: return "s32";
+	default: printf("t_str!\n"); exit(1);
+	}
+}
+
+static enum num_t t_is_32(enum num_t t)
+{
+	switch (t) {
+	case U64: return false;
+	case U32: return true;
+	case S64: return false;
+	case S32: return true;
+	default: printf("t_is_32!\n"); exit(1);
+	}
+}
+
+static enum num_t t_signed(enum num_t t)
+{
+	switch (t) {
+	case U64: return S64;
+	case U32: return S32;
+	case S64: return S64;
+	case S32: return S32;
+	default: printf("t_signed!\n"); exit(1);
+	}
+}
+
+static enum num_t t_unsigned(enum num_t t)
+{
+	switch (t) {
+	case U64: return U64;
+	case U32: return U32;
+	case S64: return U64;
+	case S32: return U32;
+	default: printf("t_unsigned!\n"); exit(1);
+	}
+}
+
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool num_is_small(enum num_t t, u64 x)
+{
+	switch (t) {
+	case U64: return (u64)x <= UNUM_MAX_DECIMAL;
+	case U32: return (u32)x <= UNUM_MAX_DECIMAL;
+	case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL;
+	case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL;
+	default: printf("num_is_small!\n"); exit(1);
+	}
+}
+
+static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x)
+{
+	bool is_small = num_is_small(t, x);
+
+	if (is_small) {
+		switch (t) {
+		case U64: return snappendf(sb, "%llu", (u64)x);
+		case U32: return snappendf(sb, "%u", (u32)x);
+		case S64: return snappendf(sb, "%lld", (s64)x);
+		case S32: return snappendf(sb, "%d", (s32)x);
+		default: printf("snprintf_num!\n"); exit(1);
+		}
+	} else {
+		switch (t) {
+		case U64:
+			if (x == U64_MAX)
+				return snappendf(sb, "U64_MAX");
+			else if (x >= U64_MAX - 256)
+				return snappendf(sb, "U64_MAX-%llu", U64_MAX - x);
+			else
+				return snappendf(sb, "%#llx", (u64)x);
+		case U32:
+			if ((u32)x == U32_MAX)
+				return snappendf(sb, "U32_MAX");
+			else if ((u32)x >= U32_MAX - 256)
+				return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x);
+			else
+				return snappendf(sb, "%#x", (u32)x);
+		case S64:
+			if ((s64)x == S64_MAX)
+				return snappendf(sb, "S64_MAX");
+			else if ((s64)x >= S64_MAX - 256)
+				return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x);
+			else if ((s64)x == S64_MIN)
+				return snappendf(sb, "S64_MIN");
+			else if ((s64)x <= S64_MIN + 256)
+				return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN);
+			else
+				return snappendf(sb, "%#llx", (s64)x);
+		case S32:
+			if ((s32)x == S32_MAX)
+				return snappendf(sb, "S32_MAX");
+			else if ((s32)x >= S32_MAX - 256)
+				return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x);
+			else if ((s32)x == S32_MIN)
+				return snappendf(sb, "S32_MIN");
+			else if ((s32)x <= S32_MIN + 256)
+				return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN);
+			else
+				return snappendf(sb, "%#x", (s32)x);
+		default: printf("snprintf_num!\n"); exit(1);
+		}
+	}
+}
+
+/* ===================================
+ * GENERIC RANGE STRUCT AND OPERATIONS
+ * ===================================
+ */
+struct range {
+	u64 a, b;
+};
+
+static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x)
+{
+	if (x.a == x.b)
+		return snprintf_num(t, sb, x.a);
+
+	snappendf(sb, "[");
+	snprintf_num(t, sb, x.a);
+	snappendf(sb, "; ");
+	snprintf_num(t, sb, x.b);
+	snappendf(sb, "]");
+}
+
+static void print_range(enum num_t t, struct range x, const char *sfx)
+{
+	DEFINE_STRBUF(sb, 128);
+
+	snprintf_range(t, sb, x);
+	printf("%s%s", sb->buf, sfx);
+}
+
+static const struct range unkn[] = {
+	[U64] = { 0, U64_MAX },
+	[U32] = { 0, U32_MAX },
+	[S64] = { (u64)S64_MIN, (u64)S64_MAX },
+	[S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX },
+};
+
+static struct range unkn_subreg(enum num_t t)
+{
+	switch (t) {
+	case U64: return unkn[U32];
+	case U32: return unkn[U32];
+	case S64: return unkn[U32];
+	case S32: return unkn[S32];
+	default: printf("unkn_subreg!\n"); exit(1);
+	}
+}
+
+static struct range range(enum num_t t, u64 a, u64 b)
+{
+	switch (t) {
+	case U64: return (struct range){ (u64)a, (u64)b };
+	case U32: return (struct range){ (u32)a, (u32)b };
+	case S64: return (struct range){ (s64)a, (s64)b };
+	case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b };
+	default: printf("range!\n"); exit(1);
+	}
+}
+
+static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; }
+static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; }
+static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); }
+static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; }
+
+static bool range_eq(struct range x, struct range y)
+{
+	return x.a == y.a && x.b == y.b;
+}
+
+static struct range range_cast_to_s32(struct range x)
+{
+	u64 a = x.a, b = x.b;
+
+	/* if upper 32 bits are constant, lower 32 bits should form a proper
+	 * s32 range to be correct
+	 */
+	if (upper32(a) == upper32(b) && (s32)a <= (s32)b)
+		return range(S32, a, b);
+
+	/* Special case where upper bits form a small sequence of two
+	 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
+	 * 0x00000000 is also valid), while lower bits form a proper s32 range
+	 * going from negative numbers to positive numbers.
+	 *
+	 * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating
+	 * over full 64-bit numbers range will form a proper [-16, 16]
+	 * ([0xffffff00; 0x00000010]) range in its lower 32 bits.
+	 */
+	if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0)
+		return range(S32, a, b);
+
+	/* otherwise we can't derive much meaningful information */
+	return unkn[S32];
+}
+
+static struct range range_cast_u64(enum num_t to_t, struct range x)
+{
+	u64 a = (u64)x.a, b = (u64)x.b;
+
+	switch (to_t) {
+	case U64:
+		return x;
+	case U32:
+		if (upper32(a) != upper32(b))
+			return unkn[U32];
+		return range(U32, a, b);
+	case S64:
+		if (sign64(a) != sign64(b))
+			return unkn[S64];
+		return range(S64, a, b);
+	case S32:
+		return range_cast_to_s32(x);
+	default: printf("range_cast_u64!\n"); exit(1);
+	}
+}
+
+static struct range range_cast_s64(enum num_t to_t, struct range x)
+{
+	s64 a = (s64)x.a, b = (s64)x.b;
+
+	switch (to_t) {
+	case U64:
+		/* equivalent to (s64)a <= (s64)b check */
+		if (sign64(a) != sign64(b))
+			return unkn[U64];
+		return range(U64, a, b);
+	case U32:
+		if (upper32(a) != upper32(b) || sign32(a) != sign32(b))
+			return unkn[U32];
+		return range(U32, a, b);
+	case S64:
+		return x;
+	case S32:
+		return range_cast_to_s32(x);
+	default: printf("range_cast_s64!\n"); exit(1);
+	}
+}
+
+static struct range range_cast_u32(enum num_t to_t, struct range x)
+{
+	u32 a = (u32)x.a, b = (u32)x.b;
+
+	switch (to_t) {
+	case U64:
+	case S64:
+		/* u32 is always a valid zero-extended u64/s64 */
+		return range(to_t, a, b);
+	case U32:
+		return x;
+	case S32:
+		return range_cast_to_s32(range(U32, a, b));
+	default: printf("range_cast_u32!\n"); exit(1);
+	}
+}
+
+static struct range range_cast_s32(enum num_t to_t, struct range x)
+{
+	s32 a = (s32)x.a, b = (s32)x.b;
+
+	switch (to_t) {
+	case U64:
+	case U32:
+	case S64:
+		if (sign32(a) != sign32(b))
+			return unkn[to_t];
+		return range(to_t, a, b);
+	case S32:
+		return x;
+	default: printf("range_cast_s32!\n"); exit(1);
+	}
+}
+
+/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving
+ * all possible information. Worst case, it will be unknown range within
+ * *to_t* domain, if nothing more specific can be guaranteed during the
+ * conversion
+ */
+static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from)
+{
+	switch (from_t) {
+	case U64: return range_cast_u64(to_t, from);
+	case U32: return range_cast_u32(to_t, from);
+	case S64: return range_cast_s64(to_t, from);
+	case S32: return range_cast_s32(to_t, from);
+	default: printf("range_cast!\n"); exit(1);
+	}
+}
+
+static bool is_valid_num(enum num_t t, u64 x)
+{
+	switch (t) {
+	case U64: return true;
+	case U32: return upper32(x) == 0;
+	case S64: return true;
+	case S32: return upper32(x) == 0;
+	default: printf("is_valid_num!\n"); exit(1);
+	}
+}
+
+static bool is_valid_range(enum num_t t, struct range x)
+{
+	if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b))
+		return false;
+
+	switch (t) {
+	case U64: return (u64)x.a <= (u64)x.b;
+	case U32: return (u32)x.a <= (u32)x.b;
+	case S64: return (s64)x.a <= (s64)x.b;
+	case S32: return (s32)x.a <= (s32)x.b;
+	default: printf("is_valid_range!\n"); exit(1);
+	}
+}
+
+static struct range range_improve(enum num_t t, struct range old, struct range new)
+{
+	return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b));
+}
+
+static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y)
+{
+	struct range y_cast;
+
+	y_cast = range_cast(y_t, x_t, y);
+
+	/* the case when new range knowledge, *y*, is a 32-bit subregister
+	 * range, while previous range knowledge, *x*, is a full register
+	 * 64-bit range, needs special treatment to take into account upper 32
+	 * bits of full register range
+	 */
+	if (t_is_32(y_t) && !t_is_32(x_t)) {
+		struct range x_swap;
+
+		/* some combinations of upper 32 bits and sign bit can lead to
+		 * invalid ranges, in such cases it's easier to detect them
+		 * after cast/swap than try to enumerate all the conditions
+		 * under which transformation and knowledge transfer is valid
+		 */
+		x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b));
+		if (!is_valid_range(x_t, x_swap))
+			return x;
+		return range_improve(x_t, x, x_swap);
+	}
+
+	/* otherwise, plain range cast and intersection works */
+	return range_improve(x_t, x, y_cast);
+}
+
+/* =======================
+ * GENERIC CONDITIONAL OPS
+ * =======================
+ */
+enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE };
+
+static enum op complement_op(enum op op)
+{
+	switch (op) {
+	case OP_LT: return OP_GE;
+	case OP_LE: return OP_GT;
+	case OP_GT: return OP_LE;
+	case OP_GE: return OP_LT;
+	case OP_EQ: return OP_NE;
+	case OP_NE: return OP_EQ;
+	default: printf("complement_op!\n"); exit(1);
+	}
+}
+
+static const char *op_str(enum op op)
+{
+	switch (op) {
+	case OP_LT: return "<";
+	case OP_LE: return "<=";
+	case OP_GT: return ">";
+	case OP_GE: return ">=";
+	case OP_EQ: return "==";
+	case OP_NE: return "!=";
+	default: printf("op_str!\n"); exit(1);
+	}
+}
+
+/* Can register with range [x.a, x.b] *EVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+#define range_canbe(T) do {									\
+	switch (op) {										\
+	case OP_LT: return (T)x.a < (T)y.b;							\
+	case OP_LE: return (T)x.a <= (T)y.b;							\
+	case OP_GT: return (T)x.b > (T)y.a;							\
+	case OP_GE: return (T)x.b >= (T)y.a;							\
+	case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b);			\
+	case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a);		\
+	default: printf("range_canbe op %d\n", op); exit(1);					\
+	}											\
+} while (0)
+
+	switch (t) {
+	case U64: { range_canbe(u64); }
+	case U32: { range_canbe(u32); }
+	case S64: { range_canbe(s64); }
+	case S32: { range_canbe(s32); }
+	default: printf("range_canbe!\n"); exit(1);
+	}
+#undef range_canbe
+}
+
+/* Does register with range [x.a, x.b] *ALWAYS* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+	/* always op <=> ! canbe complement(op) */
+	return !range_canbe_op(t, x, y, complement_op(op));
+}
+
+/* Does register with range [x.a, x.b] *NEVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+	return !range_canbe_op(t, x, y, op);
+}
+
+/* similar to verifier's is_branch_taken():
+ *    1 - always taken;
+ *    0 - never taken,
+ *   -1 - unsure.
+ */
+static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+	if (range_always_op(t, x, y, op))
+		return 1;
+	if (range_never_op(t, x, y, op))
+		return 0;
+	return -1;
+}
+
+/* What would be the new estimates for register x and y ranges assuming truthful
+ * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy.
+ *
+ * We assume "interesting" cases where ranges overlap. Cases where it's
+ * obvious that (x OP y) is either always true or false should be filtered with
+ * range_never and range_always checks.
+ */
+static void range_cond(enum num_t t, struct range x, struct range y,
+		       enum op op, struct range *newx, struct range *newy)
+{
+	if (!range_canbe_op(t, x, y, op)) {
+		/* nothing to adjust, can't happen, return original values */
+		*newx = x;
+		*newy = y;
+		return;
+	}
+	switch (op) {
+	case OP_LT:
+		*newx = range(t, x.a, min_t(t, x.b, y.b - 1));
+		*newy = range(t, max_t(t, x.a + 1, y.a), y.b);
+		break;
+	case OP_LE:
+		*newx = range(t, x.a, min_t(t, x.b, y.b));
+		*newy = range(t, max_t(t, x.a, y.a), y.b);
+		break;
+	case OP_GT:
+		*newx = range(t, max_t(t, x.a, y.a + 1), x.b);
+		*newy = range(t, y.a, min_t(t, x.b - 1, y.b));
+		break;
+	case OP_GE:
+		*newx = range(t, max_t(t, x.a, y.a), x.b);
+		*newy = range(t, y.a, min_t(t, x.b, y.b));
+		break;
+	case OP_EQ:
+		*newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+		*newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+		break;
+	case OP_NE:
+		/* generic case, can't derive more information */
+		*newx = range(t, x.a, x.b);
+		*newy = range(t, y.a, y.b);
+		break;
+
+		/* below extended logic is not supported by verifier just yet */
+		if (x.a == x.b && x.a == y.a) {
+			/* X is a constant matching left side of Y */
+			*newx = range(t, x.a, x.b);
+			*newy = range(t, y.a + 1, y.b);
+		} else if (x.a == x.b && x.b == y.b) {
+			/* X is a constant matching rigth side of Y */
+			*newx = range(t, x.a, x.b);
+			*newy = range(t, y.a, y.b - 1);
+		} else if (y.a == y.b && x.a == y.a) {
+			/* Y is a constant matching left side of X */
+			*newx = range(t, x.a + 1, x.b);
+			*newy = range(t, y.a, y.b);
+		} else if (y.a == y.b && x.b == y.b) {
+			/* Y is a constant matching rigth side of X */
+			*newx = range(t, x.a, x.b - 1);
+			*newy = range(t, y.a, y.b);
+		} else {
+			/* generic case, can't derive more information */
+			*newx = range(t, x.a, x.b);
+			*newy = range(t, y.a, y.b);
+		}
+
+		break;
+	default:
+		break;
+	}
+}
+
+/* =======================
+ * REGISTER STATE HANDLING
+ * =======================
+ */
+struct reg_state {
+	struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */
+	bool valid;
+};
+
+static void print_reg_state(struct reg_state *r, const char *sfx)
+{
+	DEFINE_STRBUF(sb, 512);
+	enum num_t t;
+	int cnt = 0;
+
+	if (!r->valid) {
+		printf("<not found>%s", sfx);
+		return;
+	}
+
+	snappendf(sb, "scalar(");
+	for (t = first_t; t <= last_t; t++) {
+		snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t));
+		snprintf_range(t, sb, r->r[t]);
+	}
+	snappendf(sb, ")");
+
+	printf("%s%s", sb->buf, sfx);
+}
+
+static void print_refinement(enum num_t s_t, struct range src,
+			     enum num_t d_t, struct range old, struct range new,
+			     const char *ctx)
+{
+	printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t));
+	print_range(s_t, src, "");
+	printf(" (%s)DST_OLD=", t_str(d_t));
+	print_range(d_t, old, "");
+	printf(" (%s)DST_NEW=", t_str(d_t));
+	print_range(d_t, new, "\n");
+}
+
+static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx)
+{
+	enum num_t d_t, s_t;
+	struct range old;
+	bool keep_going = false;
+
+again:
+	/* try to derive new knowledge from just learned range x of type t */
+	for (d_t = first_t; d_t <= last_t; d_t++) {
+		old = r->r[d_t];
+		r->r[d_t] = range_refine(d_t, r->r[d_t], t, x);
+		if (!range_eq(r->r[d_t], old)) {
+			keep_going = true;
+			if (env.verbosity >= VERBOSE_VERY)
+				print_refinement(t, x, d_t, old, r->r[d_t], ctx);
+		}
+	}
+
+	/* now see if we can derive anything new from updated reg_state's ranges */
+	for (s_t = first_t; s_t <= last_t; s_t++) {
+		for (d_t = first_t; d_t <= last_t; d_t++) {
+			old = r->r[d_t];
+			r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]);
+			if (!range_eq(r->r[d_t], old)) {
+				keep_going = true;
+				if (env.verbosity >= VERBOSE_VERY)
+					print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx);
+			}
+		}
+	}
+
+	/* keep refining until we converge */
+	if (keep_going) {
+		keep_going = false;
+		goto again;
+	}
+}
+
+static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val)
+{
+	enum num_t tt;
+
+	rs->valid = true;
+	for (tt = first_t; tt <= last_t; tt++)
+		rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt];
+
+	reg_state_refine(rs, t, rs->r[t], "CONST");
+}
+
+static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op,
+			   struct reg_state *newx, struct reg_state *newy, const char *ctx)
+{
+	char buf[32];
+	enum num_t ts[2];
+	struct reg_state xx = *x, yy = *y;
+	int i, t_cnt;
+	struct range z1, z2;
+
+	if (op == OP_EQ || op == OP_NE) {
+		/* OP_EQ and OP_NE are sign-agnostic, so we need to process
+		 * both signed and unsigned domains at the same time
+		 */
+		ts[0] = t_unsigned(t);
+		ts[1] = t_signed(t);
+		t_cnt = 2;
+	} else {
+		ts[0] = t;
+		t_cnt = 1;
+	}
+
+	for (i = 0; i < t_cnt; i++) {
+		t = ts[i];
+		z1 = x->r[t];
+		z2 = y->r[t];
+
+		range_cond(t, z1, z2, op, &z1, &z2);
+
+		if (newx) {
+			snprintf(buf, sizeof(buf), "%s R1", ctx);
+			reg_state_refine(&xx, t, z1, buf);
+		}
+		if (newy) {
+			snprintf(buf, sizeof(buf), "%s R2", ctx);
+			reg_state_refine(&yy, t, z2, buf);
+		}
+	}
+
+	if (newx)
+		*newx = xx;
+	if (newy)
+		*newy = yy;
+}
+
+static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y,
+				     enum op op)
+{
+	if (op == OP_EQ || op == OP_NE) {
+		/* OP_EQ and OP_NE are sign-agnostic */
+		enum num_t tu = t_unsigned(t);
+		enum num_t ts = t_signed(t);
+		int br_u, br_s, br;
+
+		br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op);
+		br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op);
+
+		if (br_u >= 0 && br_s >= 0 && br_u != br_s)
+			ASSERT_FALSE(true, "branch taken inconsistency!\n");
+
+		/* if 64-bit ranges are indecisive, use 32-bit subranges to
+		 * eliminate always/never taken branches, if possible
+		 */
+		if (br_u == -1 && (t == U64 || t == S64)) {
+			br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op);
+			/* we can only reject for OP_EQ, never take branch
+			 * based on lower 32 bits
+			 */
+			if (op == OP_EQ && br == 0)
+				return 0;
+			/* for OP_NEQ we can be conclusive only if lower 32 bits
+			 * differ and thus inequality branch is always taken
+			 */
+			if (op == OP_NE && br == 1)
+				return 1;
+
+			br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op);
+			if (op == OP_EQ && br == 0)
+				return 0;
+			if (op == OP_NE && br == 1)
+				return 1;
+		}
+
+		return br_u >= 0 ? br_u : br_s;
+	}
+	return range_branch_taken_op(t, x->r[t], y->r[t], op);
+}
+
+/* =====================================
+ * BPF PROGS GENERATION AND VERIFICATION
+ * =====================================
+ */
+struct case_spec {
+	/* whether to init full register (r1) or sub-register (w1) */
+	bool init_subregs;
+	/* whether to establish initial value range on full register (r1) or
+	 * sub-register (w1)
+	 */
+	bool setup_subregs;
+	/* whether to establish initial value range using signed or unsigned
+	 * comparisons (i.e., initialize umin/umax or smin/smax directly)
+	 */
+	bool setup_signed;
+	/* whether to perform comparison on full registers or sub-registers */
+	bool compare_subregs;
+	/* whether to perform comparison using signed or unsigned operations */
+	bool compare_signed;
+};
+
+/* Generate test BPF program based on provided test ranges, operation, and
+ * specifications about register bitness and signedness.
+ */
+static int load_range_cmp_prog(struct range x, struct range y, enum op op,
+			       int branch_taken, struct case_spec spec,
+			       char *log_buf, size_t log_sz,
+			       int *false_pos, int *true_pos)
+{
+#define emit(insn) ({							\
+	struct bpf_insn __insns[] = { insn };				\
+	int __i;							\
+	for (__i = 0; __i < ARRAY_SIZE(__insns); __i++)			\
+		insns[cur_pos + __i] = __insns[__i];			\
+	cur_pos += __i;							\
+})
+#define JMP_TO(target) (target - cur_pos - 1)
+	int cur_pos = 0, exit_pos, fd, op_code;
+	struct bpf_insn insns[64];
+	LIBBPF_OPTS(bpf_prog_load_opts, opts,
+		.log_level = 2,
+		.log_buf = log_buf,
+		.log_size = log_sz,
+		.prog_flags = BPF_F_TEST_REG_INVARIANTS,
+	);
+
+	/* ; skip exit block below
+	 * goto +2;
+	 */
+	emit(BPF_JMP_A(2));
+	exit_pos = cur_pos;
+	/* ; exit block for all the preparatory conditionals
+	 * out:
+	 * r0 = 0;
+	 * exit;
+	 */
+	emit(BPF_MOV64_IMM(BPF_REG_0, 0));
+	emit(BPF_EXIT_INSN());
+	/*
+	 * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value
+	 * call bpf_get_current_pid_tgid;
+	 * r6 = r0;               | w6 = w0;
+	 * call bpf_get_current_pid_tgid;
+	 * r7 = r0;               | w7 = w0;
+	 */
+	emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+	if (spec.init_subregs)
+		emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0));
+	else
+		emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0));
+	emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+	if (spec.init_subregs)
+		emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0));
+	else
+		emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+	/* ; setup initial r6/w6 possible value range ([x.a, x.b])
+	 * r1 = %[x.a] ll;        | w1 = %[x.a];
+	 * r2 = %[x.b] ll;        | w2 = %[x.b];
+	 * if r6 < r1 goto out;   | if w6 < w1 goto out;
+	 * if r6 > r2 goto out;   | if w6 > w2 goto out;
+	 */
+	if (spec.setup_subregs) {
+		emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a));
+		emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				   BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				   BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+	} else {
+		emit(BPF_LD_IMM64(BPF_REG_1, x.a));
+		emit(BPF_LD_IMM64(BPF_REG_2, x.b));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				 BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				 BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+	}
+	/* ; setup initial r7/w7 possible value range ([y.a, y.b])
+	 * r1 = %[y.a] ll;        | w1 = %[y.a];
+	 * r2 = %[y.b] ll;        | w2 = %[y.b];
+	 * if r7 < r1 goto out;   | if w7 < w1 goto out;
+	 * if r7 > r2 goto out;   | if w7 > w2 goto out;
+	 */
+	if (spec.setup_subregs) {
+		emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a));
+		emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				   BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				   BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+	} else {
+		emit(BPF_LD_IMM64(BPF_REG_1, y.a));
+		emit(BPF_LD_IMM64(BPF_REG_2, y.b));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				 BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				 BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+	}
+	/* ; range test instruction
+	 * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3;
+	 */
+	switch (op) {
+	case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break;
+	case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break;
+	case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break;
+	case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break;
+	case OP_EQ: op_code = BPF_JEQ; break;
+	case OP_NE: op_code = BPF_JNE; break;
+	default:
+		printf("unrecognized op %d\n", op);
+		return -ENOTSUP;
+	}
+	/* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+	 * ; this is used for debugging, as verifier doesn't always print
+	 * ; registers states as of condition jump instruction (e.g., when
+	 * ; precision marking happens)
+	 * r0 = r6;               | w0 = w6;
+	 * r0 = r7;               | w0 = w7;
+	 */
+	if (spec.compare_subregs) {
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+	} else {
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	}
+	if (spec.compare_subregs)
+		emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+	else
+		emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+	/* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+	 * r0 = r6;               | w0 = w6;
+	 * r0 = r7;               | w0 = w7;
+	 * exit;
+	 */
+	*false_pos = cur_pos;
+	if (spec.compare_subregs) {
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+	} else {
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	}
+	if (branch_taken == 1) /* false branch is never taken */
+		emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+	else
+		emit(BPF_EXIT_INSN());
+	/* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+	 * r0 = r6;               | w0 = w6;
+	 * r0 = r7;               | w0 = w7;
+	 * exit;
+	 */
+	*true_pos = cur_pos;
+	if (spec.compare_subregs) {
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+	} else {
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	}
+	if (branch_taken == 0) /* true branch is never taken */
+		emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+	emit(BPF_EXIT_INSN()); /* last instruction has to be exit */
+
+	fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test",
+			   "GPL", insns, cur_pos, &opts);
+	if (fd < 0)
+		return fd;
+
+	close(fd);
+	return 0;
+#undef emit
+#undef JMP_TO
+}
+
+#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0)
+
+/* Parse register state from verifier log.
+ * `s` should point to the start of "Rx = ..." substring in the verifier log.
+ */
+static int parse_reg_state(const char *s, struct reg_state *reg)
+{
+	/* There are two generic forms for SCALAR register:
+	 * - known constant: R6_rwD=P%lld
+	 * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated
+	 *   list of optional range specifiers:
+	 *     - umin=%llu, if missing, assumed 0;
+	 *     - umax=%llu, if missing, assumed U64_MAX;
+	 *     - smin=%lld, if missing, assumed S64_MIN;
+	 *     - smax=%lld, if missing, assummed S64_MAX;
+	 *     - umin32=%d, if missing, assumed 0;
+	 *     - umax32=%d, if missing, assumed U32_MAX;
+	 *     - smin32=%d, if missing, assumed S32_MIN;
+	 *     - smax32=%d, if missing, assummed S32_MAX;
+	 *     - var_off=(%#llx; %#llx), tnum part, we don't care about it.
+	 *
+	 * If some of the values are equal, they will be grouped (but min/max
+	 * are not mixed together, and similarly negative values are not
+	 * grouped with non-negative ones). E.g.:
+	 *
+	 *   R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000)
+	 *
+	 * _rwD part is optional (and any of the letters can be missing).
+	 * P (precision mark) is optional as well.
+	 *
+	 * Anything inside scalar() is optional, including id, of course.
+	 */
+	struct {
+		const char *pfx;
+		u64 *dst, def;
+		bool is_32, is_set;
+	} *f, fields[8] = {
+		{"smin=", &reg->r[S64].a, S64_MIN},
+		{"smax=", &reg->r[S64].b, S64_MAX},
+		{"umin=", &reg->r[U64].a, 0},
+		{"umax=", &reg->r[U64].b, U64_MAX},
+		{"smin32=", &reg->r[S32].a, (u32)S32_MIN, true},
+		{"smax32=", &reg->r[S32].b, (u32)S32_MAX, true},
+		{"umin32=", &reg->r[U32].a, 0,            true},
+		{"umax32=", &reg->r[U32].b, U32_MAX,      true},
+	};
+	const char *p;
+	int i;
+
+	p = strchr(s, '=');
+	if (!p)
+		return -EINVAL;
+	p++;
+	if (*p == 'P')
+		p++;
+
+	if (!str_has_pfx(p, "scalar(")) {
+		long long sval;
+		enum num_t t;
+
+		if (p[0] == '0' && p[1] == 'x') {
+			if (sscanf(p, "%llx", &sval) != 1)
+				return -EINVAL;
+		} else {
+			if (sscanf(p, "%lld", &sval) != 1)
+				return -EINVAL;
+		}
+
+		reg->valid = true;
+		for (t = first_t; t <= last_t; t++) {
+			reg->r[t] = range(t, sval, sval);
+		}
+		return 0;
+	}
+
+	p += sizeof("scalar");
+	while (p) {
+		int midxs[ARRAY_SIZE(fields)], mcnt = 0;
+		u64 val;
+
+		for (i = 0; i < ARRAY_SIZE(fields); i++) {
+			f = &fields[i];
+			if (!str_has_pfx(p, f->pfx))
+				continue;
+			midxs[mcnt++] = i;
+			p += strlen(f->pfx);
+		}
+
+		if (mcnt) {
+			/* populate all matched fields */
+			if (p[0] == '0' && p[1] == 'x') {
+				if (sscanf(p, "%llx", &val) != 1)
+					return -EINVAL;
+			} else {
+				if (sscanf(p, "%lld", &val) != 1)
+					return -EINVAL;
+			}
+
+			for (i = 0; i < mcnt; i++) {
+				f = &fields[midxs[i]];
+				f->is_set = true;
+				*f->dst = f->is_32 ? (u64)(u32)val : val;
+			}
+		} else if (str_has_pfx(p, "var_off")) {
+			/* skip "var_off=(0x0; 0x3f)" part completely */
+			p = strchr(p, ')');
+			if (!p)
+				return -EINVAL;
+			p++;
+		}
+
+		p = strpbrk(p, ",)");
+		if (*p == ')')
+			break;
+		if (p)
+			p++;
+	}
+
+	reg->valid = true;
+
+	for (i = 0; i < ARRAY_SIZE(fields); i++) {
+		f = &fields[i];
+		if (!f->is_set)
+			*f->dst = f->def;
+	}
+
+	return 0;
+}
+
+
+/* Parse all register states (TRUE/FALSE branches and DST/SRC registers)
+ * out of the verifier log for a corresponding test case BPF program.
+ */
+static int parse_range_cmp_log(const char *log_buf, struct case_spec spec,
+			       int false_pos, int true_pos,
+			       struct reg_state *false1_reg, struct reg_state *false2_reg,
+			       struct reg_state *true1_reg, struct reg_state *true2_reg)
+{
+	struct {
+		int insn_idx;
+		int reg_idx;
+		const char *reg_upper;
+		struct reg_state *state;
+	} specs[] = {
+		{false_pos,     6, "R6=", false1_reg},
+		{false_pos + 1, 7, "R7=", false2_reg},
+		{true_pos,      6, "R6=", true1_reg},
+		{true_pos + 1,  7, "R7=", true2_reg},
+	};
+	char buf[32];
+	const char *p = log_buf, *q;
+	int i, err;
+
+	for (i = 0; i < 4; i++) {
+		sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx,
+			spec.compare_subregs ? "bc" : "bf",
+			spec.compare_subregs ? "w0" : "r0",
+			spec.compare_subregs ? "w" : "r", specs[i].reg_idx);
+
+		q = strstr(p, buf);
+		if (!q) {
+			*specs[i].state = (struct reg_state){.valid = false};
+			continue;
+		}
+		p = strstr(q, specs[i].reg_upper);
+		if (!p)
+			return -EINVAL;
+		err = parse_reg_state(p, specs[i].state);
+		if (err)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/* Validate ranges match, and print details if they don't */
+static bool assert_range_eq(enum num_t t, struct range x, struct range y,
+			    const char *ctx1, const char *ctx2)
+{
+	DEFINE_STRBUF(sb, 512);
+
+	if (range_eq(x, y))
+		return true;
+
+	snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2);
+	snprintf_range(t, sb, x);
+	snappendf(sb, " != ");
+	snprintf_range(t, sb, y);
+
+	printf("%s\n", sb->buf);
+
+	return false;
+}
+
+/* Validate that register states match, and print details if they don't */
+static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx)
+{
+	bool ok = true;
+	enum num_t t;
+
+	if (r->valid != e->valid) {
+		printf("MISMATCH %s: actual %s != expected %s\n", ctx,
+		       r->valid ? "<valid>" : "<invalid>",
+		       e->valid ? "<valid>" : "<invalid>");
+		return false;
+	}
+
+	if (!r->valid)
+		return true;
+
+	for (t = first_t; t <= last_t; t++) {
+		if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t)))
+			ok = false;
+	}
+
+	return ok;
+}
+
+/* Printf verifier log, filtering out irrelevant noise */
+static void print_verifier_log(const char *buf)
+{
+	const char *p;
+
+	while (buf[0]) {
+		p = strchrnul(buf, '\n');
+
+		/* filter out irrelevant precision backtracking logs */
+		if (str_has_pfx(buf, "mark_precise: "))
+			goto skip_line;
+
+		printf("%.*s\n", (int)(p - buf), buf);
+
+skip_line:
+		buf = *p == '\0' ? p : p + 1;
+	}
+}
+
+/* Simulate provided test case purely with our own range-based logic.
+ * This is done to set up expectations for verifier's branch_taken logic and
+ * verifier's register states in the verifier log.
+ */
+static void sim_case(enum num_t init_t, enum num_t cond_t,
+		     struct range x, struct range y, enum op op,
+		     struct reg_state *fr1, struct reg_state *fr2,
+		     struct reg_state *tr1, struct reg_state *tr2,
+		     int *branch_taken)
+{
+	const u64 A = x.a;
+	const u64 B = x.b;
+	const u64 C = y.a;
+	const u64 D = y.b;
+	struct reg_state rc;
+	enum op rev_op = complement_op(op);
+	enum num_t t;
+
+	fr1->valid = fr2->valid = true;
+	tr1->valid = tr2->valid = true;
+	for (t = first_t; t <= last_t; t++) {
+		/* if we are initializing using 32-bit subregisters,
+		 * full registers get upper 32 bits zeroed automatically
+		 */
+		struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t];
+
+		fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z;
+	}
+
+	/* step 1: r1 >= A, r2 >= C */
+	reg_state_set_const(&rc, init_t, A);
+	reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A");
+	reg_state_set_const(&rc, init_t, C);
+	reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C");
+	*tr1 = *fr1;
+	*tr2 = *fr2;
+	if (env.verbosity >= VERBOSE_VERY) {
+		printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+		printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+	}
+
+	/* step 2: r1 <= B, r2 <= D */
+	reg_state_set_const(&rc, init_t, B);
+	reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B");
+	reg_state_set_const(&rc, init_t, D);
+	reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D");
+	*tr1 = *fr1;
+	*tr2 = *fr2;
+	if (env.verbosity >= VERBOSE_VERY) {
+		printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+		printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+	}
+
+	/* step 3: r1 <op> r2 */
+	*branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op);
+	fr1->valid = fr2->valid = false;
+	tr1->valid = tr2->valid = false;
+	if (*branch_taken != 1) { /* FALSE is possible */
+		fr1->valid = fr2->valid = true;
+		reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE");
+	}
+	if (*branch_taken != 0) { /* TRUE is possible */
+		tr1->valid = tr2->valid = true;
+		reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE");
+	}
+	if (env.verbosity >= VERBOSE_VERY) {
+		printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n");
+		printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n");
+		printf("STEP3 (%s) TRUE  R1:", t_str(cond_t)); print_reg_state(tr1, "\n");
+		printf("STEP3 (%s) TRUE  R2:", t_str(cond_t)); print_reg_state(tr2, "\n");
+	}
+}
+
+/* ===============================
+ * HIGH-LEVEL TEST CASE VALIDATION
+ * ===============================
+ */
+static u32 upper_seeds[] = {
+	0,
+	1,
+	U32_MAX,
+	U32_MAX - 1,
+	S32_MAX,
+	(u32)S32_MIN,
+};
+
+static u32 lower_seeds[] = {
+	0,
+	1,
+	2, (u32)-2,
+	255, (u32)-255,
+	UINT_MAX,
+	UINT_MAX - 1,
+	INT_MAX,
+	(u32)INT_MIN,
+};
+
+struct ctx {
+	int val_cnt, subval_cnt, range_cnt, subrange_cnt;
+	u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+	s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+	u32 usubvals[ARRAY_SIZE(lower_seeds)];
+	s32 ssubvals[ARRAY_SIZE(lower_seeds)];
+	struct range *uranges, *sranges;
+	struct range *usubranges, *ssubranges;
+	int max_failure_cnt, cur_failure_cnt;
+	int total_case_cnt, case_cnt;
+	int rand_case_cnt;
+	unsigned rand_seed;
+	__u64 start_ns;
+	char progress_ctx[64];
+};
+
+static void cleanup_ctx(struct ctx *ctx)
+{
+	free(ctx->uranges);
+	free(ctx->sranges);
+	free(ctx->usubranges);
+	free(ctx->ssubranges);
+}
+
+struct subtest_case {
+	enum num_t init_t;
+	enum num_t cond_t;
+	struct range x;
+	struct range y;
+	enum op op;
+};
+
+static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op)
+{
+	snappendf(sb, "(%s)", t_str(t->init_t));
+	snprintf_range(t->init_t, sb, t->x);
+	snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>");
+	snprintf_range(t->init_t, sb, t->y);
+}
+
+/* Generate and validate test case based on specific combination of setup
+ * register ranges (including their expected num_t domain), and conditional
+ * operation to perform (including num_t domain in which it has to be
+ * performed)
+ */
+static int verify_case_op(enum num_t init_t, enum num_t cond_t,
+			  struct range x, struct range y, enum op op)
+{
+	char log_buf[256 * 1024];
+	size_t log_sz = sizeof(log_buf);
+	int err, false_pos = 0, true_pos = 0, branch_taken;
+	struct reg_state fr1, fr2, tr1, tr2;
+	struct reg_state fe1, fe2, te1, te2;
+	bool failed = false;
+	struct case_spec spec = {
+		.init_subregs = (init_t == U32 || init_t == S32),
+		.setup_subregs = (init_t == U32 || init_t == S32),
+		.setup_signed = (init_t == S64 || init_t == S32),
+		.compare_subregs = (cond_t == U32 || cond_t == S32),
+		.compare_signed = (cond_t == S64 || cond_t == S32),
+	};
+
+	log_buf[0] = '\0';
+
+	sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken);
+
+	err = load_range_cmp_prog(x, y, op, branch_taken, spec,
+				  log_buf, log_sz, &false_pos, &true_pos);
+	if (err) {
+		ASSERT_OK(err, "load_range_cmp_prog");
+		failed = true;
+	}
+
+	err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos,
+				  &fr1, &fr2, &tr1, &tr2);
+	if (err) {
+		ASSERT_OK(err, "parse_range_cmp_log");
+		failed = true;
+	}
+
+	if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") ||
+	    !assert_reg_state_eq(&fr2, &fe2, "false_reg2") ||
+	    !assert_reg_state_eq(&tr1, &te1, "true_reg1") ||
+	    !assert_reg_state_eq(&tr2, &te2, "true_reg2")) {
+		failed = true;
+	}
+
+	if (failed || env.verbosity >= VERBOSE_NORMAL) {
+		if (failed || env.verbosity >= VERBOSE_VERY) {
+			printf("VERIFIER LOG:\n========================\n");
+			print_verifier_log(log_buf);
+			printf("=====================\n");
+		}
+		printf("ACTUAL   FALSE1: "); print_reg_state(&fr1, "\n");
+		printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n");
+		printf("ACTUAL   FALSE2: "); print_reg_state(&fr2, "\n");
+		printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n");
+		printf("ACTUAL   TRUE1:  "); print_reg_state(&tr1, "\n");
+		printf("EXPECTED TRUE1:  "); print_reg_state(&te1, "\n");
+		printf("ACTUAL   TRUE2:  "); print_reg_state(&tr2, "\n");
+		printf("EXPECTED TRUE2:  "); print_reg_state(&te2, "\n");
+
+		return failed ? -EINVAL : 0;
+	}
+
+	return 0;
+}
+
+/* Given setup ranges and number types, go over all supported operations,
+ * generating individual subtest for each allowed combination
+ */
+static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+			   struct range x, struct range y, bool is_subtest)
+{
+	DEFINE_STRBUF(sb, 256);
+	int err;
+	struct subtest_case sub = {
+		.init_t = init_t,
+		.cond_t = cond_t,
+		.x = x,
+		.y = y,
+	};
+
+	sb->pos = 0; /* reset position in strbuf */
+	subtest_case_str(sb, &sub, false /* ignore op */);
+	if (is_subtest && !test__start_subtest(sb->buf))
+		return 0;
+
+	for (sub.op = first_op; sub.op <= last_op; sub.op++) {
+		sb->pos = 0; /* reset position in strbuf */
+		subtest_case_str(sb, &sub, true /* print op */);
+
+		if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */
+			printf("TEST CASE: %s\n", sb->buf);
+
+		err = verify_case_op(init_t, cond_t, x, y, sub.op);
+		if (err || env.verbosity >= VERBOSE_NORMAL)
+			ASSERT_OK(err, sb->buf);
+		if (err) {
+			ctx->cur_failure_cnt++;
+			if (ctx->cur_failure_cnt > ctx->max_failure_cnt)
+				return err;
+			return 0; /* keep testing other cases */
+		}
+		ctx->case_cnt++;
+		if ((ctx->case_cnt % 10000) == 0) {
+			double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt;
+			u64 elapsed_ns = get_time_ns() - ctx->start_ns;
+			double remain_ns = elapsed_ns / progress * (1 - progress);
+
+			fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), "
+					    "elapsed %llu mins (%.2lf hrs), "
+					    "ETA %.0lf mins (%.2lf hrs)\n",
+				ctx->progress_ctx,
+				ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress,
+				elapsed_ns / 1000000000 / 60,
+				elapsed_ns / 1000000000.0 / 3600,
+				remain_ns / 1000000000.0 / 60,
+				remain_ns / 1000000000.0 / 3600);
+		}
+	}
+
+	return 0;
+}
+
+static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+		       struct range x, struct range y)
+{
+	return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */);
+}
+
+/* ================================
+ * GENERATED CASES FROM SEED VALUES
+ * ================================
+ */
+static int u64_cmp(const void *p1, const void *p2)
+{
+	u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int u32_cmp(const void *p1, const void *p2)
+{
+	u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s64_cmp(const void *p1, const void *p2)
+{
+	s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s32_cmp(const void *p1, const void *p2)
+{
+	s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+/* Generate valid unique constants from seeds, both signed and unsigned */
+static void gen_vals(struct ctx *ctx)
+{
+	int i, j, cnt = 0;
+
+	for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) {
+		for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) {
+			ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j];
+		}
+	}
+
+	/* sort and compact uvals (i.e., it's `sort | uniq`) */
+	qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp);
+	for (i = 1, j = 0; i < cnt; i++) {
+		if (ctx->uvals[j] == ctx->uvals[i])
+			continue;
+		j++;
+		ctx->uvals[j] = ctx->uvals[i];
+	}
+	ctx->val_cnt = j + 1;
+
+	/* we have exactly the same number of s64 values, they are just in
+	 * a different order than u64s, so just sort them differently
+	 */
+	for (i = 0; i < ctx->val_cnt; i++)
+		ctx->svals[i] = ctx->uvals[i];
+	qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp);
+
+	if (env.verbosity >= VERBOSE_SUPER) {
+		DEFINE_STRBUF(sb1, 256);
+		DEFINE_STRBUF(sb2, 256);
+
+		for (i = 0; i < ctx->val_cnt; i++) {
+			sb1->pos = sb2->pos = 0;
+			snprintf_num(U64, sb1, ctx->uvals[i]);
+			snprintf_num(S64, sb2, ctx->svals[i]);
+			printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf);
+		}
+	}
+
+	/* 32-bit values are generated separately */
+	cnt = 0;
+	for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) {
+		ctx->usubvals[cnt++] = lower_seeds[i];
+	}
+
+	/* sort and compact usubvals (i.e., it's `sort | uniq`) */
+	qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp);
+	for (i = 1, j = 0; i < cnt; i++) {
+		if (ctx->usubvals[j] == ctx->usubvals[i])
+			continue;
+		j++;
+		ctx->usubvals[j] = ctx->usubvals[i];
+	}
+	ctx->subval_cnt = j + 1;
+
+	for (i = 0; i < ctx->subval_cnt; i++)
+		ctx->ssubvals[i] = ctx->usubvals[i];
+	qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp);
+
+	if (env.verbosity >= VERBOSE_SUPER) {
+		DEFINE_STRBUF(sb1, 256);
+		DEFINE_STRBUF(sb2, 256);
+
+		for (i = 0; i < ctx->subval_cnt; i++) {
+			sb1->pos = sb2->pos = 0;
+			snprintf_num(U32, sb1, ctx->usubvals[i]);
+			snprintf_num(S32, sb2, ctx->ssubvals[i]);
+			printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf);
+		}
+	}
+}
+
+/* Generate valid ranges from upper/lower seeds */
+static int gen_ranges(struct ctx *ctx)
+{
+	int i, j, cnt = 0;
+
+	for (i = 0; i < ctx->val_cnt; i++) {
+		for (j = i; j < ctx->val_cnt; j++) {
+			if (env.verbosity >= VERBOSE_SUPER) {
+				DEFINE_STRBUF(sb1, 256);
+				DEFINE_STRBUF(sb2, 256);
+
+				sb1->pos = sb2->pos = 0;
+				snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j]));
+				snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j]));
+				printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf);
+			}
+			cnt++;
+		}
+	}
+	ctx->range_cnt = cnt;
+
+	ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges));
+	if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc"))
+		return -EINVAL;
+	ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges));
+	if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc"))
+		return -EINVAL;
+
+	cnt = 0;
+	for (i = 0; i < ctx->val_cnt; i++) {
+		for (j = i; j < ctx->val_cnt; j++) {
+			ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]);
+			ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]);
+			cnt++;
+		}
+	}
+
+	cnt = 0;
+	for (i = 0; i < ctx->subval_cnt; i++) {
+		for (j = i; j < ctx->subval_cnt; j++) {
+			if (env.verbosity >= VERBOSE_SUPER) {
+				DEFINE_STRBUF(sb1, 256);
+				DEFINE_STRBUF(sb2, 256);
+
+				sb1->pos = sb2->pos = 0;
+				snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j]));
+				snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j]));
+				printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf);
+			}
+			cnt++;
+		}
+	}
+	ctx->subrange_cnt = cnt;
+
+	ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges));
+	if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc"))
+		return -EINVAL;
+	ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges));
+	if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc"))
+		return -EINVAL;
+
+	cnt = 0;
+	for (i = 0; i < ctx->subval_cnt; i++) {
+		for (j = i; j < ctx->subval_cnt; j++) {
+			ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]);
+			ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]);
+			cnt++;
+		}
+	}
+
+	return 0;
+}
+
+static int parse_env_vars(struct ctx *ctx)
+{
+	const char *s;
+
+	if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) {
+		errno = 0;
+		ctx->max_failure_cnt = strtol(s, NULL, 10);
+		if (errno || ctx->max_failure_cnt < 0) {
+			ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT");
+			return -EINVAL;
+		}
+	}
+
+	if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) {
+		errno = 0;
+		ctx->rand_case_cnt = strtol(s, NULL, 10);
+		if (errno || ctx->rand_case_cnt < 0) {
+			ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT");
+			return -EINVAL;
+		}
+	}
+
+	if ((s = getenv("REG_BOUNDS_RAND_SEED"))) {
+		errno = 0;
+		ctx->rand_seed = strtoul(s, NULL, 10);
+		if (errno) {
+			ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int prepare_gen_tests(struct ctx *ctx)
+{
+	const char *s;
+	int err;
+
+	if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) {
+		test__skip();
+		return -ENOTSUP;
+	}
+
+	err = parse_env_vars(ctx);
+	if (err)
+		return err;
+
+	gen_vals(ctx);
+	err = gen_ranges(ctx);
+	if (err) {
+		ASSERT_OK(err, "gen_ranges");
+		return err;
+	}
+
+	return 0;
+}
+
+/* Go over generated constants and ranges and validate various supported
+ * combinations of them
+ */
+static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t)
+{
+	struct ctx ctx;
+	struct range rconst;
+	const struct range *ranges;
+	const u64 *vals;
+	int i, j;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	if (prepare_gen_tests(&ctx))
+		goto cleanup;
+
+	ranges = init_t == U64 ? ctx.uranges : ctx.sranges;
+	vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals;
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "RANGE x CONST, %s -> %s",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < ctx.val_cnt; i++) {
+		for (j = 0; j < ctx.range_cnt; j++) {
+			rconst = range(init_t, vals[i], vals[i]);
+
+			/* (u64|s64)(<range> x <const>) */
+			if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+				goto cleanup;
+			/* (u64|s64)(<const> x <range>) */
+			if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+				goto cleanup;
+		}
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t)
+{
+	struct ctx ctx;
+	struct range rconst;
+	const struct range *ranges;
+	const u32 *vals;
+	int i, j;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	if (prepare_gen_tests(&ctx))
+		goto cleanup;
+
+	ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges;
+	vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals;
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "RANGE x CONST, %s -> %s",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < ctx.subval_cnt; i++) {
+		for (j = 0; j < ctx.subrange_cnt; j++) {
+			rconst = range(init_t, vals[i], vals[i]);
+
+			/* (u32|s32)(<range> x <const>) */
+			if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+				goto cleanup;
+			/* (u32|s32)(<const> x <range>) */
+			if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+				goto cleanup;
+		}
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t)
+{
+	struct ctx ctx;
+	const struct range *ranges;
+	int i, j, rcnt;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	if (prepare_gen_tests(&ctx))
+		goto cleanup;
+
+	switch (init_t)
+	{
+	case U64:
+		ranges = ctx.uranges;
+		rcnt = ctx.range_cnt;
+		break;
+	case U32:
+		ranges = ctx.usubranges;
+		rcnt = ctx.subrange_cnt;
+		break;
+	case S64:
+		ranges = ctx.sranges;
+		rcnt = ctx.range_cnt;
+		break;
+	case S32:
+		ranges = ctx.ssubranges;
+		rcnt = ctx.subrange_cnt;
+		break;
+	default:
+		printf("validate_gen_range_vs_range!\n");
+		exit(1);
+	}
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "RANGE x RANGE, %s -> %s",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < rcnt; i++) {
+		for (j = i; j < rcnt; j++) {
+			/* (<range> x <range>) */
+			if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j]))
+				goto cleanup;
+			if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i]))
+				goto cleanup;
+		}
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
+/* Go over thousands of test cases generated from initial seed values.
+ * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If
+ * envvar is not set, this test is skipped during test_progs testing.
+ *
+ * We split this up into smaller subsets based on initialization and
+ * conditiona numeric domains to get an easy parallelization with test_progs'
+ * -j argument.
+ */
+
+/* RANGE x CONST, U64 initial range */
+void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); }
+void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); }
+void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); }
+void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); }
+/* RANGE x CONST, S64 initial range */
+void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); }
+void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); }
+void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); }
+void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); }
+/* RANGE x CONST, U32 initial range */
+void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); }
+void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); }
+void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); }
+void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); }
+/* RANGE x CONST, S32 initial range */
+void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); }
+void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); }
+void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); }
+void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); }
+
+/* RANGE x RANGE, U64 initial range */
+void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); }
+void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); }
+void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); }
+void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); }
+/* RANGE x RANGE, S64 initial range */
+void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); }
+void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); }
+void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); }
+void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); }
+/* RANGE x RANGE, U32 initial range */
+void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); }
+void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); }
+void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); }
+void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); }
+/* RANGE x RANGE, S32 initial range */
+void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); }
+void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); }
+void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); }
+void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); }
+
+#define DEFAULT_RAND_CASE_CNT 100
+
+#define RAND_21BIT_MASK ((1 << 22) - 1)
+
+static u64 rand_u64()
+{
+	/* RAND_MAX is guaranteed to be at least 1<<15, but in practice it
+	 * seems to be 1<<31, so we need to call it thrice to get full u64;
+	 * we'll use rougly equal split: 22 + 21 + 21 bits
+	 */
+	return ((u64)random() << 42) |
+	       (((u64)random() & RAND_21BIT_MASK) << 21) |
+	       (random() & RAND_21BIT_MASK);
+}
+
+static u64 rand_const(enum num_t t)
+{
+	return cast_t(t, rand_u64());
+}
+
+static struct range rand_range(enum num_t t)
+{
+	u64 x = rand_const(t), y = rand_const(t);
+
+	return range(t, min_t(t, x, y), max_t(t, x, y));
+}
+
+static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range)
+{
+	struct ctx ctx;
+	struct range range1, range2;
+	int err, i;
+	u64 t;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	err = parse_env_vars(&ctx);
+	if (err) {
+		ASSERT_OK(err, "parse_env_vars");
+		return;
+	}
+
+	if (ctx.rand_case_cnt == 0)
+		ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT;
+	if (ctx.rand_seed == 0)
+		ctx.rand_seed = (unsigned)get_time_ns();
+
+	srandom(ctx.rand_seed);
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "[RANDOM SEED %u] RANGE x %s, %s -> %s",
+		 ctx.rand_seed, const_range ? "CONST" : "RANGE",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < ctx.rand_case_cnt; i++) {
+		range1 = rand_range(init_t);
+		if (const_range) {
+			t = rand_const(init_t);
+			range2 = range(init_t, t, t);
+		} else {
+			range2 = rand_range(init_t);
+		}
+
+		/* <range1> x <range2> */
+		if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */))
+			goto cleanup;
+		/* <range2> x <range1> */
+		if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */))
+			goto cleanup;
+	}
+
+cleanup:
+	/* make sure we report random seed for reproducing */
+	ASSERT_TRUE(true, ctx.progress_ctx);
+	cleanup_ctx(&ctx);
+}
+
+/* [RANDOM] RANGE x CONST, U64 initial range */
+void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S64 initial range */
+void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, U32 initial range */
+void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S32 initial range */
+void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); }
+
+/* [RANDOM] RANGE x RANGE, U64 initial range */
+void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S64 initial range */
+void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, U32 initial range */
+void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S32 initial range */
+void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); }
+
+/* A set of hard-coded "interesting" cases to validate as part of normal
+ * test_progs test runs
+ */
+static struct subtest_case crafted_cases[] = {
+	{U64, U64, {0, 0xffffffff}, {0, 0}},
+	{U64, U64, {0, 0x80000000}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}},
+	{U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}},
+
+	/* single point overlap, interesting BPF_EQ and BPF_NE interactions */
+	{U64, U64, {0, 1}, {1, 0x80000000}},
+	{U64, S64, {0, 1}, {1, 0x80000000}},
+	{U64, U32, {0, 1}, {1, 0x80000000}},
+	{U64, S32, {0, 1}, {1, 0x80000000}},
+
+	{U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}},
+	{U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}},
+	{U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}},
+	{U64, S64, {0, 0xffffffffULL}, {1, 1}},
+	{U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
+
+	{U64, U32, {0, 0x100000000}, {0, 0}},
+	{U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}},
+
+	{U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}},
+	/* these are tricky cases where lower 32 bits allow to tighten 64
+	 * bit boundaries based on tightened lower 32 bit boundaries
+	 */
+	{U64, S32, {0, 0x0ffffffffULL}, {0, 0}},
+	{U64, S32, {0, 0x100000000ULL}, {0, 0}},
+	{U64, S32, {0, 0x100000001ULL}, {0, 0}},
+	{U64, S32, {0, 0x180000000ULL}, {0, 0}},
+	{U64, S32, {0, 0x17fffffffULL}, {0, 0}},
+	{U64, S32, {0, 0x180000001ULL}, {0, 0}},
+
+	/* verifier knows about [-1, 0] range for s32 for this case already */
+	{S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+	/* but didn't know about these cases initially */
+	{U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */
+	{U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */
+
+	/* longer convergence case: learning from u64 -> s64 -> u64 -> u32,
+	 * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX])
+	 */
+	{S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+
+	{U32, U32, {1, U32_MAX}, {0, 0}},
+
+	{U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+
+	{S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}},
+	{S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
+	{S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+	{S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+};
+
+/* Go over crafted hard-coded cases. This is fast, so we do it as part of
+ * normal test_progs run.
+ */
+void test_reg_bounds_crafted(void)
+{
+	struct ctx ctx;
+	int i;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) {
+		struct subtest_case *c = &crafted_cases[i];
+
+		verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y);
+		verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x);
+	}
+
+	cleanup_ctx(&ctx);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index f29c08d93beb..18d451be57c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
 	const char *err_msg;
 } spin_lock_fail_tests[] = {
 	{ "lock_id_kptr_preserve",
-	  "5: (bf) r1 = r0                       ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) "
-	  "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+	  "5: (bf) r1 = r0                       ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
+	  "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=ptr_ expected=percpu_ptr_" },
 	{ "lock_id_global_zero",
-	  "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n"
+	  "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                       ;"
-	  " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)"
-	  " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n"
+	  " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
+	  " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_innermapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                      ;"
-	  " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)"
-	  " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n"
+	  " R0=map_value(id=2,ks=4,vs=8)"
+	  " R1_w=map_value(id=2,ks=4,vs=8)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index 51883ccb8020..196abf223465 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -2387,12 +2387,9 @@ static int generate_dummy_prog(void)
 	const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
 	LIBBPF_OPTS(bpf_prog_load_opts, opts);
 	const size_t log_buf_sz = 256;
-	char *log_buf;
+	char log_buf[log_buf_sz];
 	int fd = -1;
 
-	log_buf = malloc(log_buf_sz);
-	if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
-		return fd;
 	opts.log_buf = log_buf;
 	opts.log_size = log_buf_sz;
 
@@ -2402,7 +2399,6 @@ static int generate_dummy_prog(void)
 			   prog_insns, prog_insn_cnt, &opts);
 	ASSERT_STREQ(log_buf, "", "log_0");
 	ASSERT_GE(fd, 0, "prog_fd");
-	free(log_buf);
 	return fd;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
index 72310cfc6474..6fb2217d940b 100644
--- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -16,27 +16,27 @@ static void nsleep()
 
 void test_vmlinux(void)
 {
-	int duration = 0, err;
+	int err;
 	struct test_vmlinux* skel;
 	struct test_vmlinux__bss *bss;
 
 	skel = test_vmlinux__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load"))
 		return;
 	bss = skel->bss;
 
 	err = test_vmlinux__attach(skel);
-	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "test_vmlinux__attach"))
 		goto cleanup;
 
 	/* trigger everything */
 	nsleep();
 
-	CHECK(!bss->tp_called, "tp", "not called\n");
-	CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
-	CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
-	CHECK(!bss->kprobe_called, "kprobe", "not called\n");
-	CHECK(!bss->fentry_called, "fentry", "not called\n");
+	ASSERT_TRUE(bss->tp_called, "tp");
+	ASSERT_TRUE(bss->raw_tp_called, "raw_tp");
+	ASSERT_TRUE(bss->tp_btf_called, "tp_btf");
+	ASSERT_TRUE(bss->kprobe_called, "kprobe");
+	ASSERT_TRUE(bss->fentry_called, "fentry");
 
 cleanup:
 	test_vmlinux__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index f2b8167b72a8..442f4ca39fd7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -35,6 +35,8 @@ int dump_task_stack(struct bpf_iter__task *ctx)
 	return 0;
 }
 
+int num_user_stacks = 0;
+
 SEC("iter/task")
 int get_task_user_stacks(struct bpf_iter__task *ctx)
 {
@@ -51,6 +53,9 @@ int get_task_user_stacks(struct bpf_iter__task *ctx)
 	if (res <= 0)
 		return 0;
 
+	/* Only one task, the current one, should succeed */
+	++num_user_stacks;
+
 	buf_sz += res;
 
 	/* If the verifier doesn't refine bpf_get_task_stack res, and instead
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index e1e5c54a6a11..49efaed143fc 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,48 +18,48 @@
 		return *(u64 *)num;					\
 	}
 
-__msg(": R0_w=-2147483648 R10=fp0")
+__msg(": R0_w=0xffffffff80000000 R10=fp0")
 check_assert(s64, eq, int_min, INT_MIN);
-__msg(": R0_w=2147483647 R10=fp0")
+__msg(": R0_w=0x7fffffff R10=fp0")
 check_assert(s64, eq, int_max, INT_MAX);
 __msg(": R0_w=0 R10=fp0")
 check_assert(s64, eq, zero, 0);
-__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0")
+__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0")
 check_assert(s64, eq, llong_min, LLONG_MIN);
-__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0")
+__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0")
 check_assert(s64, eq, llong_max, LLONG_MAX);
 
-__msg(": R0_w=scalar(smax=2147483646) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0")
 check_assert(s64, lt, pos, INT_MAX);
-__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, lt, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, lt, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smax=2147483647) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0")
 check_assert(s64, le, pos, INT_MAX);
 __msg(": R0_w=scalar(smax=0) R10=fp0")
 check_assert(s64, le, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, le, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, gt, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, gt, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483647) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0")
 check_assert(s64, gt, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, ge, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
+__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
 check_assert(s64, ge, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483648) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0")
 check_assert(s64, ge, neg, INT_MIN);
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
+__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
 int check_assert_range_s64(struct __sk_buff *ctx)
 {
 	struct bpf_sock *sk = ctx->sk;
@@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
+__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
 int check_assert_range_u64(struct __sk_buff *ctx)
 {
 	u64 num = ctx->len;
@@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R0=0 R1=ctx() R2=4096 R10=fp0")
 int check_assert_single_range_s64(struct __sk_buff *ctx)
 {
 	struct bpf_sock *sk = ctx->sk;
@@ -103,7 +103,7 @@ int check_assert_single_range_s64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R1=ctx() R2=4096 R10=fp0")
 int check_assert_single_range_u64(struct __sk_buff *ctx)
 {
 	u64 num = ctx->len;
@@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0")
+__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
 int check_assert_generic(struct __sk_buff *ctx)
 {
 	u8 *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index c20c4e38b71c..b2181f850d3e 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1411,4 +1411,26 @@ __naked int checkpoint_states_deletion(void)
 	);
 }
 
+struct {
+	int data[32];
+	int n;
+} loop_data;
+
+SEC("raw_tp")
+__success
+int iter_arr_with_actual_elem_count(const void *ctx)
+{
+	int i, n = loop_data.n, sum = 0;
+
+	if (n > ARRAY_SIZE(loop_data.data))
+		return 0;
+
+	bpf_for(i, 0, n) {
+		/* no rechecking of i against ARRAY_SIZE(loop_data.n) */
+		sum += loop_data.data[i];
+	}
+
+	return sum;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index b567a666d2b8..1769fdff6aea 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -14,6 +14,24 @@ struct node_data {
 	struct bpf_rb_node node;
 };
 
+struct refcounted_node {
+	long data;
+	struct bpf_rb_node rb_node;
+	struct bpf_refcount refcount;
+};
+
+struct stash {
+	struct bpf_spin_lock l;
+	struct refcounted_node __kptr *stashed;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct stash);
+	__uint(max_entries, 10);
+} refcounted_node_stash SEC(".maps");
+
 struct plain_local {
 	long key;
 	long data;
@@ -38,6 +56,7 @@ struct map_value {
  * Had to do the same w/ bpf_kfunc_call_test_release below
  */
 struct node_data *just_here_because_btf_bug;
+struct refcounted_node *just_here_because_btf_bug2;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx)
 	return 0;
 }
 
+SEC("tc")
+long refcount_acquire_without_unstash(void *ctx)
+{
+	struct refcounted_node *p;
+	struct stash *s;
+	int ret = 0;
+
+	s = bpf_map_lookup_elem(&refcounted_node_stash, &ret);
+	if (!s)
+		return 1;
+
+	if (!s->stashed)
+		/* refcount_acquire failure is expected when no refcounted_node
+		 * has been stashed before this program executes
+		 */
+		return 2;
+
+	p = bpf_refcount_acquire(s->stashed);
+	if (!p)
+		return 3;
+
+	ret = s->stashed ? s->stashed->data : -1;
+	bpf_obj_drop(p);
+	return ret;
+}
+
+/* Helper for refcount_acquire_without_unstash test */
+SEC("tc")
+long stash_refcounted_node(void *ctx)
+{
+	struct refcounted_node *p;
+	struct stash *s;
+	int key = 0;
+
+	s = bpf_map_lookup_elem(&refcounted_node_stash, &key);
+	if (!s)
+		return 1;
+
+	p = bpf_obj_new(typeof(*p));
+	if (!p)
+		return 2;
+	p->data = 42;
+
+	p = bpf_kptr_xchg(&s->stashed, p);
+	if (p) {
+		bpf_obj_drop(p);
+		return 3;
+	}
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
index c39f559d3100..42c4a8b62e36 100644
--- a/tools/testing/selftests/bpf/progs/pyperf180.c
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -1,4 +1,26 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #define STACK_MAX_LEN 180
+
+/* llvm upstream commit at clang18
+ *   https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e
+ * changed inlining behavior and caused compilation failure as some branch
+ * target distance exceeded 16bit representation which is the maximum for
+ * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18
+ * to specify which cpu version is used for compilation. So a smaller
+ * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which
+ * reduced some branch target distances and resolved the compilation failure.
+ *
+ * To capture the case where a developer/ci uses clang18 but the corresponding
+ * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count
+ * will be set as well to prevent potential compilation failures.
+ */
+#ifdef __BPF_CPU_VERSION__
+#if __BPF_CPU_VERSION__ < 4
+#define UNROLL_COUNT 90
+#endif
+#elif __clang_major__ == 18
+#define UNROLL_COUNT 90
+#endif
+
 #include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 1ef07f6ee580..1553b9c16aa7 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -54,6 +54,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx)
 }
 
 SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+long refcount_acquire_maybe_null(void *ctx)
+{
+	struct node_acquire *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	/* Intentionally not testing !n
+	 * it's MAYBE_NULL for refcount_acquire
+	 */
+	m = bpf_refcount_acquire(n);
+	if (m)
+		bpf_obj_drop(m);
+	if (n)
+		bpf_obj_drop(n);
+
+	return 0;
+}
+
+SEC("?tc")
 __failure __msg("Unreleased reference id=3 alloc_insn=9")
 long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
new file mode 100644
index 000000000000..44628865fe1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+__u32 target_ancestor_level;
+__u64 target_ancestor_cgid;
+int target_pid, target_hid;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static int bpf_link_create_verify(int cmd)
+{
+	struct cgroup *cgrp, *ancestor;
+	struct task_struct *task;
+	int ret = 0;
+
+	if (cmd != BPF_LINK_CREATE)
+		return 0;
+
+	task = bpf_get_current_task_btf();
+
+	/* Then it can run in parallel with others */
+	if (task->pid != target_pid)
+		return 0;
+
+	cgrp = bpf_task_get_cgroup1(task, target_hid);
+	if (!cgrp)
+		return 0;
+
+	/* Refuse it if its cgid or its ancestor's cgid is the target cgid */
+	if (cgrp->kn->id == target_ancestor_cgid)
+		ret = -1;
+
+	ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level);
+	if (!ancestor)
+		goto out;
+
+	if (ancestor->kn->id == target_ancestor_cgid)
+		ret = -1;
+	bpf_cgroup_release(ancestor);
+
+out:
+	bpf_cgroup_release(cgrp);
+	return ret;
+}
+
+SEC("lsm/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	return bpf_link_create_verify(cmd);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	return bpf_link_create_verify(cmd);
+}
+
+SEC("fentry")
+int BPF_PROG(fentry_run)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index c5588a14fe2e..ec430b71730b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -965,6 +965,7 @@ l0_%=:	r0 = 0;						\
 SEC("xdp")
 __description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
 __success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
 __naked void crossing_64_bit_signed_boundary_2(void)
 {
 	asm volatile ("					\
@@ -1046,6 +1047,7 @@ l0_%=:	r0 = 0;						\
 SEC("xdp")
 __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary")
 __success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
 __naked void crossing_32_bit_signed_boundary_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 37ffa57f28a1..a350ecdfba4a 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -153,6 +153,14 @@ static int parse_retval(const char *str, int *val, const char *name)
 	return parse_int(str, val, name);
 }
 
+static void update_flags(int *flags, int flag, bool clear)
+{
+	if (clear)
+		*flags &= ~flag;
+	else
+		*flags |= flag;
+}
+
 /* Uses btf_decl_tag attributes to describe the expected test
  * behavior, see bpf_misc.h for detailed description of each attribute
  * and attribute combinations.
@@ -171,6 +179,7 @@ static int parse_test_spec(struct test_loader *tester,
 	memset(spec, 0, sizeof(*spec));
 
 	spec->prog_name = bpf_program__name(prog);
+	spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */
 
 	btf = bpf_object__btf(obj);
 	if (!btf) {
@@ -187,7 +196,8 @@ static int parse_test_spec(struct test_loader *tester,
 	for (i = 1; i < btf__type_cnt(btf); i++) {
 		const char *s, *val, *msg;
 		const struct btf_type *t;
-		int tmp;
+		bool clear;
+		int flags;
 
 		t = btf__type_by_id(btf, i);
 		if (!btf_is_decl_tag(t))
@@ -253,23 +263,30 @@ static int parse_test_spec(struct test_loader *tester,
 				goto cleanup;
 		} else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
 			val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
+
+			clear = val[0] == '!';
+			if (clear)
+				val++;
+
 			if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
-				spec->prog_flags |= BPF_F_STRICT_ALIGNMENT;
+				update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear);
 			} else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) {
-				spec->prog_flags |= BPF_F_ANY_ALIGNMENT;
+				update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear);
 			} else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) {
-				spec->prog_flags |= BPF_F_TEST_RND_HI32;
+				update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear);
 			} else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) {
-				spec->prog_flags |= BPF_F_TEST_STATE_FREQ;
+				update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear);
 			} else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) {
-				spec->prog_flags |= BPF_F_SLEEPABLE;
+				update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear);
 			} else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
-				spec->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+				update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear);
+			} else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) {
+				update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear);
 			} else /* assume numeric value */ {
-				err = parse_int(val, &tmp, "test prog flags");
+				err = parse_int(val, &flags, "test prog flags");
 				if (err)
 					goto cleanup;
-				spec->prog_flags |= tmp;
+				update_flags(&spec->prog_flags, flags, clear);
 			}
 		}
 	}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 7fc00e423e4d..767e0693df10 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1396,13 +1396,18 @@ static void test_map_stress(void)
 #define MAX_DELAY_US 50000
 #define MIN_DELAY_RANGE_US 5000
 
-static int map_update_retriable(int map_fd, const void *key, const void *value,
-				int flags, int attempts)
+static bool retry_for_again_or_busy(int err)
+{
+	return (err == EAGAIN || err == EBUSY);
+}
+
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+			 retry_for_error_fn need_retry)
 {
 	int delay = rand() % MIN_DELAY_RANGE_US;
 
 	while (bpf_map_update_elem(map_fd, key, value, flags)) {
-		if (!attempts || (errno != EAGAIN && errno != EBUSY))
+		if (!attempts || !need_retry(errno))
 			return -errno;
 
 		if (delay <= MAX_DELAY_US / 2)
@@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data)
 		key = value = i;
 
 		if (do_update) {
-			err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
+			err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
+						   retry_for_again_or_busy);
 			if (err)
 				printf("error %d %d\n", err, errno);
 			assert(err == 0);
-			err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
+			err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
+						   retry_for_again_or_busy);
 			if (err)
 				printf("error %d %d\n", err, errno);
 			assert(err == 0);
diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h
index f6fbca761732..e4ac704a536c 100644
--- a/tools/testing/selftests/bpf/test_maps.h
+++ b/tools/testing/selftests/bpf/test_maps.h
@@ -4,6 +4,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdbool.h>
 
 #define CHECK(condition, tag, format...) ({				\
 	int __ret = !!(condition);					\
@@ -16,4 +17,8 @@
 
 extern int skips;
 
+typedef bool (*retry_for_error_fn)(int err);
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+			 retry_for_error_fn need_retry);
+
 #endif
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 2c89674fc62c..b0068a9d2cfe 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -679,7 +679,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
 
 	bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
 	bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
-	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
 
 	err = bpf_object__load(obj);
 	if (err) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 98107e0452d3..f36e41435be7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (fixup_skips != skips)
 		return;
 
-	pflags = BPF_F_TEST_RND_HI32;
+	pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
 	if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
 		pflags |= BPF_F_STRICT_ALIGNMENT;
 	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 8d994884c7b4..d2458c1b1671 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
 	if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
 		bpf_program__set_type(prog, type);
 
-	flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32;
+	flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
 	bpf_program__set_flags(prog, flags);
 
 	err = bpf_object__load(obj);
@@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 {
 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
 		.kern_version = kern_version,
-		.prog_flags = BPF_F_TEST_RND_HI32,
+		.prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS,
 		.log_level = extra_prog_load_log_flags,
 		.log_buf = log_buf,
 		.log_size = log_buf_sz,
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 655095810d4a..1d418d66e375 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -18,6 +18,7 @@
 #include <libelf.h>
 #include <gelf.h>
 #include <float.h>
+#include <math.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -99,6 +100,7 @@ struct stat_specs {
 	enum stat_id ids[ALL_STATS_CNT];
 	enum stat_variant variants[ALL_STATS_CNT];
 	bool asc[ALL_STATS_CNT];
+	bool abs[ALL_STATS_CNT];
 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
 };
 
@@ -133,6 +135,7 @@ struct filter {
 	int stat_id;
 	enum stat_variant stat_var;
 	long value;
+	bool abs;
 };
 
 static struct env {
@@ -142,10 +145,12 @@ static struct env {
 	bool debug;
 	bool quiet;
 	bool force_checkpoints;
+	bool force_reg_invariants;
 	enum resfmt out_fmt;
 	bool show_version;
 	bool comparison_mode;
 	bool replay_mode;
+	int top_n;
 
 	int log_level;
 	int log_size;
@@ -210,8 +215,7 @@ static const struct argp_option opts[] = {
 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
-	{ "test-states", 't', NULL, 0,
-	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
@@ -219,6 +223,10 @@ static const struct argp_option opts[] = {
 	{ "compare", 'C', NULL, 0, "Comparison mode" },
 	{ "replay", 'R', NULL, 0, "Replay mode" },
 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
+	{ "test-states", 't', NULL, 0,
+	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+	{ "test-reg-invariants", 'r', NULL, 0,
+	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
 	{},
 };
 
@@ -290,6 +298,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 't':
 		env.force_checkpoints = true;
 		break;
+	case 'r':
+		env.force_reg_invariants = true;
+		break;
+	case 'n':
+		errno = 0;
+		env.top_n = strtol(arg, NULL, 10);
+		if (errno) {
+			fprintf(stderr, "invalid top N specifier: %s\n", arg);
+			argp_usage(state);
+		}
 	case 'C':
 		env.comparison_mode = true;
 		break;
@@ -455,7 +473,8 @@ static struct {
 	{ OP_EQ, "=" },
 };
 
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+			      enum stat_variant *var, bool *is_abs);
 
 static int append_filter(struct filter **filters, int *cnt, const char *str)
 {
@@ -488,13 +507,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 		long val;
 		const char *end = str;
 		const char *op_str;
+		bool is_abs;
 
 		op_str = operators[i].op_str;
 		p = strstr(str, op_str);
 		if (!p)
 			continue;
 
-		if (!parse_stat_id_var(str, p - str, &id, &var)) {
+		if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
 			return -EINVAL;
 		}
@@ -533,6 +553,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 		f->stat_id = id;
 		f->stat_var = var;
 		f->op = operators[i].op_kind;
+		f->abs = true;
 		f->value = val;
 
 		*cnt += 1;
@@ -657,7 +678,8 @@ static struct stat_def {
 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
 };
 
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+			      enum stat_variant *var, bool *is_abs)
 {
 	static const char *var_sfxs[] = {
 		[VARIANT_A] = "_a",
@@ -667,6 +689,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v
 	};
 	int i, j, k;
 
+	/* |<stat>| means we take absolute value of given stat */
+	*is_abs = false;
+	if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
+		*is_abs = true;
+		name += 1;
+		len -= 2;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
 		struct stat_def *def = &stat_defs[i];
 		size_t alias_len, sfx_len;
@@ -722,7 +752,7 @@ static bool is_desc_sym(char c)
 static int parse_stat(const char *stat_name, struct stat_specs *specs)
 {
 	int id;
-	bool has_order = false, is_asc = false;
+	bool has_order = false, is_asc = false, is_abs = false;
 	size_t len = strlen(stat_name);
 	enum stat_variant var;
 
@@ -737,7 +767,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
 		len -= 1;
 	}
 
-	if (!parse_stat_id_var(stat_name, len, &id, &var)) {
+	if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
 		return -ESRCH;
 	}
@@ -745,6 +775,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
 	specs->ids[specs->spec_cnt] = id;
 	specs->variants[specs->spec_cnt] = var;
 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
+	specs->abs[specs->spec_cnt] = is_abs;
 	specs->spec_cnt++;
 
 	return 0;
@@ -997,6 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 
 	if (env.force_checkpoints)
 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
+	if (env.force_reg_invariants)
+		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
 
 	err = bpf_object__load(obj);
 	env.progs_processed++;
@@ -1103,7 +1136,7 @@ cleanup:
 }
 
 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
-		    enum stat_id id, bool asc)
+		    enum stat_id id, bool asc, bool abs)
 {
 	int cmp = 0;
 
@@ -1124,6 +1157,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
 		long v1 = s1->stats[id];
 		long v2 = s2->stats[id];
 
+		if (abs) {
+			v1 = v1 < 0 ? -v1 : v1;
+			v2 = v2 < 0 ? -v2 : v2;
+		}
+
 		if (v1 != v2)
 			cmp = v1 < v2 ? -1 : 1;
 		break;
@@ -1142,7 +1180,8 @@ static int cmp_prog_stats(const void *v1, const void *v2)
 	int i, cmp;
 
 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
-		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
+		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
+			       env.sort_spec.asc[i], env.sort_spec.abs[i]);
 		if (cmp != 0)
 			return cmp;
 	}
@@ -1211,7 +1250,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s,
 
 static int cmp_join_stat(const struct verif_stats_join *s1,
 			 const struct verif_stats_join *s2,
-			 enum stat_id id, enum stat_variant var, bool asc)
+			 enum stat_id id, enum stat_variant var,
+			 bool asc, bool abs)
 {
 	const char *str1 = NULL, *str2 = NULL;
 	double v1, v2;
@@ -1220,6 +1260,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1,
 	fetch_join_stat_value(s1, id, var, &str1, &v1);
 	fetch_join_stat_value(s2, id, var, &str2, &v2);
 
+	if (abs) {
+		v1 = fabs(v1);
+		v2 = fabs(v2);
+	}
+
 	if (str1)
 		cmp = strcmp(str1, str2);
 	else if (v1 != v2)
@@ -1237,7 +1282,8 @@ static int cmp_join_stats(const void *v1, const void *v2)
 		cmp = cmp_join_stat(s1, s2,
 				    env.sort_spec.ids[i],
 				    env.sort_spec.variants[i],
-				    env.sort_spec.asc[i]);
+				    env.sort_spec.asc[i],
+				    env.sort_spec.abs[i]);
 		if (cmp != 0)
 			return cmp;
 	}
@@ -1720,6 +1766,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta
 
 	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
 
+	if (f->abs)
+		value = fabs(value);
+
 	switch (f->op) {
 	case OP_EQ: return value > f->value - eps && value < f->value + eps;
 	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
@@ -1766,7 +1815,7 @@ static int handle_comparison_mode(void)
 	struct stat_specs base_specs = {}, comp_specs = {};
 	struct stat_specs tmp_sort_spec;
 	enum resfmt cur_fmt;
-	int err, i, j, last_idx;
+	int err, i, j, last_idx, cnt;
 
 	if (env.filename_cnt != 2) {
 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
@@ -1879,7 +1928,7 @@ static int handle_comparison_mode(void)
 		env.join_stat_cnt += 1;
 	}
 
-	/* now sort joined results accorsing to sort spec */
+	/* now sort joined results according to sort spec */
 	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
 
 	/* for human-readable table output we need to do extra pass to
@@ -1896,16 +1945,22 @@ one_more_time:
 	output_comp_headers(cur_fmt);
 
 	last_idx = -1;
+	cnt = 0;
 	for (i = 0; i < env.join_stat_cnt; i++) {
 		const struct verif_stats_join *join = &env.join_stats[i];
 
 		if (!should_output_join_stats(join))
 			continue;
 
+		if (env.top_n && cnt >= env.top_n)
+			break;
+
 		if (cur_fmt == RESFMT_TABLE_CALCLEN)
 			last_idx = i;
 
 		output_comp_stats(join, cur_fmt, i == last_idx);
+
+		cnt++;
 	}
 
 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
@@ -1920,6 +1975,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s
 {
 	long value = stats->stats[f->stat_id];
 
+	if (f->abs)
+		value = value < 0 ? -value : value;
+
 	switch (f->op) {
 	case OP_EQ: return value == f->value;
 	case OP_NEQ: return value != f->value;
@@ -1964,7 +2022,7 @@ static bool should_output_stats(const struct verif_stats *stats)
 static void output_prog_stats(void)
 {
 	const struct verif_stats *stats;
-	int i, last_stat_idx = 0;
+	int i, last_stat_idx = 0, cnt = 0;
 
 	if (env.out_fmt == RESFMT_TABLE) {
 		/* calculate column widths */
@@ -1984,7 +2042,10 @@ static void output_prog_stats(void)
 		stats = &env.prog_stats[i];
 		if (!should_output_stats(stats))
 			continue;
+		if (env.top_n && cnt >= env.top_n)
+			break;
 		output_stats(stats, env.out_fmt, i == last_stat_idx);
+		cnt++;
 	}
 }
 
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 685034528018..65d14f3bbe30 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -36,7 +36,9 @@ DEFAULT_COMMAND="./test_progs"
 MOUNT_DIR="mnt"
 ROOTFS_IMAGE="root.img"
 OUTPUT_DIR="$HOME/.bpf_selftests"
-KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}")
+KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config"
+	"tools/testing/selftests/bpf/config.vm"
+	"tools/testing/selftests/bpf/config.${ARCH}")
 INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX"
 NUM_COMPILE_JOBS="$(nproc)"
 LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
new file mode 100755
index 000000000000..fe0343b95e6c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that PCI reset works correctly by verifying that only the expected reset
+# methods are supported and that after issuing the reset the ifindex of the
+# port changes.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	pci_reset_test
+"
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+pci_reset_test()
+{
+	RET=0
+
+	local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1)
+	local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2)
+
+	if [ $bus != "pci" ]; then
+		check_err 1 "devlink device is not a PCI device"
+		log_test "pci reset"
+		return
+	fi
+
+	if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then
+		check_err 1 "reset is not supported"
+		log_test "pci reset"
+		return
+	fi
+
+	[[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]]
+	check_err $? "only \"bus\" reset method should be supported"
+
+	local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+	echo 1 > /sys/bus/pci/devices/$bdf/reset
+	check_err $? "reset failed"
+
+	# Wait for udev to rename newly created netdev.
+	udevadm settle
+
+	local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+	[[ $ifindex_pre != $ifindex_post ]]
+	check_err $? "reset not performed"
+
+	log_test "pci reset"
+}
+
+swp1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 5b2aca4c5f10..9274edfb76ff 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -91,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh
 TEST_PROGS += test_vxlan_nolocalbypass.sh
 TEST_PROGS += test_bridge_backup_port.sh
 TEST_PROGS += fdb_flush.sh
+TEST_PROGS += fq_band_pktlimit.sh
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 24b21b15ed3f..8d7575389f58 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -45,11 +45,13 @@ struct options {
 	const char *host;
 	const char *service;
 	unsigned int size;
+	unsigned int num_pkt;
 	struct {
 		unsigned int mark;
 		unsigned int dontfrag;
 		unsigned int tclass;
 		unsigned int hlimit;
+		unsigned int priority;
 	} sockopt;
 	struct {
 		unsigned int family;
@@ -72,6 +74,7 @@ struct options {
 	} v6;
 } opt = {
 	.size = 13,
+	.num_pkt = 1,
 	.sock = {
 		.family	= AF_UNSPEC,
 		.type	= SOCK_DGRAM,
@@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	int o;
 
-	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) {
+	while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[])
 				cs_usage(argv[0]);
 			}
 			break;
-
+		case 'P':
+			opt.sockopt.priority = atoi(optarg);
+			break;
 		case 'm':
 			opt.mark.ena = true;
 			opt.mark.val = atoi(optarg);
@@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[])
 		case 'M':
 			opt.sockopt.mark = atoi(optarg);
 			break;
+		case 'n':
+			opt.num_pkt = atoi(optarg);
+			break;
 		case 'd':
 			opt.txtime.ena = true;
 			opt.txtime.delay = atoi(optarg);
@@ -410,6 +418,10 @@ static void ca_set_sockopts(int fd)
 	    setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
 		       &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
 		error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+	if (opt.sockopt.priority &&
+	    setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
+		       &opt.sockopt.priority, sizeof(opt.sockopt.priority)))
+		error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
 }
 
 int main(int argc, char *argv[])
@@ -421,6 +433,7 @@ int main(int argc, char *argv[])
 	char *buf;
 	int err;
 	int fd;
+	int i;
 
 	cs_parse_args(argc, argv);
 
@@ -480,24 +493,27 @@ int main(int argc, char *argv[])
 
 	cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
 
-	err = sendmsg(fd, &msg, 0);
-	if (err < 0) {
-		if (!opt.silent_send)
-			fprintf(stderr, "send failed: %s\n", strerror(errno));
-		err = ERN_SEND;
-		goto err_out;
-	} else if (err != (int)opt.size) {
-		fprintf(stderr, "short send\n");
-		err = ERN_SEND_SHORT;
-		goto err_out;
-	} else {
-		err = ERN_SUCCESS;
+	for (i = 0; i < opt.num_pkt; i++) {
+		err = sendmsg(fd, &msg, 0);
+		if (err < 0) {
+			if (!opt.silent_send)
+				fprintf(stderr, "send failed: %s\n", strerror(errno));
+			err = ERN_SEND;
+			goto err_out;
+		} else if (err != (int)opt.size) {
+			fprintf(stderr, "short send\n");
+			err = ERN_SEND_SHORT;
+			goto err_out;
+		}
 	}
+	err = ERN_SUCCESS;
 
-	/* Make sure all timestamps have time to loop back */
-	usleep(opt.txtime.delay);
+	if (opt.ts.ena) {
+		/* Make sure all timestamps have time to loop back */
+		usleep(opt.txtime.delay);
 
-	cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+		cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+	}
 
 err_out:
 	close(fd);
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
new file mode 100755
index 000000000000..24b77bdf41ff
--- /dev/null
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that FQ has a packet limit per band:
+#
+# 1. set the limit to 10 per band
+# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped
+# 3. send 20 pkts on band A: verify that  0 are queued, 20 dropped
+# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped
+#
+# Send packets with a 100ms delay to ensure that previously sent
+# packets are still queued when later ones are sent.
+# Use SO_TXTIME for this.
+
+die() {
+	echo "$1"
+	exit 1
+}
+
+# run inside private netns
+if [[ $# -eq 0 ]]; then
+	./in_netns.sh "$0" __subprocess
+	exit
+fi
+
+ip link add type dummy
+ip link set dev dummy0 up
+ip -6 addr add fdaa::1/128 dev dummy0
+ip -6 route add fdaa::/64 dev dummy0
+tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000
+OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Initial stats will report zero sent, as all packets are still
+# queued in FQ. Sleep for the delay period (100ms) and see that
+# twenty are now sent.
+sleep 0.1
+OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Log the output after the test
+echo "${OUT1}"
+echo "${OUT2}"
+echo "${OUT3}"
+echo "${OUT4}"
+
+# Test the output for expected values
+echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10'  || die "unexpected drop count at 1"
+echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30'  || die "unexpected drop count at 2"
+echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40'  || die "unexpected drop count at 3"
+echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4"
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
new file mode 100755
index 000000000000..4fe0befa13fb
--- /dev/null
+++ b/tools/testing/selftests/net/net_helper.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Helper functions
+
+wait_local_port_listen()
+{
+	local listener_ns="${1}"
+	local port="${2}"
+	local protocol="${3}"
+	local port_hex
+	local i
+
+	port_hex="$(printf "%04X" "${port}")"
+	for i in $(seq 10); do
+		if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \
+		   grep -q "${port_hex}"; then
+			break
+		fi
+		sleep 0.1
+	done
+}
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 0c743752669a..af5dc57c8ce9 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,6 +3,8 @@
 #
 # Run a series of udpgro functional tests.
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
 BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -51,8 +53,7 @@ run_one() {
 		echo "ok" || \
 		echo "failed" &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen ${PEER_NS} 8000 udp
 	./udpgso_bench_tx ${tx_args}
 	ret=$?
 	wait $(jobs -p)
@@ -97,7 +98,7 @@ run_one_nat() {
 		echo "ok" || \
 		echo "failed"&
 
-	sleep 0.1
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 	ret=$?
 	kill -INT $pid
@@ -118,11 +119,9 @@ run_one_2sock() {
 		echo "ok" || \
 		echo "failed" &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen "${PEER_NS}" 12345 udp
 	./udpgso_bench_tx ${tx_args} -p 12345
-	sleep 0.1
-	# first UDP GSO socket should be closed at this point
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 	ret=$?
 	wait $(jobs -p)
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 894972877e8b..cb664679b434 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,6 +3,8 @@
 #
 # Run a series of udpgro benchmarks
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
 BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -40,8 +42,7 @@ run_one() {
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 }
 
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 0a6359bed0b9..dd47fa96f6b3 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,6 +3,8 @@
 #
 # Run a series of udpgro benchmarks
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
 BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -45,8 +47,7 @@ run_one() {
         echo ${rx_args}
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 }
 
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index b62429b0fcdb..65c8f3f983b9 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -9,43 +9,13 @@ from TdcPlugin import TdcPlugin
 
 from tdc_config import *
 
-def prepare_suite(obj, test):
-    original = obj.args.NAMES
-
-    if 'skip' in test and test['skip'] == 'yes':
-        return
-
-    if 'nsPlugin' not in test['plugins']:
-        return
-
-    shadow = {}
-    shadow['IP'] = original['IP']
-    shadow['TC'] = original['TC']
-    shadow['NS'] = '{}-{}'.format(original['NS'], test['random'])
-    shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id'])
-    shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id'])
-    shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id'])
-    shadow['DEV2'] = original['DEV2']
-    obj.args.NAMES = shadow
-
-    if obj.args.namespace:
-        obj._ns_create()
-    else:
-        obj._ports_create()
-
-    # Make sure the netns is visible in the fs
-    while True:
-        obj._proc_check()
-        try:
-            ns = obj.args.NAMES['NS']
-            f = open('/run/netns/{}'.format(ns))
-            f.close()
-            break
-        except:
-            time.sleep(0.1)
-            continue
-
-    obj.args.NAMES = original
+try:
+    from pyroute2 import netns
+    from pyroute2 import IPRoute
+    netlink = True
+except ImportError:
+    netlink = False
+    print("!!! Consider installing pyroute2 !!!")
 
 class SubPlugin(TdcPlugin):
     def __init__(self):
@@ -57,60 +27,69 @@ class SubPlugin(TdcPlugin):
 
         super().pre_suite(testcount, testlist)
 
-        print("Setting up namespaces and devices...")
+    def prepare_test(self, test):
+        if 'skip' in test and test['skip'] == 'yes':
+            return
 
-        with Pool(self.args.mp) as p:
-            it = zip(cycle([self]), testlist)
-            p.starmap(prepare_suite, it)
+        if 'nsPlugin' not in test['plugins']:
+            return
 
-    def pre_case(self, caseinfo, test_skip):
+        if netlink == True:
+            self._nl_ns_create()
+        else:
+            self._ns_create()
+
+        # Make sure the netns is visible in the fs
+        ticks = 20
+        while True:
+            if ticks == 0:
+                raise TimeoutError
+            self._proc_check()
+            try:
+                ns = self.args.NAMES['NS']
+                f = open('/run/netns/{}'.format(ns))
+                f.close()
+                break
+            except:
+                time.sleep(0.1)
+                ticks -= 1
+                continue
+
+    def pre_case(self, test, test_skip):
         if self.args.verbose:
             print('{}.pre_case'.format(self.sub_class))
 
         if test_skip:
             return
 
+        self.prepare_test(test)
 
     def post_case(self):
         if self.args.verbose:
             print('{}.post_case'.format(self.sub_class))
 
-        if self.args.namespace:
-            self._ns_destroy()
+        if netlink == True:
+            self._nl_ns_destroy()
         else:
-            self._ports_destroy()
+            self._ns_destroy()
 
     def post_suite(self, index):
         if self.args.verbose:
             print('{}.post_suite'.format(self.sub_class))
 
         # Make sure we don't leak resources
-        for f in os.listdir('/run/netns/'):
-            cmd = self._replace_keywords("$IP netns del {}".format(f))
-
-            if self.args.verbose > 3:
-                print('_exec_cmd:  command "{}"'.format(cmd))
+        cmd = "$IP -a netns del"
 
-            subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        if self.args.verbose > 3:
+            print('_exec_cmd:  command "{}"'.format(cmd))
 
-    def add_args(self, parser):
-        super().add_args(parser)
-        self.argparser_group = self.argparser.add_argument_group(
-            'netns',
-            'options for nsPlugin(run commands in net namespace)')
-        self.argparser_group.add_argument(
-            '-N', '--no-namespace', action='store_false', default=True,
-            dest='namespace', help='Don\'t run commands in namespace')
-        return self.argparser
+        subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
     def adjust_command(self, stage, command):
         super().adjust_command(stage, command)
         cmdform = 'list'
         cmdlist = list()
 
-        if not self.args.namespace:
-            return command
-
         if self.args.verbose:
             print('{}.adjust_command'.format(self.sub_class))
 
@@ -138,41 +117,67 @@ class SubPlugin(TdcPlugin):
             print('adjust_command:  return command [{}]'.format(command))
         return command
 
-    def _ports_create_cmds(self):
-        cmds = []
-
-        cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1'))
-        cmds.append(self._replace_keywords('link set $DEV0 up'))
-        cmds.append(self._replace_keywords('link add $DUMMY type dummy'))
-        if not self.args.namespace:
-            cmds.append(self._replace_keywords('link set $DEV1 up'))
-
-        return cmds
+    def _nl_ns_create(self):
+        ns = self.args.NAMES["NS"];
+        dev0 = self.args.NAMES["DEV0"];
+        dev1 = self.args.NAMES["DEV1"];
+        dummy = self.args.NAMES["DUMMY"];
 
-    def _ports_create(self):
-        self._exec_cmd_batched('pre', self._ports_create_cmds())
-
-    def _ports_destroy_cmd(self):
-        return self._replace_keywords('link del $DEV0')
-
-    def _ports_destroy(self):
-        self._exec_cmd('post', self._ports_destroy_cmd())
+        if self.args.verbose:
+            print('{}._nl_ns_create'.format(self.sub_class))
+
+        netns.create(ns)
+        netns.pushns(newns=ns)
+        with IPRoute() as ip:
+            ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'})
+            ip.link('add', ifname=dummy, kind='dummy')
+            ticks = 20
+            while True:
+                if ticks == 0:
+                    raise TimeoutError
+                try:
+                    dev1_idx = ip.link_lookup(ifname=dev1)[0]
+                    dummy_idx = ip.link_lookup(ifname=dummy)[0]
+                    ip.link('set', index=dev1_idx, state='up')
+                    ip.link('set', index=dummy_idx, state='up')
+                    break
+                except:
+                    time.sleep(0.1)
+                    ticks -= 1
+                    continue
+        netns.popns()
+
+        with IPRoute() as ip:
+            ticks = 20
+            while True:
+                if ticks == 0:
+                    raise TimeoutError
+                try:
+                    dev0_idx = ip.link_lookup(ifname=dev0)[0]
+                    ip.link('set', index=dev0_idx, state='up')
+                    break
+                except:
+                    time.sleep(0.1)
+                    ticks -= 1
+                    continue
 
     def _ns_create_cmds(self):
         cmds = []
 
-        if self.args.namespace:
-            ns = self.args.NAMES['NS']
+        ns = self.args.NAMES['NS']
 
-            cmds.append(self._replace_keywords('netns add {}'.format(ns)))
-            cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
-            cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
-            cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
-            cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+        cmds.append(self._replace_keywords('netns add {}'.format(ns)))
+        cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0'))
+        cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
+        cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns)))
+        cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
+        cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
+        cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+        cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns)))
 
-            if self.args.device:
-                cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
-                cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
+        if self.args.device:
+            cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
+            cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
 
         return cmds
 
@@ -181,9 +186,12 @@ class SubPlugin(TdcPlugin):
         Create the network namespace in which the tests will be run and set up
         the required network devices for it.
         '''
-        self._ports_create()
         self._exec_cmd_batched('pre', self._ns_create_cmds())
 
+    def _nl_ns_destroy(self):
+        ns = self.args.NAMES['NS']
+        netns.remove(ns)
+
     def _ns_destroy_cmd(self):
         return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS']))
 
@@ -192,9 +200,7 @@ class SubPlugin(TdcPlugin):
         Destroy the network namespace for testing (and any associated network
         devices as well)
         '''
-        if self.args.namespace:
-            self._exec_cmd('post', self._ns_destroy_cmd())
-            self._ports_destroy()
+        self._exec_cmd('post', self._ns_destroy_cmd())
 
     @cached_property
     def _proc(self):
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
index ddc7c355be0a..24bd0c2a3014 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -272,5 +272,62 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 parent root drr"
         ]
+    },
+    {
+        "id": "bd32",
+        "name": "Try to delete hashtable referenced by another u32 filter",
+        "category": [
+            "filter",
+            "u32"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:"
+        ],
+        "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter show dev $DEV1",
+        "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
+    },
+    {
+        "id": "4585",
+        "name": "Delete small tree of u32 hashtables and filters",
+        "category": [
+            "filter",
+            "u32"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:"
+        ],
+        "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1",
+        "matchPattern": "protocol ip pref 2 u32",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index a6718192aff3..669ec89ebfe1 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -616,7 +616,7 @@ def test_runner_mp(pm, args, alltests):
     batches.insert(0, serial)
 
     print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial)))
-    print("Using {} batches".format(len(batches)))
+    print("Using {} batches and {} workers".format(len(batches), args.mp))
 
     # We can't pickle these objects so workaround them
     global mp_pm
@@ -1017,6 +1017,7 @@ def main():
     parser = pm.call_add_args(parser)
     (args, remaining) = parser.parse_known_args()
     args.NAMES = NAMES
+    args.mp = min(args.mp, 4)
     pm.set_args(args)
     check_default_settings(args, remaining, pm)
     if args.verbose > 2:
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index eb357bd7923c..4dbe50bde5a0 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -1,7 +1,68 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
-modprobe netdevsim
-modprobe sch_teql
-./tdc.py -c actions --nobuildebpf
-./tdc.py -c qdisc
+# If a module is required and was not compiled
+# the test that requires it will fail anyways
+try_modprobe() {
+   modprobe -q -R "$1"
+   if [ $? -ne 0 ]; then
+      echo "Module $1 not found... skipping."
+   else
+      modprobe "$1"
+   fi
+}
+
+try_modprobe netdevsim
+try_modprobe act_bpf
+try_modprobe act_connmark
+try_modprobe act_csum
+try_modprobe act_ct
+try_modprobe act_ctinfo
+try_modprobe act_gact
+try_modprobe act_gate
+try_modprobe act_ipt
+try_modprobe act_mirred
+try_modprobe act_mpls
+try_modprobe act_nat
+try_modprobe act_pedit
+try_modprobe act_police
+try_modprobe act_sample
+try_modprobe act_simple
+try_modprobe act_skbedit
+try_modprobe act_skbmod
+try_modprobe act_tunnel_key
+try_modprobe act_vlan
+try_modprobe cls_basic
+try_modprobe cls_bpf
+try_modprobe cls_cgroup
+try_modprobe cls_flow
+try_modprobe cls_flower
+try_modprobe cls_fw
+try_modprobe cls_matchall
+try_modprobe cls_route
+try_modprobe cls_u32
+try_modprobe em_canid
+try_modprobe em_cmp
+try_modprobe em_ipset
+try_modprobe em_ipt
+try_modprobe em_meta
+try_modprobe em_nbyte
+try_modprobe em_text
+try_modprobe em_u32
+try_modprobe sch_cake
+try_modprobe sch_cbs
+try_modprobe sch_choke
+try_modprobe sch_codel
+try_modprobe sch_drr
+try_modprobe sch_etf
+try_modprobe sch_ets
+try_modprobe sch_fq
+try_modprobe sch_fq_codel
+try_modprobe sch_fq_pie
+try_modprobe sch_gred
+try_modprobe sch_hfsc
+try_modprobe sch_hhf
+try_modprobe sch_htb
+try_modprobe sch_teql
+./tdc.py -J`nproc` -c actions --nobuildebpf
+./tdc.py -J`nproc` -c qdisc