diff options
271 files changed, 13589 insertions, 4434 deletions
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 723408e399ab..7985c6615f3c 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -135,6 +135,30 @@ Either way, the returned buffer is either NULL, or of size buffer_szk. Without t annotation, the verifier will reject the program if a null pointer is passed in with a nonzero size. +2.2.5 __str Annotation +---------------------------- +This annotation is used to indicate that the argument is a constant string. + +An example is given below:: + + __bpf_kfunc bpf_get_file_xattr(..., const char *name__str, ...) + { + ... + } + +In this case, ``bpf_get_file_xattr()`` can be called as:: + + bpf_get_file_xattr(..., "xattr_name", ...); + +Or:: + + const char name[] = "xattr_name"; /* This need to be global */ + int BPF_PROG(...) + { + ... + bpf_get_file_xattr(..., name, ...); + ... + } .. _BPF_kfunc_nodef: diff --git a/Documentation/devicetree/bindings/net/marvell,aquantia.yaml b/Documentation/devicetree/bindings/net/marvell,aquantia.yaml new file mode 100644 index 000000000000..9854fab4c4db --- /dev/null +++ b/Documentation/devicetree/bindings/net/marvell,aquantia.yaml @@ -0,0 +1,116 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/marvell,aquantia.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell Aquantia Ethernet PHY + +maintainers: + - Christian Marangi <ansuelsmth@gmail.com> + +description: | + Marvell Aquantia Ethernet PHY require a firmware to be loaded to actually + work. + + This can be done and is implemented by OEM in 3 different way: + - Attached SPI flash directly to the PHY with the firmware. The PHY + will self load the firmware in the presence of this configuration. + - Read from a dedicated partition on system NAND declared in an + NVMEM cell, and loaded to the PHY using its mailbox interface. + - Manually provided firmware loaded from a file in the filesystem. + +allOf: + - $ref: ethernet-phy.yaml# + +select: + properties: + compatible: + contains: + enum: + - ethernet-phy-id03a1.b445 + - ethernet-phy-id03a1.b460 + - ethernet-phy-id03a1.b4a2 + - ethernet-phy-id03a1.b4d0 + - ethernet-phy-id03a1.b4e0 + - ethernet-phy-id03a1.b5c2 + - ethernet-phy-id03a1.b4b0 + - ethernet-phy-id03a1.b662 + - ethernet-phy-id03a1.b712 + - ethernet-phy-id31c3.1c12 + required: + - compatible + +properties: + reg: + maxItems: 1 + + firmware-name: + description: specify the name of PHY firmware to load + + nvmem-cells: + description: phandle to the firmware nvmem cell + maxItems: 1 + + nvmem-cell-names: + const: firmware + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-phy@0 { + compatible = "ethernet-phy-id31c3.1c12", + "ethernet-phy-ieee802.3-c45"; + + reg = <0>; + firmware-name = "AQR-G4_v5.4.C-AQR_CIG_WF-1945_0x8_ID44776_VER1630.cld"; + }; + + ethernet-phy@1 { + compatible = "ethernet-phy-id31c3.1c12", + "ethernet-phy-ieee802.3-c45"; + + reg = <1>; + nvmem-cells = <&aqr_fw>; + nvmem-cell-names = "firmware"; + }; + }; + + flash { + compatible = "jedec,spi-nor"; + #address-cells = <1>; + #size-cells = <1>; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + /* ... */ + + partition@650000 { + compatible = "nvmem-cells"; + label = "0:ethphyfw"; + reg = <0x650000 0x80000>; + read-only; + #address-cells = <1>; + #size-cells = <1>; + + aqr_fw: aqr_fw@0 { + reg = <0x0 0x5f42a>; + }; + }; + + /* ... */ + + }; + }; diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index 5d074f27d462..d3306b186000 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -55,7 +55,7 @@ properties: - items: - enum: - - renesas,r9a07g043-gbeth # RZ/G2UL + - renesas,r9a07g043-gbeth # RZ/G2UL and RZ/Five - renesas,r9a07g044-gbeth # RZ/G2{L,LC} - renesas,r9a07g054-gbeth # RZ/V2L - const: renesas,rzg2l-gbeth # RZ/{G2L,G2UL,V2L} family diff --git a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml new file mode 100644 index 000000000000..475aff7714d6 --- /dev/null +++ b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml @@ -0,0 +1,133 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/renesas,ethertsn.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas Ethernet TSN End-station + +maintainers: + - Niklas Söderlund <niklas.soderlund@ragnatech.se> + +description: + The RTSN device provides Ethernet network using a 10 Mbps, 100 Mbps, or 1 + Gbps full-duplex link via MII/GMII/RMII/RGMII. Depending on the connected PHY. + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + items: + - enum: + - renesas,r8a779g0-ethertsn # R-Car V4H + - const: renesas,rcar-gen4-ethertsn + + reg: + items: + - description: TSN End Station target + - description: generalized Precision Time Protocol target + + reg-names: + items: + - const: tsnes + - const: gptp + + interrupts: + items: + - description: TX data interrupt + - description: RX data interrupt + + interrupt-names: + items: + - const: tx + - const: rx + + clocks: + maxItems: 1 + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + phy-mode: + contains: + enum: + - mii + - rgmii + + phy-handle: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Specifies a reference to a node representing a PHY device. + + rx-internal-delay-ps: + enum: [0, 1800] + + tx-internal-delay-ps: + enum: [0, 2000] + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + +patternProperties: + "^ethernet-phy@[0-9a-f]$": + type: object + $ref: ethernet-phy.yaml# + unevaluatedProperties: false + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + - clocks + - power-domains + - resets + - phy-mode + - phy-handle + - '#address-cells' + - '#size-cells' + +additionalProperties: false + +examples: + - | + #include <dt-bindings/clock/r8a779g0-cpg-mssr.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/power/r8a779g0-sysc.h> + #include <dt-bindings/gpio/gpio.h> + + tsn0: ethernet@e6460000 { + compatible = "renesas,r8a779g0-ethertsn", "renesas,rcar-gen4-ethertsn"; + reg = <0xe6460000 0x7000>, + <0xe6449000 0x500>; + reg-names = "tsnes", "gptp"; + interrupts = <GIC_SPI 429 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 430 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "tx", "rx"; + clocks = <&cpg CPG_MOD 2723>; + power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>; + resets = <&cpg 2723>; + + phy-mode = "rgmii"; + tx-internal-delay-ps = <2000>; + phy-handle = <&phy3>; + + #address-cells = <1>; + #size-cells = <0>; + + phy3: ethernet-phy@3 { + compatible = "ethernet-phy-ieee802.3-c45"; + reg = <0>; + interrupt-parent = <&gpio4>; + interrupts = <3 IRQ_TYPE_LEVEL_LOW>; + reset-gpios = <&gpio1 23 GPIO_ACTIVE_LOW>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml index 1d33d80af11c..bbe89ea9590c 100644 --- a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml +++ b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml @@ -122,6 +122,20 @@ properties: and "phy-handle" should point to an external PHY if exists. maxItems: 1 + dmas: + minItems: 2 + maxItems: 32 + description: TX and RX DMA channel phandle + + dma-names: + items: + pattern: "^[tr]x_chan([0-9]|1[0-5])$" + description: + Should be "tx_chan0", "tx_chan1" ... "tx_chan15" for DMA Tx channel + Should be "rx_chan0", "rx_chan1" ... "rx_chan15" for DMA Rx channel + minItems: 2 + maxItems: 32 + required: - compatible - interrupts @@ -143,6 +157,8 @@ examples: clocks = <&axi_clk>, <&axi_clk>, <&pl_enet_ref_clk>, <&mgt_clk>; phy-mode = "mii"; reg = <0x40c00000 0x40000>,<0x50c00000 0x40000>; + dmas = <&xilinx_dma 0>, <&xilinx_dma 1>; + dma-names = "tx_chan0", "rx_chan0"; xlnx,rxcsum = <0x2>; xlnx,rxmem = <0x800>; xlnx,txcsum = <0x2>; diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 572d83a414d0..43067e1f63aa 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -1484,8 +1484,8 @@ operations: dont-validate: [ strict ] flags: [ admin-perm ] do: - pre: devlink-nl-pre-doit - post: devlink-nl-post-doit + pre: devlink-nl-pre-doit-dev-lock + post: devlink-nl-post-doit-dev-lock request: attributes: - bus-name diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst index cad96c8d1f97..613a818d5db6 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst @@ -24,6 +24,10 @@ Supported Devices Currently, this driver support following devices: * Network controller: Cavium, Inc. Device b200 * Network controller: Cavium, Inc. Device b400 + * Network controller: Cavium, Inc. Device b900 + * Network controller: Cavium, Inc. Device ba00 + * Network controller: Cavium, Inc. Device bc00 + * Network controller: Cavium, Inc. Device bd00 Interface Control ================= diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index dc2c7b979656..7edf0fd58c34 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -985,7 +985,8 @@ static int alb_upper_dev_walk(struct net_device *upper, if (netif_is_macvlan(upper) && !strict_match) { tags = bond_verify_device_path(bond->dev, upper, 0); if (IS_ERR_OR_NULL(tags)) - BUG(); + return -ENOMEM; + alb_send_lp_vid(slave, upper->dev_addr, tags[0].vlan_proto, tags[0].vlan_id); kfree(tags); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8e6cc0e133b7..d987432cee3b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2973,8 +2973,11 @@ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, if (start_dev == end_dev) { tags = kcalloc(level + 1, sizeof(*tags), GFP_ATOMIC); - if (!tags) + if (!tags) { + net_err_ratelimited("%s: %s: Failed to allocate tags\n", + __func__, start_dev->name); return ERR_PTR(-ENOMEM); + } tags[level].vlan_proto = BOND_VLAN_PROTO_NONE; return tags; } @@ -5755,10 +5758,8 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev, { struct bonding *bond = netdev_priv(bond_dev); struct ethtool_ts_info ts_info; - const struct ethtool_ops *ops; struct net_device *real_dev; bool sw_tx_support = false; - struct phy_device *phydev; struct list_head *iter; struct slave *slave; int ret = 0; @@ -5769,29 +5770,12 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev, rcu_read_unlock(); if (real_dev) { - ops = real_dev->ethtool_ops; - phydev = real_dev->phydev; - - if (phy_has_tsinfo(phydev)) { - ret = phy_ts_info(phydev, info); - goto out; - } else if (ops->get_ts_info) { - ret = ops->get_ts_info(real_dev, info); - goto out; - } + ret = ethtool_get_ts_info_by_layer(real_dev, info); } else { /* Check if all slaves support software tx timestamping */ rcu_read_lock(); bond_for_each_slave_rcu(bond, slave, iter) { - ret = -1; - ops = slave->dev->ethtool_ops; - phydev = slave->dev->phydev; - - if (phy_has_tsinfo(phydev)) - ret = phy_ts_info(phydev, &ts_info); - else if (ops->get_ts_info) - ret = ops->get_ts_info(slave->dev, &ts_info); - + ret = ethtool_get_ts_info_by_layer(slave->dev, &ts_info); if (!ret && (ts_info.so_timestamping & SOF_TIMESTAMPING_TX_SOFTWARE)) { sw_tx_support = true; continue; @@ -5803,15 +5787,9 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev, rcu_read_unlock(); } - ret = 0; - info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; if (sw_tx_support) info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE; - info->phc_index = -1; - -out: dev_put(real_dev); return ret; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d0359b569afe..d8cf7b30bd03 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -254,22 +254,22 @@ static bool bnxt_vf_pciid(enum board_idx idx) writel(DB_CP_IRQ_DIS_FLAGS, db) #define BNXT_DB_CQ(db, idx) \ - writel(DB_CP_FLAGS | RING_CMP(idx), (db)->doorbell) + writel(DB_CP_FLAGS | DB_RING_IDX(db, idx), (db)->doorbell) #define BNXT_DB_NQ_P5(db, idx) \ - bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ | RING_CMP(idx), \ + bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ | DB_RING_IDX(db, idx),\ (db)->doorbell) #define BNXT_DB_CQ_ARM(db, idx) \ - writel(DB_CP_REARM_FLAGS | RING_CMP(idx), (db)->doorbell) + writel(DB_CP_REARM_FLAGS | DB_RING_IDX(db, idx), (db)->doorbell) #define BNXT_DB_NQ_ARM_P5(db, idx) \ - bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ_ARM | RING_CMP(idx),\ - (db)->doorbell) + bnxt_writeq(bp, (db)->db_key64 | DBR_TYPE_NQ_ARM | \ + DB_RING_IDX(db, idx), (db)->doorbell) static void bnxt_db_nq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx) { - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) BNXT_DB_NQ_P5(db, idx); else BNXT_DB_CQ(db, idx); @@ -277,7 +277,7 @@ static void bnxt_db_nq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx) static void bnxt_db_nq_arm(struct bnxt *bp, struct bnxt_db_info *db, u32 idx) { - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) BNXT_DB_NQ_ARM_P5(db, idx); else BNXT_DB_CQ_ARM(db, idx); @@ -285,9 +285,9 @@ static void bnxt_db_nq_arm(struct bnxt *bp, struct bnxt_db_info *db, u32 idx) static void bnxt_db_cq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx) { - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) bnxt_writeq(bp, db->db_key64 | DBR_TYPE_CQ_ARMALL | - RING_CMP(idx), db->doorbell); + DB_RING_IDX(db, idx), db->doorbell); else BNXT_DB_CQ(db, idx); } @@ -321,7 +321,7 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) { if (!rxr->bnapi->in_reset) { rxr->bnapi->in_reset = true; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event); else set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event); @@ -331,16 +331,16 @@ static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) } void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr, - int idx) + u16 curr) { struct bnxt_napi *bnapi = txr->bnapi; if (bnapi->tx_fault) return; - netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)", - txr->txq_index, bnapi->tx_pkts, - txr->tx_cons, txr->tx_prod, idx); + netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_hw_cons:%u cons:%u prod:%u curr:%u)", + txr->txq_index, txr->tx_hw_cons, + txr->tx_cons, txr->tx_prod, curr); WARN_ON_ONCE(1); bnapi->tx_fault = 1; bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT); @@ -381,6 +381,8 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr, u16 prod) { + /* Sync BD data before updating doorbell */ + wmb(); bnxt_db_write(bp, &txr->tx_db, prod); txr->kick_pending = 0; } @@ -388,7 +390,7 @@ static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr, static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); - struct tx_bd *txbd; + struct tx_bd *txbd, *txbd0; struct tx_bd_ext *txbd1; struct netdev_queue *txq; int i; @@ -430,11 +432,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) len = skb_headlen(skb); last_frag = skb_shinfo(skb)->nr_frags; - txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; + txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; - txbd->tx_bd_opaque = prod; - - tx_buf = &txr->tx_buf_ring[prod]; + tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; tx_buf->skb = skb; tx_buf->nr_frags = last_frag; @@ -519,12 +519,15 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) txbd->tx_bd_len_flags_type = tx_push->tx_bd_len_flags_type; txbd->tx_bd_haddr = txr->data_mapping; + txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 2); prod = NEXT_TX(prod); - txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; + tx_push->tx_bd_opaque = txbd->tx_bd_opaque; + txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; memcpy(txbd, tx_push1, sizeof(*txbd)); prod = NEXT_TX(prod); tx_push->doorbell = - cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod); + cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | + DB_RING_IDX(&txr->tx_db, prod)); WRITE_ONCE(txr->tx_prod, prod); tx_buf->is_push = 1; @@ -562,10 +565,11 @@ normal_tx: ((last_frag + 2) << TX_BD_FLAGS_BD_CNT_SHIFT); txbd->tx_bd_haddr = cpu_to_le64(mapping); + txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 2 + last_frag); prod = NEXT_TX(prod); txbd1 = (struct tx_bd_ext *) - &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; + &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; txbd1->tx_bd_hsize_lflags = lflags; if (skb_is_gso(skb)) { @@ -601,11 +605,12 @@ normal_tx: txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags); txbd1->tx_bd_cfa_action = cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT); + txbd0 = txbd; for (i = 0; i < last_frag; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; prod = NEXT_TX(prod); - txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; + txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; len = skb_frag_size(frag); mapping = skb_frag_dma_map(&pdev->dev, frag, 0, len, @@ -614,7 +619,7 @@ normal_tx: if (unlikely(dma_mapping_error(&pdev->dev, mapping))) goto tx_dma_error; - tx_buf = &txr->tx_buf_ring[prod]; + tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; dma_unmap_addr_set(tx_buf, mapping, mapping); txbd->tx_bd_haddr = cpu_to_le64(mapping); @@ -632,16 +637,17 @@ normal_tx: skb_tx_timestamp(skb); - /* Sync BD data before updating doorbell */ - wmb(); - prod = NEXT_TX(prod); WRITE_ONCE(txr->tx_prod, prod); - if (!netdev_xmit_more() || netif_xmit_stopped(txq)) + if (!netdev_xmit_more() || netif_xmit_stopped(txq)) { bnxt_txr_db_kick(bp, txr, prod); - else + } else { + if (free_size >= bp->tx_wake_thresh) + txbd0->tx_bd_len_flags_type |= + cpu_to_le32(TX_BD_FLAGS_NO_CMPL); txr->kick_pending = 1; + } tx_done: @@ -662,7 +668,7 @@ tx_dma_error: /* start back at beginning and unmap skb */ prod = txr->tx_prod; - tx_buf = &txr->tx_buf_ring[prod]; + tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping), skb_headlen(skb), DMA_TO_DEVICE); prod = NEXT_TX(prod); @@ -670,7 +676,7 @@ tx_dma_error: /* unmap remaining mapped pages */ for (i = 0; i < last_frag; i++) { prod = NEXT_TX(prod); - tx_buf = &txr->tx_buf_ring[prod]; + tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; dma_unmap_page(&pdev->dev, dma_unmap_addr(tx_buf, mapping), skb_frag_size(&skb_shinfo(skb)->frags[i]), DMA_TO_DEVICE); @@ -686,31 +692,32 @@ tx_kick_pending: return NETDEV_TX_OK; } -static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) +static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, + int budget) { - struct bnxt_tx_ring_info *txr = bnapi->tx_ring; struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index); - u16 cons = txr->tx_cons; struct pci_dev *pdev = bp->pdev; - int nr_pkts = bnapi->tx_pkts; - int i; + u16 hw_cons = txr->tx_hw_cons; unsigned int tx_bytes = 0; + u16 cons = txr->tx_cons; + int tx_pkts = 0; - for (i = 0; i < nr_pkts; i++) { + while (RING_TX(bp, cons) != hw_cons) { struct bnxt_sw_tx_bd *tx_buf; struct sk_buff *skb; int j, last; - tx_buf = &txr->tx_buf_ring[cons]; + tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)]; cons = NEXT_TX(cons); skb = tx_buf->skb; tx_buf->skb = NULL; if (unlikely(!skb)) { - bnxt_sched_reset_txr(bp, txr, i); + bnxt_sched_reset_txr(bp, txr, cons); return; } + tx_pkts++; tx_bytes += skb->len; if (tx_buf->is_push) { @@ -724,7 +731,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) for (j = 0; j < last; j++) { cons = NEXT_TX(cons); - tx_buf = &txr->tx_buf_ring[cons]; + tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)]; dma_unmap_page( &pdev->dev, dma_unmap_addr(tx_buf, mapping), @@ -732,7 +739,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) DMA_TO_DEVICE); } if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (BNXT_CHIP_P5(bp)) { /* PTP worker takes ownership of the skb */ if (!bnxt_get_tx_ts_p5(bp, skb)) skb = NULL; @@ -747,14 +754,25 @@ next_tx_int: dev_consume_skb_any(skb); } - bnapi->tx_pkts = 0; WRITE_ONCE(txr->tx_cons, cons); - __netif_txq_completed_wake(txq, nr_pkts, tx_bytes, + __netif_txq_completed_wake(txq, tx_pkts, tx_bytes, bnxt_tx_avail(bp, txr), bp->tx_wake_thresh, READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING); } +static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) +{ + struct bnxt_tx_ring_info *txr; + int i; + + bnxt_for_each_napi_tx(i, bnapi, txr) { + if (txr->tx_hw_cons != txr->tx_cons) + __bnxt_tx_int(bp, txr, budget); + } + bnapi->events &= ~BNXT_TX_CMP_EVENT; +} + static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, struct bnxt_rx_ring_info *rxr, unsigned int *offset, @@ -803,8 +821,8 @@ static inline u8 *__bnxt_alloc_rx_frag(struct bnxt *bp, dma_addr_t *mapping, int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 prod, gfp_t gfp) { - struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)]; - struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[prod]; + struct rx_bd *rxbd = &rxr->rx_desc_ring[RX_RING(bp, prod)][RX_IDX(prod)]; + struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[RING_RX(bp, prod)]; dma_addr_t mapping; if (BNXT_RX_PAGE_MODE(bp)) { @@ -837,9 +855,10 @@ void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data) { u16 prod = rxr->rx_prod; struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf; + struct bnxt *bp = rxr->bnapi->bp; struct rx_bd *cons_bd, *prod_bd; - prod_rx_buf = &rxr->rx_buf_ring[prod]; + prod_rx_buf = &rxr->rx_buf_ring[RING_RX(bp, prod)]; cons_rx_buf = &rxr->rx_buf_ring[cons]; prod_rx_buf->data = data; @@ -847,8 +866,8 @@ void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data) prod_rx_buf->mapping = cons_rx_buf->mapping; - prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)]; - cons_bd = &rxr->rx_desc_ring[RX_RING(cons)][RX_IDX(cons)]; + prod_bd = &rxr->rx_desc_ring[RX_RING(bp, prod)][RX_IDX(prod)]; + cons_bd = &rxr->rx_desc_ring[RX_RING(bp, cons)][RX_IDX(cons)]; prod_bd->rx_bd_haddr = cons_bd->rx_bd_haddr; } @@ -868,7 +887,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp, u16 prod, gfp_t gfp) { struct rx_bd *rxbd = - &rxr->rx_agg_desc_ring[RX_RING(prod)][RX_IDX(prod)]; + &rxr->rx_agg_desc_ring[RX_AGG_RING(bp, prod)][RX_IDX(prod)]; struct bnxt_sw_rx_agg_bd *rx_agg_buf; struct page *page; dma_addr_t mapping; @@ -885,7 +904,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp, __set_bit(sw_prod, rxr->rx_agg_bmap); rx_agg_buf = &rxr->rx_agg_ring[sw_prod]; - rxr->rx_sw_agg_prod = NEXT_RX_AGG(sw_prod); + rxr->rx_sw_agg_prod = RING_RX_AGG(bp, NEXT_RX_AGG(sw_prod)); rx_agg_buf->page = page; rx_agg_buf->offset = offset; @@ -927,7 +946,7 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx, bool p5_tpa = false; u32 i; - if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa) + if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && tpa) p5_tpa = true; for (i = 0; i < agg_bufs; i++) { @@ -961,13 +980,13 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx, prod_rx_buf->mapping = cons_rx_buf->mapping; - prod_bd = &rxr->rx_agg_desc_ring[RX_RING(prod)][RX_IDX(prod)]; + prod_bd = &rxr->rx_agg_desc_ring[RX_AGG_RING(bp, prod)][RX_IDX(prod)]; prod_bd->rx_bd_haddr = cpu_to_le64(cons_rx_buf->mapping); prod_bd->rx_bd_opaque = sw_prod; prod = NEXT_RX_AGG(prod); - sw_prod = NEXT_RX_AGG(sw_prod); + sw_prod = RING_RX_AGG(bp, NEXT_RX_AGG(sw_prod)); } rxr->rx_agg_prod = prod; rxr->rx_sw_agg_prod = sw_prod; @@ -1094,7 +1113,7 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp, u32 i, total_frag_len = 0; bool p5_tpa = false; - if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa) + if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && tpa) p5_tpa = true; for (i = 0; i < agg_bufs; i++) { @@ -1249,7 +1268,7 @@ static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) { struct rx_tpa_end_cmp *tpa_end = cmp; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return 0; agg_bufs = TPA_END_AGG_BUFS(tpa_end); @@ -1300,7 +1319,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, struct rx_bd *prod_bd; dma_addr_t mapping; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { agg_id = TPA_START_AGG_ID_P5(tpa_start); agg_id = bnxt_alloc_agg_idx(rxr, agg_id); } else { @@ -1309,7 +1328,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, cons = tpa_start->rx_tpa_start_cmp_opaque; prod = rxr->rx_prod; cons_rx_buf = &rxr->rx_buf_ring[cons]; - prod_rx_buf = &rxr->rx_buf_ring[prod]; + prod_rx_buf = &rxr->rx_buf_ring[RING_RX(bp, prod)]; tpa_info = &rxr->rx_tpa[agg_id]; if (unlikely(cons != rxr->rx_next_cons || @@ -1330,7 +1349,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, mapping = tpa_info->mapping; prod_rx_buf->mapping = mapping; - prod_bd = &rxr->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)]; + prod_bd = &rxr->rx_desc_ring[RX_RING(bp, prod)][RX_IDX(prod)]; prod_bd->rx_bd_haddr = cpu_to_le64(mapping); @@ -1363,8 +1382,8 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, tpa_info->agg_count = 0; rxr->rx_prod = NEXT_RX(prod); - cons = NEXT_RX(cons); - rxr->rx_next_cons = NEXT_RX(cons); + cons = RING_RX(bp, NEXT_RX(cons)); + rxr->rx_next_cons = RING_RX(bp, NEXT_RX(cons)); cons_rx_buf = &rxr->rx_buf_ring[cons]; bnxt_reuse_rx_data(rxr, cons, cons_rx_buf->data); @@ -1563,7 +1582,7 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp, skb_shinfo(skb)->gso_size = le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len); skb_shinfo(skb)->gso_type = tpa_info->gso_type; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) payload_off = TPA_END_PAYLOAD_OFF_P5(tpa_end1); else payload_off = TPA_END_PAYLOAD_OFF(tpa_end); @@ -1611,7 +1630,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, return NULL; } - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { agg_id = TPA_END_AGG_ID_P5(tpa_end); agg_id = bnxt_lookup_agg_idx(rxr, agg_id); agg_bufs = TPA_END_AGG_BUFS_P5(tpa_end1); @@ -1876,7 +1895,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, rc = -EIO; if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) { bnapi->cp_ring.sw_stats.rx.rx_buf_errors++; - if (!(bp->flags & BNXT_FLAG_CHIP_P5) && + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && !(bp->fw_cap & BNXT_FW_CAP_RING_MONITOR)) { netdev_warn_once(bp->dev, "RX buffer error %x\n", rx_err); @@ -2007,7 +2026,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (unlikely((flags & RX_CMP_FLAGS_ITYPES_MASK) == RX_CMP_FLAGS_ITYPE_PTP_W_TS) || bp->ptp_all_rx_tstamp) { - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { u32 cmpl_ts = le32_to_cpu(rxcmp1->rx_cmp_timestamp); u64 ns, ts; @@ -2032,7 +2051,7 @@ next_rx: next_rx_no_len: rxr->rx_prod = NEXT_RX(prod); - rxr->rx_next_cons = NEXT_RX(cons); + rxr->rx_next_cons = RING_RX(bp, NEXT_RX(cons)); next_rx_no_prod_no_len: *raw_cons = tmp_raw_cons; @@ -2415,7 +2434,7 @@ static int bnxt_async_event_process(struct bnxt *bp, struct bnxt_rx_ring_info *rxr; u16 grp_idx; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) goto async_event_process_exit; netdev_warn(bp->dev, "Ring monitor event, ring type %lu id 0x%x\n", @@ -2588,7 +2607,6 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, struct bnxt_napi *bnapi = cpr->bnapi; u32 raw_cons = cpr->cp_raw_cons; u32 cons; - int tx_pkts = 0; int rx_pkts = 0; u8 event = 0; struct tx_cmp *txcmp; @@ -2609,9 +2627,17 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, */ dma_rmb(); if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { - tx_pkts++; + u32 opaque = txcmp->tx_cmp_opaque; + struct bnxt_tx_ring_info *txr; + u16 tx_freed; + + txr = bnapi->tx_ring[TX_OPAQUE_RING(opaque)]; + event |= BNXT_TX_CMP_EVENT; + txr->tx_hw_cons = TX_OPAQUE_PROD(bp, opaque); + tx_freed = (txr->tx_hw_cons - txr->tx_cons) & + bp->tx_ring_mask; /* return full budget so NAPI will complete. */ - if (unlikely(tx_pkts >= bp->tx_wake_thresh)) { + if (unlikely(tx_freed >= bp->tx_wake_thresh)) { rx_pkts = budget; raw_cons = NEXT_RAW_CMP(raw_cons); if (budget) @@ -2655,7 +2681,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, xdp_do_flush(); if (event & BNXT_TX_EVENT) { - struct bnxt_tx_ring_info *txr = bnapi->tx_ring; + struct bnxt_tx_ring_info *txr = bnapi->tx_ring[0]; u16 prod = txr->tx_prod; /* Sync BD data before updating doorbell */ @@ -2665,7 +2691,6 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } cpr->cp_raw_cons = raw_cons; - bnapi->tx_pkts += tx_pkts; bnapi->events |= event; return rx_pkts; } @@ -2673,7 +2698,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) { - if (bnapi->tx_pkts && !bnapi->tx_fault) + if ((bnapi->events & BNXT_TX_CMP_EVENT) && !bnapi->tx_fault) bnapi->tx_int(bp, bnapi, budget); if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) { @@ -2686,7 +2711,7 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi, bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); } - bnapi->events = 0; + bnapi->events &= BNXT_TX_CMP_EVENT; } static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, @@ -2826,10 +2851,10 @@ static int __bnxt_poll_cqs(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; int i, work_done = 0; - for (i = 0; i < 2; i++) { - struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[i]; + for (i = 0; i < cpr->cp_ring_count; i++) { + struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[i]; - if (cpr2) { + if (cpr2->had_nqe_notify) { work_done += __bnxt_poll_work(bp, cpr2, budget - work_done); cpr->has_more_work |= cpr2->has_more_work; @@ -2844,15 +2869,18 @@ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi, struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; int i; - for (i = 0; i < 2; i++) { - struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[i]; + for (i = 0; i < cpr->cp_ring_count; i++) { + struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[i]; struct bnxt_db_info *db; - if (cpr2 && cpr2->had_work_done) { + if (cpr2->had_work_done) { db = &cpr2->cp_db; bnxt_writeq(bp, db->db_key64 | dbr_type | - RING_CMP(cpr2->cp_raw_cons), db->doorbell); + DB_RING_IDX(db, cpr2->cp_raw_cons), + db->doorbell); cpr2->had_work_done = 0; + if (dbr_type == DBR_TYPE_CQ_ARMALL) + cpr2->had_nqe_notify = 0; } } __bnxt_poll_work_done(bp, bnapi, budget); @@ -2901,13 +2929,17 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) if (nqcmp->type == cpu_to_le16(NQ_CN_TYPE_CQ_NOTIFICATION)) { u32 idx = le32_to_cpu(nqcmp->cq_handle_low); + u32 cq_type = BNXT_NQ_HDL_TYPE(idx); struct bnxt_cp_ring_info *cpr2; /* No more budget for RX work */ - if (budget && work_done >= budget && idx == BNXT_RX_HDL) + if (budget && work_done >= budget && + cq_type == BNXT_NQ_HDL_TYPE_RX) break; - cpr2 = cpr->cp_ring_arr[idx]; + idx = BNXT_NQ_HDL_IDX(idx); + cpr2 = &cpr->cp_ring_arr[idx]; + cpr2->had_nqe_notify = 1; work_done += __bnxt_poll_work(bp, cpr2, budget - work_done); cpr->has_more_work |= cpr2->has_more_work; @@ -2922,8 +2954,9 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) BNXT_DB_NQ_P5(&cpr->cp_db, raw_cons); } poll_done: - cpr_rx = cpr->cp_ring_arr[BNXT_RX_HDL]; - if (cpr_rx && (bp->flags & BNXT_FLAG_DIM)) { + cpr_rx = &cpr->cp_ring_arr[0]; + if (cpr_rx->cp_ring_type == BNXT_NQ_HDL_TYPE_RX && + (bp->flags & BNXT_FLAG_DIM)) { struct dim_sample dim_sample = {}; dim_update_sample(cpr->event_ctr, @@ -3097,20 +3130,20 @@ static void bnxt_free_skbs(struct bnxt *bp) bnxt_free_rx_skbs(bp); } -static void bnxt_init_ctx_mem(struct bnxt_mem_init *mem_init, void *p, int len) +static void bnxt_init_ctx_mem(struct bnxt_ctx_mem_type *ctxm, void *p, int len) { - u8 init_val = mem_init->init_val; - u16 offset = mem_init->offset; + u8 init_val = ctxm->init_value; + u16 offset = ctxm->init_offset; u8 *p2 = p; int i; if (!init_val) return; - if (offset == BNXT_MEM_INVALID_OFFSET) { + if (offset == BNXT_CTX_INIT_INVALID_OFFSET) { memset(p, init_val, len); return; } - for (i = 0; i < len; i += mem_init->size) + for (i = 0; i < len; i += ctxm->entry_size) *(p2 + i + offset) = init_val; } @@ -3177,8 +3210,8 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) if (!rmem->pg_arr[i]) return -ENOMEM; - if (rmem->mem_init) - bnxt_init_ctx_mem(rmem->mem_init, rmem->pg_arr[i], + if (rmem->ctx_mem) + bnxt_init_ctx_mem(rmem->ctx_mem, rmem->pg_arr[i], rmem->page_size); if (rmem->nr_pages > 1 || rmem->depth > 0) { if (i == rmem->nr_pages - 2 && @@ -3225,7 +3258,7 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) int i, j; bp->max_tpa = MAX_TPA; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { if (!bp->max_tpa_v2) return 0; bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); @@ -3240,7 +3273,7 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) if (!rxr->rx_tpa) return -ENOMEM; - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) continue; for (j = 0; j < bp->max_tpa; j++) { agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL); @@ -3395,6 +3428,15 @@ static void bnxt_free_tx_rings(struct bnxt *bp) } } +#define BNXT_TC_TO_RING_BASE(bp, tc) \ + ((tc) * (bp)->tx_nr_rings_per_tc) + +#define BNXT_RING_TO_TC_OFF(bp, tx) \ + ((tx) % (bp)->tx_nr_rings_per_tc) + +#define BNXT_RING_TO_TC(bp, tx) \ + ((tx) / (bp)->tx_nr_rings_per_tc) + static int bnxt_alloc_tx_rings(struct bnxt *bp) { int i, j, rc; @@ -3450,7 +3492,7 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) spin_lock_init(&txr->xdp_tx_lock); if (i < bp->tx_nr_rings_xdp) continue; - if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1)) + if (BNXT_RING_TO_TC_OFF(bp, i) == (bp->tx_nr_rings_per_tc - 1)) j++; } return 0; @@ -3533,36 +3575,33 @@ static void bnxt_free_cp_rings(struct bnxt *bp) bnxt_free_ring(bp, &ring->ring_mem); - for (j = 0; j < 2; j++) { - struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j]; + if (!cpr->cp_ring_arr) + continue; - if (cpr2) { - ring = &cpr2->cp_ring_struct; - bnxt_free_ring(bp, &ring->ring_mem); - bnxt_free_cp_arrays(cpr2); - kfree(cpr2); - cpr->cp_ring_arr[j] = NULL; - } + for (j = 0; j < cpr->cp_ring_count; j++) { + struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j]; + + ring = &cpr2->cp_ring_struct; + bnxt_free_ring(bp, &ring->ring_mem); + bnxt_free_cp_arrays(cpr2); } + kfree(cpr->cp_ring_arr); + cpr->cp_ring_arr = NULL; + cpr->cp_ring_count = 0; } } -static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp) +static int bnxt_alloc_cp_sub_ring(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr) { struct bnxt_ring_mem_info *rmem; struct bnxt_ring_struct *ring; - struct bnxt_cp_ring_info *cpr; int rc; - cpr = kzalloc(sizeof(*cpr), GFP_KERNEL); - if (!cpr) - return NULL; - rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages); if (rc) { bnxt_free_cp_arrays(cpr); - kfree(cpr); - return NULL; + return -ENOMEM; } ring = &cpr->cp_ring_struct; rmem = &ring->ring_mem; @@ -3575,23 +3614,26 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp) if (rc) { bnxt_free_ring(bp, rmem); bnxt_free_cp_arrays(cpr); - kfree(cpr); - cpr = NULL; } - return cpr; + return rc; } static int bnxt_alloc_cp_rings(struct bnxt *bp) { bool sh = !!(bp->flags & BNXT_FLAG_SHARED_RINGS); - int i, rc, ulp_base_vec, ulp_msix; + int i, j, rc, ulp_base_vec, ulp_msix; + int tcs = netdev_get_num_tc(bp->dev); + if (!tcs) + tcs = 1; ulp_msix = bnxt_get_ulp_msix_num(bp); ulp_base_vec = bnxt_get_ulp_msix_base(bp); - for (i = 0; i < bp->cp_nr_rings; i++) { + for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; - struct bnxt_cp_ring_info *cpr; + struct bnxt_cp_ring_info *cpr, *cpr2; struct bnxt_ring_struct *ring; + int cp_count = 0, k; + int rx = 0, tx = 0; if (!bnapi) continue; @@ -3609,35 +3651,55 @@ static int bnxt_alloc_cp_rings(struct bnxt *bp) else ring->map_idx = i; - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) continue; if (i < bp->rx_nr_rings) { - struct bnxt_cp_ring_info *cpr2 = - bnxt_alloc_cp_sub_ring(bp); - - cpr->cp_ring_arr[BNXT_RX_HDL] = cpr2; - if (!cpr2) - return -ENOMEM; - cpr2->bnapi = bnapi; + cp_count++; + rx = 1; + } + if (i < bp->tx_nr_rings_xdp) { + cp_count++; + tx = 1; + } else if ((sh && i < bp->tx_nr_rings) || + (!sh && i >= bp->rx_nr_rings)) { + cp_count += tcs; + tx = 1; } - if ((sh && i < bp->tx_nr_rings) || - (!sh && i >= bp->rx_nr_rings)) { - struct bnxt_cp_ring_info *cpr2 = - bnxt_alloc_cp_sub_ring(bp); - cpr->cp_ring_arr[BNXT_TX_HDL] = cpr2; - if (!cpr2) - return -ENOMEM; + cpr->cp_ring_arr = kcalloc(cp_count, sizeof(*cpr), + GFP_KERNEL); + if (!cpr->cp_ring_arr) + return -ENOMEM; + cpr->cp_ring_count = cp_count; + + for (k = 0; k < cp_count; k++) { + cpr2 = &cpr->cp_ring_arr[k]; + rc = bnxt_alloc_cp_sub_ring(bp, cpr2); + if (rc) + return rc; cpr2->bnapi = bnapi; + cpr2->cp_idx = k; + if (!k && rx) { + bp->rx_ring[i].rx_cpr = cpr2; + cpr2->cp_ring_type = BNXT_NQ_HDL_TYPE_RX; + } else { + int n, tc = k - rx; + + n = BNXT_TC_TO_RING_BASE(bp, tc) + j; + bp->tx_ring[n].tx_cpr = cpr2; + cpr2->cp_ring_type = BNXT_NQ_HDL_TYPE_TX; + } } + if (tx) + j++; } return 0; } static void bnxt_init_ring_struct(struct bnxt *bp) { - int i; + int i, j; for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; @@ -3682,18 +3744,16 @@ static void bnxt_init_ring_struct(struct bnxt *bp) rmem->vmem = (void **)&rxr->rx_agg_ring; skip_rx: - txr = bnapi->tx_ring; - if (!txr) - continue; - - ring = &txr->tx_ring_struct; - rmem = &ring->ring_mem; - rmem->nr_pages = bp->tx_nr_pages; - rmem->page_size = HW_RXBD_RING_SIZE; - rmem->pg_arr = (void **)txr->tx_desc_ring; - rmem->dma_arr = txr->tx_desc_mapping; - rmem->vmem_size = SW_TXBD_RING_SIZE * bp->tx_nr_pages; - rmem->vmem = (void **)&txr->tx_buf_ring; + bnxt_for_each_napi_tx(j, bnapi, txr) { + ring = &txr->tx_ring_struct; + rmem = &ring->ring_mem; + rmem->nr_pages = bp->tx_nr_pages; + rmem->page_size = HW_TXBD_RING_SIZE; + rmem->pg_arr = (void **)txr->tx_desc_ring; + rmem->dma_arr = txr->tx_desc_mapping; + rmem->vmem_size = SW_TXBD_RING_SIZE * bp->tx_nr_pages; + rmem->vmem = (void **)&txr->tx_buf_ring; + } } } @@ -3814,11 +3874,10 @@ static void bnxt_init_cp_rings(struct bnxt *bp) ring->fw_ring_id = INVALID_HW_RING_ID; cpr->rx_ring_coal.coal_ticks = bp->rx_coal.coal_ticks; cpr->rx_ring_coal.coal_bufs = bp->rx_coal.coal_bufs; - for (j = 0; j < 2; j++) { - struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j]; - - if (!cpr2) - continue; + if (!cpr->cp_ring_arr) + continue; + for (j = 0; j < cpr->cp_ring_count; j++) { + struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j]; ring = &cpr2->cp_ring_struct; ring->fw_ring_id = INVALID_HW_RING_ID; @@ -3906,7 +3965,7 @@ static int bnxt_alloc_vnics(struct bnxt *bp) int num_vnics = 1; #ifdef CONFIG_RFS_ACCEL - if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS) + if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5_PLUS)) == BNXT_FLAG_RFS) num_vnics += bp->rx_nr_rings; #endif @@ -4179,7 +4238,7 @@ static int bnxt_alloc_vnic_attributes(struct bnxt *bp) } } - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) goto vnic_skip_grps; if (vnic->flags & BNXT_VNIC_RSS_FLAG) @@ -4199,7 +4258,7 @@ vnic_skip_grps: /* Allocate rss table and hash key */ size = L1_CACHE_ALIGN(HW_HASH_INDEX_SIZE * sizeof(u16)); - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) size = L1_CACHE_ALIGN(BNXT_MAX_RSS_TABLE_SIZE_P5); vnic->rss_table_size = size + HW_HASH_KEY_SIZE; @@ -4309,7 +4368,7 @@ static int bnxt_hwrm_func_qstat_ext(struct bnxt *bp, int rc; if (!(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED) || - !(bp->flags & BNXT_FLAG_CHIP_P5)) + !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) return -EOPNOTSUPP; rc = hwrm_req_init(bp, req, HWRM_FUNC_QSTATS_EXT); @@ -4347,7 +4406,7 @@ static void bnxt_init_stats(struct bnxt *bp) stats = &cpr->stats; rc = bnxt_hwrm_func_qstat_ext(bp, stats); if (rc) { - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) mask = (1ULL << 48) - 1; else mask = -1ULL; @@ -4493,7 +4552,7 @@ alloc_tx_ext_stats: static void bnxt_clear_ring_indices(struct bnxt *bp) { - int i; + int i, j; if (!bp->bnapi) return; @@ -4510,10 +4569,10 @@ static void bnxt_clear_ring_indices(struct bnxt *bp) cpr = &bnapi->cp_ring; cpr->cp_raw_cons = 0; - txr = bnapi->tx_ring; - if (txr) { + bnxt_for_each_napi_tx(j, bnapi, txr) { txr->tx_prod = 0; txr->tx_cons = 0; + txr->tx_hw_cons = 0; } rxr = bnapi->rx_ring; @@ -4523,6 +4582,7 @@ static void bnxt_clear_ring_indices(struct bnxt *bp) rxr->rx_sw_agg_prod = 0; rxr->rx_next_cons = 0; } + bnapi->events = 0; } } @@ -4626,7 +4686,7 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init) bp->bnapi[i] = bnapi; bp->bnapi[i]->index = i; bp->bnapi[i]->bp = bp; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring; @@ -4644,11 +4704,13 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init) for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { rxr->rx_ring_struct.ring_mem.flags = BNXT_RMEM_RING_PTE_FLAG; rxr->rx_agg_ring_struct.ring_mem.flags = BNXT_RMEM_RING_PTE_FLAG; + } else { + rxr->rx_cpr = &bp->bnapi[i]->cp_ring; } rxr->bnapi = bp->bnapi[i]; bp->bnapi[i]->rx_ring = &bp->rx_ring[i]; @@ -4671,22 +4733,33 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init) else j = bp->rx_nr_rings; - for (i = 0; i < bp->tx_nr_rings; i++, j++) { + for (i = 0; i < bp->tx_nr_rings; i++) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; + struct bnxt_napi *bnapi2; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) txr->tx_ring_struct.ring_mem.flags = BNXT_RMEM_RING_PTE_FLAG; - txr->bnapi = bp->bnapi[j]; - bp->bnapi[j]->tx_ring = txr; bp->tx_ring_map[i] = bp->tx_nr_rings_xdp + i; if (i >= bp->tx_nr_rings_xdp) { + int k = j + BNXT_RING_TO_TC_OFF(bp, i); + + bnapi2 = bp->bnapi[k]; txr->txq_index = i - bp->tx_nr_rings_xdp; - bp->bnapi[j]->tx_int = bnxt_tx_int; + txr->tx_napi_idx = + BNXT_RING_TO_TC(bp, txr->txq_index); + bnapi2->tx_ring[txr->tx_napi_idx] = txr; + bnapi2->tx_int = bnxt_tx_int; } else { - bp->bnapi[j]->flags |= BNXT_NAPI_FLAG_XDP; - bp->bnapi[j]->tx_int = bnxt_tx_int_xdp; + bnapi2 = bp->bnapi[j]; + bnapi2->flags |= BNXT_NAPI_FLAG_XDP; + bnapi2->tx_ring[0] = txr; + bnapi2->tx_int = bnxt_tx_int_xdp; + j++; } + txr->bnapi = bnapi2; + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) + txr->tx_cpr = &bnapi2->cp_ring; } rc = bnxt_alloc_stats(bp); @@ -5211,7 +5284,7 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags) nsegs = (MAX_SKB_FRAGS - n) / n; } - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { segs = MAX_TPA_SEGS_P5; max_aggs = bp->max_tpa; } else { @@ -5237,35 +5310,25 @@ static u16 bnxt_cp_ring_from_grp(struct bnxt *bp, struct bnxt_ring_struct *ring) static u16 bnxt_cp_ring_for_rx(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) { - if (bp->flags & BNXT_FLAG_CHIP_P5) { - struct bnxt_napi *bnapi = rxr->bnapi; - struct bnxt_cp_ring_info *cpr; - - cpr = bnapi->cp_ring.cp_ring_arr[BNXT_RX_HDL]; - return cpr->cp_ring_struct.fw_ring_id; - } else { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) + return rxr->rx_cpr->cp_ring_struct.fw_ring_id; + else return bnxt_cp_ring_from_grp(bp, &rxr->rx_ring_struct); - } } static u16 bnxt_cp_ring_for_tx(struct bnxt *bp, struct bnxt_tx_ring_info *txr) { - if (bp->flags & BNXT_FLAG_CHIP_P5) { - struct bnxt_napi *bnapi = txr->bnapi; - struct bnxt_cp_ring_info *cpr; - - cpr = bnapi->cp_ring.cp_ring_arr[BNXT_TX_HDL]; - return cpr->cp_ring_struct.fw_ring_id; - } else { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) + return txr->tx_cpr->cp_ring_struct.fw_ring_id; + else return bnxt_cp_ring_from_grp(bp, &txr->tx_ring_struct); - } } static int bnxt_alloc_rss_indir_tbl(struct bnxt *bp) { int entries; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) entries = BNXT_MAX_RSS_TABLE_ENTRIES_P5; else entries = HW_HASH_INDEX_SIZE; @@ -5315,7 +5378,7 @@ static u16 bnxt_get_max_rss_ring(struct bnxt *bp) int bnxt_get_nr_rss_ctxs(struct bnxt *bp, int rx_rings) { - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return DIV_ROUND_UP(rx_rings, BNXT_RSS_TABLE_ENTRIES_P5); if (BNXT_CHIP_TYPE_NITRO_A0(bp)) return 2; @@ -5361,7 +5424,7 @@ static void __bnxt_hwrm_vnic_set_rss(struct bnxt *bp, struct hwrm_vnic_rss_cfg_input *req, struct bnxt_vnic_info *vnic) { - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) bnxt_fill_hw_rss_tbl_p5(bp, vnic); else bnxt_fill_hw_rss_tbl(bp, vnic); @@ -5386,7 +5449,7 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) struct hwrm_vnic_rss_cfg_input *req; int rc; - if ((bp->flags & BNXT_FLAG_CHIP_P5) || + if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) || vnic->fw_rss_cos_lb_ctx[0] == INVALID_HW_RING_ID) return 0; @@ -5551,7 +5614,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id) if (rc) return rc; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[0]; req->default_rx_ring_id = @@ -5651,7 +5714,7 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id, if (rc) return rc; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) goto vnic_no_ring_grps; /* map ring groups to this vnic */ @@ -5699,7 +5762,7 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp) if (!rc) { u32 flags = le32_to_cpu(resp->flags); - if (!(bp->flags & BNXT_FLAG_CHIP_P5) && + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && (flags & VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP)) bp->flags |= BNXT_FLAG_NEW_RSS_CAP; if (flags & @@ -5710,14 +5773,14 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp) * VLAN_STRIP_CAP properly. */ if ((flags & VNIC_QCAPS_RESP_FLAGS_VLAN_STRIP_CAP) || - (BNXT_CHIP_P5_THOR(bp) && + (BNXT_CHIP_P5(bp) && !(bp->fw_cap & BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED))) bp->fw_cap |= BNXT_FW_CAP_VLAN_RX_STRIP; if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_HASH_TYPE_DELTA_CAP) bp->fw_cap |= BNXT_FW_CAP_RSS_HASH_TYPE_DELTA; bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported); if (bp->max_tpa_v2) { - if (BNXT_CHIP_P5_THOR(bp)) + if (BNXT_CHIP_P5(bp)) bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5; else bp->hw_ring_stats_size = BNXT_RING_STATS_SIZE_P5_SR2; @@ -5734,7 +5797,7 @@ static int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp) int rc; u16 i; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return 0; rc = hwrm_req_init(bp, req, HWRM_RING_GRP_ALLOC); @@ -5767,7 +5830,7 @@ static void bnxt_hwrm_ring_grp_free(struct bnxt *bp) struct hwrm_ring_grp_free_input *req; u16 i; - if (!bp->grp_info || (bp->flags & BNXT_FLAG_CHIP_P5)) + if (!bp->grp_info || (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) return; if (hwrm_req_init(bp, req, HWRM_RING_GRP_FREE)) @@ -5832,7 +5895,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, case HWRM_RING_ALLOC_RX: req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX; req->length = cpu_to_le32(bp->rx_ring_mask + 1); - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { u16 flags = 0; /* Association of rx ring with stats context */ @@ -5847,7 +5910,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, } break; case HWRM_RING_ALLOC_AGG: - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG; /* Association of agg ring with rx ring */ grp_info = &bp->grp_info[ring->grp_idx]; @@ -5865,7 +5928,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, case HWRM_RING_ALLOC_CMPL: req->ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; req->length = cpu_to_le32(bp->cp_ring_mask + 1); - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { /* Association of cp ring with nq */ grp_info = &bp->grp_info[map_index]; req->nq_ring_id = cpu_to_le16(grp_info->cp_fw_ring_id); @@ -5933,10 +5996,30 @@ static int bnxt_hwrm_set_async_event_cr(struct bnxt *bp, int idx) } } +static void bnxt_set_db_mask(struct bnxt *bp, struct bnxt_db_info *db, + u32 ring_type) +{ + switch (ring_type) { + case HWRM_RING_ALLOC_TX: + db->db_ring_mask = bp->tx_ring_mask; + break; + case HWRM_RING_ALLOC_RX: + db->db_ring_mask = bp->rx_ring_mask; + break; + case HWRM_RING_ALLOC_AGG: + db->db_ring_mask = bp->rx_agg_ring_mask; + break; + case HWRM_RING_ALLOC_CMPL: + case HWRM_RING_ALLOC_NQ: + db->db_ring_mask = bp->cp_ring_mask; + break; + } +} + static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type, u32 map_idx, u32 xid) { - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { if (BNXT_PF(bp)) db->doorbell = bp->bar1 + DB_PF_OFFSET_P5; else @@ -5972,6 +6055,7 @@ static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type, break; } } + bnxt_set_db_mask(bp, db, ring_type); } static int bnxt_hwrm_ring_alloc(struct bnxt *bp) @@ -5980,7 +6064,7 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) int i, rc = 0; u32 type; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) type = HWRM_RING_ALLOC_NQ; else type = HWRM_RING_ALLOC_CMPL; @@ -6016,15 +6100,13 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) struct bnxt_ring_struct *ring; u32 map_idx; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + struct bnxt_cp_ring_info *cpr2 = txr->tx_cpr; struct bnxt_napi *bnapi = txr->bnapi; - struct bnxt_cp_ring_info *cpr, *cpr2; u32 type2 = HWRM_RING_ALLOC_CMPL; - cpr = &bnapi->cp_ring; - cpr2 = cpr->cp_ring_arr[BNXT_TX_HDL]; ring = &cpr2->cp_ring_struct; - ring->handle = BNXT_TX_HDL; + ring->handle = BNXT_SET_NQ_HDL(cpr2); map_idx = bnapi->index; rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx); if (rc) @@ -6056,14 +6138,12 @@ static int bnxt_hwrm_ring_alloc(struct bnxt *bp) if (!agg_rings) bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id; - if (bp->flags & BNXT_FLAG_CHIP_P5) { - struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + struct bnxt_cp_ring_info *cpr2 = rxr->rx_cpr; u32 type2 = HWRM_RING_ALLOC_CMPL; - struct bnxt_cp_ring_info *cpr2; - cpr2 = cpr->cp_ring_arr[BNXT_RX_HDL]; ring = &cpr2->cp_ring_struct; - ring->handle = BNXT_RX_HDL; + ring->handle = BNXT_SET_NQ_HDL(cpr2); rc = hwrm_ring_alloc_send_msg(bp, ring, type2, map_idx); if (rc) goto err_out; @@ -6171,7 +6251,7 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) } } - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) type = RING_FREE_REQ_RING_TYPE_RX_AGG; else type = RING_FREE_REQ_RING_TYPE_RX; @@ -6198,7 +6278,7 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) */ bnxt_disable_int_sync(bp); - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) type = RING_FREE_REQ_RING_TYPE_NQ; else type = RING_FREE_REQ_RING_TYPE_L2_CMPL; @@ -6208,18 +6288,16 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path) struct bnxt_ring_struct *ring; int j; - for (j = 0; j < 2; j++) { - struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j]; - - if (cpr2) { - ring = &cpr2->cp_ring_struct; - if (ring->fw_ring_id == INVALID_HW_RING_ID) - continue; - hwrm_ring_free_send_msg(bp, ring, - RING_FREE_REQ_RING_TYPE_L2_CMPL, - INVALID_HW_RING_ID); - ring->fw_ring_id = INVALID_HW_RING_ID; - } + for (j = 0; j < cpr->cp_ring_count && cpr->cp_ring_arr; j++) { + struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j]; + + ring = &cpr2->cp_ring_struct; + if (ring->fw_ring_id == INVALID_HW_RING_ID) + continue; + hwrm_ring_free_send_msg(bp, ring, + RING_FREE_REQ_RING_TYPE_L2_CMPL, + INVALID_HW_RING_ID); + ring->fw_ring_id = INVALID_HW_RING_ID; } ring = &cpr->cp_ring_struct; if (ring->fw_ring_id != INVALID_HW_RING_ID) { @@ -6267,14 +6345,15 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp) cp = le16_to_cpu(resp->alloc_cmpl_rings); stats = le16_to_cpu(resp->alloc_stat_ctx); hw_resc->resv_irqs = cp; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { int rx = hw_resc->resv_rx_rings; int tx = hw_resc->resv_tx_rings; if (bp->flags & BNXT_FLAG_AGG_RINGS) rx >>= 1; if (cp < (rx + tx)) { - bnxt_trim_rings(bp, &rx, &tx, cp, false); + rx = cp / 2; + tx = rx; if (bp->flags & BNXT_FLAG_AGG_RINGS) rx <<= 1; hw_resc->resv_rx_rings = rx; @@ -6331,7 +6410,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, if (BNXT_NEW_RM(bp)) { enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0; enables |= tx_rings + ring_grps ? FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; @@ -6347,7 +6426,7 @@ __bnxt_hwrm_reserve_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0; req->num_rx_rings = cpu_to_le16(rx_rings); - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps); req->num_msix = cpu_to_le16(cp_rings); req->num_rsscos_ctxs = @@ -6382,7 +6461,7 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS | FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { enables |= tx_rings + ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; } else { @@ -6397,7 +6476,7 @@ __bnxt_hwrm_reserve_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, req->num_l2_ctxs = cpu_to_le16(BNXT_VF_MAX_L2_CTX); req->num_tx_rings = cpu_to_le16(tx_rings); req->num_rx_rings = cpu_to_le16(rx_rings); - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { req->num_cmpl_rings = cpu_to_le16(tx_rings + ring_grps); req->num_rsscos_ctxs = cpu_to_le16(DIV_ROUND_UP(ring_grps, 64)); } else { @@ -6493,7 +6572,7 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp) { int cp; - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) return bnxt_nq_rings_in_use(bp); cp = bp->tx_nr_rings + bp->rx_nr_rings; @@ -6550,7 +6629,8 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) bnxt_check_rss_tbl_no_rmgr(bp); return false; } - if ((bp->flags & BNXT_FLAG_RFS) && !(bp->flags & BNXT_FLAG_CHIP_P5)) + if ((bp->flags & BNXT_FLAG_RFS) && + !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) vnic = rx + 1; if (bp->flags & BNXT_FLAG_AGG_RINGS) rx <<= 1; @@ -6558,9 +6638,9 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) if (hw_resc->resv_rx_rings != rx || hw_resc->resv_cp_rings != cp || hw_resc->resv_vnics != vnic || hw_resc->resv_stat_ctxs != stat || (hw_resc->resv_hw_ring_grps != grp && - !(bp->flags & BNXT_FLAG_CHIP_P5))) + !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))) return true; - if ((bp->flags & BNXT_FLAG_CHIP_P5) && BNXT_PF(bp) && + if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && BNXT_PF(bp) && hw_resc->resv_irqs != nq) return true; return false; @@ -6575,13 +6655,15 @@ static int __bnxt_reserve_rings(struct bnxt *bp) int grp, rx_rings, rc; int vnic = 1, stat; bool sh = false; + int tx_cp; if (!bnxt_need_reserve_rings(bp)) return 0; if (bp->flags & BNXT_FLAG_SHARED_RINGS) sh = true; - if ((bp->flags & BNXT_FLAG_RFS) && !(bp->flags & BNXT_FLAG_CHIP_P5)) + if ((bp->flags & BNXT_FLAG_RFS) && + !(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) vnic = rx + 1; if (bp->flags & BNXT_FLAG_AGG_RINGS) rx <<= 1; @@ -6624,7 +6706,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) rc = bnxt_trim_rings(bp, &rx_rings, &tx, cp, sh); if (bp->flags & BNXT_FLAG_AGG_RINGS) rx = rx_rings << 1; - cp = sh ? max_t(int, tx, rx_rings) : tx + rx_rings; + tx_cp = bnxt_num_tx_to_cp(bp, tx); + cp = sh ? max_t(int, tx_cp, rx_rings) : tx_cp + rx_rings; bp->tx_nr_rings = tx; /* If we cannot reserve all the RX rings, reset the RSS map only @@ -6671,7 +6754,7 @@ static int bnxt_hwrm_check_vf_rings(struct bnxt *bp, int tx_rings, int rx_rings, FUNC_VF_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_VNIC_ASSETS_TEST | FUNC_VF_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST; - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) flags |= FUNC_VF_CFG_REQ_FLAGS_RING_GRP_ASSETS_TEST; req->flags = cpu_to_le32(flags); @@ -6693,7 +6776,7 @@ static int bnxt_hwrm_check_pf_rings(struct bnxt *bp, int tx_rings, int rx_rings, FUNC_CFG_REQ_FLAGS_CMPL_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_STAT_CTX_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_VNIC_ASSETS_TEST; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) flags |= FUNC_CFG_REQ_FLAGS_RSSCOS_CTX_ASSETS_TEST | FUNC_CFG_REQ_FLAGS_NQ_ASSETS_TEST; else @@ -6887,10 +6970,40 @@ int bnxt_hwrm_set_ring_coal(struct bnxt *bp, struct bnxt_napi *bnapi) return hwrm_req_send(bp, req_rx); } +static int +bnxt_hwrm_set_rx_coal(struct bnxt *bp, struct bnxt_napi *bnapi, + struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req) +{ + u16 ring_id = bnxt_cp_ring_for_rx(bp, bnapi->rx_ring); + + req->ring_id = cpu_to_le16(ring_id); + return hwrm_req_send(bp, req); +} + +static int +bnxt_hwrm_set_tx_coal(struct bnxt *bp, struct bnxt_napi *bnapi, + struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req) +{ + struct bnxt_tx_ring_info *txr; + int i, rc; + + bnxt_for_each_napi_tx(i, bnapi, txr) { + u16 ring_id; + + ring_id = bnxt_cp_ring_for_tx(bp, txr); + req->ring_id = cpu_to_le16(ring_id); + rc = hwrm_req_send(bp, req); + if (rc) + return rc; + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) + return 0; + } + return 0; +} + int bnxt_hwrm_set_coal(struct bnxt *bp) { - struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx, *req_tx, - *req; + struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req_rx, *req_tx; int i, rc; rc = hwrm_req_init(bp, req_rx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS); @@ -6911,29 +7024,19 @@ int bnxt_hwrm_set_coal(struct bnxt *bp) for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_coal *hw_coal; - u16 ring_id; - - req = req_rx; - if (!bnapi->rx_ring) { - ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring); - req = req_tx; - } else { - ring_id = bnxt_cp_ring_for_rx(bp, bnapi->rx_ring); - } - req->ring_id = cpu_to_le16(ring_id); - rc = hwrm_req_send(bp, req); + if (!bnapi->rx_ring) + rc = bnxt_hwrm_set_tx_coal(bp, bnapi, req_tx); + else + rc = bnxt_hwrm_set_rx_coal(bp, bnapi, req_rx); if (rc) break; - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) continue; - if (bnapi->rx_ring && bnapi->tx_ring) { - req = req_tx; - ring_id = bnxt_cp_ring_for_tx(bp, bnapi->tx_ring); - req->ring_id = cpu_to_le16(ring_id); - rc = hwrm_req_send(bp, req); + if (bnapi->rx_ring && bnapi->tx_ring[0]) { + rc = bnxt_hwrm_set_tx_coal(bp, bnapi, req_tx); if (rc) break; } @@ -7087,7 +7190,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) if (bp->db_size) goto func_qcfg_exit; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { if (BNXT_PF(bp)) min_db_offset = DB_PF_OFFSET_P5; else @@ -7104,37 +7207,103 @@ func_qcfg_exit: return rc; } -static void bnxt_init_ctx_initializer(struct bnxt_ctx_mem_info *ctx, - struct hwrm_func_backing_store_qcaps_output *resp) +static void bnxt_init_ctx_initializer(struct bnxt_ctx_mem_type *ctxm, + u8 init_val, u8 init_offset, + bool init_mask_set) { - struct bnxt_mem_init *mem_init; - u16 init_mask; - u8 init_val; - u8 *offset; - int i; + ctxm->init_value = init_val; + ctxm->init_offset = BNXT_CTX_INIT_INVALID_OFFSET; + if (init_mask_set) + ctxm->init_offset = init_offset * 4; + else + ctxm->init_value = 0; +} + +static int bnxt_alloc_all_ctx_pg_info(struct bnxt *bp, int ctx_max) +{ + struct bnxt_ctx_mem_info *ctx = bp->ctx; + u16 type; + + for (type = 0; type < ctx_max; type++) { + struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; + int n = 1; - init_val = resp->ctx_kind_initializer; - init_mask = le16_to_cpu(resp->ctx_init_mask); - offset = &resp->qp_init_offset; - mem_init = &ctx->mem_init[BNXT_CTX_MEM_INIT_QP]; - for (i = 0; i < BNXT_CTX_MEM_INIT_MAX; i++, mem_init++, offset++) { - mem_init->init_val = init_val; - mem_init->offset = BNXT_MEM_INVALID_OFFSET; - if (!init_mask) + if (!ctxm->max_entries) continue; - if (i == BNXT_CTX_MEM_INIT_STAT) - offset = &resp->stat_init_offset; - if (init_mask & (1 << i)) - mem_init->offset = *offset * 4; - else - mem_init->init_val = 0; + + if (ctxm->instance_bmap) + n = hweight32(ctxm->instance_bmap); + ctxm->pg_info = kcalloc(n, sizeof(*ctxm->pg_info), GFP_KERNEL); + if (!ctxm->pg_info) + return -ENOMEM; } - ctx->mem_init[BNXT_CTX_MEM_INIT_QP].size = ctx->qp_entry_size; - ctx->mem_init[BNXT_CTX_MEM_INIT_SRQ].size = ctx->srq_entry_size; - ctx->mem_init[BNXT_CTX_MEM_INIT_CQ].size = ctx->cq_entry_size; - ctx->mem_init[BNXT_CTX_MEM_INIT_VNIC].size = ctx->vnic_entry_size; - ctx->mem_init[BNXT_CTX_MEM_INIT_STAT].size = ctx->stat_entry_size; - ctx->mem_init[BNXT_CTX_MEM_INIT_MRAV].size = ctx->mrav_entry_size; + return 0; +} + +#define BNXT_CTX_INIT_VALID(flags) \ + (!!((flags) & \ + FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_ENABLE_CTX_KIND_INIT)) + +static int bnxt_hwrm_func_backing_store_qcaps_v2(struct bnxt *bp) +{ + struct hwrm_func_backing_store_qcaps_v2_output *resp; + struct hwrm_func_backing_store_qcaps_v2_input *req; + u16 last_valid_type = BNXT_CTX_INV; + struct bnxt_ctx_mem_info *ctx; + u16 type; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS_V2); + if (rc) + return rc; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + bp->ctx = ctx; + + resp = hwrm_req_hold(bp, req); + + for (type = 0; type < BNXT_CTX_V2_MAX; ) { + struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; + u8 init_val, init_off, i; + __le32 *p; + u32 flags; + + req->type = cpu_to_le16(type); + rc = hwrm_req_send(bp, req); + if (rc) + goto ctx_done; + flags = le32_to_cpu(resp->flags); + type = le16_to_cpu(resp->next_valid_type); + if (!(flags & FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID)) + continue; + + ctxm->type = le16_to_cpu(resp->type); + last_valid_type = ctxm->type; + ctxm->entry_size = le16_to_cpu(resp->entry_size); + ctxm->flags = flags; + ctxm->instance_bmap = le32_to_cpu(resp->instance_bit_map); + ctxm->entry_multiple = resp->entry_multiple; + ctxm->max_entries = le32_to_cpu(resp->max_num_entries); + ctxm->min_entries = le32_to_cpu(resp->min_num_entries); + init_val = resp->ctx_init_value; + init_off = resp->ctx_init_offset; + bnxt_init_ctx_initializer(ctxm, init_val, init_off, + BNXT_CTX_INIT_VALID(flags)); + ctxm->split_entry_cnt = min_t(u8, resp->subtype_valid_cnt, + BNXT_MAX_SPLIT_ENTRY); + for (i = 0, p = &resp->split_entry_0; i < ctxm->split_entry_cnt; + i++, p++) + ctxm->split[i] = le32_to_cpu(*p); + } + if (last_valid_type < BNXT_CTX_V2_MAX) + ctx->ctx_arr[last_valid_type].last = true; + rc = bnxt_alloc_all_ctx_pg_info(bp, BNXT_CTX_V2_MAX); + +ctx_done: + hwrm_req_drop(bp, req); + return rc; } static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp) @@ -7146,6 +7315,9 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp) if (bp->hwrm_spec_code < 0x10902 || BNXT_VF(bp) || bp->ctx) return 0; + if (bp->fw_cap & BNXT_FW_CAP_BACKING_STORE_V2) + return bnxt_hwrm_func_backing_store_qcaps_v2(bp); + rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_QCAPS); if (rc) return rc; @@ -7153,48 +7325,83 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp) resp = hwrm_req_hold(bp, req); rc = hwrm_req_send_silent(bp, req); if (!rc) { - struct bnxt_ctx_pg_info *ctx_pg; + struct bnxt_ctx_mem_type *ctxm; struct bnxt_ctx_mem_info *ctx; - int i, tqm_rings; + u8 init_val, init_idx = 0; + u16 init_mask; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = bp->ctx; if (!ctx) { - rc = -ENOMEM; - goto ctx_err; - } - ctx->qp_max_entries = le32_to_cpu(resp->qp_max_entries); - ctx->qp_min_qp1_entries = le16_to_cpu(resp->qp_min_qp1_entries); - ctx->qp_max_l2_entries = le16_to_cpu(resp->qp_max_l2_entries); - ctx->qp_entry_size = le16_to_cpu(resp->qp_entry_size); - ctx->srq_max_l2_entries = le16_to_cpu(resp->srq_max_l2_entries); - ctx->srq_max_entries = le32_to_cpu(resp->srq_max_entries); - ctx->srq_entry_size = le16_to_cpu(resp->srq_entry_size); - ctx->cq_max_l2_entries = le16_to_cpu(resp->cq_max_l2_entries); - ctx->cq_max_entries = le32_to_cpu(resp->cq_max_entries); - ctx->cq_entry_size = le16_to_cpu(resp->cq_entry_size); - ctx->vnic_max_vnic_entries = - le16_to_cpu(resp->vnic_max_vnic_entries); - ctx->vnic_max_ring_table_entries = + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + rc = -ENOMEM; + goto ctx_err; + } + bp->ctx = ctx; + } + init_val = resp->ctx_kind_initializer; + init_mask = le16_to_cpu(resp->ctx_init_mask); + + ctxm = &ctx->ctx_arr[BNXT_CTX_QP]; + ctxm->max_entries = le32_to_cpu(resp->qp_max_entries); + ctxm->qp_qp1_entries = le16_to_cpu(resp->qp_min_qp1_entries); + ctxm->qp_l2_entries = le16_to_cpu(resp->qp_max_l2_entries); + ctxm->entry_size = le16_to_cpu(resp->qp_entry_size); + bnxt_init_ctx_initializer(ctxm, init_val, resp->qp_init_offset, + (init_mask & (1 << init_idx++)) != 0); + + ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ]; + ctxm->srq_l2_entries = le16_to_cpu(resp->srq_max_l2_entries); + ctxm->max_entries = le32_to_cpu(resp->srq_max_entries); + ctxm->entry_size = le16_to_cpu(resp->srq_entry_size); + bnxt_init_ctx_initializer(ctxm, init_val, resp->srq_init_offset, + (init_mask & (1 << init_idx++)) != 0); + + ctxm = &ctx->ctx_arr[BNXT_CTX_CQ]; + ctxm->cq_l2_entries = le16_to_cpu(resp->cq_max_l2_entries); + ctxm->max_entries = le32_to_cpu(resp->cq_max_entries); + ctxm->entry_size = le16_to_cpu(resp->cq_entry_size); + bnxt_init_ctx_initializer(ctxm, init_val, resp->cq_init_offset, + (init_mask & (1 << init_idx++)) != 0); + + ctxm = &ctx->ctx_arr[BNXT_CTX_VNIC]; + ctxm->vnic_entries = le16_to_cpu(resp->vnic_max_vnic_entries); + ctxm->max_entries = ctxm->vnic_entries + le16_to_cpu(resp->vnic_max_ring_table_entries); - ctx->vnic_entry_size = le16_to_cpu(resp->vnic_entry_size); - ctx->stat_max_entries = le32_to_cpu(resp->stat_max_entries); - ctx->stat_entry_size = le16_to_cpu(resp->stat_entry_size); - ctx->tqm_entry_size = le16_to_cpu(resp->tqm_entry_size); - ctx->tqm_min_entries_per_ring = - le32_to_cpu(resp->tqm_min_entries_per_ring); - ctx->tqm_max_entries_per_ring = - le32_to_cpu(resp->tqm_max_entries_per_ring); - ctx->tqm_entries_multiple = resp->tqm_entries_multiple; - if (!ctx->tqm_entries_multiple) - ctx->tqm_entries_multiple = 1; - ctx->mrav_max_entries = le32_to_cpu(resp->mrav_max_entries); - ctx->mrav_entry_size = le16_to_cpu(resp->mrav_entry_size); - ctx->mrav_num_entries_units = + ctxm->entry_size = le16_to_cpu(resp->vnic_entry_size); + bnxt_init_ctx_initializer(ctxm, init_val, + resp->vnic_init_offset, + (init_mask & (1 << init_idx++)) != 0); + + ctxm = &ctx->ctx_arr[BNXT_CTX_STAT]; + ctxm->max_entries = le32_to_cpu(resp->stat_max_entries); + ctxm->entry_size = le16_to_cpu(resp->stat_entry_size); + bnxt_init_ctx_initializer(ctxm, init_val, + resp->stat_init_offset, + (init_mask & (1 << init_idx++)) != 0); + + ctxm = &ctx->ctx_arr[BNXT_CTX_STQM]; + ctxm->entry_size = le16_to_cpu(resp->tqm_entry_size); + ctxm->min_entries = le32_to_cpu(resp->tqm_min_entries_per_ring); + ctxm->max_entries = le32_to_cpu(resp->tqm_max_entries_per_ring); + ctxm->entry_multiple = resp->tqm_entries_multiple; + if (!ctxm->entry_multiple) + ctxm->entry_multiple = 1; + + memcpy(&ctx->ctx_arr[BNXT_CTX_FTQM], ctxm, sizeof(*ctxm)); + + ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV]; + ctxm->max_entries = le32_to_cpu(resp->mrav_max_entries); + ctxm->entry_size = le16_to_cpu(resp->mrav_entry_size); + ctxm->mrav_num_entries_units = le16_to_cpu(resp->mrav_num_entries_units); - ctx->tim_entry_size = le16_to_cpu(resp->tim_entry_size); - ctx->tim_max_entries = le32_to_cpu(resp->tim_max_entries); + bnxt_init_ctx_initializer(ctxm, init_val, + resp->mrav_init_offset, + (init_mask & (1 << init_idx++)) != 0); - bnxt_init_ctx_initializer(ctx, resp); + ctxm = &ctx->ctx_arr[BNXT_CTX_TIM]; + ctxm->entry_size = le16_to_cpu(resp->tim_entry_size); + ctxm->max_entries = le32_to_cpu(resp->tim_max_entries); ctx->tqm_fp_rings_count = resp->tqm_fp_rings_count; if (!ctx->tqm_fp_rings_count) @@ -7202,16 +7409,11 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp) else if (ctx->tqm_fp_rings_count > BNXT_MAX_TQM_FP_RINGS) ctx->tqm_fp_rings_count = BNXT_MAX_TQM_FP_RINGS; - tqm_rings = ctx->tqm_fp_rings_count + BNXT_MAX_TQM_SP_RINGS; - ctx_pg = kcalloc(tqm_rings, sizeof(*ctx_pg), GFP_KERNEL); - if (!ctx_pg) { - kfree(ctx); - rc = -ENOMEM; - goto ctx_err; - } - for (i = 0; i < tqm_rings; i++, ctx_pg++) - ctx->tqm_mem[i] = ctx_pg; - bp->ctx = ctx; + ctxm = &ctx->ctx_arr[BNXT_CTX_FTQM]; + memcpy(ctxm, &ctx->ctx_arr[BNXT_CTX_STQM], sizeof(*ctxm)); + ctxm->instance_bmap = (1 << ctx->tqm_fp_rings_count) - 1; + + rc = bnxt_alloc_all_ctx_pg_info(bp, BNXT_CTX_MAX); } else { rc = 0; } @@ -7250,6 +7452,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) struct hwrm_func_backing_store_cfg_input *req; struct bnxt_ctx_mem_info *ctx = bp->ctx; struct bnxt_ctx_pg_info *ctx_pg; + struct bnxt_ctx_mem_type *ctxm; void **__req = (void **)&req; u32 req_len = sizeof(*req); __le32 *num_entries; @@ -7271,82 +7474,99 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) req->enables = cpu_to_le32(enables); if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP) { - ctx_pg = &ctx->qp_mem; + ctxm = &ctx->ctx_arr[BNXT_CTX_QP]; + ctx_pg = ctxm->pg_info; req->qp_num_entries = cpu_to_le32(ctx_pg->entries); - req->qp_num_qp1_entries = cpu_to_le16(ctx->qp_min_qp1_entries); - req->qp_num_l2_entries = cpu_to_le16(ctx->qp_max_l2_entries); - req->qp_entry_size = cpu_to_le16(ctx->qp_entry_size); + req->qp_num_qp1_entries = cpu_to_le16(ctxm->qp_qp1_entries); + req->qp_num_l2_entries = cpu_to_le16(ctxm->qp_l2_entries); + req->qp_entry_size = cpu_to_le16(ctxm->entry_size); bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req->qpc_pg_size_qpc_lvl, &req->qpc_page_dir); } if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ) { - ctx_pg = &ctx->srq_mem; + ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ]; + ctx_pg = ctxm->pg_info; req->srq_num_entries = cpu_to_le32(ctx_pg->entries); - req->srq_num_l2_entries = cpu_to_le16(ctx->srq_max_l2_entries); - req->srq_entry_size = cpu_to_le16(ctx->srq_entry_size); + req->srq_num_l2_entries = cpu_to_le16(ctxm->srq_l2_entries); + req->srq_entry_size = cpu_to_le16(ctxm->entry_size); bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req->srq_pg_size_srq_lvl, &req->srq_page_dir); } if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ) { - ctx_pg = &ctx->cq_mem; + ctxm = &ctx->ctx_arr[BNXT_CTX_CQ]; + ctx_pg = ctxm->pg_info; req->cq_num_entries = cpu_to_le32(ctx_pg->entries); - req->cq_num_l2_entries = cpu_to_le16(ctx->cq_max_l2_entries); - req->cq_entry_size = cpu_to_le16(ctx->cq_entry_size); + req->cq_num_l2_entries = cpu_to_le16(ctxm->cq_l2_entries); + req->cq_entry_size = cpu_to_le16(ctxm->entry_size); bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req->cq_pg_size_cq_lvl, &req->cq_page_dir); } if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC) { - ctx_pg = &ctx->vnic_mem; - req->vnic_num_vnic_entries = - cpu_to_le16(ctx->vnic_max_vnic_entries); + ctxm = &ctx->ctx_arr[BNXT_CTX_VNIC]; + ctx_pg = ctxm->pg_info; + req->vnic_num_vnic_entries = cpu_to_le16(ctxm->vnic_entries); req->vnic_num_ring_table_entries = - cpu_to_le16(ctx->vnic_max_ring_table_entries); - req->vnic_entry_size = cpu_to_le16(ctx->vnic_entry_size); + cpu_to_le16(ctxm->max_entries - ctxm->vnic_entries); + req->vnic_entry_size = cpu_to_le16(ctxm->entry_size); bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req->vnic_pg_size_vnic_lvl, &req->vnic_page_dir); } if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT) { - ctx_pg = &ctx->stat_mem; - req->stat_num_entries = cpu_to_le32(ctx->stat_max_entries); - req->stat_entry_size = cpu_to_le16(ctx->stat_entry_size); + ctxm = &ctx->ctx_arr[BNXT_CTX_STAT]; + ctx_pg = ctxm->pg_info; + req->stat_num_entries = cpu_to_le32(ctxm->max_entries); + req->stat_entry_size = cpu_to_le16(ctxm->entry_size); bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req->stat_pg_size_stat_lvl, &req->stat_page_dir); } if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV) { - ctx_pg = &ctx->mrav_mem; + u32 units; + + ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV]; + ctx_pg = ctxm->pg_info; req->mrav_num_entries = cpu_to_le32(ctx_pg->entries); - if (ctx->mrav_num_entries_units) - flags |= - FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT; - req->mrav_entry_size = cpu_to_le16(ctx->mrav_entry_size); + units = ctxm->mrav_num_entries_units; + if (units) { + u32 num_mr, num_ah = ctxm->mrav_av_entries; + u32 entries; + + num_mr = ctx_pg->entries - num_ah; + entries = ((num_mr / units) << 16) | (num_ah / units); + req->mrav_num_entries = cpu_to_le32(entries); + flags |= FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT; + } + req->mrav_entry_size = cpu_to_le16(ctxm->entry_size); bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req->mrav_pg_size_mrav_lvl, &req->mrav_page_dir); } if (enables & FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM) { - ctx_pg = &ctx->tim_mem; + ctxm = &ctx->ctx_arr[BNXT_CTX_TIM]; + ctx_pg = ctxm->pg_info; req->tim_num_entries = cpu_to_le32(ctx_pg->entries); - req->tim_entry_size = cpu_to_le16(ctx->tim_entry_size); + req->tim_entry_size = cpu_to_le16(ctxm->entry_size); bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, &req->tim_pg_size_tim_lvl, &req->tim_page_dir); } + ctxm = &ctx->ctx_arr[BNXT_CTX_STQM]; for (i = 0, num_entries = &req->tqm_sp_num_entries, pg_attr = &req->tqm_sp_pg_size_tqm_sp_lvl, pg_dir = &req->tqm_sp_page_dir, - ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP; + ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP, + ctx_pg = ctxm->pg_info; i < BNXT_MAX_TQM_RINGS; + ctx_pg = &ctx->ctx_arr[BNXT_CTX_FTQM].pg_info[i], i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) { if (!(enables & ena)) continue; - req->tqm_entry_size = cpu_to_le16(ctx->tqm_entry_size); - ctx_pg = ctx->tqm_mem[i]; + req->tqm_entry_size = cpu_to_le16(ctxm->entry_size); *num_entries = cpu_to_le32(ctx_pg->entries); bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir); } @@ -7370,7 +7590,7 @@ static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp, static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp, struct bnxt_ctx_pg_info *ctx_pg, u32 mem_size, - u8 depth, struct bnxt_mem_init *mem_init) + u8 depth, struct bnxt_ctx_mem_type *ctxm) { struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem; int rc; @@ -7408,7 +7628,7 @@ static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp, rmem->pg_tbl_map = ctx_pg->ctx_dma_arr[i]; rmem->depth = 1; rmem->nr_pages = MAX_CTX_PAGES; - rmem->mem_init = mem_init; + rmem->ctx_mem = ctxm; if (i == (nr_tbls - 1)) { int rem = ctx_pg->nr_pages % MAX_CTX_PAGES; @@ -7423,7 +7643,7 @@ static int bnxt_alloc_ctx_pg_tbls(struct bnxt *bp, rmem->nr_pages = DIV_ROUND_UP(mem_size, BNXT_PAGE_SIZE); if (rmem->nr_pages > 1 || depth) rmem->depth = 1; - rmem->mem_init = mem_init; + rmem->ctx_mem = ctxm; rc = bnxt_alloc_ctx_mem_blk(bp, ctx_pg); } return rc; @@ -7458,38 +7678,131 @@ static void bnxt_free_ctx_pg_tbls(struct bnxt *bp, ctx_pg->nr_pages = 0; } +static int bnxt_setup_ctxm_pg_tbls(struct bnxt *bp, + struct bnxt_ctx_mem_type *ctxm, u32 entries, + u8 pg_lvl) +{ + struct bnxt_ctx_pg_info *ctx_pg = ctxm->pg_info; + int i, rc = 0, n = 1; + u32 mem_size; + + if (!ctxm->entry_size || !ctx_pg) + return -EINVAL; + if (ctxm->instance_bmap) + n = hweight32(ctxm->instance_bmap); + if (ctxm->entry_multiple) + entries = roundup(entries, ctxm->entry_multiple); + entries = clamp_t(u32, entries, ctxm->min_entries, ctxm->max_entries); + mem_size = entries * ctxm->entry_size; + for (i = 0; i < n && !rc; i++) { + ctx_pg[i].entries = entries; + rc = bnxt_alloc_ctx_pg_tbls(bp, &ctx_pg[i], mem_size, pg_lvl, + ctxm->init_value ? ctxm : NULL); + } + return rc; +} + +static int bnxt_hwrm_func_backing_store_cfg_v2(struct bnxt *bp, + struct bnxt_ctx_mem_type *ctxm, + bool last) +{ + struct hwrm_func_backing_store_cfg_v2_input *req; + u32 instance_bmap = ctxm->instance_bmap; + int i, j, rc = 0, n = 1; + __le32 *p; + + if (!(ctxm->flags & BNXT_CTX_MEM_TYPE_VALID) || !ctxm->pg_info) + return 0; + + if (instance_bmap) + n = hweight32(ctxm->instance_bmap); + else + instance_bmap = 1; + + rc = hwrm_req_init(bp, req, HWRM_FUNC_BACKING_STORE_CFG_V2); + if (rc) + return rc; + hwrm_req_hold(bp, req); + req->type = cpu_to_le16(ctxm->type); + req->entry_size = cpu_to_le16(ctxm->entry_size); + req->subtype_valid_cnt = ctxm->split_entry_cnt; + for (i = 0, p = &req->split_entry_0; i < ctxm->split_entry_cnt; i++) + p[i] = cpu_to_le32(ctxm->split[i]); + for (i = 0, j = 0; j < n && !rc; i++) { + struct bnxt_ctx_pg_info *ctx_pg; + + if (!(instance_bmap & (1 << i))) + continue; + req->instance = cpu_to_le16(i); + ctx_pg = &ctxm->pg_info[j++]; + if (!ctx_pg->entries) + continue; + req->num_entries = cpu_to_le32(ctx_pg->entries); + bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, + &req->page_size_pbl_level, + &req->page_dir); + if (last && j == n) + req->flags = + cpu_to_le32(FUNC_BACKING_STORE_CFG_V2_REQ_FLAGS_BS_CFG_ALL_DONE); + rc = hwrm_req_send(bp, req); + } + hwrm_req_drop(bp, req); + return rc; +} + +static int bnxt_backing_store_cfg_v2(struct bnxt *bp) +{ + struct bnxt_ctx_mem_info *ctx = bp->ctx; + struct bnxt_ctx_mem_type *ctxm; + int rc = 0; + u16 type; + + for (type = 0 ; type < BNXT_CTX_V2_MAX; type++) { + ctxm = &ctx->ctx_arr[type]; + + rc = bnxt_hwrm_func_backing_store_cfg_v2(bp, ctxm, ctxm->last); + if (rc) + return rc; + } + return 0; +} + void bnxt_free_ctx_mem(struct bnxt *bp) { struct bnxt_ctx_mem_info *ctx = bp->ctx; - int i; + u16 type; if (!ctx) return; - if (ctx->tqm_mem[0]) { - for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) - bnxt_free_ctx_pg_tbls(bp, ctx->tqm_mem[i]); - kfree(ctx->tqm_mem[0]); - ctx->tqm_mem[0] = NULL; + for (type = 0; type < BNXT_CTX_V2_MAX; type++) { + struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; + struct bnxt_ctx_pg_info *ctx_pg = ctxm->pg_info; + int i, n = 1; + + if (!ctx_pg) + continue; + if (ctxm->instance_bmap) + n = hweight32(ctxm->instance_bmap); + for (i = 0; i < n; i++) + bnxt_free_ctx_pg_tbls(bp, &ctx_pg[i]); + + kfree(ctx_pg); + ctxm->pg_info = NULL; } - bnxt_free_ctx_pg_tbls(bp, &ctx->tim_mem); - bnxt_free_ctx_pg_tbls(bp, &ctx->mrav_mem); - bnxt_free_ctx_pg_tbls(bp, &ctx->stat_mem); - bnxt_free_ctx_pg_tbls(bp, &ctx->vnic_mem); - bnxt_free_ctx_pg_tbls(bp, &ctx->cq_mem); - bnxt_free_ctx_pg_tbls(bp, &ctx->srq_mem); - bnxt_free_ctx_pg_tbls(bp, &ctx->qp_mem); ctx->flags &= ~BNXT_CTX_FLAG_INITED; + kfree(ctx); + bp->ctx = NULL; } static int bnxt_alloc_ctx_mem(struct bnxt *bp) { - struct bnxt_ctx_pg_info *ctx_pg; + struct bnxt_ctx_mem_type *ctxm; struct bnxt_ctx_mem_info *ctx; - struct bnxt_mem_init *init; - u32 mem_size, ena, entries; - u32 entries_sp, min; + u32 l2_qps, qp1_qps, max_qps; + u32 ena, entries_sp, entries; + u32 srqs, max_srqs, min; u32 num_mr, num_ah; u32 extra_srqs = 0; u32 extra_qps = 0; @@ -7506,120 +7819,93 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp) if (!ctx || (ctx->flags & BNXT_CTX_FLAG_INITED)) return 0; + ctxm = &ctx->ctx_arr[BNXT_CTX_QP]; + l2_qps = ctxm->qp_l2_entries; + qp1_qps = ctxm->qp_qp1_entries; + max_qps = ctxm->max_entries; + ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ]; + srqs = ctxm->srq_l2_entries; + max_srqs = ctxm->max_entries; if ((bp->flags & BNXT_FLAG_ROCE_CAP) && !is_kdump_kernel()) { pg_lvl = 2; - extra_qps = 65536; - extra_srqs = 8192; + extra_qps = min_t(u32, 65536, max_qps - l2_qps - qp1_qps); + extra_srqs = min_t(u32, 8192, max_srqs - srqs); } - ctx_pg = &ctx->qp_mem; - ctx_pg->entries = ctx->qp_min_qp1_entries + ctx->qp_max_l2_entries + - extra_qps; - if (ctx->qp_entry_size) { - mem_size = ctx->qp_entry_size * ctx_pg->entries; - init = &ctx->mem_init[BNXT_CTX_MEM_INIT_QP]; - rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl, init); - if (rc) - return rc; - } + ctxm = &ctx->ctx_arr[BNXT_CTX_QP]; + rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, l2_qps + qp1_qps + extra_qps, + pg_lvl); + if (rc) + return rc; - ctx_pg = &ctx->srq_mem; - ctx_pg->entries = ctx->srq_max_l2_entries + extra_srqs; - if (ctx->srq_entry_size) { - mem_size = ctx->srq_entry_size * ctx_pg->entries; - init = &ctx->mem_init[BNXT_CTX_MEM_INIT_SRQ]; - rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl, init); - if (rc) - return rc; - } + ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ]; + rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, srqs + extra_srqs, pg_lvl); + if (rc) + return rc; - ctx_pg = &ctx->cq_mem; - ctx_pg->entries = ctx->cq_max_l2_entries + extra_qps * 2; - if (ctx->cq_entry_size) { - mem_size = ctx->cq_entry_size * ctx_pg->entries; - init = &ctx->mem_init[BNXT_CTX_MEM_INIT_CQ]; - rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, pg_lvl, init); - if (rc) - return rc; - } + ctxm = &ctx->ctx_arr[BNXT_CTX_CQ]; + rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, ctxm->cq_l2_entries + + extra_qps * 2, pg_lvl); + if (rc) + return rc; - ctx_pg = &ctx->vnic_mem; - ctx_pg->entries = ctx->vnic_max_vnic_entries + - ctx->vnic_max_ring_table_entries; - if (ctx->vnic_entry_size) { - mem_size = ctx->vnic_entry_size * ctx_pg->entries; - init = &ctx->mem_init[BNXT_CTX_MEM_INIT_VNIC]; - rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, init); - if (rc) - return rc; - } + ctxm = &ctx->ctx_arr[BNXT_CTX_VNIC]; + rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, ctxm->max_entries, 1); + if (rc) + return rc; - ctx_pg = &ctx->stat_mem; - ctx_pg->entries = ctx->stat_max_entries; - if (ctx->stat_entry_size) { - mem_size = ctx->stat_entry_size * ctx_pg->entries; - init = &ctx->mem_init[BNXT_CTX_MEM_INIT_STAT]; - rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, init); - if (rc) - return rc; - } + ctxm = &ctx->ctx_arr[BNXT_CTX_STAT]; + rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, ctxm->max_entries, 1); + if (rc) + return rc; ena = 0; if (!(bp->flags & BNXT_FLAG_ROCE_CAP)) goto skip_rdma; - ctx_pg = &ctx->mrav_mem; + ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV]; /* 128K extra is needed to accommodate static AH context * allocation by f/w. */ - num_mr = 1024 * 256; - num_ah = 1024 * 128; - ctx_pg->entries = num_mr + num_ah; - if (ctx->mrav_entry_size) { - mem_size = ctx->mrav_entry_size * ctx_pg->entries; - init = &ctx->mem_init[BNXT_CTX_MEM_INIT_MRAV]; - rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 2, init); - if (rc) - return rc; - } + num_mr = min_t(u32, ctxm->max_entries / 2, 1024 * 256); + num_ah = min_t(u32, num_mr, 1024 * 128); + ctxm->split_entry_cnt = BNXT_CTX_MRAV_AV_SPLIT_ENTRY + 1; + if (!ctxm->mrav_av_entries || ctxm->mrav_av_entries > num_ah) + ctxm->mrav_av_entries = num_ah; + + rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, num_mr + num_ah, 2); + if (rc) + return rc; ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV; - if (ctx->mrav_num_entries_units) - ctx_pg->entries = - ((num_mr / ctx->mrav_num_entries_units) << 16) | - (num_ah / ctx->mrav_num_entries_units); - - ctx_pg = &ctx->tim_mem; - ctx_pg->entries = ctx->qp_mem.entries; - if (ctx->tim_entry_size) { - mem_size = ctx->tim_entry_size * ctx_pg->entries; - rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, NULL); - if (rc) - return rc; - } + + ctxm = &ctx->ctx_arr[BNXT_CTX_TIM]; + rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, l2_qps + qp1_qps + extra_qps, 1); + if (rc) + return rc; ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM; skip_rdma: - min = ctx->tqm_min_entries_per_ring; - entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries + - 2 * (extra_qps + ctx->qp_min_qp1_entries) + min; - entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple); - entries = ctx->qp_max_l2_entries + 2 * (extra_qps + ctx->qp_min_qp1_entries); - entries = roundup(entries, ctx->tqm_entries_multiple); - entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring); - for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) { - ctx_pg = ctx->tqm_mem[i]; - ctx_pg->entries = i ? entries : entries_sp; - if (ctx->tqm_entry_size) { - mem_size = ctx->tqm_entry_size * ctx_pg->entries; - rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, - NULL); - if (rc) - return rc; - } + ctxm = &ctx->ctx_arr[BNXT_CTX_STQM]; + min = ctxm->min_entries; + entries_sp = ctx->ctx_arr[BNXT_CTX_VNIC].vnic_entries + l2_qps + + 2 * (extra_qps + qp1_qps) + min; + rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, entries_sp, 2); + if (rc) + return rc; + + ctxm = &ctx->ctx_arr[BNXT_CTX_FTQM]; + entries = l2_qps + 2 * (extra_qps + qp1_qps); + rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, entries, 2); + if (rc) + return rc; + for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP << i; - } ena |= FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES; - rc = bnxt_hwrm_func_backing_store_cfg(bp, ena); + + if (bp->fw_cap & BNXT_FW_CAP_BACKING_STORE_V2) + rc = bnxt_backing_store_cfg_v2(bp); + else + rc = bnxt_hwrm_func_backing_store_cfg(bp, ena); if (rc) { netdev_err(bp->dev, "Failed configuring context mem, rc = %d.\n", rc); @@ -7667,7 +7953,7 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all) hw_resc->min_stat_ctxs = le16_to_cpu(resp->min_stat_ctx); hw_resc->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx); - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { u16 max_msix = le16_to_cpu(resp->max_msix); hw_resc->max_nqs = max_msix; @@ -7696,7 +7982,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) u8 flags; int rc; - if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5_THOR(bp)) { + if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5(bp)) { rc = -ENODEV; goto no_ptp; } @@ -7728,7 +8014,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) if (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK) { ptp->refclk_regs[0] = le32_to_cpu(resp->ts_ref_clock_reg_lower); ptp->refclk_regs[1] = le32_to_cpu(resp->ts_ref_clock_reg_upper); - } else if (bp->flags & BNXT_FLAG_CHIP_P5) { + } else if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { ptp->refclk_regs[0] = BNXT_TS_REG_TIMESYNC_TS0_LOWER; ptp->refclk_regs[1] = BNXT_TS_REG_TIMESYNC_TS0_UPPER; } else { @@ -7800,6 +8086,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_HOT_RESET_IF; if (BNXT_PF(bp) && (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED)) bp->fw_cap |= BNXT_FW_CAP_LIVEPATCH; + if (flags_ext & FUNC_QCAPS_RESP_FLAGS_EXT_BS_V2_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_BACKING_STORE_V2; flags_ext2 = le32_to_cpu(resp->flags_ext2); if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_RX_ALL_PKTS_TIMESTAMPS_SUPPORTED) @@ -8435,7 +8723,7 @@ static void bnxt_accumulate_all_stats(struct bnxt *bp) int i; /* Chip bug. Counter intermittently becomes 0. */ - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) ignore_zero = true; for (i = 0; i < bp->cp_nr_rings; i++) { @@ -8629,7 +8917,7 @@ static void bnxt_clear_vnic(struct bnxt *bp) return; bnxt_hwrm_clear_vnic_filter(bp); - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) { + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) { /* clear all RSS setting before free vnic ctx */ bnxt_hwrm_clear_vnic_rss(bp); bnxt_hwrm_vnic_ctx_free(bp); @@ -8638,7 +8926,7 @@ static void bnxt_clear_vnic(struct bnxt *bp) if (bp->flags & BNXT_FLAG_TPA) bnxt_set_tpa(bp, false); bnxt_hwrm_vnic_free(bp); - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) bnxt_hwrm_vnic_ctx_free(bp); } @@ -8795,7 +9083,7 @@ static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id) static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) { - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return __bnxt_setup_vnic_p5(bp, vnic_id); else return __bnxt_setup_vnic(bp, vnic_id); @@ -8806,7 +9094,7 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) #ifdef CONFIG_RFS_ACCEL int i, rc = 0; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return 0; for (i = 0; i < bp->rx_nr_rings; i++) { @@ -9031,8 +9319,8 @@ static int bnxt_set_real_num_queues(struct bnxt *bp) return rc; } -static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max, - bool shared) +static int __bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max, + bool shared) { int _rx = *rx, _tx = *tx; @@ -9055,6 +9343,46 @@ static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max, return 0; } +static int __bnxt_num_tx_to_cp(struct bnxt *bp, int tx, int tx_sets, int tx_xdp) +{ + return (tx - tx_xdp) / tx_sets + tx_xdp; +} + +int bnxt_num_tx_to_cp(struct bnxt *bp, int tx) +{ + int tcs = netdev_get_num_tc(bp->dev); + + if (!tcs) + tcs = 1; + return __bnxt_num_tx_to_cp(bp, tx, tcs, bp->tx_nr_rings_xdp); +} + +static int bnxt_num_cp_to_tx(struct bnxt *bp, int tx_cp) +{ + int tcs = netdev_get_num_tc(bp->dev); + + return (tx_cp - bp->tx_nr_rings_xdp) * tcs + + bp->tx_nr_rings_xdp; +} + +static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max, + bool sh) +{ + int tx_cp = bnxt_num_tx_to_cp(bp, *tx); + + if (tx_cp != *tx) { + int tx_saved = tx_cp, rc; + + rc = __bnxt_trim_rings(bp, rx, &tx_cp, max, sh); + if (rc) + return rc; + if (tx_cp != tx_saved) + *tx = bnxt_num_cp_to_tx(bp, tx_cp); + return 0; + } + return __bnxt_trim_rings(bp, rx, tx, max, sh); +} + static void bnxt_setup_msix(struct bnxt *bp) { const int len = sizeof(bp->irq_tbl[0].name); @@ -9067,7 +9395,7 @@ static void bnxt_setup_msix(struct bnxt *bp) for (i = 0; i < tcs; i++) { count = bp->tx_nr_rings_per_tc; - off = i * count; + off = BNXT_TC_TO_RING_BASE(bp, i); netdev_set_tc_queue(dev, i, count, off); } } @@ -9148,7 +9476,7 @@ static unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp) { unsigned int cp = bp->hw_resc.max_cp_rings; - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) cp -= bnxt_get_ulp_msix_num(bp); return cp; @@ -9158,7 +9486,7 @@ static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_nqs); return min_t(unsigned int, hw_resc->max_irqs, hw_resc->max_cp_rings); @@ -9174,7 +9502,7 @@ unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp) unsigned int cp; cp = bnxt_get_max_func_cp_rings_for_en(bp); - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return cp - bp->rx_nr_rings - bp->tx_nr_rings; else return cp - bp->cp_nr_rings; @@ -9193,7 +9521,7 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num) int max_idx, avail_msix; max_idx = bp->total_irqs; - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) max_idx = min_t(int, bp->total_irqs, max_cp); avail_msix = max_idx - bp->cp_nr_rings; if (!BNXT_NEW_RM(bp) || avail_msix >= num) @@ -9217,7 +9545,7 @@ static int bnxt_get_num_msix(struct bnxt *bp) static int bnxt_init_msix(struct bnxt *bp) { - int i, total_vecs, max, rc = 0, min = 1, ulp_msix; + int i, total_vecs, max, rc = 0, min = 1, ulp_msix, tx_cp; struct msix_entry *msix_ent; total_vecs = bnxt_get_num_msix(bp); @@ -9259,9 +9587,10 @@ static int bnxt_init_msix(struct bnxt *bp) if (rc) goto msix_setup_exit; + tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); bp->cp_nr_rings = (min == 1) ? - max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : - bp->tx_nr_rings + bp->rx_nr_rings; + max_t(int, tx_cp, bp->rx_nr_rings) : + tx_cp + bp->rx_nr_rings; } else { rc = -ENOMEM; @@ -9471,7 +9800,7 @@ static void bnxt_init_napi(struct bnxt *bp) if (bp->flags & BNXT_FLAG_USING_MSIX) { int (*poll_fn)(struct napi_struct *, int) = bnxt_poll; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) poll_fn = bnxt_poll_p5; else if (BNXT_CHIP_TYPE_NITRO_A0(bp)) cp_nr_rings--; @@ -9527,8 +9856,6 @@ static void bnxt_enable_napi(struct bnxt *bp) cpr = &bnapi->cp_ring; bnapi->in_reset = false; - bnapi->tx_pkts = 0; - if (bnapi->rx_ring) { INIT_WORK(&cpr->dim.work, bnxt_dim_work); cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; @@ -10211,8 +10538,6 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) bnxt_ulp_stop(bp); bnxt_free_ctx_mem(bp); - kfree(bp->ctx); - bp->ctx = NULL; bnxt_dcb_free(bp); rc = bnxt_fw_init_one(bp); if (rc) { @@ -11194,7 +11519,7 @@ static bool bnxt_can_reserve_rings(struct bnxt *bp) /* If the chip and firmware supports RFS */ static bool bnxt_rfs_supported(struct bnxt *bp) { - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2) return true; return false; @@ -11215,7 +11540,7 @@ static bool bnxt_rfs_capable(struct bnxt *bp) #ifdef CONFIG_RFS_ACCEL int vnics, max_vnics, max_rss_ctxs; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return bnxt_rfs_supported(bp); if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings) return false; @@ -11317,7 +11642,7 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features) update_tpa = true; if ((bp->flags & BNXT_FLAG_TPA) == 0 || (flags & BNXT_FLAG_TPA) == 0 || - (bp->flags & BNXT_FLAG_CHIP_P5)) + (bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) re_init = true; } @@ -11559,15 +11884,13 @@ static int bnxt_dbg_hwrm_ring_info_get(struct bnxt *bp, u8 ring_type, static void bnxt_dump_tx_sw_state(struct bnxt_napi *bnapi) { - struct bnxt_tx_ring_info *txr = bnapi->tx_ring; - int i = bnapi->index; - - if (!txr) - return; + struct bnxt_tx_ring_info *txr; + int i = bnapi->index, j; - netdev_info(bnapi->bp->dev, "[%d]: tx{fw_ring: %d prod: %x cons: %x}\n", - i, txr->tx_ring_struct.fw_ring_id, txr->tx_prod, - txr->tx_cons); + bnxt_for_each_napi_tx(j, bnapi, txr) + netdev_info(bnapi->bp->dev, "[%d.%d]: tx{fw_ring: %d prod: %x cons: %x}\n", + i, j, txr->tx_ring_struct.fw_ring_id, txr->tx_prod, + txr->tx_cons); } static void bnxt_dump_rx_sw_state(struct bnxt_napi *bnapi) @@ -11730,8 +12053,7 @@ static void bnxt_timer(struct timer_list *t) if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state)) bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT); - if ((bp->flags & BNXT_FLAG_CHIP_P5) && !bp->chip_rev && - netif_carrier_ok(dev)) + if ((BNXT_CHIP_P5(bp)) && !bp->chip_rev && netif_carrier_ok(dev)) bnxt_queue_sp_work(bp, BNXT_RING_COAL_NOW_SP_EVENT); bnxt_restart_timer: @@ -11840,8 +12162,6 @@ static void bnxt_fw_reset_close(struct bnxt *bp) if (pci_is_enabled(bp->pdev)) pci_disable_device(bp->pdev); bnxt_free_ctx_mem(bp); - kfree(bp->ctx); - bp->ctx = NULL; } static bool is_bnxt_fw_ok(struct bnxt *bp) @@ -11984,7 +12304,7 @@ static void bnxt_chk_missed_irq(struct bnxt *bp) { int i; - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) return; for (i = 0; i < bp->cp_nr_rings; i++) { @@ -11997,12 +12317,11 @@ static void bnxt_chk_missed_irq(struct bnxt *bp) continue; cpr = &bnapi->cp_ring; - for (j = 0; j < 2; j++) { - struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j]; + for (j = 0; j < cpr->cp_ring_count; j++) { + struct bnxt_cp_ring_info *cpr2 = &cpr->cp_ring_arr[j]; u32 val[2]; - if (!cpr2 || cpr2->has_more_work || - !bnxt_has_work(bp, cpr2)) + if (cpr2->has_more_work || !bnxt_has_work(bp, cpr2)) continue; if (cpr2->cp_raw_cons != cpr2->last_cp_raw_cons) { @@ -12161,23 +12480,27 @@ static void bnxt_sp_task(struct work_struct *work) clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state); } +static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, + int *max_cp); + /* Under rtnl_lock */ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int tx_xdp) { - int max_rx, max_tx, tx_sets = 1; + int max_rx, max_tx, max_cp, tx_sets = 1, tx_cp; int tx_rings_needed, stats; int rx_rings = rx; - int cp, vnics, rc; + int cp, vnics; if (tcs) tx_sets = tcs; - rc = bnxt_get_max_rings(bp, &max_rx, &max_tx, sh); - if (rc) - return rc; + if (bp->flags & BNXT_FLAG_AGG_RINGS) + rx_rings <<= 1; + + _bnxt_get_max_rings(bp, &max_rx, &max_tx, &max_cp); - if (max_rx < rx) + if (max_rx < rx_rings) return -ENOMEM; tx_rings_needed = tx * tx_sets + tx_xdp; @@ -12185,12 +12508,14 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, return -ENOMEM; vnics = 1; - if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5)) == BNXT_FLAG_RFS) - vnics += rx_rings; + if ((bp->flags & (BNXT_FLAG_RFS | BNXT_FLAG_CHIP_P5_PLUS)) == + BNXT_FLAG_RFS) + vnics += rx; - if (bp->flags & BNXT_FLAG_AGG_RINGS) - rx_rings <<= 1; - cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx; + tx_cp = __bnxt_num_tx_to_cp(bp, tx_rings_needed, tx_sets, tx_xdp); + cp = sh ? max_t(int, tx_cp, rx) : tx_cp + rx; + if (max_cp < cp) + return -ENOMEM; stats = cp; if (BNXT_NEW_RM(bp)) { cp += bnxt_get_ulp_msix_num(bp); @@ -12265,10 +12590,10 @@ static bool bnxt_fw_pre_resv_vnics(struct bnxt *bp) { u16 fw_maj = BNXT_FW_MAJ(bp), fw_bld = BNXT_FW_BLD(bp); - if (!(bp->flags & BNXT_FLAG_CHIP_P5) && + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && (fw_maj > 218 || (fw_maj == 218 && fw_bld >= 18))) return true; - if ((bp->flags & BNXT_FLAG_CHIP_P5) && + if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && (fw_maj > 216 || (fw_maj == 216 && fw_bld >= 172))) return true; return false; @@ -12824,7 +13149,7 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) { struct bnxt *bp = netdev_priv(dev); bool sh = false; - int rc; + int rc, tx_cp; if (tc > bp->max_tc) { netdev_err(dev, "Too many traffic classes requested: %d. Max supported is %d.\n", @@ -12855,8 +13180,9 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) netdev_reset_tc(dev); } bp->tx_nr_rings += bp->tx_nr_rings_xdp; - bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : - bp->tx_nr_rings + bp->rx_nr_rings; + tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); + bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : + tx_cp + bp->rx_nr_rings; if (netif_running(bp->dev)) return bnxt_open_nic(bp, true, false); @@ -13255,8 +13581,6 @@ static void bnxt_remove_one(struct pci_dev *pdev) bp->fw_health = NULL; bnxt_cleanup_pci(bp); bnxt_free_ctx_mem(bp); - kfree(bp->ctx); - bp->ctx = NULL; kfree(bp->rss_indir_tbl); bp->rss_indir_tbl = NULL; bnxt_free_port_stats(bp); @@ -13325,7 +13649,7 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, max_irq = min_t(int, bnxt_get_max_func_irqs(bp) - bnxt_get_ulp_msix_num(bp), hw_resc->max_stat_ctxs - bnxt_get_ulp_stat_ctxs(bp)); - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) *max_cp = min_t(int, *max_cp, max_irq); max_ring_grps = hw_resc->max_hw_ring_grps; if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) { @@ -13334,8 +13658,11 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, } if (bp->flags & BNXT_FLAG_AGG_RINGS) *max_rx >>= 1; - if (bp->flags & BNXT_FLAG_CHIP_P5) { - bnxt_trim_rings(bp, max_rx, max_tx, *max_cp, false); + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + if (*max_cp < (*max_rx + *max_tx)) { + *max_rx = *max_cp / 2; + *max_tx = *max_rx; + } /* On P5 chips, max_cp output param should be available NQs */ *max_cp = max_irq; } @@ -13636,7 +13963,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } max_irqs = bnxt_get_max_irq(pdev); - dev = alloc_etherdev_mq(sizeof(*bp), max_irqs); + dev = alloc_etherdev_mqs(sizeof(*bp), max_irqs * BNXT_MAX_QUEUE, + max_irqs); if (!dev) return -ENOMEM; @@ -13678,8 +14006,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (BNXT_PF(bp)) bnxt_vpd_read_info(bp); - if (BNXT_CHIP_P5(bp)) { - bp->flags |= BNXT_FLAG_CHIP_P5; + if (BNXT_CHIP_P5_PLUS(bp)) { + bp->flags |= BNXT_FLAG_CHIP_P5_PLUS; if (BNXT_CHIP_SR2(bp)) bp->flags |= BNXT_FLAG_CHIP_SR2; } @@ -13744,7 +14072,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bp->gro_func = bnxt_gro_func_5730x; if (BNXT_CHIP_P4(bp)) bp->gro_func = bnxt_gro_func_5731x; - else if (BNXT_CHIP_P5(bp)) + else if (BNXT_CHIP_P5_PLUS(bp)) bp->gro_func = bnxt_gro_func_5750x; } if (!BNXT_CHIP_P4_PLUS(bp)) @@ -13852,8 +14180,6 @@ init_err_pci_clean: bp->fw_health = NULL; bnxt_cleanup_pci(bp); bnxt_free_ctx_mem(bp); - kfree(bp->ctx); - bp->ctx = NULL; kfree(bp->rss_indir_tbl); bp->rss_indir_tbl = NULL; @@ -13906,8 +14232,6 @@ static int bnxt_suspend(struct device *device) bnxt_hwrm_func_drv_unrgtr(bp); pci_disable_device(bp->pdev); bnxt_free_ctx_mem(bp); - kfree(bp->ctx); - bp->ctx = NULL; rtnl_unlock(); return rc; } @@ -14004,8 +14328,6 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, if (pci_is_enabled(pdev)) pci_disable_device(pdev); bnxt_free_ctx_mem(bp); - kfree(bp->ctx); - bp->ctx = NULL; rtnl_unlock(); /* Request a slot slot reset. */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index e702dbc3e6b1..94b3627406c4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -61,6 +61,24 @@ struct tx_bd { __le64 tx_bd_haddr; } __packed; +#define TX_OPAQUE_IDX_MASK 0x0000ffff +#define TX_OPAQUE_BDS_MASK 0x00ff0000 +#define TX_OPAQUE_BDS_SHIFT 16 +#define TX_OPAQUE_RING_MASK 0xff000000 +#define TX_OPAQUE_RING_SHIFT 24 + +#define SET_TX_OPAQUE(bp, txr, idx, bds) \ + (((txr)->tx_napi_idx << TX_OPAQUE_RING_SHIFT) | \ + ((bds) << TX_OPAQUE_BDS_SHIFT) | ((idx) & (bp)->tx_ring_mask)) + +#define TX_OPAQUE_IDX(opq) ((opq) & TX_OPAQUE_IDX_MASK) +#define TX_OPAQUE_RING(opq) (((opq) & TX_OPAQUE_RING_MASK) >> \ + TX_OPAQUE_RING_SHIFT) +#define TX_OPAQUE_BDS(opq) (((opq) & TX_OPAQUE_BDS_MASK) >> \ + TX_OPAQUE_BDS_SHIFT) +#define TX_OPAQUE_PROD(bp, opq) ((TX_OPAQUE_IDX(opq) + TX_OPAQUE_BDS(opq)) &\ + (bp)->tx_ring_mask) + struct tx_bd_ext { __le32 tx_bd_hsize_lflags; #define TX_BD_FLAGS_TCP_UDP_CHKSUM (1 << 0) @@ -530,6 +548,19 @@ struct nqe_cn { __le32 cq_handle_high; }; +#define BNXT_NQ_HDL_IDX_MASK 0x00ffffff +#define BNXT_NQ_HDL_TYPE_MASK 0xff000000 +#define BNXT_NQ_HDL_TYPE_SHIFT 24 +#define BNXT_NQ_HDL_TYPE_RX 0x00 +#define BNXT_NQ_HDL_TYPE_TX 0x01 + +#define BNXT_NQ_HDL_IDX(hdl) ((hdl) & BNXT_NQ_HDL_IDX_MASK) +#define BNXT_NQ_HDL_TYPE(hdl) (((hdl) & BNXT_NQ_HDL_TYPE_MASK) >> \ + BNXT_NQ_HDL_TYPE_SHIFT) + +#define BNXT_SET_NQ_HDL(cpr) \ + (((cpr)->cp_ring_type << BNXT_NQ_HDL_TYPE_SHIFT) | (cpr)->cp_idx) + #define DB_IDX_MASK 0xffffff #define DB_IDX_VALID (0x1 << 26) #define DB_IRQ_DIS (0x1 << 27) @@ -655,10 +686,12 @@ struct nqe_cn { */ #define BNXT_MIN_TX_DESC_CNT (MAX_SKB_FRAGS + 2) -#define RX_RING(x) (((x) & ~(RX_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4)) +#define RX_RING(bp, x) (((x) & (bp)->rx_ring_mask) >> (BNXT_PAGE_SHIFT - 4)) +#define RX_AGG_RING(bp, x) (((x) & (bp)->rx_agg_ring_mask) >> \ + (BNXT_PAGE_SHIFT - 4)) #define RX_IDX(x) ((x) & (RX_DESC_CNT - 1)) -#define TX_RING(x) (((x) & ~(TX_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4)) +#define TX_RING(bp, x) (((x) & (bp)->tx_ring_mask) >> (BNXT_PAGE_SHIFT - 4)) #define TX_IDX(x) ((x) & (TX_DESC_CNT - 1)) #define CP_RING(x) (((x) & ~(CP_DESC_CNT - 1)) >> (BNXT_PAGE_SHIFT - 4)) @@ -685,11 +718,14 @@ struct nqe_cn { #define RX_CMP_TYPE(rxcmp) \ (le32_to_cpu((rxcmp)->rx_cmp_len_flags_type) & RX_CMP_CMP_TYPE) -#define NEXT_RX(idx) (((idx) + 1) & bp->rx_ring_mask) +#define RING_RX(bp, idx) ((idx) & (bp)->rx_ring_mask) +#define NEXT_RX(idx) ((idx) + 1) -#define NEXT_RX_AGG(idx) (((idx) + 1) & bp->rx_agg_ring_mask) +#define RING_RX_AGG(bp, idx) ((idx) & (bp)->rx_agg_ring_mask) +#define NEXT_RX_AGG(idx) ((idx) + 1) -#define NEXT_TX(idx) (((idx) + 1) & bp->tx_ring_mask) +#define RING_TX(bp, idx) ((idx) & (bp)->tx_ring_mask) +#define NEXT_TX(idx) ((idx) + 1) #define ADV_RAW_CMP(idx, n) ((idx) + (n)) #define NEXT_RAW_CMP(idx) ADV_RAW_CMP(idx, 1) @@ -702,6 +738,7 @@ struct nqe_cn { #define BNXT_AGG_EVENT 2 #define BNXT_TX_EVENT 4 #define BNXT_REDIRECT_EVENT 8 +#define BNXT_TX_CMP_EVENT 0x10 struct bnxt_sw_tx_bd { union { @@ -730,13 +767,6 @@ struct bnxt_sw_rx_agg_bd { dma_addr_t mapping; }; -struct bnxt_mem_init { - u8 init_val; - u16 offset; -#define BNXT_MEM_INVALID_OFFSET 0xffff - u16 size; -}; - struct bnxt_ring_mem_info { int nr_pages; int page_size; @@ -746,7 +776,7 @@ struct bnxt_ring_mem_info { #define BNXT_RMEM_USE_FULL_PAGE_FLAG 4 u16 depth; - struct bnxt_mem_init *mem_init; + struct bnxt_ctx_mem_type *ctx_mem; void **pg_arr; dma_addr_t *dma_arr; @@ -788,13 +818,19 @@ struct bnxt_db_info { u64 db_key64; u32 db_key32; }; + u32 db_ring_mask; }; +#define DB_RING_IDX(db, idx) ((idx) & (db)->db_ring_mask) + struct bnxt_tx_ring_info { struct bnxt_napi *bnapi; + struct bnxt_cp_ring_info *tx_cpr; u16 tx_prod; u16 tx_cons; + u16 tx_hw_cons; u16 txq_index; + u8 tx_napi_idx; u8 kick_pending; struct bnxt_db_info tx_db; @@ -901,6 +937,7 @@ struct bnxt_tpa_idx_map { struct bnxt_rx_ring_info { struct bnxt_napi *bnapi; + struct bnxt_cp_ring_info *rx_cpr; u16 rx_prod; u16 rx_agg_prod; u16 rx_sw_agg_prod; @@ -980,6 +1017,10 @@ struct bnxt_cp_ring_info { u8 had_work_done:1; u8 has_more_work:1; + u8 had_nqe_notify:1; + + u8 cp_ring_type; + u8 cp_idx; u32 last_cp_raw_cons; @@ -1004,11 +1045,18 @@ struct bnxt_cp_ring_info { struct bnxt_ring_struct cp_ring_struct; - struct bnxt_cp_ring_info *cp_ring_arr[2]; -#define BNXT_RX_HDL 0 -#define BNXT_TX_HDL 1 + int cp_ring_count; + struct bnxt_cp_ring_info *cp_ring_arr; }; +#define BNXT_MAX_QUEUE 8 +#define BNXT_MAX_TXR_PER_NAPI BNXT_MAX_QUEUE + +#define bnxt_for_each_napi_tx(iter, bnapi, txr) \ + for (iter = 0, txr = (bnapi)->tx_ring[0]; txr; \ + txr = (iter < BNXT_MAX_TXR_PER_NAPI - 1) ? \ + (bnapi)->tx_ring[++iter] : NULL) + struct bnxt_napi { struct napi_struct napi; struct bnxt *bp; @@ -1016,11 +1064,10 @@ struct bnxt_napi { int index; struct bnxt_cp_ring_info cp_ring; struct bnxt_rx_ring_info *rx_ring; - struct bnxt_tx_ring_info *tx_ring; + struct bnxt_tx_ring_info *tx_ring[BNXT_MAX_TXR_PER_NAPI]; void (*tx_int)(struct bnxt *, struct bnxt_napi *, int budget); - int tx_pkts; u8 events; u8 tx_fault:1; @@ -1355,8 +1402,6 @@ struct bnxt_link_info { (PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE | \ BNXT_FEC_RS_OFF(link_info)) -#define BNXT_MAX_QUEUE 8 - struct bnxt_queue_info { u8 queue_id; u8 queue_profile; @@ -1385,7 +1430,7 @@ struct bnxt_test_info { }; #define CHIMP_REG_VIEW_ADDR \ - ((bp->flags & BNXT_FLAG_CHIP_P5) ? 0x80000000 : 0xb1000000) + ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) ? 0x80000000 : 0xb1000000) #define BNXT_GRCPF_REG_CHIMP_COMM 0x0 #define BNXT_GRCPF_REG_CHIMP_COMM_TRIGGER 0x100 @@ -1509,53 +1554,72 @@ do { \ attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_4K; \ } while (0) +struct bnxt_ctx_mem_type { + u16 type; + u16 entry_size; + u32 flags; +#define BNXT_CTX_MEM_TYPE_VALID FUNC_BACKING_STORE_QCAPS_V2_RESP_FLAGS_TYPE_VALID + u32 instance_bmap; + u8 init_value; + u8 entry_multiple; + u16 init_offset; +#define BNXT_CTX_INIT_INVALID_OFFSET 0xffff + u32 max_entries; + u32 min_entries; + u8 last:1; + u8 split_entry_cnt; +#define BNXT_MAX_SPLIT_ENTRY 4 + union { + struct { + u32 qp_l2_entries; + u32 qp_qp1_entries; + u32 qp_fast_qpmd_entries; + }; + u32 srq_l2_entries; + u32 cq_l2_entries; + u32 vnic_entries; + struct { + u32 mrav_av_entries; + u32 mrav_num_entries_units; + }; + u32 split[BNXT_MAX_SPLIT_ENTRY]; + }; + struct bnxt_ctx_pg_info *pg_info; +}; + +#define BNXT_CTX_MRAV_AV_SPLIT_ENTRY 0 + +#define BNXT_CTX_QP FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QP +#define BNXT_CTX_SRQ FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ +#define BNXT_CTX_CQ FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ +#define BNXT_CTX_VNIC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_VNIC +#define BNXT_CTX_STAT FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_STAT +#define BNXT_CTX_STQM FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SP_TQM_RING +#define BNXT_CTX_FTQM FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_FP_TQM_RING +#define BNXT_CTX_MRAV FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MRAV +#define BNXT_CTX_TIM FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TIM +#define BNXT_CTX_TKC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TKC +#define BNXT_CTX_RKC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RKC +#define BNXT_CTX_MTQM FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_MP_TQM_RING +#define BNXT_CTX_SQDBS FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SQ_DB_SHADOW +#define BNXT_CTX_RQDBS FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RQ_DB_SHADOW +#define BNXT_CTX_SRQDBS FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_SRQ_DB_SHADOW +#define BNXT_CTX_CQDBS FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CQ_DB_SHADOW +#define BNXT_CTX_QTKC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QUIC_TKC +#define BNXT_CTX_QRKC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_QUIC_RKC +#define BNXT_CTX_TBLSC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_TBL_SCOPE +#define BNXT_CTX_XPAR FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_XID_PARTITION + +#define BNXT_CTX_MAX (BNXT_CTX_TIM + 1) +#define BNXT_CTX_V2_MAX (BNXT_CTX_XPAR + 1) +#define BNXT_CTX_INV ((u16)-1) + struct bnxt_ctx_mem_info { - u32 qp_max_entries; - u16 qp_min_qp1_entries; - u16 qp_max_l2_entries; - u16 qp_entry_size; - u16 srq_max_l2_entries; - u32 srq_max_entries; - u16 srq_entry_size; - u16 cq_max_l2_entries; - u32 cq_max_entries; - u16 cq_entry_size; - u16 vnic_max_vnic_entries; - u16 vnic_max_ring_table_entries; - u16 vnic_entry_size; - u32 stat_max_entries; - u16 stat_entry_size; - u16 tqm_entry_size; - u32 tqm_min_entries_per_ring; - u32 tqm_max_entries_per_ring; - u32 mrav_max_entries; - u16 mrav_entry_size; - u16 tim_entry_size; - u32 tim_max_entries; - u16 mrav_num_entries_units; - u8 tqm_entries_multiple; u8 tqm_fp_rings_count; u32 flags; #define BNXT_CTX_FLAG_INITED 0x01 - - struct bnxt_ctx_pg_info qp_mem; - struct bnxt_ctx_pg_info srq_mem; - struct bnxt_ctx_pg_info cq_mem; - struct bnxt_ctx_pg_info vnic_mem; - struct bnxt_ctx_pg_info stat_mem; - struct bnxt_ctx_pg_info mrav_mem; - struct bnxt_ctx_pg_info tim_mem; - struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TQM_RINGS]; - -#define BNXT_CTX_MEM_INIT_QP 0 -#define BNXT_CTX_MEM_INIT_SRQ 1 -#define BNXT_CTX_MEM_INIT_CQ 2 -#define BNXT_CTX_MEM_INIT_VNIC 3 -#define BNXT_CTX_MEM_INIT_STAT 4 -#define BNXT_CTX_MEM_INIT_MRAV 5 -#define BNXT_CTX_MEM_INIT_MAX 6 - struct bnxt_mem_init mem_init[BNXT_CTX_MEM_INIT_MAX]; + struct bnxt_ctx_mem_type ctx_arr[BNXT_CTX_V2_MAX]; }; enum bnxt_health_severity { @@ -1795,7 +1859,7 @@ struct bnxt { atomic_t intr_sem; u32 flags; - #define BNXT_FLAG_CHIP_P5 0x1 + #define BNXT_FLAG_CHIP_P5_PLUS 0x1 #define BNXT_FLAG_VF 0x2 #define BNXT_FLAG_LRO 0x4 #ifdef CONFIG_INET @@ -1849,21 +1913,21 @@ struct bnxt { #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0) #define BNXT_RX_PAGE_MODE(bp) ((bp)->flags & BNXT_FLAG_RX_PAGE_MODE) #define BNXT_SUPPORTS_TPA(bp) (!BNXT_CHIP_TYPE_NITRO_A0(bp) && \ - (!((bp)->flags & BNXT_FLAG_CHIP_P5) || \ + (!((bp)->flags & BNXT_FLAG_CHIP_P5_PLUS) ||\ (bp)->max_tpa_v2) && !is_kdump_kernel()) #define BNXT_RX_JUMBO_MODE(bp) ((bp)->flags & BNXT_FLAG_JUMBO) #define BNXT_CHIP_SR2(bp) \ ((bp)->chip_num == CHIP_NUM_58818) -#define BNXT_CHIP_P5_THOR(bp) \ +#define BNXT_CHIP_P5(bp) \ ((bp)->chip_num == CHIP_NUM_57508 || \ (bp)->chip_num == CHIP_NUM_57504 || \ (bp)->chip_num == CHIP_NUM_57502) /* Chip class phase 5 */ -#define BNXT_CHIP_P5(bp) \ - (BNXT_CHIP_P5_THOR(bp) || BNXT_CHIP_SR2(bp)) +#define BNXT_CHIP_P5_PLUS(bp) \ + (BNXT_CHIP_P5(bp) || BNXT_CHIP_SR2(bp)) /* Chip class phase 4.x */ #define BNXT_CHIP_P4(bp) \ @@ -1874,7 +1938,7 @@ struct bnxt { !BNXT_CHIP_TYPE_NITRO_A0(bp))) #define BNXT_CHIP_P4_PLUS(bp) \ - (BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp)) + (BNXT_CHIP_P4(bp) || BNXT_CHIP_P5_PLUS(bp)) struct bnxt_aux_priv *aux_priv; struct bnxt_en_dev *edev; @@ -2017,6 +2081,7 @@ struct bnxt { #define BNXT_FW_CAP_THRESHOLD_TEMP_SUPPORTED BIT_ULL(33) #define BNXT_FW_CAP_DFLT_VLAN_TPID_PCP BIT_ULL(34) #define BNXT_FW_CAP_PRE_RESV_VNICS BIT_ULL(35) + #define BNXT_FW_CAP_BACKING_STORE_V2 BIT_ULL(36) u32 fw_dbg_cap; @@ -2297,10 +2362,11 @@ static inline void bnxt_writeq_relaxed(struct bnxt *bp, u64 val, static inline void bnxt_db_write_relaxed(struct bnxt *bp, struct bnxt_db_info *db, u32 idx) { - if (bp->flags & BNXT_FLAG_CHIP_P5) { - bnxt_writeq_relaxed(bp, db->db_key64 | idx, db->doorbell); + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + bnxt_writeq_relaxed(bp, db->db_key64 | DB_RING_IDX(db, idx), + db->doorbell); } else { - u32 db_val = db->db_key32 | idx; + u32 db_val = db->db_key32 | DB_RING_IDX(db, idx); writel_relaxed(db_val, db->doorbell); if (bp->flags & BNXT_FLAG_DOUBLE_DB) @@ -2312,10 +2378,11 @@ static inline void bnxt_db_write_relaxed(struct bnxt *bp, static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db, u32 idx) { - if (bp->flags & BNXT_FLAG_CHIP_P5) { - bnxt_writeq(bp, db->db_key64 | idx, db->doorbell); + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + bnxt_writeq(bp, db->db_key64 | DB_RING_IDX(db, idx), + db->doorbell); } else { - u32 db_val = db->db_key32 | idx; + u32 db_val = db->db_key32 | DB_RING_IDX(db, idx); writel(db_val, db->doorbell); if (bp->flags & BNXT_FLAG_DOUBLE_DB) @@ -2351,6 +2418,7 @@ int __bnxt_hwrm_get_tx_rings(struct bnxt *bp, u16 fid, int *tx_rings); int bnxt_nq_rings_in_use(struct bnxt *bp); int bnxt_hwrm_set_coal(struct bnxt *); void bnxt_free_ctx_mem(struct bnxt *bp); +int bnxt_num_tx_to_cp(struct bnxt *bp, int tx); unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp); unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp); unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp); @@ -2360,7 +2428,7 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init); void bnxt_tx_disable(struct bnxt *bp); void bnxt_tx_enable(struct bnxt *bp); void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr, - int idx); + u16 curr); void bnxt_report_link(struct bnxt *bp); int bnxt_update_link(struct bnxt *bp, bool chng_link_state); int bnxt_hwrm_set_pause(struct bnxt *); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index f302dac56599..ae1bdda56d22 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -469,8 +469,6 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, } bnxt_cancel_reservations(bp, false); bnxt_free_ctx_mem(bp); - kfree(bp->ctx); - bp->ctx = NULL; break; } case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: { @@ -741,7 +739,7 @@ static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp, u32 *nvm_cfg_ver) } /* earlier devices present as an array of raw bytes */ - if (!BNXT_CHIP_P5(bp)) { + if (!BNXT_CHIP_P5_PLUS(bp)) { dim = 0; i = 0; bits *= 3; /* array of 3 version components */ @@ -761,7 +759,7 @@ static int bnxt_hwrm_get_nvm_cfg_ver(struct bnxt *bp, u32 *nvm_cfg_ver) goto exit; bnxt_copy_from_nvm_data(&ver, data, bits, bytes); - if (BNXT_CHIP_P5(bp)) { + if (BNXT_CHIP_P5_PLUS(bp)) { *nvm_cfg_ver <<= 8; *nvm_cfg_ver |= ver.vu8; } else { @@ -781,7 +779,7 @@ static int bnxt_dl_info_put(struct bnxt *bp, struct devlink_info_req *req, if (!strlen(buf)) return 0; - if ((bp->flags & BNXT_FLAG_CHIP_P5) && + if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && (!strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_NCSI) || !strcmp(key, DEVLINK_INFO_VERSION_GENERIC_FW_ROCE))) return 0; @@ -1007,7 +1005,7 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, if (rc) return rc; - if (BNXT_CHIP_P5(bp)) { + if (BNXT_CHIP_P5_PLUS(bp)) { rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH); if (rc) return rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index f3f384773ac0..b0cea5b600cc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -511,7 +511,7 @@ static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp) { if (BNXT_SUPPORTS_TPA(bp)) { if (bp->max_tpa_v2) { - if (BNXT_CHIP_P5_THOR(bp)) + if (BNXT_CHIP_P5(bp)) return BNXT_NUM_TPA_RING_STATS_P5; return BNXT_NUM_TPA_RING_STATS_P5_SR2; } @@ -528,7 +528,8 @@ static int bnxt_get_num_ring_stats(struct bnxt *bp) bnxt_get_num_tpa_ring_stats(bp); tx = NUM_RING_TX_HW_STATS; cmn = NUM_RING_CMN_SW_STATS; - return rx * bp->rx_nr_rings + tx * bp->tx_nr_rings + + return rx * bp->rx_nr_rings + + tx * (bp->tx_nr_rings_xdp + bp->tx_nr_rings_per_tc) + cmn * bp->cp_nr_rings; } @@ -923,6 +924,7 @@ static int bnxt_set_channels(struct net_device *dev, bool sh = false; int tx_xdp = 0; int rc = 0; + int tx_cp; if (channel->other_count) return -EINVAL; @@ -994,8 +996,9 @@ static int bnxt_set_channels(struct net_device *dev, if (tcs > 1) bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs + tx_xdp; - bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : - bp->tx_nr_rings + bp->rx_nr_rings; + tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); + bp->cp_nr_rings = sh ? max_t(int, tx_cp, bp->rx_nr_rings) : + tx_cp + bp->rx_nr_rings; /* After changing number of rx channels, update NTUPLE feature. */ netdev_update_features(dev); @@ -1319,7 +1322,7 @@ u32 bnxt_get_rxfh_indir_size(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return ALIGN(bp->rx_nr_rings, BNXT_RSS_TABLE_ENTRIES_P5); return HW_HASH_INDEX_SIZE; } @@ -3940,8 +3943,8 @@ static int bnxt_run_loopback(struct bnxt *bp) int rc; cpr = &rxr->bnapi->cp_ring; - if (bp->flags & BNXT_FLAG_CHIP_P5) - cpr = cpr->cp_ring_arr[BNXT_RX_HDL]; + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) + cpr = rxr->rx_cpr; pkt_size = min(bp->dev->mtu + ETH_HLEN, bp->rx_copy_thresh); skb = netdev_alloc_skb(bp->dev, pkt_size); if (!skb) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index f3886710e778..a1ec39b46518 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -650,7 +650,7 @@ static int bnxt_map_ptp_regs(struct bnxt *bp) int rc, i; reg_arr = ptp->refclk_regs; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (BNXT_CHIP_P5(bp)) { rc = bnxt_map_regs(bp, reg_arr, 2, BNXT_PTP_GRC_WIN); if (rc) return rc; @@ -967,7 +967,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) rc = err; goto out; } - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (BNXT_CHIP_P5(bp)) { spin_lock_bh(&ptp->ptp_lock); bnxt_refclk_read(bp, NULL, &ptp->current_time); WRITE_ONCE(ptp->old_time, ptp->current_time); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index c722b3b41730..175192ebaa77 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -536,7 +536,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset) if (rc) return rc; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { vf_msix = hw_resc->max_nqs - bnxt_nq_rings_in_use(bp); vf_ring_grps = 0; } else { @@ -565,7 +565,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset) req->min_l2_ctxs = cpu_to_le16(min); req->min_vnics = cpu_to_le16(min); req->min_stat_ctx = cpu_to_le16(min); - if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) req->min_hw_ring_grps = cpu_to_le16(min); } else { vf_cp_rings /= num_vfs; @@ -602,7 +602,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset) req->max_stat_ctx = cpu_to_le16(vf_stat_ctx); req->max_hw_ring_grps = cpu_to_le16(vf_ring_grps); req->max_rsscos_ctx = cpu_to_le16(vf_rss); - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) req->max_msix = cpu_to_le16(vf_msix / num_vfs); hwrm_req_hold(bp, req); @@ -630,7 +630,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset) le16_to_cpu(req->min_rsscos_ctx) * n; hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n; hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n; - if (bp->flags & BNXT_FLAG_CHIP_P5) + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) hw_resc->max_nqs -= vf_msix; rc = pf->active_vfs; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 6ba2b9398633..e89731492f5e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -42,7 +42,7 @@ static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent) for (i = 0; i < num_msix; i++) { ent[i].vector = bp->irq_tbl[idx + i].vector; ent[i].ring_idx = idx + i; - if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { ent[i].db_offset = DB_PF_OFFSET_P5; if (BNXT_VF(bp)) ent[i].db_offset = DB_VF_OFFSET_P5; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 96f5ca778c67..4791f6a14e55 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -42,17 +42,17 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, /* fill up the first buffer */ prod = txr->tx_prod; - tx_buf = &txr->tx_buf_ring[prod]; + tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; tx_buf->nr_frags = num_frags; if (xdp) tx_buf->page = virt_to_head_page(xdp->data); - txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; + txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; flags = (len << TX_BD_LEN_SHIFT) | ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT) | bnxt_lhint_arr[len >> 9]; txbd->tx_bd_len_flags_type = cpu_to_le32(flags); - txbd->tx_bd_opaque = prod; + txbd->tx_bd_opaque = SET_TX_OPAQUE(bp, txr, prod, 1 + num_frags); txbd->tx_bd_haddr = cpu_to_le64(mapping); /* now let us fill up the frags into the next buffers */ @@ -67,10 +67,10 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, WRITE_ONCE(txr->tx_prod, prod); /* first fill up the first buffer */ - frag_tx_buf = &txr->tx_buf_ring[prod]; + frag_tx_buf = &txr->tx_buf_ring[RING_TX(bp, prod)]; frag_tx_buf->page = skb_frag_page(frag); - txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; + txbd = &txr->tx_desc_ring[TX_RING(bp, prod)][TX_IDX(prod)]; frag_len = skb_frag_size(frag); frag_mapping = skb_frag_dma_map(&pdev->dev, frag, 0, @@ -127,20 +127,20 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp, void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) { - struct bnxt_tx_ring_info *txr = bnapi->tx_ring; + struct bnxt_tx_ring_info *txr = bnapi->tx_ring[0]; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; + u16 tx_hw_cons = txr->tx_hw_cons; bool rx_doorbell_needed = false; - int nr_pkts = bnapi->tx_pkts; struct bnxt_sw_tx_bd *tx_buf; u16 tx_cons = txr->tx_cons; u16 last_tx_cons = tx_cons; - int i, j, frags; + int j, frags; if (!budget) return; - for (i = 0; i < nr_pkts; i++) { - tx_buf = &txr->tx_buf_ring[tx_cons]; + while (RING_TX(bp, tx_cons) != tx_hw_cons) { + tx_buf = &txr->tx_buf_ring[RING_TX(bp, tx_cons)]; if (tx_buf->action == XDP_REDIRECT) { struct pci_dev *pdev = bp->pdev; @@ -160,17 +160,17 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) frags = tx_buf->nr_frags; for (j = 0; j < frags; j++) { tx_cons = NEXT_TX(tx_cons); - tx_buf = &txr->tx_buf_ring[tx_cons]; + tx_buf = &txr->tx_buf_ring[RING_TX(bp, tx_cons)]; page_pool_recycle_direct(rxr->page_pool, tx_buf->page); } } else { - bnxt_sched_reset_txr(bp, txr, i); + bnxt_sched_reset_txr(bp, txr, tx_cons); return; } tx_cons = NEXT_TX(tx_cons); } - bnapi->tx_pkts = 0; + bnapi->events &= ~BNXT_TX_CMP_EVENT; WRITE_ONCE(txr->tx_cons, tx_cons); if (rx_doorbell_needed) { tx_buf = &txr->tx_buf_ring[last_tx_cons]; @@ -249,7 +249,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, pdev = bp->pdev; offset = bp->rx_offset; - txr = rxr->bnapi->tx_ring; + txr = rxr->bnapi->tx_ring[0]; /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */ orig_data = xdp.data; @@ -275,7 +275,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, case XDP_TX: rx_buf = &rxr->rx_buf_ring[cons]; mapping = rx_buf->mapping - bp->rx_dma_offset; - *event = 0; + *event &= BNXT_TX_CMP_EVENT; if (unlikely(xdp_buff_has_frags(&xdp))) { struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(&xdp); @@ -398,7 +398,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) { struct net_device *dev = bp->dev; - int tx_xdp = 0, rc, tc; + int tx_xdp = 0, tx_cp, rc, tc; struct bpf_prog *old; if (prog && !prog->aux->xdp_has_frags && @@ -446,7 +446,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) } bp->tx_nr_rings_xdp = tx_xdp; bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tc + tx_xdp; - bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings); + tx_cp = bnxt_num_tx_to_cp(bp, bp->tx_nr_rings); + bp->cp_nr_rings = max_t(int, tx_cp, bp->rx_nr_rings); bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 78c972bb1d96..aa5700ac9c00 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1165,9 +1165,10 @@ struct macb_ptp_info { int (*get_ts_info)(struct net_device *dev, struct ethtool_ts_info *info); int (*get_hwtst)(struct net_device *netdev, - struct ifreq *ifr); + struct kernel_hwtstamp_config *tstamp_config); int (*set_hwtst)(struct net_device *netdev, - struct ifreq *ifr, int cmd); + struct kernel_hwtstamp_config *tstamp_config, + struct netlink_ext_ack *extack); }; struct macb_pm_data { @@ -1314,7 +1315,7 @@ struct macb { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_clock_info; struct tsu_incr tsu_incr; - struct hwtstamp_config tstamp_config; + struct kernel_hwtstamp_config tstamp_config; /* RX queue filer rule set*/ struct ethtool_rx_fs_list rx_fs_list; @@ -1363,8 +1364,12 @@ static inline void gem_ptp_do_rxstamp(struct macb *bp, struct sk_buff *skb, stru gem_ptp_rxstamp(bp, skb, desc); } -int gem_get_hwtst(struct net_device *dev, struct ifreq *rq); -int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd); + +int gem_get_hwtst(struct net_device *dev, + struct kernel_hwtstamp_config *tstamp_config); +int gem_set_hwtst(struct net_device *dev, + struct kernel_hwtstamp_config *tstamp_config, + struct netlink_ext_ack *extack); #else static inline void gem_ptp_init(struct net_device *ndev) { } static inline void gem_ptp_remove(struct net_device *ndev) { } diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index cebae0f418f2..898debfd4db3 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3773,18 +3773,38 @@ static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!netif_running(dev)) return -EINVAL; - if (bp->ptp_info) { - switch (cmd) { - case SIOCSHWTSTAMP: - return bp->ptp_info->set_hwtst(dev, rq, cmd); - case SIOCGHWTSTAMP: - return bp->ptp_info->get_hwtst(dev, rq); - } - } - return phylink_mii_ioctl(bp->phylink, rq, cmd); } +static int macb_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *cfg) +{ + struct macb *bp = netdev_priv(dev); + + if (!netif_running(dev)) + return -EINVAL; + + if (!bp->ptp_info) + return -EOPNOTSUPP; + + return bp->ptp_info->get_hwtst(dev, cfg); +} + +static int macb_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) +{ + struct macb *bp = netdev_priv(dev); + + if (!netif_running(dev)) + return -EINVAL; + + if (!bp->ptp_info) + return -EOPNOTSUPP; + + return bp->ptp_info->set_hwtst(dev, cfg, extack); +} + static inline void macb_set_txcsum_feature(struct macb *bp, netdev_features_t features) { @@ -3884,6 +3904,8 @@ static const struct net_device_ops macb_netdev_ops = { #endif .ndo_set_features = macb_set_features, .ndo_features_check = macb_features_check, + .ndo_hwtstamp_set = macb_hwtstamp_set, + .ndo_hwtstamp_get = macb_hwtstamp_get, }; /* Configure peripheral capabilities according to device tree @@ -4539,6 +4561,8 @@ static const struct net_device_ops at91ether_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = at91ether_poll_controller, #endif + .ndo_hwtstamp_set = macb_hwtstamp_set, + .ndo_hwtstamp_get = macb_hwtstamp_get, }; static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk, diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 51d26fa190d7..a63bf29c4fa8 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -374,19 +374,16 @@ static int gem_ptp_set_ts_mode(struct macb *bp, return 0; } -int gem_get_hwtst(struct net_device *dev, struct ifreq *rq) +int gem_get_hwtst(struct net_device *dev, + struct kernel_hwtstamp_config *tstamp_config) { - struct hwtstamp_config *tstamp_config; struct macb *bp = netdev_priv(dev); - tstamp_config = &bp->tstamp_config; + *tstamp_config = bp->tstamp_config; if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) return -EOPNOTSUPP; - if (copy_to_user(rq->ifr_data, tstamp_config, sizeof(*tstamp_config))) - return -EFAULT; - else - return 0; + return 0; } static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) @@ -401,22 +398,18 @@ static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE)); } -int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) +int gem_set_hwtst(struct net_device *dev, + struct kernel_hwtstamp_config *tstamp_config, + struct netlink_ext_ack *extack) { enum macb_bd_control tx_bd_control = TSTAMP_DISABLED; enum macb_bd_control rx_bd_control = TSTAMP_DISABLED; - struct hwtstamp_config *tstamp_config; struct macb *bp = netdev_priv(dev); u32 regval; - tstamp_config = &bp->tstamp_config; if ((bp->hw_dma_cap & HW_DMA_CAP_PTP) == 0) return -EOPNOTSUPP; - if (copy_from_user(tstamp_config, ifr->ifr_data, - sizeof(*tstamp_config))) - return -EFAULT; - switch (tstamp_config->tx_type) { case HWTSTAMP_TX_OFF: break; @@ -463,12 +456,11 @@ int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) return -ERANGE; } + bp->tstamp_config = *tstamp_config; + if (gem_ptp_set_ts_mode(bp, tx_bd_control, rx_bd_control) != 0) return -ERANGE; - if (copy_to_user(ifr->ifr_data, tstamp_config, sizeof(*tstamp_config))) - return -EFAULT; - else - return 0; + return 0; } diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h index 1eb4d5fd8561..c36b93f0de15 100644 --- a/drivers/net/ethernet/google/gve/gve_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_dqo.h @@ -33,6 +33,9 @@ #define GVE_DEALLOCATE_COMPL_TIMEOUT 60 netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev); +netdev_features_t gve_features_check_dqo(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features); bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean); int gve_rx_poll_dqo(struct gve_notify_block *block, int budget); int gve_tx_alloc_rings_dqo(struct gve_priv *priv); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 2d42e733837b..cc169748ffa2 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -79,6 +79,18 @@ static int gve_verify_driver_compatibility(struct gve_priv *priv) return err; } +static netdev_features_t gve_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + struct gve_priv *priv = netdev_priv(dev); + + if (!gve_is_gqi(priv)) + return gve_features_check_dqo(skb, dev, features); + + return features; +} + static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct gve_priv *priv = netdev_priv(dev); @@ -1879,6 +1891,7 @@ err: static const struct net_device_ops gve_netdev_ops = { .ndo_start_xmit = gve_start_xmit, + .ndo_features_check = gve_features_check, .ndo_open = gve_open, .ndo_stop = gve_close, .ndo_get_stats64 = gve_get_stats, diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 1e19b834a613..f59c4710f118 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -843,6 +843,16 @@ static bool gve_can_send_tso(const struct sk_buff *skb) return true; } +netdev_features_t gve_features_check_dqo(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + if (skb_is_gso(skb) && !gve_can_send_tso(skb)) + return features & ~NETIF_F_GSO_MASK; + + return features; +} + /* Attempt to transmit specified SKB. * * Returns 0 if the SKB was transmitted or dropped. @@ -854,11 +864,10 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, int num_buffer_descs; int total_num_descs; - if (tx->dqo.qpl) { - if (skb_is_gso(skb)) - if (unlikely(ipv6_hopopt_jumbo_remove(skb))) - goto drop; + if (skb_is_gso(skb) && unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto drop; + if (tx->dqo.qpl) { /* We do not need to verify the number of buffers used per * packet or per segment in case of TSO as with 2K size buffers * none of the TX packet rules would be violated. @@ -868,24 +877,8 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, */ num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO); } else { - if (skb_is_gso(skb)) { - /* If TSO doesn't meet HW requirements, attempt to linearize the - * packet. - */ - if (unlikely(!gve_can_send_tso(skb) && - skb_linearize(skb) < 0)) { - net_err_ratelimited("%s: Failed to transmit TSO packet\n", - priv->dev->name); - goto drop; - } - - if (unlikely(ipv6_hopopt_jumbo_remove(skb))) - goto drop; - - num_buffer_descs = gve_num_buffer_descs_needed(skb); - } else { - num_buffer_descs = gve_num_buffer_descs_needed(skb); - + num_buffer_descs = gve_num_buffer_descs_needed(skb); + if (!skb_is_gso(skb)) { if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) { if (unlikely(skb_linearize(skb) < 0)) goto drop; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 1bf424ac3145..6022944d04f8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -24,6 +24,7 @@ #define I40E_MAX_VEB 16 #define I40E_MAX_NUM_DESCRIPTORS 4096 +#define I40E_MAX_NUM_DESCRIPTORS_XL710 8160 #define I40E_MAX_CSR_SPACE (4 * 1024 * 1024 - 64 * 1024) #define I40E_DEFAULT_NUM_DESCRIPTORS 512 #define I40E_REQ_DESCRIPTOR_MULTIPLE 32 @@ -33,11 +34,11 @@ #define I40E_MIN_VSI_ALLOC 83 /* LAN, ATR, FCOE, 64 VF */ /* max 16 qps */ #define i40e_default_queues_per_vmdq(pf) \ - (((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1) + (test_bit(I40E_HW_CAP_RSS_AQ, (pf)->hw.caps) ? 4 : 1) #define I40E_DEFAULT_QUEUES_PER_VF 4 #define I40E_MAX_VF_QUEUES 16 #define i40e_pf_get_max_q_per_tc(pf) \ - (((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64) + (test_bit(I40E_HW_CAP_128_QP_RSS, (pf)->hw.caps) ? 128 : 64) #define I40E_FDIR_RING_COUNT 32 #define I40E_MAX_AQ_BUF_SIZE 4096 #define I40E_AQ_LEN 256 @@ -78,7 +79,7 @@ #define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */ /* driver state flags */ -enum i40e_state_t { +enum i40e_state { __I40E_TESTING, __I40E_CONFIG_BUSY, __I40E_CONFIG_DONE, @@ -126,7 +127,7 @@ enum i40e_state_t { BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED) /* VSI state flags */ -enum i40e_vsi_state_t { +enum i40e_vsi_state { __I40E_VSI_DOWN, __I40E_VSI_NEEDS_RESTART, __I40E_VSI_SYNCING_FILTERS, @@ -138,6 +139,60 @@ enum i40e_vsi_state_t { __I40E_VSI_STATE_SIZE__, }; +enum i40e_pf_flags { + I40E_FLAG_MSI_ENA, + I40E_FLAG_MSIX_ENA, + I40E_FLAG_RSS_ENA, + I40E_FLAG_VMDQ_ENA, + I40E_FLAG_SRIOV_ENA, + I40E_FLAG_DCB_CAPABLE, + I40E_FLAG_DCB_ENA, + I40E_FLAG_FD_SB_ENA, + I40E_FLAG_FD_ATR_ENA, + I40E_FLAG_MFP_ENA, + I40E_FLAG_HW_ATR_EVICT_ENA, + I40E_FLAG_VEB_MODE_ENA, + I40E_FLAG_VEB_STATS_ENA, + I40E_FLAG_LINK_POLLING_ENA, + I40E_FLAG_TRUE_PROMISC_ENA, + I40E_FLAG_LEGACY_RX_ENA, + I40E_FLAG_PTP_ENA, + I40E_FLAG_IWARP_ENA, + I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, + I40E_FLAG_SOURCE_PRUNING_DIS, + I40E_FLAG_TC_MQPRIO_ENA, + I40E_FLAG_FD_SB_INACTIVE, + I40E_FLAG_FD_SB_TO_CLOUD_FILTER, + I40E_FLAG_FW_LLDP_DIS, + I40E_FLAG_RS_FEC, + I40E_FLAG_BASE_R_FEC, + /* TOTAL_PORT_SHUTDOWN_ENA + * Allows to physically disable the link on the NIC's port. + * If enabled, (after link down request from the OS) + * no link, traffic or led activity is possible on that port. + * + * If I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA is set, the + * I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA must be explicitly forced + * to true and cannot be disabled by system admin at that time. + * The functionalities are exclusive in terms of configuration, but + * they also have similar behavior (allowing to disable physical + * link of the port), with following differences: + * - LINK_DOWN_ON_CLOSE_ENA is configurable at host OS run-time and + * is supported by whole family of 7xx Intel Ethernet Controllers + * - TOTAL_PORT_SHUTDOWN_ENA may be enabled only before OS loads + * (in BIOS) only if motherboard's BIOS and NIC's FW has support of it + * - when LINK_DOWN_ON_CLOSE_ENABLED is used, the link is being brought + * down by sending phy_type=0 to NIC's FW + * - when TOTAL_PORT_SHUTDOWN_ENA is used, phy_type is not altered, + * instead the link is being brought down by clearing + * bit (I40E_AQ_PHY_ENABLE_LINK) in abilities field of + * i40e_aq_set_phy_config structure + */ + I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, + I40E_FLAG_VF_VLAN_PRUNING_ENA, + I40E_PF_FLAGS_NBITS, /* must be last */ +}; + enum i40e_interrupt_policy { I40E_INTERRUPT_BEST_CASE, I40E_INTERRUPT_MEDIUM, @@ -480,78 +535,7 @@ struct i40e_pf { struct timer_list service_timer; struct work_struct service_task; - u32 hw_features; -#define I40E_HW_RSS_AQ_CAPABLE BIT(0) -#define I40E_HW_128_QP_RSS_CAPABLE BIT(1) -#define I40E_HW_ATR_EVICT_CAPABLE BIT(2) -#define I40E_HW_WB_ON_ITR_CAPABLE BIT(3) -#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT(4) -#define I40E_HW_NO_PCI_LINK_CHECK BIT(5) -#define I40E_HW_100M_SGMII_CAPABLE BIT(6) -#define I40E_HW_NO_DCB_SUPPORT BIT(7) -#define I40E_HW_USE_SET_LLDP_MIB BIT(8) -#define I40E_HW_GENEVE_OFFLOAD_CAPABLE BIT(9) -#define I40E_HW_PTP_L4_CAPABLE BIT(10) -#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE BIT(11) -#define I40E_HW_HAVE_CRT_RETIMER BIT(13) -#define I40E_HW_OUTER_UDP_CSUM_CAPABLE BIT(14) -#define I40E_HW_PHY_CONTROLS_LEDS BIT(15) -#define I40E_HW_STOP_FW_LLDP BIT(16) -#define I40E_HW_PORT_ID_VALID BIT(17) -#define I40E_HW_RESTART_AUTONEG BIT(18) - - u32 flags; -#define I40E_FLAG_RX_CSUM_ENABLED BIT(0) -#define I40E_FLAG_MSI_ENABLED BIT(1) -#define I40E_FLAG_MSIX_ENABLED BIT(2) -#define I40E_FLAG_RSS_ENABLED BIT(3) -#define I40E_FLAG_VMDQ_ENABLED BIT(4) -#define I40E_FLAG_SRIOV_ENABLED BIT(5) -#define I40E_FLAG_DCB_CAPABLE BIT(6) -#define I40E_FLAG_DCB_ENABLED BIT(7) -#define I40E_FLAG_FD_SB_ENABLED BIT(8) -#define I40E_FLAG_FD_ATR_ENABLED BIT(9) -#define I40E_FLAG_MFP_ENABLED BIT(10) -#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT(11) -#define I40E_FLAG_VEB_MODE_ENABLED BIT(12) -#define I40E_FLAG_VEB_STATS_ENABLED BIT(13) -#define I40E_FLAG_LINK_POLLING_ENABLED BIT(14) -#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT(15) -#define I40E_FLAG_LEGACY_RX BIT(16) -#define I40E_FLAG_PTP BIT(17) -#define I40E_FLAG_IWARP_ENABLED BIT(18) -#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(19) -#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(20) -#define I40E_FLAG_TC_MQPRIO BIT(21) -#define I40E_FLAG_FD_SB_INACTIVE BIT(22) -#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER BIT(23) -#define I40E_FLAG_DISABLE_FW_LLDP BIT(24) -#define I40E_FLAG_RS_FEC BIT(25) -#define I40E_FLAG_BASE_R_FEC BIT(26) -/* TOTAL_PORT_SHUTDOWN - * Allows to physically disable the link on the NIC's port. - * If enabled, (after link down request from the OS) - * no link, traffic or led activity is possible on that port. - * - * If I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED is set, the - * I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED must be explicitly forced to true - * and cannot be disabled by system admin at that time. - * The functionalities are exclusive in terms of configuration, but they also - * have similar behavior (allowing to disable physical link of the port), - * with following differences: - * - LINK_DOWN_ON_CLOSE_ENABLED is configurable at host OS run-time and is - * supported by whole family of 7xx Intel Ethernet Controllers - * - TOTAL_PORT_SHUTDOWN may be enabled only before OS loads (in BIOS) - * only if motherboard's BIOS and NIC's FW has support of it - * - when LINK_DOWN_ON_CLOSE_ENABLED is used, the link is being brought down - * by sending phy_type=0 to NIC's FW - * - when TOTAL_PORT_SHUTDOWN is used, phy_type is not altered, instead - * the link is being brought down by clearing bit (I40E_AQ_PHY_ENABLE_LINK) - * in abilities field of i40e_aq_set_phy_config structure - */ -#define I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED BIT(27) -#define I40E_FLAG_VF_VLAN_PRUNING BIT(28) - + DECLARE_BITMAP(flags, I40E_PF_FLAGS_NBITS); struct i40e_client_instance *cinst; bool stat_offsets_loaded; struct i40e_hw_port_stats stats; @@ -1267,7 +1251,7 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr); void i40e_vlan_stripping_enable(struct i40e_vsi *vsi); static inline bool i40e_is_sw_dcb(struct i40e_pf *pf) { - return !!(pf->flags & I40E_FLAG_DISABLE_FW_LLDP); + return test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags); } #ifdef CONFIG_I40E_DCB @@ -1301,7 +1285,7 @@ int i40e_set_partition_bw_setting(struct i40e_pf *pf); int i40e_commit_partition_bw_setting(struct i40e_pf *pf); void i40e_print_link_message(struct i40e_vsi *vsi, bool isup); -void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags); +void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags); static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) { @@ -1321,13 +1305,13 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, * i40e_is_tc_mqprio_enabled - check if TC MQPRIO is enabled on PF * @pf: pointer to a pf. * - * Check and return value of flag I40E_FLAG_TC_MQPRIO. + * Check and return state of flag I40E_FLAG_TC_MQPRIO. * - * Return: I40E_FLAG_TC_MQPRIO set state. + * Return: true/false if I40E_FLAG_TC_MQPRIO is set or not **/ -static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf) +static inline bool i40e_is_tc_mqprio_enabled(struct i40e_pf *pf) { - return pf->flags & I40E_FLAG_TC_MQPRIO; + return test_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 9ce6e633cc2f..896c43905309 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -17,29 +17,16 @@ static void i40e_resume_aq(struct i40e_hw *hw); static void i40e_adminq_init_regs(struct i40e_hw *hw) { /* set head and tail registers in our local struct */ - if (i40e_is_vf(hw)) { - hw->aq.asq.tail = I40E_VF_ATQT1; - hw->aq.asq.head = I40E_VF_ATQH1; - hw->aq.asq.len = I40E_VF_ATQLEN1; - hw->aq.asq.bal = I40E_VF_ATQBAL1; - hw->aq.asq.bah = I40E_VF_ATQBAH1; - hw->aq.arq.tail = I40E_VF_ARQT1; - hw->aq.arq.head = I40E_VF_ARQH1; - hw->aq.arq.len = I40E_VF_ARQLEN1; - hw->aq.arq.bal = I40E_VF_ARQBAL1; - hw->aq.arq.bah = I40E_VF_ARQBAH1; - } else { - hw->aq.asq.tail = I40E_PF_ATQT; - hw->aq.asq.head = I40E_PF_ATQH; - hw->aq.asq.len = I40E_PF_ATQLEN; - hw->aq.asq.bal = I40E_PF_ATQBAL; - hw->aq.asq.bah = I40E_PF_ATQBAH; - hw->aq.arq.tail = I40E_PF_ARQT; - hw->aq.arq.head = I40E_PF_ARQH; - hw->aq.arq.len = I40E_PF_ARQLEN; - hw->aq.arq.bal = I40E_PF_ARQBAL; - hw->aq.arq.bah = I40E_PF_ARQBAH; - } + hw->aq.asq.tail = I40E_PF_ATQT; + hw->aq.asq.head = I40E_PF_ATQH; + hw->aq.asq.len = I40E_PF_ATQLEN; + hw->aq.asq.bal = I40E_PF_ATQBAL; + hw->aq.asq.bah = I40E_PF_ATQBAH; + hw->aq.arq.tail = I40E_PF_ARQT; + hw->aq.arq.head = I40E_PF_ARQH; + hw->aq.arq.len = I40E_PF_ARQLEN; + hw->aq.arq.bal = I40E_PF_ARQBAL; + hw->aq.arq.bah = I40E_PF_ARQBAH; } /** @@ -503,44 +490,76 @@ shutdown_arq_out: } /** - * i40e_set_hw_flags - set HW flags + * i40e_set_hw_caps - set HW flags * @hw: pointer to the hardware structure **/ -static void i40e_set_hw_flags(struct i40e_hw *hw) +static void i40e_set_hw_caps(struct i40e_hw *hw) { - struct i40e_adminq_info *aq = &hw->aq; - - hw->flags = 0; + bitmap_zero(hw->caps, I40E_HW_CAPS_NBITS); switch (hw->mac.type) { case I40E_MAC_XL710: - if (aq->api_maj_ver > 1 || - (aq->api_maj_ver == 1 && - aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710)) { - hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE; - hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE; + if (i40e_is_aq_api_ver_ge(hw, 1, + I40E_MINOR_VER_GET_LINK_INFO_XL710)) { + set_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps); + set_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, hw->caps); /* The ability to RX (not drop) 802.1ad frames */ - hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE; + set_bit(I40E_HW_CAP_802_1AD, hw->caps); + } + if (i40e_is_aq_api_ver_ge(hw, 1, 5)) { + /* Supported in FW API version higher than 1.4 */ + set_bit(I40E_HW_CAP_GENEVE_OFFLOAD, hw->caps); + } + if (i40e_is_fw_ver_lt(hw, 4, 33)) { + set_bit(I40E_HW_CAP_RESTART_AUTONEG, hw->caps); + /* No DCB support for FW < v4.33 */ + set_bit(I40E_HW_CAP_NO_DCB_SUPPORT, hw->caps); + } + if (i40e_is_fw_ver_lt(hw, 4, 3)) { + /* Disable FW LLDP if FW < v4.3 */ + set_bit(I40E_HW_CAP_STOP_FW_LLDP, hw->caps); + } + if (i40e_is_fw_ver_ge(hw, 4, 40)) { + /* Use the FW Set LLDP MIB API if FW >= v4.40 */ + set_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, hw->caps); + } + if (i40e_is_fw_ver_ge(hw, 6, 0)) { + /* Enable PTP L4 if FW > v6.0 */ + set_bit(I40E_HW_CAP_PTP_L4, hw->caps); } break; case I40E_MAC_X722: - hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE | - I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK; + set_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps); + set_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps); + set_bit(I40E_HW_CAP_RSS_AQ, hw->caps); + set_bit(I40E_HW_CAP_128_QP_RSS, hw->caps); + set_bit(I40E_HW_CAP_ATR_EVICT, hw->caps); + set_bit(I40E_HW_CAP_WB_ON_ITR, hw->caps); + set_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, hw->caps); + set_bit(I40E_HW_CAP_NO_PCI_LINK_CHECK, hw->caps); + set_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, hw->caps); + set_bit(I40E_HW_CAP_GENEVE_OFFLOAD, hw->caps); + set_bit(I40E_HW_CAP_PTP_L4, hw->caps); + set_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, hw->caps); + set_bit(I40E_HW_CAP_OUTER_UDP_CSUM, hw->caps); + + if (rd32(hw, I40E_GLQF_FDEVICTENA(1)) != + I40E_FDEVICT_PCTYPE_DEFAULT) { + hw_warn(hw, "FD EVICT PCTYPES are not right, disable FD HW EVICT\n"); + clear_bit(I40E_HW_CAP_ATR_EVICT, hw->caps); + } - if (aq->api_maj_ver > 1 || - (aq->api_maj_ver == 1 && - aq->api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722)) - hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE; + if (i40e_is_aq_api_ver_ge(hw, 1, + I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722)) + set_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, hw->caps); - if (aq->api_maj_ver > 1 || - (aq->api_maj_ver == 1 && - aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_X722)) - hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE; + if (i40e_is_aq_api_ver_ge(hw, 1, + I40E_MINOR_VER_GET_LINK_INFO_X722)) + set_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps); - if (aq->api_maj_ver > 1 || - (aq->api_maj_ver == 1 && - aq->api_min_ver >= I40E_MINOR_VER_FW_REQUEST_FEC_X722)) - hw->flags |= I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE; + if (i40e_is_aq_api_ver_ge(hw, 1, + I40E_MINOR_VER_FW_REQUEST_FEC_X722)) + set_bit(I40E_HW_CAP_X722_FEC_REQUEST, hw->caps); fallthrough; default: @@ -548,22 +567,18 @@ static void i40e_set_hw_flags(struct i40e_hw *hw) } /* Newer versions of firmware require lock when reading the NVM */ - if (aq->api_maj_ver > 1 || - (aq->api_maj_ver == 1 && - aq->api_min_ver >= 5)) - hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK; - - if (aq->api_maj_ver > 1 || - (aq->api_maj_ver == 1 && - aq->api_min_ver >= 8)) { - hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT; - hw->flags |= I40E_HW_FLAG_DROP_MODE; - } + if (i40e_is_aq_api_ver_ge(hw, 1, 5)) + set_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps); + + /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */ + if (i40e_is_aq_api_ver_ge(hw, 1, 7)) + set_bit(I40E_HW_CAP_802_1AD, hw->caps); - if (aq->api_maj_ver > 1 || - (aq->api_maj_ver == 1 && - aq->api_min_ver >= 9)) - hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED; + if (i40e_is_aq_api_ver_ge(hw, 1, 8)) + set_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps); + + if (i40e_is_aq_api_ver_ge(hw, 1, 9)) + set_bit(I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED, hw->caps); } /** @@ -633,7 +648,7 @@ int i40e_init_adminq(struct i40e_hw *hw) /* Some features were introduced in different FW API version * for different MAC type. */ - i40e_set_hw_flags(hw); + i40e_set_hw_caps(hw); /* get the NVM version info */ i40e_read_nvm_word(hw, I40E_SR_NVM_DEV_STARTER_VERSION, @@ -648,25 +663,7 @@ int i40e_init_adminq(struct i40e_hw *hw) &oem_lo); hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo; - if (hw->mac.type == I40E_MAC_XL710 && - hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) { - hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE; - hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE; - } - if (hw->mac.type == I40E_MAC_X722 && - hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722) { - hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE; - } - - /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */ - if (hw->aq.api_maj_ver > 1 || - (hw->aq.api_maj_ver == 1 && - hw->aq.api_min_ver >= 7)) - hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE; - - if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) { + if (i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR + 1, 0)) { ret_code = -EIO; goto init_adminq_free_arq; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index d7e24d661724..e171f4814e21 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1374,8 +1374,8 @@ i40e_aq_get_phy_capabilities(struct i40e_hw *hw, if (report_init) { if (hw->mac.type == I40E_MAC_XL710 && - hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) { + i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR, + I40E_MINOR_VER_GET_LINK_INFO_XL710)) { status = i40e_aq_get_link_info(hw, true, NULL, NULL); } else { hw->phy.phy_types = le32_to_cpu(abilities->phy_type); @@ -1645,12 +1645,11 @@ int i40e_aq_get_link_info(struct i40e_hw *hw, else hw_link_info->lse_enable = false; - if ((hw->mac.type == I40E_MAC_XL710) && - (hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 && - hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) + if (hw->mac.type == I40E_MAC_XL710 && i40e_is_fw_ver_lt(hw, 4, 40) && + hw_link_info->phy_type == 0xE) hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; - if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE && + if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps) && hw->mac.type != I40E_MAC_X722) { __le32 tmp; @@ -1750,21 +1749,6 @@ int i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags, } /** - * i40e_is_aq_api_ver_ge - * @aq: pointer to AdminQ info containing HW API version to compare - * @maj: API major value - * @min: API minor value - * - * Assert whether current HW API version is greater/equal than provided. - **/ -static bool i40e_is_aq_api_ver_ge(struct i40e_adminq_info *aq, u16 maj, - u16 min) -{ - return (aq->api_maj_ver > maj || - (aq->api_maj_ver == maj && aq->api_min_ver >= min)); -} - -/** * i40e_aq_add_vsi * @hw: pointer to the hw struct * @vsi_ctx: pointer to a vsi context struct @@ -1890,14 +1874,14 @@ int i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, if (set) { flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST; - if (rx_only_promisc && i40e_is_aq_api_ver_ge(&hw->aq, 1, 5)) + if (rx_only_promisc && i40e_is_aq_api_ver_ge(hw, 1, 5)) flags |= I40E_AQC_SET_VSI_PROMISC_RX_ONLY; } cmd->promiscuous_flags = cpu_to_le16(flags); cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST); - if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5)) + if (i40e_is_aq_api_ver_ge(hw, 1, 5)) cmd->valid_flags |= cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_RX_ONLY); @@ -2000,13 +1984,13 @@ int i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, if (enable) { flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST; - if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5)) + if (i40e_is_aq_api_ver_ge(hw, 1, 5)) flags |= I40E_AQC_SET_VSI_PROMISC_RX_ONLY; } cmd->promiscuous_flags = cpu_to_le16(flags); cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST); - if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5)) + if (i40e_is_aq_api_ver_ge(hw, 1, 5)) cmd->valid_flags |= cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_RX_ONLY); cmd->seid = cpu_to_le16(seid); @@ -2253,7 +2237,7 @@ int i40e_aq_set_switch_config(struct i40e_hw *hw, scfg->flags = cpu_to_le16(flags); scfg->valid_flags = cpu_to_le16(valid_flags); scfg->mode = mode; - if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) { + if (test_bit(I40E_HW_CAP_802_1AD, hw->caps)) { scfg->switch_tag = cpu_to_le16(hw->switch_tag); scfg->first_tag = cpu_to_le16(hw->first_tag); scfg->second_tag = cpu_to_le16(hw->second_tag); @@ -3637,7 +3621,7 @@ i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore, (struct i40e_aqc_lldp_restore *)&desc.params.raw; int status; - if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)) { + if (!test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) { i40e_debug(hw, I40E_DEBUG_ALL, "Restore LLDP not supported by current FW version.\n"); return -ENODEV; @@ -3680,7 +3664,7 @@ int i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent, cmd->command |= I40E_AQ_LLDP_AGENT_SHUTDOWN; if (persist) { - if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT) + if (test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) cmd->command |= I40E_AQ_LLDP_AGENT_STOP_PERSIST; else i40e_debug(hw, I40E_DEBUG_ALL, @@ -3713,7 +3697,7 @@ int i40e_aq_start_lldp(struct i40e_hw *hw, bool persist, cmd->command = I40E_AQ_LLDP_AGENT_START; if (persist) { - if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT) + if (test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) cmd->command |= I40E_AQ_LLDP_AGENT_START_PERSIST; else i40e_debug(hw, I40E_DEBUG_ALL, @@ -3741,7 +3725,7 @@ i40e_aq_set_dcb_parameters(struct i40e_hw *hw, bool dcb_enable, (struct i40e_aqc_set_dcb_parameters *)&desc.params.raw; int status; - if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE)) + if (!test_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, hw->caps)) return -ENODEV; i40e_fill_default_direct_cmd_desc(&desc, @@ -5043,7 +5027,7 @@ static int i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr, u32 i; *reg_val = 0; - if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) { status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL, @@ -5076,7 +5060,7 @@ static int i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr, int status; u32 i; - if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) { status = i40e_aq_set_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL, @@ -5115,7 +5099,7 @@ int i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, u8 port_num; u32 i; - if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) { status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL, @@ -5238,14 +5222,14 @@ int i40e_aq_rx_ctl_read_register(struct i40e_hw *hw, **/ u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr) { - bool use_register; + bool use_register = false; int status = 0; int retry = 5; u32 val = 0; - use_register = (((hw->aq.api_maj_ver == 1) && - (hw->aq.api_min_ver < 5)) || - (hw->mac.type == I40E_MAC_X722)); + if (i40e_is_aq_api_ver_lt(hw, 1, 5) || hw->mac.type == I40E_MAC_X722) + use_register = true; + if (!use_register) { do_retry: status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL); @@ -5300,13 +5284,13 @@ int i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, **/ void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val) { - bool use_register; + bool use_register = false; int status = 0; int retry = 5; - use_register = (((hw->aq.api_maj_ver == 1) && - (hw->aq.api_min_ver < 5)) || - (hw->mac.type == I40E_MAC_X722)); + if (i40e_is_aq_api_ver_lt(hw, 1, 5) || hw->mac.type == I40E_MAC_X722) + use_register = true; + if (!use_register) { do_retry: status = i40e_aq_rx_ctl_write_register(hw, reg_addr, @@ -5335,7 +5319,7 @@ static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio, struct i40e_aqc_phy_register_access *cmd) { if (set_mdio && cmd->phy_interface == I40E_AQ_PHY_REG_ACCESS_EXTERNAL) { - if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED) + if (test_bit(I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED, hw->caps)) cmd->cmd_flags |= I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER | ((mdio_num << diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 68602fc375f6..498728e16a37 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -804,14 +804,11 @@ int i40e_get_dcb_config(struct i40e_hw *hw) int ret = 0; /* If Firmware version < v4.33 on X710/XL710, IEEE only */ - if ((hw->mac.type == I40E_MAC_XL710) && - (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || - (hw->aq.fw_maj_ver < 4))) + if (hw->mac.type == I40E_MAC_XL710 && i40e_is_fw_ver_lt(hw, 4, 33)) return i40e_get_ieee_dcb_config(hw); /* If Firmware version == v4.33 on X710/XL710, use old CEE struct */ - if ((hw->mac.type == I40E_MAC_XL710) && - ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33))) { + if (hw->mac.type == I40E_MAC_XL710 && i40e_is_fw_ver_eq(hw, 4, 33)) { ret = i40e_aq_get_cee_dcb_config(hw, &cee_v1_cfg, sizeof(cee_v1_cfg), NULL); if (!ret) { @@ -877,7 +874,7 @@ int i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change) return -EOPNOTSUPP; /* Read LLDP NVM area */ - if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT) { + if (test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) { u8 offset = 0; if (hw->mac.type == I40E_MAC_XL710) diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c index 077a95dad32c..4721845fda6e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c @@ -310,8 +310,8 @@ static u8 i40e_dcbnl_getstate(struct net_device *netdev) struct i40e_pf *pf = i40e_netdev_to_pf(netdev); dev_dbg(&pf->pdev->dev, "DCB state=%d\n", - !!(pf->flags & I40E_FLAG_DCB_ENABLED)); - return !!(pf->flags & I40E_FLAG_DCB_ENABLED); + test_bit(I40E_FLAG_DCB_ENA, pf->flags) ? 1 : 0); + return test_bit(I40E_FLAG_DCB_ENA, pf->flags) ? 1 : 0; } /** @@ -331,19 +331,19 @@ static u8 i40e_dcbnl_setstate(struct net_device *netdev, u8 state) return ret; dev_dbg(&pf->pdev->dev, "new state=%d current state=%d\n", - state, (pf->flags & I40E_FLAG_DCB_ENABLED) ? 1 : 0); + state, test_bit(I40E_FLAG_DCB_ENA, pf->flags) ? 1 : 0); /* Nothing to do */ - if (!state == !(pf->flags & I40E_FLAG_DCB_ENABLED)) + if (!state == !test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return ret; if (i40e_is_sw_dcb(pf)) { if (state) { - pf->flags |= I40E_FLAG_DCB_ENABLED; + set_bit(I40E_FLAG_DCB_ENA, pf->flags); memcpy(&pf->hw.desired_dcbx_config, &pf->hw.local_dcbx_config, sizeof(struct i40e_dcbx_config)); } else { - pf->flags &= ~I40E_FLAG_DCB_ENABLED; + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); } } else { /* Cannot directly manipulate FW LLDP Agent */ @@ -653,7 +653,7 @@ static u8 i40e_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap) { struct i40e_pf *pf = i40e_netdev_to_pf(netdev); - if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) + if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) return I40E_DCBNL_STATUS_ERROR; switch (capid) { @@ -693,7 +693,7 @@ static int i40e_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num) { struct i40e_pf *pf = i40e_netdev_to_pf(netdev); - if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) + if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) return -EINVAL; *num = I40E_MAX_TRAFFIC_CLASS; @@ -827,15 +827,12 @@ static void i40e_dcbnl_get_perm_hw_addr(struct net_device *dev, u8 *perm_addr) { struct i40e_pf *pf = i40e_netdev_to_pf(dev); - int i, j; + int i; memset(perm_addr, 0xff, MAX_ADDR_LEN); for (i = 0; i < dev->addr_len; i++) perm_addr[i] = pf->hw.mac.perm_addr[i]; - - for (j = 0; j < dev->addr_len; j++, i++) - perm_addr[i] = pf->hw.mac.san_addr[j]; } static const struct dcbnl_rtnl_ops dcbnl_ops = { @@ -891,11 +888,11 @@ void i40e_dcbnl_set_all(struct i40e_vsi *vsi) return; /* DCB not enabled */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) + if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return; /* MFP mode but not an iSCSI PF so return */ - if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(hw->func_caps.iscsi)) + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && !(hw->func_caps.iscsi)) return; dcbxcfg = &hw->local_dcbx_config; @@ -1002,7 +999,7 @@ void i40e_dcbnl_flush_apps(struct i40e_pf *pf, int i; /* MFP mode but not an iSCSI PF so return */ - if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(pf->hw.func_caps.iscsi)) + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && !(pf->hw.func_caps.iscsi)) return; for (i = 0; i < old_cfg->numapps; i++) { @@ -1025,7 +1022,7 @@ void i40e_dcbnl_setup(struct i40e_vsi *vsi) struct i40e_pf *pf = i40e_netdev_to_pf(dev); /* Not DCB capable */ - if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) + if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) return; dev->dcbnl_ops = &dcbnl_ops; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debug.h b/drivers/net/ethernet/intel/i40e/i40e_debug.h index 27ebc72d8bfe..e9871dfb32bd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debug.h +++ b/drivers/net/ethernet/intel/i40e/i40e_debug.h @@ -37,6 +37,7 @@ struct i40e_hw; struct device *i40e_hw_to_dev(struct i40e_hw *hw); #define hw_dbg(hw, S, A...) dev_dbg(i40e_hw_to_dev(hw), S, ##A) +#define hw_warn(hw, S, A...) dev_warn(i40e_hw_to_dev(hw), S, ##A) #define i40e_debug(h, m, s, ...) \ do { \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 999c9708def5..88240571721a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -147,9 +147,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " state[%d] = %08lx\n", i, vsi->state[i]); if (vsi == pf->vsi[pf->lan_vsi]) - dev_info(&pf->pdev->dev, " MAC address: %pM SAN MAC: %pM Port MAC: %pM\n", + dev_info(&pf->pdev->dev, " MAC address: %pM Port MAC: %pM\n", pf->hw.mac.addr, - pf->hw.mac.san_addr, pf->hw.mac.port_addr); hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { dev_info(&pf->pdev->dev, @@ -820,8 +819,8 @@ static ssize_t i40e_dbg_command_write(struct file *filp, /* By default we are in VEPA mode, if this is the first VF/VMDq * VSI to be added switch to VEB mode. */ - if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { + set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index fd7163128c4d..eb9a7b32af73 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -430,35 +430,35 @@ static const char i40e_gstrings_test[][ETH_GSTRING_LEN] = { struct i40e_priv_flags { char flag_string[ETH_GSTRING_LEN]; - u64 flag; + u8 bitno; bool read_only; }; -#define I40E_PRIV_FLAG(_name, _flag, _read_only) { \ +#define I40E_PRIV_FLAG(_name, _bitno, _read_only) { \ .flag_string = _name, \ - .flag = _flag, \ + .bitno = _bitno, \ .read_only = _read_only, \ } static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { /* NOTE: MFP setting cannot be changed */ - I40E_PRIV_FLAG("MFP", I40E_FLAG_MFP_ENABLED, 1), + I40E_PRIV_FLAG("MFP", I40E_FLAG_MFP_ENA, 1), I40E_PRIV_FLAG("total-port-shutdown", - I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED, 1), - I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENABLED, 0), - I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0), - I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0), - I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0), + I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, 1), + I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENA, 0), + I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENA, 0), + I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENA, 0), + I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENA, 0), I40E_PRIV_FLAG("link-down-on-close", - I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0), - I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0), + I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, 0), + I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX_ENA, 0), I40E_PRIV_FLAG("disable-source-pruning", - I40E_FLAG_SOURCE_PRUNING_DISABLED, 0), - I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0), + I40E_FLAG_SOURCE_PRUNING_DIS, 0), + I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_FW_LLDP_DIS, 0), I40E_PRIV_FLAG("rs-fec", I40E_FLAG_RS_FEC, 0), I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0), I40E_PRIV_FLAG("vf-vlan-pruning", - I40E_FLAG_VF_VLAN_PRUNING, 0), + I40E_FLAG_VF_VLAN_PRUNING_ENA, 0), }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags) @@ -466,7 +466,7 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { /* Private flags with a global effect, restricted to PF 0 */ static const struct i40e_priv_flags i40e_gl_gstrings_priv_flags[] = { I40E_PRIV_FLAG("vf-true-promisc-support", - I40E_FLAG_TRUE_PROMISC_SUPPORT, 0), + I40E_FLAG_TRUE_PROMISC_ENA, 0), }; #define I40E_GL_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gl_gstrings_priv_flags) @@ -502,7 +502,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 1000baseT_Full); - if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) { + if (test_bit(I40E_HW_CAP_100M_SGMII, pf->hw.caps)) { ethtool_link_ksettings_add_link_mode(ks, supported, 100baseT_Full); ethtool_link_ksettings_add_link_mode(ks, advertising, @@ -601,7 +601,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, 10000baseKX4_Full); } if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR && - !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) { + !test_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps)) { ethtool_link_ksettings_add_link_mode(ks, supported, 10000baseKR_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) @@ -609,7 +609,7 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, 10000baseKR_Full); } if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX && - !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) { + !test_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps)) { ethtool_link_ksettings_add_link_mode(ks, supported, 1000baseKX_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) @@ -917,7 +917,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 1000baseT_Full); - if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) { + if (test_bit(I40E_HW_CAP_100M_SGMII, pf->hw.caps)) { ethtool_link_ksettings_add_link_mode(ks, supported, 100baseT_Full); if (hw_link_info->requested_speeds & @@ -1488,12 +1488,8 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg) struct i40e_pf *pf = np->vsi->back; struct i40e_hw *hw = &pf->hw; int status = 0; - u32 flags = 0; int err = 0; - flags = READ_ONCE(pf->flags); - i40e_set_fec_in_flags(fec_cfg, &flags); - /* Get the current phy config */ memset(&abilities, 0, sizeof(abilities)); status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, @@ -1525,7 +1521,7 @@ static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg) err = -EAGAIN; goto done; } - pf->flags = flags; + i40e_set_fec_in_flags(fec_cfg, pf->flags); status = i40e_update_link_info(hw); if (status) /* debug level message only due to relation to the link @@ -1599,7 +1595,7 @@ static int i40e_set_fec_param(struct net_device *netdev, return -EPERM; if (hw->mac.type == I40E_MAC_X722 && - !(hw->flags & I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE)) { + !test_bit(I40E_HW_CAP_X722_FEC_REQUEST, hw->caps)) { netdev_err(netdev, "Setting FEC encoding not supported by firmware. Please update the NVM image.\n"); return -EOPNOTSUPP; } @@ -2015,6 +2011,18 @@ static void i40e_get_drvinfo(struct net_device *netdev, drvinfo->n_priv_flags += I40E_GL_PRIV_FLAGS_STR_LEN; } +static u32 i40e_get_max_num_descriptors(struct i40e_pf *pf) +{ + struct i40e_hw *hw = &pf->hw; + + switch (hw->mac.type) { + case I40E_MAC_XL710: + return I40E_MAX_NUM_DESCRIPTORS_XL710; + default: + return I40E_MAX_NUM_DESCRIPTORS; + } +} + static void i40e_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, @@ -2024,8 +2032,8 @@ static void i40e_get_ringparam(struct net_device *netdev, struct i40e_pf *pf = np->vsi->back; struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; - ring->rx_max_pending = I40E_MAX_NUM_DESCRIPTORS; - ring->tx_max_pending = I40E_MAX_NUM_DESCRIPTORS; + ring->rx_max_pending = i40e_get_max_num_descriptors(pf); + ring->tx_max_pending = i40e_get_max_num_descriptors(pf); ring->rx_mini_max_pending = 0; ring->rx_jumbo_max_pending = 0; ring->rx_pending = vsi->rx_rings[0]->count; @@ -2050,12 +2058,12 @@ static int i40e_set_ringparam(struct net_device *netdev, struct kernel_ethtool_ringparam *kernel_ring, struct netlink_ext_ack *extack) { + u32 new_rx_count, new_tx_count, max_num_descriptors; struct i40e_ring *tx_rings = NULL, *rx_rings = NULL; struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_hw *hw = &np->vsi->back->hw; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - u32 new_rx_count, new_tx_count; u16 tx_alloc_queue_pairs; int timeout = 50; int i, err = 0; @@ -2063,14 +2071,15 @@ static int i40e_set_ringparam(struct net_device *netdev, if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; - if (ring->tx_pending > I40E_MAX_NUM_DESCRIPTORS || + max_num_descriptors = i40e_get_max_num_descriptors(pf); + if (ring->tx_pending > max_num_descriptors || ring->tx_pending < I40E_MIN_NUM_DESCRIPTORS || - ring->rx_pending > I40E_MAX_NUM_DESCRIPTORS || + ring->rx_pending > max_num_descriptors || ring->rx_pending < I40E_MIN_NUM_DESCRIPTORS) { netdev_info(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d]\n", ring->tx_pending, ring->rx_pending, - I40E_MIN_NUM_DESCRIPTORS, I40E_MAX_NUM_DESCRIPTORS); + I40E_MIN_NUM_DESCRIPTORS, max_num_descriptors); return -EINVAL; } @@ -2419,7 +2428,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, veb_stats = ((pf->lan_veb != I40E_NO_VEB) && (pf->lan_veb < I40E_MAX_VEB) && - (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)); + test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags)); if (veb_stats) { veb = pf->veb[pf->lan_veb]; @@ -2548,7 +2557,7 @@ static int i40e_get_ts_info(struct net_device *dev, struct i40e_pf *pf = i40e_netdev_to_pf(dev); /* only report HW timestamping if PTP is enabled */ - if (!(pf->flags & I40E_FLAG_PTP)) + if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags)) return ethtool_op_get_ts_info(dev, info); info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | @@ -2570,7 +2579,7 @@ static int i40e_get_ts_info(struct net_device *dev, BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ); - if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE) + if (test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps)) info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) | @@ -2819,10 +2828,10 @@ static int i40e_set_phys_id(struct net_device *netdev, switch (state) { case ETHTOOL_ID_ACTIVE: - if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) { + if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps)) { pf->led_status = i40e_led_get(hw); } else { - if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) + if (!test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL); ret = i40e_led_get_phy(hw, &temp_status, @@ -2831,25 +2840,25 @@ static int i40e_set_phys_id(struct net_device *netdev, } return blink_freq; case ETHTOOL_ID_ON: - if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) + if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps)) i40e_led_set(hw, 0xf, false); else ret = i40e_led_set_phy(hw, true, pf->led_status, 0); break; case ETHTOOL_ID_OFF: - if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) + if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps)) i40e_led_set(hw, 0x0, false); else ret = i40e_led_set_phy(hw, false, pf->led_status, 0); break; case ETHTOOL_ID_INACTIVE: - if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) { + if (!test_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps)) { i40e_led_set(hw, pf->led_status, false); } else { ret = i40e_led_set_phy(hw, false, pf->led_status, (pf->phy_led_val | I40E_PHY_LED_MODE_ORIG)); - if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) + if (!test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) i40e_aq_set_phy_debug(hw, 0, NULL); } break; @@ -3628,7 +3637,7 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) bitmap_zero(flow_pctypes, FLOW_PCTYPES_SIZE); - if (pf->flags & I40E_FLAG_MFP_ENABLED) { + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { dev_err(&pf->pdev->dev, "Change of RSS hash input set is not supported when MFP mode is enabled\n"); return -EOPNOTSUPP; @@ -3644,19 +3653,22 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) switch (nfc->flow_type) { case TCP_V4_FLOW: set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP, flow_pctypes); - if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) + if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, + pf->hw.caps)) set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK, flow_pctypes); break; case TCP_V6_FLOW: set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP, flow_pctypes); - if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) + if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, + pf->hw.caps)) set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK, flow_pctypes); break; case UDP_V4_FLOW: set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_UDP, flow_pctypes); - if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) { + if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, + pf->hw.caps)) { set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP, flow_pctypes); set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP, @@ -3666,7 +3678,8 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) break; case UDP_V6_FLOW: set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_UDP, flow_pctypes); - if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) { + if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, + pf->hw.caps)) { set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP, flow_pctypes); set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP, @@ -4644,7 +4657,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, * main port cannot change them when in MFP mode as this would impact * any filters on the other ports. */ - if (pf->flags & I40E_FLAG_MFP_ENABLED) { + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { netif_err(pf, drv, vsi->netdev, "Cannot change Flow Director input sets while MFP is enabled\n"); return -EOPNOTSUPP; } @@ -4804,7 +4817,7 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi, return -EINVAL; pf = vsi->back; - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) + if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) return -EOPNOTSUPP; if (test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) @@ -5001,7 +5014,7 @@ static void i40e_get_channels(struct net_device *dev, ch->max_combined = i40e_max_channels(vsi); /* report info for other vector */ - ch->other_count = (pf->flags & I40E_FLAG_FD_SB_ENABLED) ? 1 : 0; + ch->other_count = test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) ? 1 : 0; ch->max_other = ch->other_count; /* Note: This code assumes DCB is disabled for now. */ @@ -5044,7 +5057,7 @@ static int i40e_set_channels(struct net_device *dev, return -EINVAL; /* verify other_count has not changed */ - if (ch->other_count != ((pf->flags & I40E_FLAG_FD_SB_ENABLED) ? 1 : 0)) + if (ch->other_count != (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) ? 1 : 0)) return -EINVAL; /* verify the number of channels does not exceed hardware limits */ @@ -5215,11 +5228,11 @@ static u32 i40e_get_priv_flags(struct net_device *dev) u32 i, j, ret_flags = 0; for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) { - const struct i40e_priv_flags *priv_flags; + const struct i40e_priv_flags *priv_flag; - priv_flags = &i40e_gstrings_priv_flags[i]; + priv_flag = &i40e_gstrings_priv_flags[i]; - if (priv_flags->flag & pf->flags) + if (test_bit(priv_flag->bitno, pf->flags)) ret_flags |= BIT(i); } @@ -5227,11 +5240,11 @@ static u32 i40e_get_priv_flags(struct net_device *dev) return ret_flags; for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) { - const struct i40e_priv_flags *priv_flags; + const struct i40e_priv_flags *priv_flag; - priv_flags = &i40e_gl_gstrings_priv_flags[j]; + priv_flag = &i40e_gl_gstrings_priv_flags[j]; - if (priv_flags->flag & pf->flags) + if (test_bit(priv_flag->bitno, pf->flags)) ret_flags |= BIT(i + j); } @@ -5245,8 +5258,10 @@ static u32 i40e_get_priv_flags(struct net_device *dev) **/ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) { + DECLARE_BITMAP(changed_flags, I40E_PF_FLAGS_NBITS); + DECLARE_BITMAP(orig_flags, I40E_PF_FLAGS_NBITS); + DECLARE_BITMAP(new_flags, I40E_PF_FLAGS_NBITS); struct i40e_netdev_priv *np = netdev_priv(dev); - u64 orig_flags, new_flags, changed_flags; enum i40e_admin_queue_err adq_err; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; @@ -5254,51 +5269,57 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) int status; u32 i, j; - orig_flags = READ_ONCE(pf->flags); - new_flags = orig_flags; + bitmap_copy(orig_flags, pf->flags, I40E_PF_FLAGS_NBITS); + bitmap_copy(new_flags, pf->flags, I40E_PF_FLAGS_NBITS); for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) { - const struct i40e_priv_flags *priv_flags; - - priv_flags = &i40e_gstrings_priv_flags[i]; + const struct i40e_priv_flags *priv_flag; + bool new_val; - if (flags & BIT(i)) - new_flags |= priv_flags->flag; - else - new_flags &= ~(priv_flags->flag); + priv_flag = &i40e_gstrings_priv_flags[i]; + new_val = (flags & BIT(i)) ? true : false; /* If this is a read-only flag, it can't be changed */ - if (priv_flags->read_only && - ((orig_flags ^ new_flags) & ~BIT(i))) + if (priv_flag->read_only && + test_bit(priv_flag->bitno, orig_flags) != new_val) return -EOPNOTSUPP; + + if (new_val) + set_bit(priv_flag->bitno, new_flags); + else + clear_bit(priv_flag->bitno, new_flags); } if (pf->hw.pf_id != 0) goto flags_complete; for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) { - const struct i40e_priv_flags *priv_flags; + const struct i40e_priv_flags *priv_flag; + bool new_val; - priv_flags = &i40e_gl_gstrings_priv_flags[j]; - - if (flags & BIT(i + j)) - new_flags |= priv_flags->flag; - else - new_flags &= ~(priv_flags->flag); + priv_flag = &i40e_gl_gstrings_priv_flags[j]; + new_val = (flags & BIT(i + j)) ? true : false; /* If this is a read-only flag, it can't be changed */ - if (priv_flags->read_only && - ((orig_flags ^ new_flags) & ~BIT(i))) + if (priv_flag->read_only && + test_bit(priv_flag->bitno, orig_flags) != new_val) return -EOPNOTSUPP; + + if (new_val) + set_bit(priv_flag->bitno, new_flags); + else + clear_bit(priv_flag->bitno, new_flags); } flags_complete: - changed_flags = orig_flags ^ new_flags; + bitmap_xor(changed_flags, pf->flags, orig_flags, I40E_PF_FLAGS_NBITS); - if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) + if (test_bit(I40E_FLAG_FW_LLDP_DIS, changed_flags)) reset_needed = I40E_PF_RESET_AND_REBUILD_FLAG; - if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED | - I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED)) + + if (test_bit(I40E_FLAG_VEB_STATS_ENA, changed_flags) || + test_bit(I40E_FLAG_LEGACY_RX_ENA, changed_flags) || + test_bit(I40E_FLAG_SOURCE_PRUNING_DIS, changed_flags)) reset_needed = BIT(__I40E_PF_RESET_REQUESTED); /* Before we finalize any flag changes, we need to perform some @@ -5306,8 +5327,8 @@ flags_complete: */ /* ATR eviction is not supported on all devices */ - if ((new_flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) && - !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)) + if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, new_flags) && + !test_bit(I40E_HW_CAP_ATR_EVICT, pf->hw.caps)) return -EOPNOTSUPP; /* If the driver detected FW LLDP was disabled on init, this flag could @@ -5318,15 +5339,14 @@ flags_complete: * disable LLDP, however we _must_ not allow the user to enable/disable * LLDP with this flag on unsupported FW versions. */ - if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { - if (!(pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE)) { - dev_warn(&pf->pdev->dev, - "Device does not support changing FW LLDP\n"); - return -EOPNOTSUPP; - } + if (test_bit(I40E_FLAG_FW_LLDP_DIS, changed_flags) && + !test_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, pf->hw.caps)) { + dev_warn(&pf->pdev->dev, + "Device does not support changing FW LLDP\n"); + return -EOPNOTSUPP; } - if (changed_flags & I40E_FLAG_RS_FEC && + if (test_bit(I40E_FLAG_RS_FEC, changed_flags) && pf->hw.device_id != I40E_DEV_ID_25G_SFP28 && pf->hw.device_id != I40E_DEV_ID_25G_B) { dev_warn(&pf->pdev->dev, @@ -5334,7 +5354,7 @@ flags_complete: return -EOPNOTSUPP; } - if (changed_flags & I40E_FLAG_BASE_R_FEC && + if (test_bit(I40E_FLAG_BASE_R_FEC, changed_flags) && pf->hw.device_id != I40E_DEV_ID_25G_SFP28 && pf->hw.device_id != I40E_DEV_ID_25G_B && pf->hw.device_id != I40E_DEV_ID_KX_X722) { @@ -5349,17 +5369,17 @@ flags_complete: */ /* Flush current ATR settings if ATR was disabled */ - if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) && - !(new_flags & I40E_FLAG_FD_ATR_ENABLED)) { + if (test_bit(I40E_FLAG_FD_ATR_ENA, changed_flags) && + !test_bit(I40E_FLAG_FD_ATR_ENA, new_flags)) { set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); } - if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) { + if (test_bit(I40E_FLAG_TRUE_PROMISC_ENA, changed_flags)) { u16 sw_flags = 0, valid_flags = 0; int ret; - if (!(new_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) + if (!test_bit(I40E_FLAG_TRUE_PROMISC_ENA, new_flags)) sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags, @@ -5374,17 +5394,17 @@ flags_complete: } } - if ((changed_flags & I40E_FLAG_RS_FEC) || - (changed_flags & I40E_FLAG_BASE_R_FEC)) { + if (test_bit(I40E_FLAG_RS_FEC, changed_flags) || + test_bit(I40E_FLAG_BASE_R_FEC, changed_flags)) { u8 fec_cfg = 0; - if (new_flags & I40E_FLAG_RS_FEC && - new_flags & I40E_FLAG_BASE_R_FEC) { + if (test_bit(I40E_FLAG_RS_FEC, new_flags) && + test_bit(I40E_FLAG_BASE_R_FEC, new_flags)) { fec_cfg = I40E_AQ_SET_FEC_AUTO; - } else if (new_flags & I40E_FLAG_RS_FEC) { + } else if (test_bit(I40E_FLAG_RS_FEC, new_flags)) { fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS | I40E_AQ_SET_FEC_ABILITY_RS); - } else if (new_flags & I40E_FLAG_BASE_R_FEC) { + } else if (test_bit(I40E_FLAG_BASE_R_FEC, new_flags)) { fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR | I40E_AQ_SET_FEC_ABILITY_KR); } @@ -5392,35 +5412,35 @@ flags_complete: dev_warn(&pf->pdev->dev, "Cannot change FEC config\n"); } - if ((changed_flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && - (orig_flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)) { + if (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, changed_flags) && + test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, orig_flags)) { dev_err(&pf->pdev->dev, "Setting link-down-on-close not supported on this port (because total-port-shutdown is enabled)\n"); return -EOPNOTSUPP; } - if ((changed_flags & I40E_FLAG_VF_VLAN_PRUNING) && + if (test_bit(I40E_FLAG_VF_VLAN_PRUNING_ENA, changed_flags) && pf->num_alloc_vfs) { dev_warn(&pf->pdev->dev, "Changing vf-vlan-pruning flag while VF(s) are active is not supported\n"); return -EOPNOTSUPP; } - if ((changed_flags & I40E_FLAG_LEGACY_RX) && + if (test_bit(I40E_FLAG_LEGACY_RX_ENA, changed_flags) && I40E_2K_TOO_SMALL_WITH_PADDING) { dev_warn(&pf->pdev->dev, "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); return -EOPNOTSUPP; } - if ((changed_flags & new_flags & - I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && - (new_flags & I40E_FLAG_MFP_ENABLED)) + if (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, changed_flags) && + test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, new_flags) && + test_bit(I40E_FLAG_MFP_ENA, new_flags)) dev_warn(&pf->pdev->dev, "Turning on link-down-on-close flag may affect other partitions\n"); - if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { - if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) { + if (test_bit(I40E_FLAG_FW_LLDP_DIS, changed_flags)) { + if (test_bit(I40E_FLAG_FW_LLDP_DIS, new_flags)) { #ifdef CONFIG_I40E_DCB i40e_dcb_sw_default_config(pf); #endif /* CONFIG_I40E_DCB */ @@ -5461,7 +5481,7 @@ flags_complete: * initialization or (b) while holding the RTNL lock, we don't need * anything fancy here. */ - pf->flags = new_flags; + bitmap_copy(pf->flags, new_flags, I40E_PF_FLAGS_NBITS); /* Issue reset to cause things to take effect, as additional bits * are added we will need to create a mask of bits requiring reset @@ -5491,7 +5511,7 @@ static int i40e_get_module_info(struct net_device *netdev, int status; /* Check if firmware supports reading module EEPROM. */ - if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) { + if (!test_bit(I40E_HW_CAP_AQ_PHY_ACCESS, hw->caps)) { netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n"); return -EINVAL; } @@ -5571,8 +5591,8 @@ static int i40e_get_module_info(struct net_device *netdev, modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; break; default: - netdev_err(vsi->netdev, "Module type unrecognized\n"); - return -EINVAL; + netdev_dbg(vsi->netdev, "SFP module type unrecognized or no SFP connector used.\n"); + return -EOPNOTSUPP; } return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f7a332e51524..7ded598a68e6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -371,7 +371,7 @@ static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue) if (tx_ring) { head = i40e_get_head(tx_ring); /* Read interrupt register */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) val = rd32(&pf->hw, I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx + tx_ring->vsi->base_vector - 1)); @@ -1209,13 +1209,13 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) pf->stat_offsets_loaded, &osd->rx_lpi_count, &nsd->rx_lpi_count); - if (pf->flags & I40E_FLAG_FD_SB_ENABLED && + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && !test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) nsd->fd_sb_status = true; else nsd->fd_sb_status = false; - if (pf->flags & I40E_FLAG_FD_ATR_ENABLED && + if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && !test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) nsd->fd_atr_status = true; else @@ -1485,7 +1485,7 @@ static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi, return pvid; is_any = (trusted || - !(pf->flags & I40E_FLAG_VF_VLAN_PRUNING)); + !test_bit(I40E_FLAG_VF_VLAN_PRUNING_ENA, pf->flags)); if ((vlan_filters && f->vlan == I40E_VLAN_ANY) || (!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) || @@ -1890,7 +1890,7 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi) u8 *lut; int ret; - if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)) + if (!test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) return 0; if (!vsi->rss_size) vsi->rss_size = min_t(int, pf->alloc_rss_size, @@ -2045,7 +2045,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, */ if (vsi->req_queue_pairs > 0) vsi->num_queue_pairs = vsi->req_queue_pairs; - else if (pf->flags & I40E_FLAG_MSIX_ENABLED) + else if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) vsi->num_queue_pairs = pf->num_lan_msix; else vsi->num_queue_pairs = 1; @@ -2058,7 +2058,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, else num_tc_qps = vsi->alloc_queue_pairs; - if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { + if (enabled_tc && test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) { /* Find numtc from enabled TC bitmap */ for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i)) /* TC is enabled */ @@ -2077,7 +2077,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; /* Do not allow use more TC queue pairs than MSI-X vectors exist */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix); /* Setup queue offset/count for all TCs for given VSI */ @@ -2089,8 +2089,10 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, switch (vsi->type) { case I40E_VSI_MAIN: - if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED)) || + if ((!test_bit(I40E_FLAG_FD_SB_ENA, + pf->flags) && + !test_bit(I40E_FLAG_FD_ATR_ENA, + pf->flags)) || vsi->tc_config.enabled_tc != 1) { qcount = min_t(int, pf->alloc_rss_size, num_tc_qps); @@ -2476,7 +2478,7 @@ static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc) if (vsi->type == I40E_VSI_MAIN && pf->lan_veb != I40E_NO_VEB && - !(pf->flags & I40E_FLAG_MFP_ENABLED)) { + !test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { /* set defport ON for Main VSI instead of true promisc * this way we will get all unicast/multicast and VLAN * promisc behavior but will not get VF or VMDq traffic @@ -2907,7 +2909,7 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) */ static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi) { - if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) + if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048); return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048; @@ -3462,7 +3464,7 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) ring->xsk_pool = i40e_xsk_pool(ring); /* some ATR related tx ring init */ - if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) { + if (test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags)) { ring->atr_sample_rate = vsi->back->atr_sample_rate; ring->atr_count = 0; } else { @@ -3478,9 +3480,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) tx_ctx.new_context = 1; tx_ctx.base = (ring->dma / 128); tx_ctx.qlen = ring->count; - tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED)); - tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP); + if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags) || + test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags)) + tx_ctx.fd_ena = 1; + if (test_bit(I40E_FLAG_PTP_ENA, vsi->back->flags)) + tx_ctx.timesync_ena = 1; /* FDIR VSI tx ring can still use RS bit and writebacks */ if (vsi->type != I40E_VSI_FDIR) tx_ctx.head_wb_ena = 1; @@ -3663,7 +3667,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) } /* configure Rx buffer alignment */ - if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) { + if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) { if (I40E_2K_TOO_SMALL_WITH_PADDING) { dev_info(&vsi->back->pdev->dev, "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); @@ -3761,7 +3765,7 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) u16 qoffset, qcount; int i, n; - if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { + if (!test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) { /* Reset the TC information */ for (i = 0; i < vsi->num_queue_pairs; i++) { rx_ring = vsi->rx_rings[i]; @@ -3828,7 +3832,7 @@ static void i40e_fdir_filter_restore(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; struct hlist_node *node; - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) + if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) return; /* Reset FDir counters as we're replaying all existing filters */ @@ -3966,10 +3970,10 @@ static void i40e_enable_misc_int_causes(struct i40e_pf *pf) I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK; - if (pf->flags & I40E_FLAG_IWARP_ENABLED) + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) val |= I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK; - if (pf->flags & I40E_FLAG_PTP) + if (test_bit(I40E_FLAG_PTP_ENA, pf->flags)) val |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, val); @@ -4205,7 +4209,7 @@ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi) } /* disable each interrupt */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = vsi->base_vector; i < (vsi->num_q_vectors + vsi->base_vector); i++) wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0); @@ -4231,7 +4235,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; int i; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = 0; i < vsi->num_q_vectors; i++) i40e_irq_dynamic_enable(vsi, i); } else { @@ -4252,7 +4256,7 @@ static void i40e_free_misc_vector(struct i40e_pf *pf) wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0); i40e_flush(&pf->hw); - if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) { free_irq(pf->msix_entries[0].vector, pf); clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); } @@ -4287,7 +4291,7 @@ static irqreturn_t i40e_intr(int irq, void *data) (icr0 & I40E_PFINT_ICR0_SWINT_MASK)) pf->sw_int_count++; - if ((pf->flags & I40E_FLAG_IWARP_ENABLED) && + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) && (icr0 & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) { ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK; dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n"); @@ -4480,7 +4484,7 @@ static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget) i += tx_ring->count; tx_ring->next_to_clean = i; - if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx); return budget > 0; @@ -4593,9 +4597,9 @@ static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename) struct i40e_pf *pf = vsi->back; int err; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) err = i40e_vsi_request_irq_msix(vsi, basename); - else if (pf->flags & I40E_FLAG_MSI_ENABLED) + else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags)) err = request_irq(pf->pdev->irq, i40e_intr, 0, pf->int_name, pf); else @@ -4627,7 +4631,7 @@ static void i40e_netpoll(struct net_device *netdev) if (test_bit(__I40E_VSI_DOWN, vsi->state)) return; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = 0; i < vsi->num_q_vectors; i++) i40e_msix_clean_rings(0, vsi->q_vectors[i]); } else { @@ -4967,7 +4971,7 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) u32 val, qp; int i; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { if (!vsi->q_vectors) return; @@ -5129,16 +5133,17 @@ static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi) static void i40e_reset_interrupt_capability(struct i40e_pf *pf) { /* If we're in Legacy mode, the interrupt was cleaned in vsi_close */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { pci_disable_msix(pf->pdev); kfree(pf->msix_entries); pf->msix_entries = NULL; kfree(pf->irq_pile); pf->irq_pile = NULL; - } else if (pf->flags & I40E_FLAG_MSI_ENABLED) { + } else if (test_bit(I40E_FLAG_MSI_ENA, pf->flags)) { pci_disable_msi(pf->pdev); } - pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED); + clear_bit(I40E_FLAG_MSI_ENA, pf->flags); + clear_bit(I40E_FLAG_MSIX_ENA, pf->flags); } /** @@ -5478,11 +5483,11 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc; /* If neither MQPRIO nor DCB is enabled, then always use single TC */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) + if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return 1; /* SFP mode will be enabled for all TCs on port */ - if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) + if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) return i40e_dcb_get_num_tc(dcbcfg); /* MFP mode return count of enabled TCs for this PF */ @@ -5512,11 +5517,11 @@ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) /* If neither MQPRIO nor DCB is enabled for this PF then just return * default TC */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) + if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return I40E_DEFAULT_TRAFFIC_CLASS; /* SFP mode we want PF to be enabled for all TCs */ - if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) + if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config); /* MFP enabled and iSCSI PF type */ @@ -5605,7 +5610,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, /* There is no need to reset BW when mqprio mode is on. */ if (i40e_is_tc_mqprio_enabled(pf)) return 0; - if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) { + if (!vsi->mqprio_qopt.qopt.hw && !test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret) dev_info(&pf->pdev->dev, @@ -5858,7 +5863,7 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) } vsi->reconfig_rss = false; } - if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; @@ -6271,7 +6276,7 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid, if (ch->type == I40E_VSI_VMDQ2) ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2; - if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) { + if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); ctxt.info.switch_id = @@ -6576,8 +6581,8 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi, * VSI to be added switch to VEB mode. */ - if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { + set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); if (vsi->type == I40E_VSI_MAIN) { if (i40e_is_tc_mqprio_enabled(pf)) @@ -6988,9 +6993,9 @@ int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg) if (need_reconfig) { /* Enable DCB tagging only when more than one TC */ if (new_numtc > 1) - pf->flags |= I40E_FLAG_DCB_ENABLED; + set_bit(I40E_FLAG_DCB_ENA, pf->flags); else - pf->flags &= ~I40E_FLAG_DCB_ENABLED; + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); set_bit(__I40E_PORT_SUSPENDED, pf->state); /* Reconfiguration needed quiesce all VSIs */ @@ -7080,7 +7085,7 @@ out: set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } /* registers are set, lets apply */ - if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) + if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps)) ret = i40e_hw_set_dcb_config(pf, new_cfg); } @@ -7101,7 +7106,7 @@ int i40e_dcb_sw_default_config(struct i40e_pf *pf) struct i40e_hw *hw = &pf->hw; int err; - if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) { + if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps)) { /* Update the local cached instance with TC0 ETS */ memset(&pf->tmp_cfg, 0, sizeof(struct i40e_dcbx_config)); pf->tmp_cfg.etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING; @@ -7162,12 +7167,12 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf) /* Do not enable DCB for SW1 and SW2 images even if the FW is capable * Also do not enable DCBx if FW LLDP agent is disabled */ - if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT) { + if (test_bit(I40E_HW_CAP_NO_DCB_SUPPORT, pf->hw.caps)) { dev_info(&pf->pdev->dev, "DCB is not supported.\n"); err = -EOPNOTSUPP; goto out; } - if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) { + if (test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) { dev_info(&pf->pdev->dev, "FW LLDP is disabled, attempting SW DCB\n"); err = i40e_dcb_sw_default_config(pf); if (err) { @@ -7178,8 +7183,8 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf) pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; /* at init capable but disabled */ - pf->flags |= I40E_FLAG_DCB_CAPABLE; - pf->flags &= ~I40E_FLAG_DCB_ENABLED; + set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); goto out; } err = i40e_init_dcb(hw, true); @@ -7194,20 +7199,20 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf) pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_IEEE; - pf->flags |= I40E_FLAG_DCB_CAPABLE; + set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); /* Enable DCB tagging only when more than one TC * or explicitly disable if only one TC */ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1) - pf->flags |= I40E_FLAG_DCB_ENABLED; + set_bit(I40E_FLAG_DCB_ENA, pf->flags); else - pf->flags &= ~I40E_FLAG_DCB_ENABLED; + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); dev_dbg(&pf->pdev->dev, "DCBX offload is supported for this PF.\n"); } } else if (pf->hw.aq.asq_last_status == I40E_AQ_RC_EPERM) { dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n"); - pf->flags |= I40E_FLAG_DISABLE_FW_LLDP; + set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags); } else { dev_info(&pf->pdev->dev, "Query for DCB configuration failed, err %pe aq_err %s\n", @@ -7367,7 +7372,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; int err; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) i40e_vsi_configure_msix(vsi); else i40e_configure_msi_and_legacy(vsi); @@ -7471,7 +7476,7 @@ static int i40e_force_link_state(struct i40e_pf *pf, bool is_up) * and its speed values are OK, no need for a flap * if non_zero_phy_type was set, still need to force up */ - if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED) + if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) non_zero_phy_type = true; else if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0) return 0; @@ -7487,7 +7492,7 @@ static int i40e_force_link_state(struct i40e_pf *pf, bool is_up) non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0; /* Copy the old settings, except of phy_type */ config.abilities = abilities.abilities; - if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED) { + if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) { if (is_up) config.abilities |= I40E_AQ_PHY_ENABLE_LINK; else @@ -7537,8 +7542,8 @@ int i40e_up(struct i40e_vsi *vsi) int err; if (vsi->type == I40E_VSI_MAIN && - (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED || - vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)) + (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) || + test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags))) i40e_force_link_state(vsi->back, true); err = i40e_vsi_configure(vsi); @@ -7566,8 +7571,8 @@ void i40e_down(struct i40e_vsi *vsi) i40e_vsi_disable_irq(vsi); i40e_vsi_stop_rings(vsi); if (vsi->type == I40E_VSI_MAIN && - (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED || - vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)) + (test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) || + test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags))) i40e_force_link_state(vsi->back, false); i40e_napi_disable_all(vsi); @@ -7973,7 +7978,7 @@ static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev) struct i40e_fwd_adapter *fwd; int avail_macvlan, ret; - if ((pf->flags & I40E_FLAG_DCB_ENABLED)) { + if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n"); return ERR_PTR(-EINVAL); } @@ -8168,23 +8173,23 @@ static int i40e_setup_tc(struct net_device *netdev, void *type_data) hw = mqprio_qopt->qopt.hw; mode = mqprio_qopt->mode; if (!hw) { - pf->flags &= ~I40E_FLAG_TC_MQPRIO; + clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); goto config_tc; } /* Check if MFP enabled */ - if (pf->flags & I40E_FLAG_MFP_ENABLED) { + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { netdev_info(netdev, "Configuring TC not supported in MFP mode\n"); return ret; } switch (mode) { case TC_MQPRIO_MODE_DCB: - pf->flags &= ~I40E_FLAG_TC_MQPRIO; + clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); /* Check if DCB enabled to continue */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) { + if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { netdev_info(netdev, "DCB is not enabled for adapter\n"); return ret; @@ -8198,20 +8203,20 @@ static int i40e_setup_tc(struct net_device *netdev, void *type_data) } break; case TC_MQPRIO_MODE_CHANNEL: - if (pf->flags & I40E_FLAG_DCB_ENABLED) { + if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) { netdev_info(netdev, "Full offload of TC Mqprio options is not supported when DCB is enabled\n"); return ret; } - if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) + if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return ret; ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt); if (ret) return ret; memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); - pf->flags |= I40E_FLAG_TC_MQPRIO; - pf->flags &= ~I40E_FLAG_DCB_ENABLED; + set_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); break; default: return -EINVAL; @@ -8795,11 +8800,11 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, return -EINVAL; } - if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags)) { dev_err(&vsi->back->pdev->dev, "Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n"); - vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED; - vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER; + clear_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags); + clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, vsi->back->flags); } filter = kzalloc(sizeof(*filter), GFP_KERNEL); @@ -8895,11 +8900,11 @@ static int i40e_delete_clsflower(struct i40e_vsi *vsi, pf->num_cloud_filters--; if (!pf->num_cloud_filters) - if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) && - !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) { - pf->flags |= I40E_FLAG_FD_SB_ENABLED; - pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER; - pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE; + if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) && + !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) { + set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags); + clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } return 0; } @@ -9200,11 +9205,11 @@ static void i40e_cloud_filter_exit(struct i40e_pf *pf) } pf->num_cloud_filters = 0; - if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) && - !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) { - pf->flags |= I40E_FLAG_FD_SB_ENABLED; - pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER; - pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE; + if (test_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags) && + !test_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags)) { + set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_TO_CLOUD_FILTER, pf->flags); + clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } } @@ -9292,7 +9297,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) i40e_prep_for_reset(pf); i40e_reset_and_rebuild(pf, true, lock_acquired); dev_info(&pf->pdev->dev, - pf->flags & I40E_FLAG_DISABLE_FW_LLDP ? + test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ? "FW LLDP is disabled\n" : "FW LLDP is enabled\n"); @@ -9407,12 +9412,12 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, if (I40E_IS_X710TL_DEVICE(hw->device_id) && (hw->phy.link_info.link_speed & ~(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB)) && - !(pf->flags & I40E_FLAG_DCB_CAPABLE)) + !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) /* let firmware decide if the DCB should be disabled */ - pf->flags |= I40E_FLAG_DCB_CAPABLE; + set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); /* Not DCB capable or capability disabled */ - if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) + if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) return ret; /* Ignore if event is not for Nearest Bridge */ @@ -9448,7 +9453,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) { dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n"); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); } else { dev_info(&pf->pdev->dev, "Failed querying DCB configuration data from firmware, err %pe aq_err %s\n", @@ -9476,9 +9481,9 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, /* Enable DCB tagging only when more than one TC */ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1) - pf->flags |= I40E_FLAG_DCB_ENABLED; + set_bit(I40E_FLAG_DCB_ENA, pf->flags); else - pf->flags &= ~I40E_FLAG_DCB_ENABLED; + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); set_bit(__I40E_PORT_SUSPENDED, pf->state); /* Reconfiguration needed quiesce all VSIs */ @@ -9610,7 +9615,7 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf) static void i40e_reenable_fdir_sb(struct i40e_pf *pf) { if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) - if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); } @@ -9631,7 +9636,7 @@ static void i40e_reenable_fdir_atr(struct i40e_pf *pf) I40E_L3_SRC_MASK | I40E_L3_DST_MASK | I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); } @@ -9946,7 +9951,7 @@ static void i40e_link_event(struct i40e_pf *pf) if (pf->vf) i40e_vc_notify_link_state(pf); - if (pf->flags & I40E_FLAG_PTP) + if (test_bit(I40E_FLAG_PTP_ENA, pf->flags)) i40e_ptp_set_increment(pf); #ifdef CONFIG_I40E_DCB if (new_link == old_link) @@ -9963,13 +9968,13 @@ static void i40e_link_event(struct i40e_pf *pf) memset(&pf->tmp_cfg, 0, sizeof(pf->tmp_cfg)); err = i40e_dcb_sw_default_config(pf); if (err) { - pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | - I40E_FLAG_DCB_ENABLED); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); } else { pf->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; - pf->flags |= I40E_FLAG_DCB_CAPABLE; - pf->flags &= ~I40E_FLAG_DCB_ENABLED; + set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); } } #endif /* CONFIG_I40E_DCB */ @@ -9994,7 +9999,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf) return; pf->service_timer_previous = jiffies; - if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) || + if (test_bit(I40E_FLAG_LINK_POLLING_ENA, pf->flags) || test_bit(__I40E_TEMP_LINK_POLLING, pf->state)) i40e_link_event(pf); @@ -10005,7 +10010,7 @@ static void i40e_watchdog_subtask(struct i40e_pf *pf) if (pf->vsi[i] && pf->vsi[i]->netdev) i40e_update_stats(pf->vsi[i]); - if (pf->flags & I40E_FLAG_VEB_STATS_ENABLED) { + if (test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags)) { /* Update the stats for the active switching components */ for (i = 0; i < I40E_MAX_VEB; i++) if (pf->veb[i]) @@ -10094,7 +10099,7 @@ static void i40e_handle_link_event(struct i40e_pf *pf, if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && (!(status->link_info & I40E_AQ_LINK_UP)) && - (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) { + (!test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))) { dev_err(&pf->pdev->dev, "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n"); dev_err(&pf->pdev->dev, @@ -10400,7 +10405,7 @@ static int i40e_reconstitute_veb(struct i40e_veb *veb) if (ret) goto end_reconstitute; - if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) + if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) veb->bridge_mode = BRIDGE_MODE_VEB; else veb->bridge_mode = BRIDGE_MODE_VEPA; @@ -10545,7 +10550,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]); } - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) + if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) return; /* find existing VSI and see if it needs configuring */ @@ -10557,8 +10562,8 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) pf->vsi[pf->lan_vsi]->seid, 0); if (!vsi) { dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n"); - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; - pf->flags |= I40E_FLAG_FD_SB_INACTIVE; + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); return; } } @@ -10936,14 +10941,14 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) i40e_aq_set_dcb_parameters(hw, false, NULL); dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n"); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); } else { i40e_aq_set_dcb_parameters(hw, true, NULL); ret = i40e_init_pf_dcb(pf); if (ret) { dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n", ret); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); /* Continue without DCB enabled */ } } @@ -11064,7 +11069,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) wr32(hw, I40E_REG_MSS, val); } - if (pf->hw_features & I40E_HW_RESTART_AUTONEG) { + if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) { msleep(75); ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (ret) @@ -11074,7 +11079,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) pf->hw.aq.asq_last_status)); } /* reinit the misc interrupt */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { ret = i40e_setup_misc_vector(pf); if (ret) goto end_unlock; @@ -11349,7 +11354,7 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) if (!vsi->num_rx_desc) vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, I40E_REQ_DESCRIPTOR_MULTIPLE); - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) vsi->num_q_vectors = pf->num_lan_msix; else vsi->num_q_vectors = 1; @@ -11667,7 +11672,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; - if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) + if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps)) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; ring->itr_setting = pf->tx_itr_default; WRITE_ONCE(vsi->tx_rings[i], ring++); @@ -11684,7 +11689,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; - if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) + if (test_bit(I40E_HW_CAP_WB_ON_ITR, vsi->back->hw.caps)) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; set_ring_xdp(ring); ring->itr_setting = pf->tx_itr_default; @@ -11748,7 +11753,7 @@ static int i40e_init_msix(struct i40e_pf *pf) int v_actual; int iwarp_requested = 0; - if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) + if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return -ENODEV; /* The number of vectors we'll request will be comprised of: @@ -11787,7 +11792,7 @@ static int i40e_init_msix(struct i40e_pf *pf) vectors_left -= pf->num_lan_msix; /* reserve one vector for sideband flow director */ - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { if (vectors_left) { pf->num_fdsb_msix = 1; v_budget++; @@ -11798,7 +11803,7 @@ static int i40e_init_msix(struct i40e_pf *pf) } /* can we reserve enough for iWARP? */ - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { iwarp_requested = pf->num_iwarp_msix; if (!vectors_left) @@ -11810,7 +11815,7 @@ static int i40e_init_msix(struct i40e_pf *pf) } /* any vectors left over go for VMDq support */ - if (pf->flags & I40E_FLAG_VMDQ_ENABLED) { + if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags)) { if (!vectors_left) { pf->num_vmdq_msix = 0; pf->num_vmdq_qps = 0; @@ -11867,7 +11872,7 @@ static int i40e_init_msix(struct i40e_pf *pf) v_actual = i40e_reserve_msix_vectors(pf, v_budget); if (v_actual < I40E_MIN_MSIX) { - pf->flags &= ~I40E_FLAG_MSIX_ENABLED; + clear_bit(I40E_FLAG_MSIX_ENA, pf->flags); kfree(pf->msix_entries); pf->msix_entries = NULL; pci_disable_msix(pf->pdev); @@ -11905,7 +11910,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_msix = 1; break; case 3: - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { pf->num_lan_msix = 1; pf->num_iwarp_msix = 1; } else { @@ -11913,7 +11918,7 @@ static int i40e_init_msix(struct i40e_pf *pf) } break; default: - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { pf->num_iwarp_msix = min_t(int, (vec / 3), iwarp_requested); pf->num_vmdq_vsis = min_t(int, (vec / 3), @@ -11922,7 +11927,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_vmdq_vsis = min_t(int, (vec / 2), I40E_DEFAULT_NUM_VMDQ_VSI); } - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { pf->num_fdsb_msix = 1; vec--; } @@ -11934,22 +11939,20 @@ static int i40e_init_msix(struct i40e_pf *pf) } } - if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && - (pf->num_fdsb_msix == 0)) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && pf->num_fdsb_msix == 0) { dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n"); - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; - pf->flags |= I40E_FLAG_FD_SB_INACTIVE; + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } - if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) && - (pf->num_vmdq_msix == 0)) { + if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) && pf->num_vmdq_msix == 0) { dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n"); - pf->flags &= ~I40E_FLAG_VMDQ_ENABLED; + clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); } - if ((pf->flags & I40E_FLAG_IWARP_ENABLED) && - (pf->num_iwarp_msix == 0)) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags) && + pf->num_iwarp_msix == 0) { dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n"); - pf->flags &= ~I40E_FLAG_IWARP_ENABLED; + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); } i40e_debug(&pf->hw, I40E_DEBUG_INIT, "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n", @@ -12003,7 +12006,7 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi) int err, v_idx, num_q_vectors; /* if not MSIX, give the one vector only to the LAN VSI */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) num_q_vectors = vsi->num_q_vectors; else if (vsi == pf->vsi[pf->lan_vsi]) num_q_vectors = 1; @@ -12034,38 +12037,39 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) int vectors = 0; ssize_t size; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { vectors = i40e_init_msix(pf); if (vectors < 0) { - pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | - I40E_FLAG_IWARP_ENABLED | - I40E_FLAG_RSS_ENABLED | - I40E_FLAG_DCB_CAPABLE | - I40E_FLAG_DCB_ENABLED | - I40E_FLAG_SRIOV_ENABLED | - I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED | - I40E_FLAG_VMDQ_ENABLED); - pf->flags |= I40E_FLAG_FD_SB_INACTIVE; + clear_bit(I40E_FLAG_MSIX_ENA, pf->flags); + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); + clear_bit(I40E_FLAG_RSS_ENA, pf->flags); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); + clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); /* rework the queue expectations without MSIX */ i40e_determine_queue_usage(pf); } } - if (!(pf->flags & I40E_FLAG_MSIX_ENABLED) && - (pf->flags & I40E_FLAG_MSI_ENABLED)) { + if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && + test_bit(I40E_FLAG_MSI_ENA, pf->flags)) { dev_info(&pf->pdev->dev, "MSI-X not available, trying MSI\n"); vectors = pci_enable_msi(pf->pdev); if (vectors < 0) { dev_info(&pf->pdev->dev, "MSI init failed - %d\n", vectors); - pf->flags &= ~I40E_FLAG_MSI_ENABLED; + clear_bit(I40E_FLAG_MSI_ENA, pf->flags); } vectors = 1; /* one MSI or Legacy vector */ } - if (!(pf->flags & (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED))) + if (!test_bit(I40E_FLAG_MSI_ENA, pf->flags) && + !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) dev_info(&pf->pdev->dev, "MSI-X and MSI not available, falling back to Legacy IRQ\n"); /* set up vector assignment tracking */ @@ -12098,7 +12102,8 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf) * scheme. We need to re-enabled them here in order to attempt to * re-acquire the MSI or MSI-X vectors */ - pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED); + set_bit(I40E_FLAG_MSI_ENA, pf->flags); + set_bit(I40E_FLAG_MSIX_ENA, pf->flags); err = i40e_init_interrupt_scheme(pf); if (err) @@ -12120,7 +12125,7 @@ static int i40e_restore_interrupt_scheme(struct i40e_pf *pf) if (err) goto err_unwind; - if (pf->flags & I40E_FLAG_IWARP_ENABLED) + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) i40e_client_update_msix_info(pf); return 0; @@ -12148,7 +12153,7 @@ static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf) { int err; - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { err = i40e_setup_misc_vector(pf); if (err) { @@ -12158,7 +12163,7 @@ static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf) return err; } } else { - u32 flags = pf->flags & I40E_FLAG_MSI_ENABLED ? 0 : IRQF_SHARED; + u32 flags = test_bit(I40E_FLAG_MSI_ENA, pf->flags) ? 0 : IRQF_SHARED; err = request_irq(pf->pdev->irq, i40e_intr, flags, pf->int_name, pf); @@ -12362,7 +12367,7 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; - if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) + if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) return i40e_config_rss_aq(vsi, seed, lut, lut_size); else return i40e_config_rss_reg(vsi, seed, lut, lut_size); @@ -12381,7 +12386,7 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { struct i40e_pf *pf = vsi->back; - if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) + if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) return i40e_get_rss_aq(vsi, seed, lut, lut_size); else return i40e_get_rss_reg(vsi, seed, lut, lut_size); @@ -12484,7 +12489,7 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; int new_rss_size; - if (!(pf->flags & I40E_FLAG_RSS_ENABLED)) + if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags)) return 0; queue_count = min_t(int, queue_count, num_online_cpus()); @@ -12717,9 +12722,9 @@ static int i40e_sw_init(struct i40e_pf *pf) u16 pow; /* Set default capability flags */ - pf->flags = I40E_FLAG_RX_CSUM_ENABLED | - I40E_FLAG_MSI_ENABLED | - I40E_FLAG_MSIX_ENABLED; + bitmap_zero(pf->flags, I40E_PF_FLAGS_NBITS); + set_bit(I40E_FLAG_MSI_ENA, pf->flags); + set_bit(I40E_FLAG_MSIX_ENA, pf->flags); /* Set default ITR */ pf->rx_itr_default = I40E_ITR_RX_DEF; @@ -12739,14 +12744,14 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->rss_size_max = min_t(int, pf->rss_size_max, pow); if (pf->hw.func_caps.rss) { - pf->flags |= I40E_FLAG_RSS_ENABLED; + set_bit(I40E_FLAG_RSS_ENA, pf->flags); pf->alloc_rss_size = min_t(int, pf->rss_size_max, num_online_cpus()); } /* MFP mode enabled */ if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.flex10_enable) { - pf->flags |= I40E_FLAG_MFP_ENABLED; + set_bit(I40E_FLAG_MFP_ENA, pf->flags); dev_info(&pf->pdev->dev, "MFP mode Enabled\n"); if (i40e_get_partition_bw_setting(pf)) { dev_warn(&pf->pdev->dev, @@ -12763,84 +12768,32 @@ static int i40e_sw_init(struct i40e_pf *pf) if ((pf->hw.func_caps.fd_filters_guaranteed > 0) || (pf->hw.func_caps.fd_filters_best_effort > 0)) { - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; + set_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE; - if (pf->flags & I40E_FLAG_MFP_ENABLED && + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && pf->hw.num_partitions > 1) dev_info(&pf->pdev->dev, "Flow Director Sideband mode Disabled in MFP mode\n"); else - pf->flags |= I40E_FLAG_FD_SB_ENABLED; + set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); pf->fdir_pf_filter_count = pf->hw.func_caps.fd_filters_guaranteed; pf->hw.fdir_shared_filter_count = pf->hw.func_caps.fd_filters_best_effort; } - if (pf->hw.mac.type == I40E_MAC_X722) { - pf->hw_features |= (I40E_HW_RSS_AQ_CAPABLE | - I40E_HW_128_QP_RSS_CAPABLE | - I40E_HW_ATR_EVICT_CAPABLE | - I40E_HW_WB_ON_ITR_CAPABLE | - I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE | - I40E_HW_NO_PCI_LINK_CHECK | - I40E_HW_USE_SET_LLDP_MIB | - I40E_HW_GENEVE_OFFLOAD_CAPABLE | - I40E_HW_PTP_L4_CAPABLE | - I40E_HW_WOL_MC_MAGIC_PKT_WAKE | - I40E_HW_OUTER_UDP_CSUM_CAPABLE); - -#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03 - if (rd32(&pf->hw, I40E_GLQF_FDEVICTENA(1)) != - I40E_FDEVICT_PCTYPE_DEFAULT) { - dev_warn(&pf->pdev->dev, - "FD EVICT PCTYPES are not right, disable FD HW EVICT\n"); - pf->hw_features &= ~I40E_HW_ATR_EVICT_CAPABLE; - } - } else if ((pf->hw.aq.api_maj_ver > 1) || - ((pf->hw.aq.api_maj_ver == 1) && - (pf->hw.aq.api_min_ver > 4))) { - /* Supported in FW API version higher than 1.4 */ - pf->hw_features |= I40E_HW_GENEVE_OFFLOAD_CAPABLE; - } - /* Enable HW ATR eviction if possible */ - if (pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE) - pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED; - - if ((pf->hw.mac.type == I40E_MAC_XL710) && - (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4))) { - pf->hw_features |= I40E_HW_RESTART_AUTONEG; - /* No DCB support for FW < v4.33 */ - pf->hw_features |= I40E_HW_NO_DCB_SUPPORT; - } - - /* Disable FW LLDP if FW < v4.3 */ - if ((pf->hw.mac.type == I40E_MAC_XL710) && - (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || - (pf->hw.aq.fw_maj_ver < 4))) - pf->hw_features |= I40E_HW_STOP_FW_LLDP; - - /* Use the FW Set LLDP MIB API if FW > v4.40 */ - if ((pf->hw.mac.type == I40E_MAC_XL710) && - (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) || - (pf->hw.aq.fw_maj_ver >= 5))) - pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB; - - /* Enable PTP L4 if FW > v6.0 */ - if (pf->hw.mac.type == I40E_MAC_XL710 && - pf->hw.aq.fw_maj_ver >= 6) - pf->hw_features |= I40E_HW_PTP_L4_CAPABLE; + if (test_bit(I40E_HW_CAP_ATR_EVICT, pf->hw.caps)) + set_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags); if (pf->hw.func_caps.vmdq && num_online_cpus() != 1) { pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI; - pf->flags |= I40E_FLAG_VMDQ_ENABLED; + set_bit(I40E_FLAG_VMDQ_ENA, pf->flags); pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf); } if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) { - pf->flags |= I40E_FLAG_IWARP_ENABLED; + set_bit(I40E_FLAG_IWARP_ENA, pf->flags); /* IWARP needs one extra vector for CQP just like MISC.*/ pf->num_iwarp_msix = (int)num_online_cpus() + 1; } @@ -12850,14 +12803,13 @@ static int i40e_sw_init(struct i40e_pf *pf) * if NPAR is functioning so unset this hw flag in this case. */ if (pf->hw.mac.type == I40E_MAC_XL710 && - pf->hw.func_caps.npar_enable && - (pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE)) - pf->hw.flags &= ~I40E_HW_FLAG_FW_LLDP_STOPPABLE; + pf->hw.func_caps.npar_enable) + clear_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, pf->hw.caps); #ifdef CONFIG_PCI_IOV if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) { pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; - pf->flags |= I40E_FLAG_SRIOV_ENABLED; + set_bit(I40E_FLAG_SRIOV_ENA, pf->flags); pf->num_req_vfs = min_t(int, pf->hw.func_caps.num_vfs, I40E_MAX_VF_COUNT); @@ -12868,7 +12820,7 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->lan_vsi = I40E_NO_VSI; /* By default FW has this off for performance reasons */ - pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED; + clear_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags); /* set up queue assignment tracking */ size = sizeof(struct i40e_lump_tracking) @@ -12887,8 +12839,8 @@ static int i40e_sw_init(struct i40e_pf *pf) /* Link down on close must be on when total port shutdown * is enabled for a given port */ - pf->flags |= (I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED | - I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED); + set_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags); + set_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags); dev_info(&pf->pdev->dev, "total-port-shutdown was enabled, link-down-on-close is forced on\n"); } @@ -12914,31 +12866,31 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) */ if (features & NETIF_F_NTUPLE) { /* Enable filters and mark for reset */ - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) + if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) need_reset = true; /* enable FD_SB only if there is MSI-X vector and no cloud * filters exist */ if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) { - pf->flags |= I40E_FLAG_FD_SB_ENABLED; - pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE; + set_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } } else { /* turn off filters, mark for reset and clear SW filter list */ - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { need_reset = true; i40e_fdir_filter_exit(pf); } - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state); - pf->flags |= I40E_FLAG_FD_SB_INACTIVE; + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); /* reset fd counters */ pf->fd_add_err = 0; pf->fd_atr_cnt = 0; /* if ATR was auto disabled it can be re-enabled. */ if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && (I40E_DEBUG_FD & pf->hw.debug_mask)) dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); } @@ -13087,7 +13039,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev, struct i40e_pf *pf = np->vsi->back; struct i40e_hw *hw = &pf->hw; - if (!(pf->hw_features & I40E_HW_PORT_ID_VALID)) + if (!test_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps)) return -EOPNOTSUPP; ppid->id_len = min_t(int, sizeof(hw->mac.port_addr), sizeof(ppid->id)); @@ -13116,7 +13068,7 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct i40e_pf *pf = np->vsi->back; int err = 0; - if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED)) + if (!test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) return -EOPNOTSUPP; if (vid) { @@ -13216,9 +13168,9 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, veb->bridge_mode = mode; /* TODO: If no VFs or VMDq VSIs, disallow VEB mode */ if (mode == BRIDGE_MODE_VEB) - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); else - pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; + clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); break; } @@ -13552,7 +13504,7 @@ static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair) struct i40e_hw *hw = &pf->hw; /* All rings in a qp belong to the same qvector. */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx); else i40e_irq_dynamic_enable_icr0(pf); @@ -13577,7 +13529,7 @@ static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair) * * All rings in a qp belong to the same qvector. */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { u32 intpf = vsi->base_vector + rxr->q_vector->v_idx; wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0); @@ -13762,7 +13714,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_RXCSUM | 0; - if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE)) + if (!test_bit(I40E_HW_CAP_OUTER_UDP_CSUM, pf->hw.caps)) netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic; @@ -13798,7 +13750,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) + if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; netdev->hw_features |= hw_features | NETIF_F_LOOPBACK; @@ -13989,7 +13941,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) * negative logic - if it's set, we need to fiddle with * the VSI to disable source pruning. */ - if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) { + if (test_bit(I40E_FLAG_SOURCE_PRUNING_DIS, pf->flags)) { memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = pf->main_vsi_seid; ctxt.pf_num = pf->hw.pf_id; @@ -14011,7 +13963,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) } /* MFP mode setup queue map and update VSI */ - if ((pf->flags & I40E_FLAG_MFP_ENABLED) && + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) && !(pf->hw.func_caps.iscsi)) { /* NIC type PF */ memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = pf->main_vsi_seid; @@ -14059,7 +14011,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) ctxt.uplink_seid = vsi->uplink_seid; ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; ctxt.flags = I40E_AQ_VSI_TYPE_PF; - if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) && + if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags) && (i40e_is_vsi_uplink_mode_veb(vsi))) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); @@ -14107,7 +14059,7 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); } - if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= @@ -14323,7 +14275,7 @@ static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi) /* In Legacy mode, we do not have to get any other vector since we * piggyback on the misc/ICR0 for queue interrupts. */ - if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) + if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return ret; if (vsi->num_q_vectors) vsi->base_vector = i40e_get_lump(pf, pf->irq_pile, @@ -14490,9 +14442,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, * already enabled, in which case we can't force VEPA * mode. */ - if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { veb->bridge_mode = BRIDGE_MODE_VEPA; - pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; + clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); } i40e_config_bridge_mode(veb); } @@ -14588,12 +14540,16 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, break; } - if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) && - (vsi->type == I40E_VSI_VMDQ2)) { + if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps) && + vsi->type == I40E_VSI_VMDQ2) { ret = i40e_vsi_config_rss(vsi); + if (ret) + goto err_config; } return vsi; +err_config: + i40e_vsi_clear_rings(vsi); err_rings: i40e_vsi_free_q_vectors(vsi); err_msix: @@ -14831,7 +14787,7 @@ void i40e_veb_release(struct i40e_veb *veb) static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) { struct i40e_pf *pf = veb->pf; - bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED); + bool enable_stats = !!test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags); int ret; ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid, @@ -15131,7 +15087,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui */ if ((pf->hw.pf_id == 0) && - !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) { + !test_bit(I40E_FLAG_TRUE_PROMISC_ENA, pf->flags)) { flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; pf->last_sw_conf_flags = flags; } @@ -15198,7 +15154,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acqui /* enable RSS in the HW, even for only one queue, as the stack can use * the hash */ - if ((pf->flags & I40E_FLAG_RSS_ENABLED)) + if (test_bit(I40E_FLAG_RSS_ENA, pf->flags)) i40e_pf_config_rss(pf); /* fill in link information and enable LSE reporting */ @@ -15240,42 +15196,42 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) queues_left = pf->hw.func_caps.num_tx_qp; if ((queues_left == 1) || - !(pf->flags & I40E_FLAG_MSIX_ENABLED)) { + !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { /* one qp for PF, no queues for anything else */ queues_left = 0; pf->alloc_rss_size = pf->num_lan_qps = 1; /* make sure all the fancies are disabled */ - pf->flags &= ~(I40E_FLAG_RSS_ENABLED | - I40E_FLAG_IWARP_ENABLED | - I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED | - I40E_FLAG_DCB_CAPABLE | - I40E_FLAG_DCB_ENABLED | - I40E_FLAG_SRIOV_ENABLED | - I40E_FLAG_VMDQ_ENABLED); - pf->flags |= I40E_FLAG_FD_SB_INACTIVE; - } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED | - I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED | - I40E_FLAG_DCB_CAPABLE))) { + clear_bit(I40E_FLAG_RSS_ENA, pf->flags); + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags); + clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); + } else if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags) && + !test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && + !test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && + !test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) { /* one qp for PF */ pf->alloc_rss_size = pf->num_lan_qps = 1; queues_left -= pf->num_lan_qps; - pf->flags &= ~(I40E_FLAG_RSS_ENABLED | - I40E_FLAG_IWARP_ENABLED | - I40E_FLAG_FD_SB_ENABLED | - I40E_FLAG_FD_ATR_ENABLED | - I40E_FLAG_DCB_ENABLED | - I40E_FLAG_VMDQ_ENABLED); - pf->flags |= I40E_FLAG_FD_SB_INACTIVE; + clear_bit(I40E_FLAG_RSS_ENA, pf->flags); + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); + clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); } else { /* Not enough queues for all TCs */ - if ((pf->flags & I40E_FLAG_DCB_CAPABLE) && - (queues_left < I40E_MAX_TRAFFIC_CLASS)) { - pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | - I40E_FLAG_DCB_ENABLED); + if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags) && + queues_left < I40E_MAX_TRAFFIC_CLASS) { + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n"); } @@ -15288,24 +15244,24 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) queues_left -= pf->num_lan_qps; } - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { if (queues_left > 1) { queues_left -= 1; /* save 1 queue for FD */ } else { - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; - pf->flags |= I40E_FLAG_FD_SB_INACTIVE; + clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags); + set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags); dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n"); } } - if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && + if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) && pf->num_vf_qps && pf->num_req_vfs && queues_left) { pf->num_req_vfs = min_t(int, pf->num_req_vfs, (queues_left / pf->num_vf_qps)); queues_left -= (pf->num_req_vfs * pf->num_vf_qps); } - if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) && + if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags) && pf->num_vmdq_vsis && pf->num_vmdq_qps && queues_left) { pf->num_vmdq_vsis = min_t(int, pf->num_vmdq_vsis, (queues_left / pf->num_vmdq_qps)); @@ -15316,7 +15272,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) dev_dbg(&pf->pdev->dev, "qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n", pf->hw.func_caps.num_tx_qp, - !!(pf->flags & I40E_FLAG_FD_SB_ENABLED), + !!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags), pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs, pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left); @@ -15340,7 +15296,8 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf) settings->hash_lut_size = I40E_HASH_LUT_SIZE_128; /* Flow Director is enabled */ - if (pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED)) + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) || + test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags)) settings->enable_fdir = true; /* Ethtype and MACVLAN filters enabled for PF */ @@ -15372,21 +15329,21 @@ static void i40e_print_features(struct i40e_pf *pf) i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d", pf->hw.func_caps.num_vsis, pf->vsi[pf->lan_vsi]->num_queue_pairs); - if (pf->flags & I40E_FLAG_RSS_ENABLED) + if (test_bit(I40E_FLAG_RSS_ENA, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " RSS"); - if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) + if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " FD_ATR"); - if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { i += scnprintf(&buf[i], REMAIN(i), " FD_SB"); i += scnprintf(&buf[i], REMAIN(i), " NTUPLE"); } - if (pf->flags & I40E_FLAG_DCB_CAPABLE) + if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " DCB"); i += scnprintf(&buf[i], REMAIN(i), " VxLAN"); i += scnprintf(&buf[i], REMAIN(i), " Geneve"); - if (pf->flags & I40E_FLAG_PTP) + if (test_bit(I40E_FLAG_PTP_ENA, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " PTP"); - if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) + if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) i += scnprintf(&buf[i], REMAIN(i), " VEB"); else i += scnprintf(&buf[i], REMAIN(i), " VEPA"); @@ -15417,22 +15374,26 @@ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf) * @fec_cfg: FEC option to set in flags * @flags: ptr to flags in which we set FEC option **/ -void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags) +void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags) { - if (fec_cfg & I40E_AQ_SET_FEC_AUTO) - *flags |= I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC; + if (fec_cfg & I40E_AQ_SET_FEC_AUTO) { + set_bit(I40E_FLAG_RS_FEC, flags); + set_bit(I40E_FLAG_BASE_R_FEC, flags); + } if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) || (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) { - *flags |= I40E_FLAG_RS_FEC; - *flags &= ~I40E_FLAG_BASE_R_FEC; + set_bit(I40E_FLAG_RS_FEC, flags); + clear_bit(I40E_FLAG_BASE_R_FEC, flags); } if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) || (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) { - *flags |= I40E_FLAG_BASE_R_FEC; - *flags &= ~I40E_FLAG_RS_FEC; + set_bit(I40E_FLAG_BASE_R_FEC, flags); + clear_bit(I40E_FLAG_RS_FEC, flags); + } + if (fec_cfg == 0) { + clear_bit(I40E_FLAG_RS_FEC, flags); + clear_bit(I40E_FLAG_BASE_R_FEC, flags); } - if (fec_cfg == 0) - *flags &= ~(I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC); } /** @@ -15858,15 +15819,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->vendor_id, hw->device_id, hw->subsystem_vendor_id, hw->subsystem_device_id); - if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) + if (i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR, + I40E_FW_MINOR_VERSION(hw) + 1)) dev_dbg(&pdev->dev, "The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n", hw->aq.api_maj_ver, hw->aq.api_min_ver, I40E_FW_API_VERSION_MAJOR, I40E_FW_MINOR_VERSION(hw)); - else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) + else if (i40e_is_aq_api_ver_lt(hw, 1, 4)) dev_info(&pdev->dev, "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n", hw->aq.api_maj_ver, @@ -15913,7 +15874,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * Ignore error return codes because if it was already disabled via * hardware settings this will fail */ - if (pf->hw_features & I40E_HW_STOP_FW_LLDP) { + if (test_bit(I40E_HW_CAP_STOP_FW_LLDP, pf->hw.caps)) { dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n"); i40e_aq_stop_lldp(hw, true, false, NULL); } @@ -15930,7 +15891,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ether_addr_copy(hw->mac.perm_addr, hw->mac.addr); i40e_get_port_mac_addr(hw, hw->mac.port_addr); if (is_valid_ether_addr(hw->mac.port_addr)) - pf->hw_features |= I40E_HW_PORT_ID_VALID; + set_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps); i40e_ptp_alloc_pins(pf); pci_set_drvdata(pdev, pf); @@ -15940,10 +15901,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) status = i40e_get_fw_lldp_status(&pf->hw, &lldp_status); (!status && lldp_status == I40E_GET_FW_LLDP_STATUS_ENABLED) ? - (pf->flags &= ~I40E_FLAG_DISABLE_FW_LLDP) : - (pf->flags |= I40E_FLAG_DISABLE_FW_LLDP); + (clear_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) : + (set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)); dev_info(&pdev->dev, - (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ? + test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ? "FW LLDP is disabled\n" : "FW LLDP is enabled\n"); @@ -15953,7 +15914,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = i40e_init_pf_dcb(pf); if (err) { dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err); - pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED); + clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); + clear_bit(I40E_FLAG_DCB_ENA, pf->flags); /* Continue without DCB enabled */ } #endif /* CONFIG_I40E_DCB */ @@ -16021,11 +15983,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_PCI_IOV /* prep for VF support */ - if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && - (pf->flags & I40E_FLAG_MSIX_ENABLED) && + if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) && + test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && !test_bit(__I40E_BAD_EEPROM, pf->state)) { if (pci_num_vf(pdev)) - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); } #endif err = i40e_setup_pf_switch(pf, false, false); @@ -16066,7 +16028,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) wr32(hw, I40E_REG_MSS, val); } - if (pf->hw_features & I40E_HW_RESTART_AUTONEG) { + if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) { msleep(75); err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (err) @@ -16086,7 +16048,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * the misc functionality and queue processing is combined in * the same vector and that gets setup at open. */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { err = i40e_setup_misc_vector(pf); if (err) { dev_info(&pdev->dev, @@ -16099,8 +16061,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_PCI_IOV /* prep for VF support */ - if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && - (pf->flags & I40E_FLAG_MSIX_ENABLED) && + if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) && + test_bit(I40E_FLAG_MSIX_ENA, pf->flags) && !test_bit(__I40E_BAD_EEPROM, pf->state)) { /* disable link interrupts for VFs */ val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM); @@ -16120,7 +16082,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } #endif /* CONFIG_PCI_IOV */ - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile, pf->num_iwarp_msix, I40E_IWARP_IRQ_PILE_ID); @@ -16128,7 +16090,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_info(&pdev->dev, "failed to get tracking for %d vectors for IWARP err=%d\n", pf->num_iwarp_msix, pf->iwarp_base_vector); - pf->flags &= ~I40E_FLAG_IWARP_ENABLED; + clear_bit(I40E_FLAG_IWARP_ENA, pf->flags); } } @@ -16142,7 +16104,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) round_jiffies(jiffies + pf->service_timer_period)); /* add this PF to client device list and launch a client service task */ - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { err = i40e_lan_add_device(pf); if (err) dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n", @@ -16155,7 +16117,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * and will report PCI Gen 1 x 1 by default so don't bother * checking them. */ - if (!(pf->hw_features & I40E_HW_NO_PCI_LINK_CHECK)) { + if (!test_bit(I40E_HW_CAP_NO_PCI_LINK_CHECK, pf->hw.caps)) { char speed[PCI_SPEED_SIZE] = "Unknown"; char width[PCI_WIDTH_SIZE] = "Unknown"; @@ -16209,7 +16171,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->hw.phy.link_info.requested_speeds = abilities.link_speed; /* set the FEC config due to the board capabilities */ - i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, &pf->flags); + i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, pf->flags); /* get the supported phy types from the fw */ err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL); @@ -16236,10 +16198,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->main_vsi_seid); if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) || - (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4)) - pf->hw_features |= I40E_HW_PHY_CONTROLS_LEDS; + (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4)) + set_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps); if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722) - pf->hw_features |= I40E_HW_HAVE_CRT_RETIMER; + set_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps); /* print a string summarizing features */ i40e_print_features(pf); @@ -16308,10 +16270,10 @@ static void i40e_remove(struct pci_dev *pdev) usleep_range(1000, 2000); set_bit(__I40E_IN_REMOVE, pf->state); - if (pf->flags & I40E_FLAG_SRIOV_ENABLED) { + if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) { set_bit(__I40E_VF_RESETS_DISABLED, pf->state); i40e_free_vfs(pf); - pf->flags &= ~I40E_FLAG_SRIOV_ENABLED; + clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags); } /* no more scheduling of any task */ set_bit(__I40E_SUSPENDED, pf->state); @@ -16366,7 +16328,7 @@ static void i40e_remove(struct pci_dev *pdev) i40e_cloud_filter_exit(pf); /* remove attached clients */ - if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) { ret_code = i40e_lan_del_device(pf); if (ret_code) dev_warn(&pdev->dev, "Failed to delete client device: %d\n", @@ -16385,7 +16347,7 @@ static void i40e_remove(struct pci_dev *pdev) unmap: /* Free MSI/legacy interrupt 0 when in recovery mode. */ if (test_bit(__I40E_RECOVERY_MODE, pf->state) && - !(pf->flags & I40E_FLAG_MSIX_ENABLED)) + !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) free_irq(pf->pdev->irq, pf); /* shutdown the adminq */ @@ -16601,7 +16563,8 @@ static void i40e_shutdown(struct pci_dev *pdev) */ i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false); - if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE)) + if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) && + pf->wol_en) i40e_enable_mc_magic_wake(pf); i40e_prep_for_reset(pf); @@ -16613,7 +16576,7 @@ static void i40e_shutdown(struct pci_dev *pdev) /* Free MSI/legacy interrupt 0 when in recovery mode. */ if (test_bit(__I40E_RECOVERY_MODE, pf->state) && - !(pf->flags & I40E_FLAG_MSIX_ENABLED)) + !test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) free_irq(pf->pdev->irq, pf); /* Since we're going to destroy queues during the @@ -16654,7 +16617,8 @@ static int __maybe_unused i40e_suspend(struct device *dev) */ i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false); - if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE)) + if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) && + pf->wol_en) i40e_enable_mc_magic_wake(pf); /* Since we're going to destroy queues during the diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 77cdbfc19d47..62eb34871094 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -291,7 +291,7 @@ static int i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset, static int __i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, u16 *data) { - if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) + if (test_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps)) return i40e_read_nvm_word_aq(hw, offset, data); return i40e_read_nvm_word_srctl(hw, offset, data); @@ -310,14 +310,14 @@ int i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, { int ret_code = 0; - if (hw->flags & I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK) + if (test_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps)) ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); if (ret_code) return ret_code; ret_code = __i40e_read_nvm_word(hw, offset, data); - if (hw->flags & I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK) + if (test_bit(I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, hw->caps)) i40e_release_nvm(hw); return ret_code; @@ -499,7 +499,7 @@ static int __i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, u16 *words, u16 *data) { - if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) + if (test_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps)) return i40e_read_nvm_buffer_aq(hw, offset, words, data); return i40e_read_nvm_buffer_srctl(hw, offset, words, data); @@ -521,7 +521,7 @@ int i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, { int ret_code = 0; - if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) { + if (test_bit(I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, hw->caps)) { ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); if (!ret_code) { ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 001162042050..af4269330581 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -501,4 +501,74 @@ i40e_add_pinfo_to_list(struct i40e_hw *hw, /* i40e_ddp */ int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash); +/* Firmware and AdminQ version check helpers */ + +/** + * i40e_is_aq_api_ver_ge + * @hw: pointer to i40e_hw structure + * @maj: API major value to compare + * @min: API minor value to compare + * + * Assert whether current HW API version is greater/equal than provided. + **/ +static inline bool i40e_is_aq_api_ver_ge(struct i40e_hw *hw, u16 maj, u16 min) +{ + return (hw->aq.api_maj_ver > maj || + (hw->aq.api_maj_ver == maj && hw->aq.api_min_ver >= min)); +} + +/** + * i40e_is_aq_api_ver_lt + * @hw: pointer to i40e_hw structure + * @maj: API major value to compare + * @min: API minor value to compare + * + * Assert whether current HW API version is less than provided. + **/ +static inline bool i40e_is_aq_api_ver_lt(struct i40e_hw *hw, u16 maj, u16 min) +{ + return !i40e_is_aq_api_ver_ge(hw, maj, min); +} + +/** + * i40e_is_fw_ver_ge + * @hw: pointer to i40e_hw structure + * @maj: API major value to compare + * @min: API minor value to compare + * + * Assert whether current firmware version is greater/equal than provided. + **/ +static inline bool i40e_is_fw_ver_ge(struct i40e_hw *hw, u16 maj, u16 min) +{ + return (hw->aq.fw_maj_ver > maj || + (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver >= min)); +} + +/** + * i40e_is_fw_ver_lt + * @hw: pointer to i40e_hw structure + * @maj: API major value to compare + * @min: API minor value to compare + * + * Assert whether current firmware version is less than provided. + **/ +static inline bool i40e_is_fw_ver_lt(struct i40e_hw *hw, u16 maj, u16 min) +{ + return !i40e_is_fw_ver_ge(hw, maj, min); +} + +/** + * i40e_is_fw_ver_eq + * @hw: pointer to i40e_hw structure + * @maj: API major value to compare + * @min: API minor value to compare + * + * Assert whether current firmware version is equal to provided. + **/ +static inline bool i40e_is_fw_ver_eq(struct i40e_hw *hw, u16 maj, u16 min) +{ + return (hw->aq.fw_maj_ver > maj || + (hw->aq.fw_maj_ver == maj && hw->aq.fw_min_ver == min)); +} + #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 20b77398f060..1cf993a79438 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -35,7 +35,7 @@ enum i40e_ptp_pin { GPIO_4 }; -enum i40e_can_set_pins_t { +enum i40e_can_set_pins { CANT_DO_PINS = -1, CAN_SET_PINS, CAN_DO_PINS @@ -193,7 +193,7 @@ static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw) * return CAN_DO_PINS if pins can be manipulated within a NIC or * return CANT_DO_PINS otherwise. **/ -static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf) +static enum i40e_can_set_pins i40e_can_set_pins(struct i40e_pf *pf) { if (!i40e_is_ptp_pin_dev(&pf->hw)) { dev_warn(&pf->pdev->dev, @@ -680,7 +680,7 @@ void i40e_ptp_rx_hang(struct i40e_pf *pf) * configured. We don't want to spuriously warn about Rx timestamp * hangs if we don't care about the timestamps. */ - if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx) + if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_rx) return; spin_lock_bh(&pf->ptp_rx_lock); @@ -733,7 +733,7 @@ void i40e_ptp_tx_hang(struct i40e_pf *pf) { struct sk_buff *skb; - if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx) + if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_tx) return; /* Nothing to do if we're not already waiting for a timestamp */ @@ -771,7 +771,7 @@ void i40e_ptp_tx_hwtstamp(struct i40e_pf *pf) u32 hi, lo; u64 ns; - if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx) + if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_tx) return; /* don't attempt to timestamp if we don't have an skb */ @@ -818,7 +818,7 @@ void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index) /* Since we cannot turn off the Rx timestamp logic if the device is * doing Tx timestamping, check if Rx timestamping is configured. */ - if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx) + if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags) || !pf->ptp_rx) return; hw = &pf->hw; @@ -924,7 +924,7 @@ int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr) { struct hwtstamp_config *config = &pf->tstamp_config; - if (!(pf->flags & I40E_FLAG_PTP)) + if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags)) return -EOPNOTSUPP; return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? @@ -1071,7 +1071,7 @@ static void i40e_ptp_set_pins_hw(struct i40e_pf *pf) static int i40e_ptp_set_pins(struct i40e_pf *pf, struct i40e_ptp_pins_settings *pins) { - enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf); + enum i40e_can_set_pins pin_caps = i40e_can_set_pins(pf); int i = 0; if (pin_caps == CANT_DO_PINS) @@ -1211,7 +1211,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE)) + if (!test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps)) return -ERANGE; pf->ptp_rx = true; tsyntype = I40E_PRTTSYN_CTL1_V1MESSTYPE0_MASK | @@ -1225,7 +1225,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE)) + if (!test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps)) return -ERANGE; fallthrough; case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: @@ -1234,7 +1234,7 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf, pf->ptp_rx = true; tsyntype = I40E_PRTTSYN_CTL1_V2MESSTYPE0_MASK | I40E_PRTTSYN_CTL1_TSYNTYPE_V2; - if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE) { + if (test_bit(I40E_HW_CAP_PTP_L4, pf->hw.caps)) { tsyntype |= I40E_PRTTSYN_CTL1_UDP_ENA_MASK; config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; } else { @@ -1308,7 +1308,7 @@ int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr) struct hwtstamp_config config; int err; - if (!(pf->flags & I40E_FLAG_PTP)) + if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags)) return -EOPNOTSUPP; if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) @@ -1426,7 +1426,7 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf) void i40e_ptp_save_hw_time(struct i40e_pf *pf) { /* don't try to access the PTP clock if it's not enabled */ - if (!(pf->flags & I40E_FLAG_PTP)) + if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags)) return; i40e_ptp_gettimex(&pf->ptp_caps, &pf->ptp_prev_hw_time, NULL); @@ -1483,7 +1483,7 @@ void i40e_ptp_init(struct i40e_pf *pf) pf_id = (rd32(hw, I40E_PRTTSYN_CTL0) & I40E_PRTTSYN_CTL0_PF_ID_MASK) >> I40E_PRTTSYN_CTL0_PF_ID_SHIFT; if (hw->pf_id != pf_id) { - pf->flags &= ~I40E_FLAG_PTP; + clear_bit(I40E_FLAG_PTP_ENA, pf->flags); dev_info(&pf->pdev->dev, "%s: PTP not supported on %s\n", __func__, netdev->name); @@ -1504,7 +1504,7 @@ void i40e_ptp_init(struct i40e_pf *pf) if (pf->hw.debug_mask & I40E_DEBUG_LAN) dev_info(&pf->pdev->dev, "PHC enabled\n"); - pf->flags |= I40E_FLAG_PTP; + set_bit(I40E_FLAG_PTP_ENA, pf->flags); /* Ensure the clocks are running. */ regval = rd32(hw, I40E_PRTTSYN_CTL0); @@ -1539,7 +1539,7 @@ void i40e_ptp_stop(struct i40e_pf *pf) struct i40e_hw *hw = &pf->hw; u32 regval; - pf->flags &= ~I40E_FLAG_PTP; + clear_bit(I40E_FLAG_PTP_ENA, pf->flags); pf->ptp_tx = false; pf->ptp_rx = false; diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index f408fcf23ce8..d561687303ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -899,6 +899,7 @@ #define I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT 7 #define I40E_GLQF_ORT_FLX_PAYLOAD_MASK I40E_MASK(0x1, I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT) #define I40E_GLQF_FDEVICTENA(_i) (0x00270384 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */ +#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03 /* Redefined for X722 family */ #define I40E_GLGEN_STAT_CLEAR 0x00390004 /* Reset: CORER */ #endif /* _I40E_REGISTER_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index dd410b15000f..b82df5bdfac0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -464,7 +464,7 @@ static int i40e_add_del_fdir_tcp(struct i40e_vsi *vsi, &pf->fd_tcp6_filter_cnt); if (add) { - if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) && I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); @@ -734,7 +734,7 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw, * FD ATR/SB and then re-enable it when there is room. */ if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) { - if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && + if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) && !test_and_set_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) if (I40E_DEBUG_FD & pf->hw.debug_mask) @@ -1071,7 +1071,7 @@ static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, if (q_vector->arm_wb_state) return; - if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) { val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */ @@ -1095,7 +1095,7 @@ static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, **/ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { - if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) { u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */ I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | @@ -2699,7 +2699,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, u32 intval; /* If we don't have MSIX, then we only need to re-enable icr0 */ - if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { + if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) { i40e_irq_dynamic_enable_icr0(vsi->back); return; } @@ -2888,7 +2888,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, u16 i; /* make sure ATR is enabled */ - if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) + if (!test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags)) return; if (test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) @@ -2933,7 +2933,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, /* Due to lack of space, no more new filters can be programmed */ if (th->syn && test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) return; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) { + if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags)) { /* HW ATR eviction will take care of removing filters on FIN * and RST packets. */ @@ -2995,7 +2995,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) + if (test_bit(I40E_FLAG_HW_ATR_EVICT_ENA, pf->flags)) dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); @@ -3053,7 +3053,7 @@ static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, tx_flags |= I40E_TX_FLAGS_SW_VLAN; } - if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED)) + if (!test_bit(I40E_FLAG_DCB_ENA, tx_ring->vsi->back->flags)) goto out; /* Insert 802.1p priority into VLAN header */ @@ -3229,7 +3229,7 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, * we are not already transmitting a packet to be timestamped */ pf = i40e_netdev_to_pf(tx_ring->netdev); - if (!(pf->flags & I40E_FLAG_PTP)) + if (!test_bit(I40E_FLAG_PTP_ENA, pf->flags)) return 0; if (pf->ptp_tx && diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 421fe5675584..abf15067eb5d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -58,7 +58,7 @@ static inline u16 i40e_intrl_usec_to_reg(int intrl) * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any * register but instead is a special value meaning "don't update" ITR0/1/2. */ -enum i40e_dyn_idx_t { +enum i40e_dyn_idx { I40E_IDX_ITR0 = 0, I40E_IDX_ITR1 = 1, I40E_IDX_ITR2 = 2, @@ -92,8 +92,8 @@ enum i40e_dyn_idx_t { BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) #define i40e_pf_get_default_rss_hena(pf) \ - (((pf)->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \ - I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA) + (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, (pf)->hw.caps) ? \ + I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA) /* Supported Rx Buffer Sizes (a multiple of 128) */ #define I40E_RXBUFFER_256 256 @@ -306,7 +306,7 @@ struct i40e_rx_queue_stats { u64 page_busy_count; }; -enum i40e_ring_state_t { +enum i40e_ring_state { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, __I40E_RING_STATE_NBITS /* must be last */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index aff6dc6afbe2..de69c2e22448 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -64,9 +64,7 @@ typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *); enum i40e_mac_type { I40E_MAC_UNKNOWN = 0, I40E_MAC_XL710, - I40E_MAC_VF, I40E_MAC_X722, - I40E_MAC_X722_VF, I40E_MAC_GENERIC, }; @@ -272,9 +270,7 @@ struct i40e_mac_info { enum i40e_mac_type type; u8 addr[ETH_ALEN]; u8 perm_addr[ETH_ALEN]; - u8 san_addr[ETH_ALEN]; u8 port_addr[ETH_ALEN]; - u16 max_fcoeq; }; enum i40e_aq_resources_ids { @@ -482,6 +478,36 @@ struct i40e_dcbx_config { struct i40e_dcb_app_priority_table app[I40E_DCBX_MAX_APPS]; }; +enum i40e_hw_flags { + I40E_HW_CAP_AQ_SRCTL_ACCESS_ENABLE, + I40E_HW_CAP_802_1AD, + I40E_HW_CAP_AQ_PHY_ACCESS, + I40E_HW_CAP_NVM_READ_REQUIRES_LOCK, + I40E_HW_CAP_FW_LLDP_STOPPABLE, + I40E_HW_CAP_FW_LLDP_PERSISTENT, + I40E_HW_CAP_AQ_PHY_ACCESS_EXTENDED, + I40E_HW_CAP_X722_FEC_REQUEST, + I40E_HW_CAP_RSS_AQ, + I40E_HW_CAP_128_QP_RSS, + I40E_HW_CAP_ATR_EVICT, + I40E_HW_CAP_WB_ON_ITR, + I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, + I40E_HW_CAP_NO_PCI_LINK_CHECK, + I40E_HW_CAP_100M_SGMII, + I40E_HW_CAP_NO_DCB_SUPPORT, + I40E_HW_CAP_USE_SET_LLDP_MIB, + I40E_HW_CAP_GENEVE_OFFLOAD, + I40E_HW_CAP_PTP_L4, + I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, + I40E_HW_CAP_CRT_RETIMER, + I40E_HW_CAP_OUTER_UDP_CSUM, + I40E_HW_CAP_PHY_CONTROLS_LEDS, + I40E_HW_CAP_STOP_FW_LLDP, + I40E_HW_CAP_PORT_ID_VALID, + I40E_HW_CAP_RESTART_AUTONEG, + I40E_HW_CAPS_NBITS, +}; + /* Port hardware description */ struct i40e_hw { u8 __iomem *hw_addr; @@ -546,16 +572,7 @@ struct i40e_hw { struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */ struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ -#define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0) -#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1) -#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) -#define I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK BIT_ULL(3) -#define I40E_HW_FLAG_FW_LLDP_STOPPABLE BIT_ULL(4) -#define I40E_HW_FLAG_FW_LLDP_PERSISTENT BIT_ULL(5) -#define I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED BIT_ULL(6) -#define I40E_HW_FLAG_DROP_MODE BIT_ULL(7) -#define I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE BIT_ULL(8) - u64 flags; + DECLARE_BITMAP(caps, I40E_HW_CAPS_NBITS); /* Used in set switch config AQ command */ u16 switch_tag; @@ -567,12 +584,6 @@ struct i40e_hw { char err_str[16]; }; -static inline bool i40e_is_vf(struct i40e_hw *hw) -{ - return (hw->mac.type == I40E_MAC_VF || - hw->mac.type == I40E_MAC_X722_VF); -} - struct i40e_driver_version { u8 major_version; u8 minor_version; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 3f99eb198245..37cca484abb8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1808,7 +1808,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs) if (pci_num_vf(pf->pdev) != num_alloc_vfs) { ret = pci_enable_sriov(pf->pdev, num_alloc_vfs); if (ret) { - pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; + clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); pf->num_alloc_vfs = 0; goto err_iov; } @@ -1919,8 +1919,8 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) } if (num_vfs) { - if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) { + set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG); } ret = i40e_pci_sriov_enable(pdev, num_vfs); @@ -1929,7 +1929,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!pci_vfs_assigned(pf->pdev)) { i40e_free_vfs(pf); - pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; + clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags); i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG); } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); @@ -2137,14 +2137,14 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF; } else { - if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) && + if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps) && (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ)) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ; else vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; } - if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) { + if (test_bit(I40E_HW_CAP_MULTI_TCP_UDP_RSS_PCTYPE, pf->hw.caps)) { if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2; @@ -2153,12 +2153,12 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP; - if ((pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE) && + if (test_bit(I40E_HW_CAP_OUTER_UDP_CSUM, pf->hw.caps) && (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM)) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM; if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING) { - if (pf->flags & I40E_FLAG_MFP_ENABLED) { + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { dev_err(&pf->pdev->dev, "VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n", vf->vf_id); @@ -2168,7 +2168,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING; } - if (pf->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) { + if (test_bit(I40E_HW_CAP_WB_ON_ITR, pf->hw.caps)) { if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_WB_ON_ITR; @@ -4843,7 +4843,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) goto out; } - if (pf->flags & I40E_FLAG_MFP_ENABLED) { + if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { dev_err(&pf->pdev->dev, "Trusted VF not supported in MFP mode.\n"); ret = -EINVAL; goto out; diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 351e0d36df44..cd7dcd0fa7f2 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -522,11 +522,18 @@ enum ice_misc_thread_tasks { ICE_MISC_THREAD_NBITS /* must be last */ }; -struct ice_switchdev_info { +struct ice_eswitch { struct ice_vsi *control_vsi; struct ice_vsi *uplink_vsi; struct ice_esw_br_offloads *br_offloads; + struct xarray reprs; bool is_running; + /* struct to allow cp queues management optimization */ + struct { + int to_reach; + int value; + bool is_reaching; + } qs; }; struct ice_agg_node { @@ -637,7 +644,7 @@ struct ice_pf { struct ice_link_default_override_tlv link_dflt_override; struct ice_lag *lag; /* Link Aggregation information */ - struct ice_switchdev_info switchdev; + struct ice_eswitch eswitch; struct ice_esw_br_port *br_port; #define ICE_INVALID_AGG_NODE_ID 0 @@ -846,7 +853,7 @@ static inline struct ice_vsi *ice_find_vsi(struct ice_pf *pf, u16 vsi_num) */ static inline bool ice_is_switchdev_running(struct ice_pf *pf) { - return pf->switchdev.is_running; + return pf->eswitch.is_running; } #define ICE_FD_STAT_CTR_BLOCK_COUNT 256 diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 80dc5445b50d..f4e24d11ebd0 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -810,6 +810,10 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node struct ice_vf *vf; int i; + if (node->rate_node) + /* already added, skip to the next */ + goto traverse_children; + if (node->parent == tc_node) { /* create root node */ rate_node = devl_rate_node_create(devlink, node, node->name, NULL); @@ -831,6 +835,7 @@ static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node if (rate_node && !IS_ERR(rate_node)) node->rate_node = rate_node; +traverse_children: for (i = 0; i < node->num_children; i++) ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf); } @@ -861,6 +866,30 @@ int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *v return 0; } +static void ice_clear_rate_nodes(struct ice_sched_node *node) +{ + node->rate_node = NULL; + + for (int i = 0; i < node->num_children; i++) + ice_clear_rate_nodes(node->children[i]); +} + +/** + * ice_devlink_rate_clear_tx_topology - clear node->rate_node + * @vsi: main vsi struct + * + * Clear rate_node to cleanup creation of Tx topology. + * + */ +void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi) +{ + struct ice_port_info *pi = vsi->port_info; + + mutex_lock(&pi->sched_lock); + ice_clear_rate_nodes(pi->root->children[0]); + mutex_unlock(&pi->sched_lock); +} + /** * ice_set_object_tx_share - sets node scheduling parameter * @pi: devlink struct instance diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h index 6ec96779f52e..d291c0e2e17b 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.h +++ b/drivers/net/ethernet/intel/ice/ice_devlink.h @@ -20,5 +20,6 @@ void ice_devlink_destroy_regions(struct ice_pf *pf); int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi); void ice_tear_down_devlink_rate_tree(struct ice_pf *pf); +void ice_devlink_rate_clear_tx_topology(struct ice_vsi *vsi); #endif /* _ICE_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index a655d499abfa..3f80e2081e5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -11,17 +11,34 @@ #include "ice_tc_lib.h" /** - * ice_eswitch_add_vf_sp_rule - add adv rule with VF's VSI index + * ice_eswitch_del_sp_rules - delete adv rules added on PRs + * @pf: pointer to the PF struct + * + * Delete all advanced rules that were used to forward packets with the + * device's VSI index to the corresponding eswitch ctrl VSI queue. + */ +static void ice_eswitch_del_sp_rules(struct ice_pf *pf) +{ + struct ice_repr *repr; + unsigned long id; + + xa_for_each(&pf->eswitch.reprs, id, repr) { + if (repr->sp_rule.rid) + ice_rem_adv_rule_by_id(&pf->hw, &repr->sp_rule); + } +} + +/** + * ice_eswitch_add_sp_rule - add adv rule with device's VSI index * @pf: pointer to PF struct - * @vf: pointer to VF struct + * @repr: pointer to the repr struct * * This function adds advanced rule that forwards packets with - * VF's VSI index to the corresponding switchdev ctrl VSI queue. + * device's VSI index to the corresponding eswitch ctrl VSI queue. */ -static int -ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf) +static int ice_eswitch_add_sp_rule(struct ice_pf *pf, struct ice_repr *repr) { - struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; struct ice_adv_rule_info rule_info = { 0 }; struct ice_adv_lkup_elem *list; struct ice_hw *hw = &pf->hw; @@ -38,39 +55,42 @@ ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf) rule_info.sw_act.vsi_handle = ctrl_vsi->idx; rule_info.sw_act.fltr_act = ICE_FWD_TO_Q; rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id + - ctrl_vsi->rxq_map[vf->vf_id]; + ctrl_vsi->rxq_map[repr->q_id]; rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE; rule_info.flags_info.act_valid = true; rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN; - rule_info.src_vsi = vf->lan_vsi_idx; + rule_info.src_vsi = repr->src_vsi->idx; err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, - &vf->repr->sp_rule); + &repr->sp_rule); if (err) - dev_err(ice_pf_to_dev(pf), "Unable to add VF slow-path rule in switchdev mode for VF %d", - vf->vf_id); + dev_err(ice_pf_to_dev(pf), "Unable to add slow-path rule for eswitch for PR %d", + repr->id); kfree(list); return err; } -/** - * ice_eswitch_del_vf_sp_rule - delete adv rule with VF's VSI index - * @vf: pointer to the VF struct - * - * Delete the advanced rule that was used to forward packets with the VF's VSI - * index to the corresponding switchdev ctrl VSI queue. - */ -static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf) +static int +ice_eswitch_add_sp_rules(struct ice_pf *pf) { - if (!vf->repr) - return; + struct ice_repr *repr; + unsigned long id; + int err; - ice_rem_adv_rule_by_id(&vf->pf->hw, &vf->repr->sp_rule); + xa_for_each(&pf->eswitch.reprs, id, repr) { + err = ice_eswitch_add_sp_rule(pf, repr); + if (err) { + ice_eswitch_del_sp_rules(pf); + return err; + } + } + + return 0; } /** - * ice_eswitch_setup_env - configure switchdev HW filters + * ice_eswitch_setup_env - configure eswitch HW filters * @pf: pointer to PF struct * * This function adds HW filters configuration specific for switchdev @@ -78,18 +98,18 @@ static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf) */ static int ice_eswitch_setup_env(struct ice_pf *pf) { - struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi; - struct net_device *uplink_netdev = uplink_vsi->netdev; - struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi; + struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; + struct net_device *netdev = uplink_vsi->netdev; struct ice_vsi_vlan_ops *vlan_ops; bool rule_added = false; ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx); - netif_addr_lock_bh(uplink_netdev); - __dev_uc_unsync(uplink_netdev, NULL); - __dev_mc_unsync(uplink_netdev, NULL); - netif_addr_unlock_bh(uplink_netdev); + netif_addr_lock_bh(netdev); + __dev_uc_unsync(netdev, NULL); + __dev_mc_unsync(netdev, NULL); + netif_addr_unlock_bh(netdev); if (ice_vsi_add_vlan_zero(uplink_vsi)) goto err_def_rx; @@ -132,19 +152,20 @@ err_def_rx: } /** - * ice_eswitch_remap_rings_to_vectors - reconfigure rings of switchdev ctrl VSI - * @pf: pointer to PF struct + * ice_eswitch_remap_rings_to_vectors - reconfigure rings of eswitch ctrl VSI + * @eswitch: pointer to eswitch struct * - * In switchdev number of allocated Tx/Rx rings is equal. + * In eswitch number of allocated Tx/Rx rings is equal. * * This function fills q_vectors structures associated with representor and * move each ring pairs to port representor netdevs. Each port representor * will have dedicated 1 Tx/Rx ring pair, so number of rings pair is equal to * number of VFs. */ -static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf) +static void ice_eswitch_remap_rings_to_vectors(struct ice_eswitch *eswitch) { - struct ice_vsi *vsi = pf->switchdev.control_vsi; + struct ice_vsi *vsi = eswitch->control_vsi; + unsigned long repr_id = 0; int q_id; ice_for_each_txq(vsi, q_id) { @@ -152,13 +173,14 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf) struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; struct ice_repr *repr; - struct ice_vf *vf; - vf = ice_get_vf_by_id(pf, q_id); - if (WARN_ON(!vf)) - continue; + repr = xa_find(&eswitch->reprs, &repr_id, U32_MAX, + XA_PRESENT); + if (!repr) + break; - repr = vf->repr; + repr_id += 1; + repr->q_id = q_id; q_vector = repr->q_vector; tx_ring = vsi->tx_rings[q_id]; rx_ring = vsi->rx_rings[q_id]; @@ -181,136 +203,96 @@ static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf) rx_ring->q_vector = q_vector; rx_ring->next = NULL; rx_ring->netdev = repr->netdev; - - ice_put_vf(vf); } } /** - * ice_eswitch_release_reprs - clear PR VSIs configuration + * ice_eswitch_release_repr - clear PR VSI configuration * @pf: poiner to PF struct - * @ctrl_vsi: pointer to switchdev control VSI + * @repr: pointer to PR */ static void -ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi) +ice_eswitch_release_repr(struct ice_pf *pf, struct ice_repr *repr) { - struct ice_vf *vf; - unsigned int bkt; + struct ice_vsi *vsi = repr->src_vsi; - lockdep_assert_held(&pf->vfs.table_lock); - - ice_for_each_vf(pf, bkt, vf) { - struct ice_vsi *vsi = vf->repr->src_vsi; - - /* Skip VFs that aren't configured */ - if (!vf->repr->dst) - continue; + /* Skip representors that aren't configured */ + if (!repr->dst) + return; - ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); - metadata_dst_free(vf->repr->dst); - vf->repr->dst = NULL; - ice_eswitch_del_vf_sp_rule(vf); - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, - ICE_FWD_TO_VSI); + ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); + metadata_dst_free(repr->dst); + repr->dst = NULL; + ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac, + ICE_FWD_TO_VSI); - netif_napi_del(&vf->repr->q_vector->napi); - } + netif_napi_del(&repr->q_vector->napi); } /** - * ice_eswitch_setup_reprs - configure port reprs to run in switchdev mode + * ice_eswitch_setup_repr - configure PR to run in switchdev mode * @pf: pointer to PF struct + * @repr: pointer to PR struct */ -static int ice_eswitch_setup_reprs(struct ice_pf *pf) +static int ice_eswitch_setup_repr(struct ice_pf *pf, struct ice_repr *repr) { - struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; - int max_vsi_num = 0; - struct ice_vf *vf; - unsigned int bkt; - - lockdep_assert_held(&pf->vfs.table_lock); - - ice_for_each_vf(pf, bkt, vf) { - struct ice_vsi *vsi = vf->repr->src_vsi; - - ice_remove_vsi_fltr(&pf->hw, vsi->idx); - vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, - GFP_KERNEL); - if (!vf->repr->dst) { - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, - ICE_FWD_TO_VSI); - goto err; - } - - if (ice_eswitch_add_vf_sp_rule(pf, vf)) { - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, - ICE_FWD_TO_VSI); - goto err; - } - - if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) { - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, - ICE_FWD_TO_VSI); - ice_eswitch_del_vf_sp_rule(vf); - metadata_dst_free(vf->repr->dst); - vf->repr->dst = NULL; - goto err; - } + struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; + struct ice_vsi *vsi = repr->src_vsi; + struct metadata_dst *dst; - if (ice_vsi_add_vlan_zero(vsi)) { - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, - ICE_FWD_TO_VSI); - ice_eswitch_del_vf_sp_rule(vf); - metadata_dst_free(vf->repr->dst); - vf->repr->dst = NULL; - ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); - goto err; - } + ice_remove_vsi_fltr(&pf->hw, vsi->idx); + repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, + GFP_KERNEL); + if (!repr->dst) + goto err_add_mac_fltr; - if (max_vsi_num < vsi->vsi_num) - max_vsi_num = vsi->vsi_num; + if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) + goto err_dst_free; - netif_napi_add(vf->repr->netdev, &vf->repr->q_vector->napi, - ice_napi_poll); + if (ice_vsi_add_vlan_zero(vsi)) + goto err_update_security; - netif_keep_dst(vf->repr->netdev); - } + netif_napi_add(repr->netdev, &repr->q_vector->napi, + ice_napi_poll); - ice_for_each_vf(pf, bkt, vf) { - struct ice_repr *repr = vf->repr; - struct ice_vsi *vsi = repr->src_vsi; - struct metadata_dst *dst; + netif_keep_dst(repr->netdev); - dst = repr->dst; - dst->u.port_info.port_id = vsi->vsi_num; - dst->u.port_info.lower_dev = repr->netdev; - ice_repr_set_traffic_vsi(repr, ctrl_vsi); - } + dst = repr->dst; + dst->u.port_info.port_id = vsi->vsi_num; + dst->u.port_info.lower_dev = repr->netdev; + ice_repr_set_traffic_vsi(repr, ctrl_vsi); return 0; -err: - ice_eswitch_release_reprs(pf, ctrl_vsi); +err_update_security: + ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); +err_dst_free: + metadata_dst_free(repr->dst); + repr->dst = NULL; +err_add_mac_fltr: + ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac, ICE_FWD_TO_VSI); return -ENODEV; } /** - * ice_eswitch_update_repr - reconfigure VF port representor - * @vsi: VF VSI for which port representor is configured + * ice_eswitch_update_repr - reconfigure port representor + * @repr_id: representor ID + * @vsi: VSI for which port representor is configured */ -void ice_eswitch_update_repr(struct ice_vsi *vsi) +void ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct ice_repr *repr; - struct ice_vf *vf; int ret; if (!ice_is_switchdev_running(pf)) return; - vf = vsi->vf; - repr = vf->repr; + repr = xa_load(&pf->eswitch.reprs, repr_id); + if (!repr) + return; + repr->src_vsi = vsi; repr->dst->u.port_info.port_id = vsi->vsi_num; @@ -319,9 +301,10 @@ void ice_eswitch_update_repr(struct ice_vsi *vsi) ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof); if (ret) { - ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); - dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor", - vsi->vf->vf_id); + ice_fltr_add_mac_and_broadcast(vsi, repr->parent_mac, + ICE_FWD_TO_VSI); + dev_err(ice_pf_to_dev(pf), "Failed to update VSI of port representor %d", + repr->id); } } @@ -353,13 +336,13 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) skb_dst_drop(skb); dst_hold((struct dst_entry *)repr->dst); skb_dst_set(skb, (struct dst_entry *)repr->dst); - skb->queue_mapping = repr->vf->vf_id; + skb->queue_mapping = repr->q_id; return ice_start_xmit(skb, netdev); } /** - * ice_eswitch_set_target_vsi - set switchdev context in Tx context descriptor + * ice_eswitch_set_target_vsi - set eswitch context in Tx context descriptor * @skb: pointer to send buffer * @off: pointer to offload struct */ @@ -382,7 +365,7 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb, } /** - * ice_eswitch_release_env - clear switchdev HW filters + * ice_eswitch_release_env - clear eswitch HW filters * @pf: pointer to PF struct * * This function removes HW filters configuration specific for switchdev @@ -390,8 +373,8 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb, */ static void ice_eswitch_release_env(struct ice_pf *pf) { - struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi; - struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + struct ice_vsi *uplink_vsi = pf->eswitch.uplink_vsi; + struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; struct ice_vsi_vlan_ops *vlan_ops; vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi); @@ -407,7 +390,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf) } /** - * ice_eswitch_vsi_setup - configure switchdev control VSI + * ice_eswitch_vsi_setup - configure eswitch control VSI * @pf: pointer to PF structure * @pi: pointer to port_info structure */ @@ -424,48 +407,29 @@ ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) } /** - * ice_eswitch_napi_del - remove NAPI handle for all port representors - * @pf: pointer to PF structure - */ -static void ice_eswitch_napi_del(struct ice_pf *pf) -{ - struct ice_vf *vf; - unsigned int bkt; - - lockdep_assert_held(&pf->vfs.table_lock); - - ice_for_each_vf(pf, bkt, vf) - netif_napi_del(&vf->repr->q_vector->napi); -} - -/** * ice_eswitch_napi_enable - enable NAPI for all port representors - * @pf: pointer to PF structure + * @reprs: xarray of reprs */ -static void ice_eswitch_napi_enable(struct ice_pf *pf) +static void ice_eswitch_napi_enable(struct xarray *reprs) { - struct ice_vf *vf; - unsigned int bkt; - - lockdep_assert_held(&pf->vfs.table_lock); + struct ice_repr *repr; + unsigned long id; - ice_for_each_vf(pf, bkt, vf) - napi_enable(&vf->repr->q_vector->napi); + xa_for_each(reprs, id, repr) + napi_enable(&repr->q_vector->napi); } /** * ice_eswitch_napi_disable - disable NAPI for all port representors - * @pf: pointer to PF structure + * @reprs: xarray of reprs */ -static void ice_eswitch_napi_disable(struct ice_pf *pf) +static void ice_eswitch_napi_disable(struct xarray *reprs) { - struct ice_vf *vf; - unsigned int bkt; - - lockdep_assert_held(&pf->vfs.table_lock); + struct ice_repr *repr; + unsigned long id; - ice_for_each_vf(pf, bkt, vf) - napi_disable(&vf->repr->q_vector->napi); + xa_for_each(reprs, id, repr) + napi_disable(&repr->q_vector->napi); } /** @@ -486,39 +450,26 @@ static int ice_eswitch_enable_switchdev(struct ice_pf *pf) return -EINVAL; } - pf->switchdev.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info); - if (!pf->switchdev.control_vsi) + pf->eswitch.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info); + if (!pf->eswitch.control_vsi) return -ENODEV; - ctrl_vsi = pf->switchdev.control_vsi; - pf->switchdev.uplink_vsi = uplink_vsi; + ctrl_vsi = pf->eswitch.control_vsi; + /* cp VSI is createad with 1 queue as default */ + pf->eswitch.qs.value = 1; + pf->eswitch.uplink_vsi = uplink_vsi; if (ice_eswitch_setup_env(pf)) goto err_vsi; - if (ice_repr_add_for_all_vfs(pf)) - goto err_repr_add; - - if (ice_eswitch_setup_reprs(pf)) - goto err_setup_reprs; - - ice_eswitch_remap_rings_to_vectors(pf); - - if (ice_vsi_open(ctrl_vsi)) - goto err_setup_reprs; - if (ice_eswitch_br_offloads_init(pf)) goto err_br_offloads; - ice_eswitch_napi_enable(pf); + pf->eswitch.is_running = true; return 0; err_br_offloads: - ice_vsi_close(ctrl_vsi); -err_setup_reprs: - ice_repr_rem_from_all_vfs(pf); -err_repr_add: ice_eswitch_release_env(pf); err_vsi: ice_vsi_release(ctrl_vsi); @@ -526,19 +477,19 @@ err_vsi: } /** - * ice_eswitch_disable_switchdev - disable switchdev resources + * ice_eswitch_disable_switchdev - disable eswitch resources * @pf: pointer to PF structure */ static void ice_eswitch_disable_switchdev(struct ice_pf *pf) { - struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + struct ice_vsi *ctrl_vsi = pf->eswitch.control_vsi; - ice_eswitch_napi_disable(pf); ice_eswitch_br_offloads_deinit(pf); ice_eswitch_release_env(pf); - ice_eswitch_release_reprs(pf, ctrl_vsi); ice_vsi_release(ctrl_vsi); - ice_repr_rem_from_all_vfs(pf); + + pf->eswitch.is_running = false; + pf->eswitch.qs.is_reaching = false; } /** @@ -566,6 +517,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, case DEVLINK_ESWITCH_MODE_LEGACY: dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to legacy", pf->hw.pf_id); + xa_destroy(&pf->eswitch.reprs); NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to legacy"); break; case DEVLINK_ESWITCH_MODE_SWITCHDEV: @@ -578,6 +530,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev", pf->hw.pf_id); + xa_init_flags(&pf->eswitch.reprs, XA_FLAGS_ALLOC); NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev"); break; } @@ -616,74 +569,166 @@ bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf) } /** - * ice_eswitch_release - cleanup eswitch + * ice_eswitch_start_all_tx_queues - start Tx queues of all port representors * @pf: pointer to PF structure */ -void ice_eswitch_release(struct ice_pf *pf) +static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf) { - if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) + struct ice_repr *repr; + unsigned long id; + + if (test_bit(ICE_DOWN, pf->state)) return; - ice_eswitch_disable_switchdev(pf); - pf->switchdev.is_running = false; + xa_for_each(&pf->eswitch.reprs, id, repr) + ice_repr_start_tx_queues(repr); } /** - * ice_eswitch_configure - configure eswitch + * ice_eswitch_stop_all_tx_queues - stop Tx queues of all port representors * @pf: pointer to PF structure */ -int ice_eswitch_configure(struct ice_pf *pf) +void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { - int status; - - if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY || pf->switchdev.is_running) - return 0; + struct ice_repr *repr; + unsigned long id; - status = ice_eswitch_enable_switchdev(pf); - if (status) - return status; + if (test_bit(ICE_DOWN, pf->state)) + return; - pf->switchdev.is_running = true; - return 0; + xa_for_each(&pf->eswitch.reprs, id, repr) + ice_repr_stop_tx_queues(repr); } -/** - * ice_eswitch_start_all_tx_queues - start Tx queues of all port representors - * @pf: pointer to PF structure - */ -static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf) +static void ice_eswitch_stop_reprs(struct ice_pf *pf) { - struct ice_vf *vf; - unsigned int bkt; + ice_eswitch_del_sp_rules(pf); + ice_eswitch_stop_all_tx_queues(pf); + ice_eswitch_napi_disable(&pf->eswitch.reprs); +} - lockdep_assert_held(&pf->vfs.table_lock); +static void ice_eswitch_start_reprs(struct ice_pf *pf) +{ + ice_eswitch_napi_enable(&pf->eswitch.reprs); + ice_eswitch_start_all_tx_queues(pf); + ice_eswitch_add_sp_rules(pf); +} - if (test_bit(ICE_DOWN, pf->state)) - return; +static void +ice_eswitch_cp_change_queues(struct ice_eswitch *eswitch, int change) +{ + struct ice_vsi *cp = eswitch->control_vsi; + int queues = 0; + + if (eswitch->qs.is_reaching) { + if (eswitch->qs.to_reach >= eswitch->qs.value + change) { + queues = eswitch->qs.to_reach; + eswitch->qs.is_reaching = false; + } else { + queues = 0; + } + } else if ((change > 0 && cp->alloc_txq <= eswitch->qs.value) || + change < 0) { + queues = cp->alloc_txq + change; + } - ice_for_each_vf(pf, bkt, vf) { - if (vf->repr) - ice_repr_start_tx_queues(vf->repr); + if (queues) { + cp->req_txq = queues; + cp->req_rxq = queues; + ice_vsi_close(cp); + ice_vsi_rebuild(cp, ICE_VSI_FLAG_NO_INIT); + ice_vsi_open(cp); + } else if (!change) { + /* change == 0 means that VSI wasn't open, open it here */ + ice_vsi_open(cp); } + + eswitch->qs.value += change; + ice_eswitch_remap_rings_to_vectors(eswitch); } -/** - * ice_eswitch_stop_all_tx_queues - stop Tx queues of all port representors - * @pf: pointer to PF structure - */ -void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) +int +ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) { - struct ice_vf *vf; - unsigned int bkt; + struct ice_repr *repr; + int change = 1; + int err; - lockdep_assert_held(&pf->vfs.table_lock); + if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) + return 0; - if (test_bit(ICE_DOWN, pf->state)) + if (xa_empty(&pf->eswitch.reprs)) { + err = ice_eswitch_enable_switchdev(pf); + if (err) + return err; + /* Control plane VSI is created with 1 queue as default */ + pf->eswitch.qs.to_reach -= 1; + change = 0; + } + + ice_eswitch_stop_reprs(pf); + + repr = ice_repr_add_vf(vf); + if (IS_ERR(repr)) + goto err_create_repr; + + err = ice_eswitch_setup_repr(pf, repr); + if (err) + goto err_setup_repr; + + err = xa_alloc(&pf->eswitch.reprs, &repr->id, repr, + XA_LIMIT(1, INT_MAX), GFP_KERNEL); + if (err) + goto err_xa_alloc; + + vf->repr_id = repr->id; + + ice_eswitch_cp_change_queues(&pf->eswitch, change); + ice_eswitch_start_reprs(pf); + + return 0; + +err_xa_alloc: + ice_eswitch_release_repr(pf, repr); +err_setup_repr: + ice_repr_rem_vf(repr); +err_create_repr: + if (xa_empty(&pf->eswitch.reprs)) + ice_eswitch_disable_switchdev(pf); + ice_eswitch_start_reprs(pf); + + return err; +} + +void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) +{ + struct ice_repr *repr = xa_load(&pf->eswitch.reprs, vf->repr_id); + struct devlink *devlink = priv_to_devlink(pf); + + if (!repr) return; - ice_for_each_vf(pf, bkt, vf) { - if (vf->repr) - ice_repr_stop_tx_queues(vf->repr); + ice_eswitch_stop_reprs(pf); + xa_erase(&pf->eswitch.reprs, repr->id); + + if (xa_empty(&pf->eswitch.reprs)) + ice_eswitch_disable_switchdev(pf); + else + ice_eswitch_cp_change_queues(&pf->eswitch, -1); + + ice_eswitch_release_repr(pf, repr); + ice_repr_rem_vf(repr); + + if (xa_empty(&pf->eswitch.reprs)) { + /* since all port representors are destroyed, there is + * no point in keeping the nodes + */ + ice_devlink_rate_clear_tx_topology(ice_get_main_vsi(pf)); + devl_lock(devlink); + devl_rate_nodes_destroy(devlink); + devl_unlock(devlink); + } else { + ice_eswitch_start_reprs(pf); } } @@ -693,30 +738,35 @@ void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) */ int ice_eswitch_rebuild(struct ice_pf *pf) { - struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; - int status; - - ice_eswitch_napi_disable(pf); - ice_eswitch_napi_del(pf); - - status = ice_eswitch_setup_env(pf); - if (status) - return status; + struct ice_repr *repr; + unsigned long id; + int err; - status = ice_eswitch_setup_reprs(pf); - if (status) - return status; + if (!ice_is_switchdev_running(pf)) + return 0; - ice_eswitch_remap_rings_to_vectors(pf); + err = ice_vsi_rebuild(pf->eswitch.control_vsi, ICE_VSI_FLAG_INIT); + if (err) + return err; - ice_replay_tc_fltrs(pf); + xa_for_each(&pf->eswitch.reprs, id, repr) + ice_eswitch_detach(pf, repr->vf); - status = ice_vsi_open(ctrl_vsi); - if (status) - return status; + return 0; +} - ice_eswitch_napi_enable(pf); - ice_eswitch_start_all_tx_queues(pf); +/** + * ice_eswitch_reserve_cp_queues - reserve control plane VSI queues + * @pf: pointer to PF structure + * @change: how many more (or less) queues is needed + * + * Remember to call ice_eswitch_attach/detach() the "change" times. + */ +void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change) +{ + if (pf->eswitch.qs.value + change < 0) + return; - return 0; + pf->eswitch.qs.to_reach = pf->eswitch.qs.value + change; + pf->eswitch.qs.is_reaching = true; } diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index b18bf83a2f5b..1a288a03a79a 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -7,8 +7,9 @@ #include <net/devlink.h> #ifdef CONFIG_ICE_SWITCHDEV -void ice_eswitch_release(struct ice_pf *pf); -int ice_eswitch_configure(struct ice_pf *pf); +void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf); +int +ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf); int ice_eswitch_rebuild(struct ice_pf *pf); int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode); @@ -17,7 +18,7 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack); bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf); -void ice_eswitch_update_repr(struct ice_vsi *vsi); +void ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi); void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf); @@ -25,8 +26,15 @@ void ice_eswitch_set_target_vsi(struct sk_buff *skb, struct ice_tx_offload_params *off); netdev_tx_t ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev); +void ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change); #else /* CONFIG_ICE_SWITCHDEV */ -static inline void ice_eswitch_release(struct ice_pf *pf) { } +static inline void ice_eswitch_detach(struct ice_pf *pf, struct ice_vf *vf) { } + +static inline int +ice_eswitch_attach(struct ice_pf *pf, struct ice_vf *vf) +{ + return -EOPNOTSUPP; +} static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { } @@ -34,7 +42,8 @@ static inline void ice_eswitch_set_target_vsi(struct sk_buff *skb, struct ice_tx_offload_params *off) { } -static inline void ice_eswitch_update_repr(struct ice_vsi *vsi) { } +static inline void +ice_eswitch_update_repr(unsigned long repr_id, struct ice_vsi *vsi) { } static inline int ice_eswitch_configure(struct ice_pf *pf) { @@ -68,5 +77,8 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) { return NETDEV_TX_BUSY; } + +static inline void +ice_eswitch_reserve_cp_queues(struct ice_pf *pf, int change) { } #endif /* CONFIG_ICE_SWITCHDEV */ #endif /* _ICE_ESWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch_br.c b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c index 6ae0269bdf73..ac5beecd028b 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch_br.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c @@ -893,10 +893,14 @@ ice_eswitch_br_port_deinit(struct ice_esw_br *bridge, ice_eswitch_br_fdb_entry_delete(bridge, fdb_entry); } - if (br_port->type == ICE_ESWITCH_BR_UPLINK_PORT && vsi->back) + if (br_port->type == ICE_ESWITCH_BR_UPLINK_PORT && vsi->back) { vsi->back->br_port = NULL; - else if (vsi->vf && vsi->vf->repr) - vsi->vf->repr->br_port = NULL; + } else { + struct ice_repr *repr = ice_repr_get_by_vsi(vsi); + + if (repr) + repr->br_port = NULL; + } xa_erase(&bridge->ports, br_port->vsi_idx); ice_eswitch_br_port_vlans_flush(br_port); @@ -947,7 +951,7 @@ ice_eswitch_br_vf_repr_port_init(struct ice_esw_br *bridge, static int ice_eswitch_br_uplink_port_init(struct ice_esw_br *bridge, struct ice_pf *pf) { - struct ice_vsi *vsi = pf->switchdev.uplink_vsi; + struct ice_vsi *vsi = pf->eswitch.uplink_vsi; struct ice_esw_br_port *br_port; int err; @@ -1185,7 +1189,7 @@ ice_eswitch_br_port_event(struct notifier_block *nb, static void ice_eswitch_br_offloads_dealloc(struct ice_pf *pf) { - struct ice_esw_br_offloads *br_offloads = pf->switchdev.br_offloads; + struct ice_esw_br_offloads *br_offloads = pf->eswitch.br_offloads; ASSERT_RTNL(); @@ -1194,7 +1198,7 @@ ice_eswitch_br_offloads_dealloc(struct ice_pf *pf) ice_eswitch_br_deinit(br_offloads, br_offloads->bridge); - pf->switchdev.br_offloads = NULL; + pf->eswitch.br_offloads = NULL; kfree(br_offloads); } @@ -1205,14 +1209,14 @@ ice_eswitch_br_offloads_alloc(struct ice_pf *pf) ASSERT_RTNL(); - if (pf->switchdev.br_offloads) + if (pf->eswitch.br_offloads) return ERR_PTR(-EEXIST); br_offloads = kzalloc(sizeof(*br_offloads), GFP_KERNEL); if (!br_offloads) return ERR_PTR(-ENOMEM); - pf->switchdev.br_offloads = br_offloads; + pf->eswitch.br_offloads = br_offloads; br_offloads->pf = pf; return br_offloads; @@ -1223,7 +1227,7 @@ ice_eswitch_br_offloads_deinit(struct ice_pf *pf) { struct ice_esw_br_offloads *br_offloads; - br_offloads = pf->switchdev.br_offloads; + br_offloads = pf->eswitch.br_offloads; if (!br_offloads) return; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 4b1e56396293..d826b5afa143 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -212,11 +212,18 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi) vsi->alloc_txq)); break; case ICE_VSI_SWITCHDEV_CTRL: - /* The number of queues for ctrl VSI is equal to number of VFs. + /* The number of queues for ctrl VSI is equal to number of PRs * Each ring is associated to the corresponding VF_PR netdev. + * Tx and Rx rings are always equal */ - vsi->alloc_txq = ice_get_num_vfs(pf); - vsi->alloc_rxq = vsi->alloc_txq; + if (vsi->req_txq && vsi->req_rxq) { + vsi->alloc_txq = vsi->req_txq; + vsi->alloc_rxq = vsi->req_rxq; + } else { + vsi->alloc_txq = 1; + vsi->alloc_rxq = 1; + } + vsi->num_q_vectors = 1; break; case ICE_VSI_VF: @@ -519,16 +526,14 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d { struct ice_q_vector *q_vector = (struct ice_q_vector *)data; struct ice_pf *pf = q_vector->vsi->back; - struct ice_vf *vf; - unsigned int bkt; + struct ice_repr *repr; + unsigned long id; if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring) return IRQ_HANDLED; - rcu_read_lock(); - ice_for_each_vf_rcu(pf, bkt, vf) - napi_schedule(&vf->repr->q_vector->napi); - rcu_read_unlock(); + xa_for_each(&pf->eswitch.reprs, id, repr) + napi_schedule(&repr->q_vector->napi); return IRQ_HANDLED; } @@ -3071,27 +3076,26 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, } /** - * ice_vsi_realloc_stat_arrays - Frees unused stat structures + * ice_vsi_realloc_stat_arrays - Frees unused stat structures or alloc new ones * @vsi: VSI pointer - * @prev_txq: Number of Tx rings before ring reallocation - * @prev_rxq: Number of Rx rings before ring reallocation */ -static void -ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) +static int +ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi) { + u16 req_txq = vsi->req_txq ? vsi->req_txq : vsi->alloc_txq; + u16 req_rxq = vsi->req_rxq ? vsi->req_rxq : vsi->alloc_rxq; + struct ice_ring_stats **tx_ring_stats; + struct ice_ring_stats **rx_ring_stats; struct ice_vsi_stats *vsi_stat; struct ice_pf *pf = vsi->back; + u16 prev_txq = vsi->alloc_txq; + u16 prev_rxq = vsi->alloc_rxq; int i; - if (!prev_txq || !prev_rxq) - return; - if (vsi->type == ICE_VSI_CHNL) - return; - vsi_stat = pf->vsi_stats[vsi->idx]; - if (vsi->num_txq < prev_txq) { - for (i = vsi->num_txq; i < prev_txq; i++) { + if (req_txq < prev_txq) { + for (i = req_txq; i < prev_txq; i++) { if (vsi_stat->tx_ring_stats[i]) { kfree_rcu(vsi_stat->tx_ring_stats[i], rcu); WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL); @@ -3099,14 +3103,36 @@ ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) } } - if (vsi->num_rxq < prev_rxq) { - for (i = vsi->num_rxq; i < prev_rxq; i++) { + tx_ring_stats = vsi_stat->rx_ring_stats; + vsi_stat->tx_ring_stats = + krealloc_array(vsi_stat->tx_ring_stats, req_txq, + sizeof(*vsi_stat->tx_ring_stats), + GFP_KERNEL | __GFP_ZERO); + if (!vsi_stat->tx_ring_stats) { + vsi_stat->tx_ring_stats = tx_ring_stats; + return -ENOMEM; + } + + if (req_rxq < prev_rxq) { + for (i = req_rxq; i < prev_rxq; i++) { if (vsi_stat->rx_ring_stats[i]) { kfree_rcu(vsi_stat->rx_ring_stats[i], rcu); WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL); } } } + + rx_ring_stats = vsi_stat->rx_ring_stats; + vsi_stat->rx_ring_stats = + krealloc_array(vsi_stat->rx_ring_stats, req_rxq, + sizeof(*vsi_stat->rx_ring_stats), + GFP_KERNEL | __GFP_ZERO); + if (!vsi_stat->rx_ring_stats) { + vsi_stat->rx_ring_stats = rx_ring_stats; + return -ENOMEM; + } + + return 0; } /** @@ -3123,9 +3149,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) { struct ice_vsi_cfg_params params = {}; struct ice_coalesce_stored *coalesce; - int ret, prev_txq, prev_rxq; int prev_num_q_vectors = 0; struct ice_pf *pf; + int ret; if (!vsi) return -EINVAL; @@ -3144,8 +3170,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); - prev_txq = vsi->num_txq; - prev_rxq = vsi->num_rxq; + ret = ice_vsi_realloc_stat_arrays(vsi); + if (ret) + goto err_vsi_cfg; ice_vsi_decfg(vsi); ret = ice_vsi_cfg_def(vsi, ¶ms); @@ -3163,8 +3190,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) return ice_schedule_reset(pf, ICE_RESET_PFR); } - ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq); - ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); kfree(coalesce); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index fb9c93f37e84..43ba3e55b8c1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4702,6 +4702,8 @@ static void ice_deinit_features(struct ice_pf *pf) ice_ptp_release(pf); if (test_bit(ICE_FLAG_DPLL, pf->flags)) ice_dpll_deinit(pf); + if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + xa_destroy(&pf->eswitch.reprs); } static void ice_init_wakeup(struct ice_pf *pf) @@ -7401,9 +7403,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } - err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL); + err = ice_eswitch_rebuild(pf); if (err) { - dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err); + dev_err(dev, "Switchdev rebuild failed: %d\n", err); goto err_vsi_rebuild; } diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index c686ac0935eb..5f30fb131f74 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -14,7 +14,7 @@ */ static int ice_repr_get_sw_port_id(struct ice_repr *repr) { - return repr->vf->pf->hw.port_info->lport; + return repr->src_vsi->back->hw.port_info->lport; } /** @@ -35,7 +35,7 @@ ice_repr_get_phys_port_name(struct net_device *netdev, char *buf, size_t len) return -EOPNOTSUPP; res = snprintf(buf, len, "pf%dvfr%d", ice_repr_get_sw_port_id(repr), - repr->vf->vf_id); + repr->id); if (res <= 0) return -EOPNOTSUPP; return 0; @@ -278,25 +278,67 @@ ice_repr_reg_netdev(struct net_device *netdev) return register_netdev(netdev); } +static void ice_repr_remove_node(struct devlink_port *devlink_port) +{ + devl_lock(devlink_port->devlink); + devl_rate_leaf_destroy(devlink_port); + devl_unlock(devlink_port->devlink); +} + /** - * ice_repr_add - add representor for VF - * @vf: pointer to VF structure + * ice_repr_rem - remove representor from VF + * @repr: pointer to representor structure */ -static int ice_repr_add(struct ice_vf *vf) +static void ice_repr_rem(struct ice_repr *repr) +{ + kfree(repr->q_vector); + free_netdev(repr->netdev); + kfree(repr); +} + +/** + * ice_repr_rem_vf - remove representor from VF + * @repr: pointer to representor structure + */ +void ice_repr_rem_vf(struct ice_repr *repr) +{ + ice_repr_remove_node(&repr->vf->devlink_port); + unregister_netdev(repr->netdev); + ice_devlink_destroy_vf_port(repr->vf); + ice_virtchnl_set_dflt_ops(repr->vf); + ice_repr_rem(repr); +} + +static void ice_repr_set_tx_topology(struct ice_pf *pf) +{ + struct devlink *devlink; + + /* only export if ADQ and DCB disabled and eswitch enabled*/ + if (ice_is_adq_active(pf) || ice_is_dcb_active(pf) || + !ice_is_switchdev_running(pf)) + return; + + devlink = priv_to_devlink(pf); + ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf)); +} + +/** + * ice_repr_add - add representor for generic VSI + * @pf: pointer to PF structure + * @src_vsi: pointer to VSI structure of device to represent + * @parent_mac: device MAC address + */ +static struct ice_repr * +ice_repr_add(struct ice_pf *pf, struct ice_vsi *src_vsi, const u8 *parent_mac) { struct ice_q_vector *q_vector; struct ice_netdev_priv *np; struct ice_repr *repr; - struct ice_vsi *vsi; int err; - vsi = ice_get_vf_vsi(vf); - if (!vsi) - return -EINVAL; - repr = kzalloc(sizeof(*repr), GFP_KERNEL); if (!repr) - return -ENOMEM; + return ERR_PTR(-ENOMEM); repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv)); if (!repr->netdev) { @@ -304,9 +346,7 @@ static int ice_repr_add(struct ice_vf *vf) goto err_alloc; } - repr->src_vsi = vsi; - repr->vf = vf; - vf->repr = repr; + repr->src_vsi = src_vsi; np = netdev_priv(repr->netdev); np->repr = repr; @@ -316,10 +356,40 @@ static int ice_repr_add(struct ice_vf *vf) goto err_alloc_q_vector; } repr->q_vector = q_vector; + repr->q_id = repr->id; + + ether_addr_copy(repr->parent_mac, parent_mac); + + return repr; + +err_alloc_q_vector: + free_netdev(repr->netdev); +err_alloc: + kfree(repr); + return ERR_PTR(err); +} + +struct ice_repr *ice_repr_add_vf(struct ice_vf *vf) +{ + struct ice_repr *repr; + struct ice_vsi *vsi; + int err; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return ERR_PTR(-ENOENT); err = ice_devlink_create_vf_port(vf); if (err) - goto err_devlink; + return ERR_PTR(err); + + repr = ice_repr_add(vf->pf, vsi, vf->hw_lan_addr); + if (IS_ERR(repr)) { + err = PTR_ERR(repr); + goto err_repr_add; + } + + repr->vf = vf; repr->netdev->min_mtu = ETH_MIN_MTU; repr->netdev->max_mtu = ICE_MAX_MTU; @@ -331,100 +401,23 @@ static int ice_repr_add(struct ice_vf *vf) goto err_netdev; ice_virtchnl_set_repr_ops(vf); + ice_repr_set_tx_topology(vf->pf); - return 0; + return repr; err_netdev: + ice_repr_rem(repr); +err_repr_add: ice_devlink_destroy_vf_port(vf); -err_devlink: - kfree(repr->q_vector); - vf->repr->q_vector = NULL; -err_alloc_q_vector: - free_netdev(repr->netdev); - repr->netdev = NULL; -err_alloc: - kfree(repr); - vf->repr = NULL; - return err; -} - -/** - * ice_repr_rem - remove representor from VF - * @vf: pointer to VF structure - */ -static void ice_repr_rem(struct ice_vf *vf) -{ - if (!vf->repr) - return; - - kfree(vf->repr->q_vector); - vf->repr->q_vector = NULL; - unregister_netdev(vf->repr->netdev); - ice_devlink_destroy_vf_port(vf); - free_netdev(vf->repr->netdev); - vf->repr->netdev = NULL; - kfree(vf->repr); - vf->repr = NULL; - - ice_virtchnl_set_dflt_ops(vf); -} - -/** - * ice_repr_rem_from_all_vfs - remove port representor for all VFs - * @pf: pointer to PF structure - */ -void ice_repr_rem_from_all_vfs(struct ice_pf *pf) -{ - struct devlink *devlink; - struct ice_vf *vf; - unsigned int bkt; - - lockdep_assert_held(&pf->vfs.table_lock); - - ice_for_each_vf(pf, bkt, vf) - ice_repr_rem(vf); - - /* since all port representors are destroyed, there is - * no point in keeping the nodes - */ - devlink = priv_to_devlink(pf); - devl_lock(devlink); - devl_rate_nodes_destroy(devlink); - devl_unlock(devlink); + return ERR_PTR(err); } -/** - * ice_repr_add_for_all_vfs - add port representor for all VFs - * @pf: pointer to PF structure - */ -int ice_repr_add_for_all_vfs(struct ice_pf *pf) +struct ice_repr *ice_repr_get_by_vsi(struct ice_vsi *vsi) { - struct devlink *devlink; - struct ice_vf *vf; - unsigned int bkt; - int err; - - lockdep_assert_held(&pf->vfs.table_lock); - - ice_for_each_vf(pf, bkt, vf) { - err = ice_repr_add(vf); - if (err) - goto err; - } - - /* only export if ADQ and DCB disabled */ - if (ice_is_adq_active(pf) || ice_is_dcb_active(pf)) - return 0; - - devlink = priv_to_devlink(pf); - ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf)); - - return 0; - -err: - ice_repr_rem_from_all_vfs(pf); + if (!vsi->vf) + return NULL; - return err; + return xa_load(&vsi->back->eswitch.reprs, vsi->vf->repr_id); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h index e1ee2d2c1d2d..f9aede315716 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.h +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -13,14 +13,17 @@ struct ice_repr { struct net_device *netdev; struct metadata_dst *dst; struct ice_esw_br_port *br_port; + int q_id; + u32 id; + u8 parent_mac[ETH_ALEN]; #ifdef CONFIG_ICE_SWITCHDEV /* info about slow path rule */ struct ice_rule_query_data sp_rule; #endif }; -int ice_repr_add_for_all_vfs(struct ice_pf *pf); -void ice_repr_rem_from_all_vfs(struct ice_pf *pf); +struct ice_repr *ice_repr_add_vf(struct ice_vf *vf); +void ice_repr_rem_vf(struct ice_repr *repr); void ice_repr_start_tx_queues(struct ice_repr *repr); void ice_repr_stop_tx_queues(struct ice_repr *repr); @@ -29,4 +32,6 @@ void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi); struct ice_repr *ice_netdev_to_repr(struct net_device *netdev); bool ice_is_port_repr_netdev(const struct net_device *netdev); + +struct ice_repr *ice_repr_get_by_vsi(struct ice_vsi *vsi); #endif diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 2a5e6616cc0a..5a45bd5ce6ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -172,13 +172,14 @@ void ice_free_vfs(struct ice_pf *pf) else dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); - mutex_lock(&vfs->table_lock); + ice_eswitch_reserve_cp_queues(pf, -ice_get_num_vfs(pf)); - ice_eswitch_release(pf); + mutex_lock(&vfs->table_lock); ice_for_each_vf(pf, bkt, vf) { mutex_lock(&vf->cfg_lock); + ice_eswitch_detach(pf, vf); ice_dis_vf_qs(vf); if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { @@ -614,6 +615,14 @@ static int ice_start_vfs(struct ice_pf *pf) goto teardown; } + retval = ice_eswitch_attach(pf, vf); + if (retval) { + dev_err(ice_pf_to_dev(pf), "Failed to attach VF %d to eswitch, error %d", + vf->vf_id, retval); + ice_vf_vsi_release(vf); + goto teardown; + } + set_bit(ICE_VF_STATE_INIT, vf->vf_states); ice_ena_vf_mappings(vf); wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE); @@ -923,6 +932,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) goto err_unroll_sriov; } + ice_eswitch_reserve_cp_queues(pf, num_vfs); ret = ice_start_vfs(pf); if (ret) { dev_err(dev, "Failed to start %d VFs, err %d\n", num_vfs, ret); @@ -932,12 +942,6 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) clear_bit(ICE_VF_DIS, pf->state); - ret = ice_eswitch_configure(pf); - if (ret) { - dev_err(dev, "Failed to configure eswitch, err %d\n", ret); - goto err_unroll_sriov; - } - /* rearm global interrupts */ if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state)) ice_irq_dynamic_ena(hw, NULL, NULL); diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index dd03cb69ad26..08d3bbf4b44c 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -653,7 +653,7 @@ static int ice_tc_setup_redirect_action(struct net_device *filter_dev, ice_tc_is_dev_uplink(target_dev)) { repr = ice_netdev_to_repr(filter_dev); - fltr->dest_vsi = repr->src_vsi->back->switchdev.uplink_vsi; + fltr->dest_vsi = repr->src_vsi->back->eswitch.uplink_vsi; fltr->direction = ICE_ESWITCH_FLTR_EGRESS; } else if (ice_tc_is_dev_uplink(filter_dev) && ice_is_port_repr_netdev(target_dev)) { @@ -765,7 +765,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) rule_info.sw_act.src = hw->pf_id; rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE; } else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS && - fltr->dest_vsi == vsi->back->switchdev.uplink_vsi) { + fltr->dest_vsi == vsi->back->eswitch.uplink_vsi) { /* VF to Uplink */ rule_info.sw_act.flag |= ICE_FLTR_TX; rule_info.sw_act.src = vsi->idx; diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index aca1f2ea5034..d2a99a20c4ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -760,6 +760,7 @@ void ice_reset_all_vfs(struct ice_pf *pf) ice_for_each_vf(pf, bkt, vf) { mutex_lock(&vf->cfg_lock); + ice_eswitch_detach(pf, vf); vf->driver_caps = 0; ice_vc_set_default_allowlist(vf); @@ -775,13 +776,11 @@ void ice_reset_all_vfs(struct ice_pf *pf) ice_vf_rebuild_vsi(vf); ice_vf_post_vsi_rebuild(vf); + ice_eswitch_attach(pf, vf); + mutex_unlock(&vf->cfg_lock); } - if (ice_is_eswitch_mode_switchdev(pf)) - if (ice_eswitch_rebuild(pf)) - dev_warn(dev, "eswitch rebuild failed\n"); - ice_flush(hw); clear_bit(ICE_VF_DIS, pf->state); @@ -928,7 +927,7 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) goto out_unlock; } - ice_eswitch_update_repr(vsi); + ice_eswitch_update_repr(vf->repr_id, vsi); /* if the VF has been reset allow it to come up again */ ice_mbx_clear_malvf(&vf->mbx_info); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index 93c774f2f437..35866553f288 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -130,7 +130,7 @@ struct ice_vf { struct ice_mdd_vf_events mdd_tx_events; DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX); - struct ice_repr *repr; + unsigned long repr_id; const struct ice_virtchnl_ops *virtchnl_ops; const struct ice_vf_ops *vf_ops; diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index f48f82d5e274..ac7c861e83a0 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -81,6 +81,21 @@ struct igc_tx_timestamp_request { u32 flags; /* flags that should be added to the tx_buffer */ }; +struct igc_inline_rx_tstamps { + /* Timestamps are saved in little endian at the beginning of the packet + * buffer following the layout: + * + * DWORD: | 0 | 1 | 2 | 3 | + * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH | + * + * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds + * part of the timestamp. + * + */ + __le32 timer1[2]; + __le32 timer0[2]; +}; + struct igc_ring_container { struct igc_ring *ring; /* pointer to linked list of rings */ unsigned int total_bytes; /* total bytes processed this int */ @@ -261,6 +276,8 @@ struct igc_adapter { unsigned int ptp_flags; /* System time value lock */ spinlock_t tmreg_lock; + /* Free-running timer lock */ + spinlock_t free_timer_lock; struct cyclecounter cc; struct timecounter tc; struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */ @@ -469,6 +486,8 @@ enum igc_tx_flags { IGC_TX_FLAGS_TSTAMP_1 = 0x100, IGC_TX_FLAGS_TSTAMP_2 = 0x200, IGC_TX_FLAGS_TSTAMP_3 = 0x400, + + IGC_TX_FLAGS_TSTAMP_TIMER_1 = 0x800, }; enum igc_boards { @@ -531,7 +550,7 @@ struct igc_rx_buffer { struct igc_xdp_buff { struct xdp_buff xdp; union igc_adv_rx_desc *rx_desc; - ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */ + struct igc_inline_rx_tstamps *rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */ }; struct igc_q_vector { diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h index f7d6491d4c60..bf8cdfbba9ff 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.h +++ b/drivers/net/ethernet/intel/igc/igc_base.h @@ -37,6 +37,10 @@ struct igc_adv_tx_context_desc { #define IGC_ADVTXD_TSTAMP_REG_1 0x00010000 /* Select register 1 for timestamp */ #define IGC_ADVTXD_TSTAMP_REG_2 0x00020000 /* Select register 2 for timestamp */ #define IGC_ADVTXD_TSTAMP_REG_3 0x00030000 /* Select register 3 for timestamp */ +#define IGC_ADVTXD_TSTAMP_TIMER_1 0x00010000 /* Select timer 1 for timestamp */ +#define IGC_ADVTXD_TSTAMP_TIMER_2 0x00020000 /* Select timer 2 for timestamp */ +#define IGC_ADVTXD_TSTAMP_TIMER_3 0x00030000 /* Select timer 3 for timestamp */ + #define IGC_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ #define IGC_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ #define IGC_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index b3037016f31d..5f92b3c7c3d4 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -317,6 +317,8 @@ #define IGC_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ #define IGC_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ +#define IGC_TXD_PTP2_TIMER_1 0x00000020 + /* IPSec Encrypt Enable */ #define IGC_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ #define IGC_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e9bb403bbacf..61db1d3bfa0b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1299,14 +1299,16 @@ static void igc_tx_olinfo_status(struct igc_ring *tx_ring, u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; /* insert L4 checksum */ - olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) * - ((IGC_TXD_POPTS_TXSM << 8) / - IGC_TX_FLAGS_CSUM); + olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_CSUM, + (IGC_TXD_POPTS_TXSM << 8)); /* insert IPv4 checksum */ - olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) * - (((IGC_TXD_POPTS_IXSM << 8)) / - IGC_TX_FLAGS_IPV4); + olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_IPV4, + (IGC_TXD_POPTS_IXSM << 8)); + + /* Use the second timer (free running, in general) for the timestamp */ + olinfo_status |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_TIMER_1, + IGC_TXD_PTP2_TIMER_1); tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); } @@ -1651,6 +1653,8 @@ done: if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_USE_CYCLES) + tx_flags |= IGC_TX_FLAGS_TSTAMP_TIMER_1; } else { adapter->tx_hwtstamp_skipped++; } @@ -1963,9 +1967,9 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, struct igc_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - ktime_t timestamp) + struct igc_xdp_buff *ctx) { + struct xdp_buff *xdp = &ctx->xdp; unsigned int metasize = xdp->data - xdp->data_meta; unsigned int size = xdp->data_end - xdp->data; unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); @@ -1982,8 +1986,10 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, if (unlikely(!skb)) return NULL; - if (timestamp) - skb_hwtstamps(skb)->hwtstamp = timestamp; + if (ctx->rx_ts) { + skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; + skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; + } /* Determine available headroom for copy */ headlen = size; @@ -2583,11 +2589,10 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) int xdp_status = 0, rx_buffer_pgcnt; while (likely(total_packets < budget)) { - union igc_adv_rx_desc *rx_desc; + struct igc_xdp_buff ctx = { .rx_ts = NULL }; struct igc_rx_buffer *rx_buffer; + union igc_adv_rx_desc *rx_desc; unsigned int size, truesize; - struct igc_xdp_buff ctx; - ktime_t timestamp = 0; int pkt_offset = 0; void *pktbuf; @@ -2614,9 +2619,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { - timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, - pktbuf); - ctx.rx_ts = timestamp; + ctx.rx_ts = pktbuf; pkt_offset = IGC_TS_HDR_LEN; size -= IGC_TS_HDR_LEN; } @@ -2653,8 +2656,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) else if (ring_uses_build_skb(rx_ring)) skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); else - skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp, - timestamp); + skb = igc_construct_skb(rx_ring, rx_buffer, &ctx); /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -2803,9 +2805,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) ctx->rx_desc = desc; if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { - timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, - bi->xdp->data); - ctx->rx_ts = timestamp; + ctx->rx_ts = bi->xdp->data; bi->xdp->data += IGC_TS_HDR_LEN; @@ -6562,6 +6562,24 @@ int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) return 0; } +static ktime_t igc_get_tstamp(struct net_device *dev, + const struct skb_shared_hwtstamps *hwtstamps, + bool cycles) +{ + struct igc_adapter *adapter = netdev_priv(dev); + struct igc_inline_rx_tstamps *tstamp; + ktime_t timestamp; + + tstamp = hwtstamps->netdev_data; + + if (cycles) + timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer1); + else + timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); + + return timestamp; +} + static const struct net_device_ops igc_netdev_ops = { .ndo_open = igc_open, .ndo_stop = igc_close, @@ -6579,6 +6597,7 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_bpf = igc_bpf, .ndo_xdp_xmit = igc_xdp_xmit, .ndo_xsk_wakeup = igc_xsk_wakeup, + .ndo_get_tstamp = igc_get_tstamp, }; /* PCIe configuration access */ @@ -6682,9 +6701,11 @@ static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) { const struct igc_xdp_buff *ctx = (void *)_ctx; + struct igc_adapter *adapter = netdev_priv(ctx->xdp.rxq->dev); + struct igc_inline_rx_tstamps *tstamp = ctx->rx_ts; if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { - *timestamp = ctx->rx_ts; + *timestamp = igc_ptp_rx_pktstamp(adapter, tstamp->timer0); return 0; } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 928f38792203..885faaa7b9de 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -459,12 +459,10 @@ static int igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, /** * igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer * @adapter: Pointer to adapter the packet buffer belongs to - * @buf: Pointer to packet buffer + * @buf: Pointer to start of timestamp in HW format (2 32-bit words) * - * This function retrieves the timestamp saved in the beginning of packet - * buffer. While two timestamps are available, one in timer0 reference and the - * other in timer1 reference, this function considers only the timestamp in - * timer0 reference. + * This function retrieves and converts the timestamp stored at @buf + * to ktime_t, adjusting for hardware latencies. * * Returns timestamp value. */ @@ -474,17 +472,8 @@ ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf) u32 secs, nsecs; int adjust; - /* Timestamps are saved in little endian at the beginning of the packet - * buffer following the layout: - * - * DWORD: | 0 | 1 | 2 | 3 | - * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH | - * - * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds - * part of the timestamp. - */ - nsecs = le32_to_cpu(buf[2]); - secs = le32_to_cpu(buf[3]); + nsecs = le32_to_cpu(buf[0]); + secs = le32_to_cpu(buf[1]); timestamp = ktime_set(secs, nsecs); @@ -542,10 +531,11 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { val = rd32(IGC_SRRCTL(i)); - /* FIXME: For now, only support retrieving RX timestamps from - * timer 0. + /* Enable retrieving timestamps from timer 0, the + * "adjustable clock" and timer 1 the "free running + * clock". */ - val |= IGC_SRRCTL_TIMER1SEL(0) | IGC_SRRCTL_TIMER0SEL(0) | + val |= IGC_SRRCTL_TIMER1SEL(1) | IGC_SRRCTL_TIMER0SEL(0) | IGC_SRRCTL_TIMESTAMP; wr32(IGC_SRRCTL(i), val); } @@ -1035,6 +1025,26 @@ static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp, adapter, &adapter->snapshot, cts); } +static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct igc_adapter *igc = container_of(ptp, struct igc_adapter, ptp_caps); + struct igc_hw *hw = &igc->hw; + unsigned long flags; + + spin_lock_irqsave(&igc->free_timer_lock, flags); + + ptp_read_system_prets(sts); + ts->tv_nsec = rd32(IGC_SYSTIML_1); + ts->tv_sec = rd32(IGC_SYSTIMH_1); + ptp_read_system_postts(sts); + + spin_unlock_irqrestore(&igc->free_timer_lock, flags); + + return 0; +} + /** * igc_ptp_init - Initialize PTP functionality * @adapter: Board private structure @@ -1088,6 +1098,7 @@ void igc_ptp_init(struct igc_adapter *adapter) adapter->ptp_caps.adjfine = igc_ptp_adjfine_i225; adapter->ptp_caps.adjtime = igc_ptp_adjtime_i225; adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225; + adapter->ptp_caps.getcyclesx64 = igc_ptp_getcyclesx64; adapter->ptp_caps.settime64 = igc_ptp_settime_i225; adapter->ptp_caps.enable = igc_ptp_feature_enable_i225; adapter->ptp_caps.pps = 1; @@ -1108,6 +1119,7 @@ void igc_ptp_init(struct igc_adapter *adapter) } spin_lock_init(&adapter->ptp_tx_lock); + spin_lock_init(&adapter->free_timer_lock); spin_lock_init(&adapter->tmreg_lock); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 20e17f5fbce3..d38c87d7e5e8 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -243,6 +243,11 @@ #define IGC_SYSTIMR 0x0B6F8 /* System time register Residue */ #define IGC_TIMINCA 0x0B608 /* Increment attributes register - RW */ +#define IGC_SYSTIML_1 0x0B688 /* System time register Low - RO (timer 1) */ +#define IGC_SYSTIMH_1 0x0B68C /* System time register High - RO (timer 1) */ +#define IGC_SYSTIMR_1 0x0B684 /* System time register Residue (timer 1) */ +#define IGC_TIMINCA_1 0x0B690 /* Increment attributes register - RW (timer 1) */ + /* TX Timestamp Low */ #define IGC_TXSTMPL_0 0x0B618 #define IGC_TXSTMPL_1 0x0B698 diff --git a/drivers/net/ethernet/marvell/octeon_ep/Makefile b/drivers/net/ethernet/marvell/octeon_ep/Makefile index 2026c8118158..02a4a21bc298 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/Makefile +++ b/drivers/net/ethernet/marvell/octeon_ep/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_OCTEON_EP) += octeon_ep.o octeon_ep-y := octep_main.o octep_cn9k_pf.o octep_tx.o octep_rx.o \ - octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o + octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o \ + octep_cnxk_pf.o diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c new file mode 100644 index 000000000000..abb03e9119e7 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c @@ -0,0 +1,886 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> + +#include "octep_config.h" +#include "octep_main.h" +#include "octep_regs_cnxk_pf.h" + +/* We will support 128 pf's in control mbox */ +#define CTRL_MBOX_MAX_PF 128 +#define CTRL_MBOX_SZ ((size_t)(0x400000 / CTRL_MBOX_MAX_PF)) + +/* Names of Hardware non-queue generic interrupts */ +static char *cnxk_non_ioq_msix_names[] = { + "epf_ire_rint", + "epf_ore_rint", + "epf_vfire_rint", + "epf_rsvd0", + "epf_vfore_rint", + "epf_rsvd1", + "epf_mbox_rint", + "epf_rsvd2_0", + "epf_rsvd2_1", + "epf_dma_rint", + "epf_dma_vf_rint", + "epf_rsvd3", + "epf_pp_vf_rint", + "epf_rsvd3", + "epf_misc_rint", + "epf_rsvd5", + /* Next 16 are for OEI_RINT */ + "epf_oei_rint0", + "epf_oei_rint1", + "epf_oei_rint2", + "epf_oei_rint3", + "epf_oei_rint4", + "epf_oei_rint5", + "epf_oei_rint6", + "epf_oei_rint7", + "epf_oei_rint8", + "epf_oei_rint9", + "epf_oei_rint10", + "epf_oei_rint11", + "epf_oei_rint12", + "epf_oei_rint13", + "epf_oei_rint14", + "epf_oei_rint15", + /* IOQ interrupt */ + "octeon_ep" +}; + +/* Dump useful hardware CSRs for debug purpose */ +static void cnxk_dump_regs(struct octep_device *oct, int qno) +{ + struct device *dev = &oct->pdev->dev; + + dev_info(dev, "IQ-%d register dump\n", qno); + dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_INSTR_DBELL(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(qno))); + dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_CONTROL(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(qno))); + dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_ENABLE(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(qno))); + dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_INSTR_BADDR(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(qno))); + dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_INSTR_RSIZE(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(qno))); + dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_CNTS(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_CNTS(qno))); + dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_INT_LEVELS(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(qno))); + dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_PKT_CNT(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_PKT_CNT(qno))); + dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_BYTE_CNT(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_BYTE_CNT(qno))); + + dev_info(dev, "OQ-%d register dump\n", qno); + dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_SLIST_DBELL(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(qno))); + dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_CONTROL(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(qno))); + dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_ENABLE(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(qno))); + dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_SLIST_BADDR(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(qno))); + dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_SLIST_RSIZE(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(qno))); + dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_CNTS(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_CNTS(qno))); + dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_INT_LEVELS(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(qno))); + dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_PKT_CNT(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_PKT_CNT(qno))); + dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_BYTE_CNT(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_BYTE_CNT(qno))); + dev_info(dev, "R[%d]_ERR_TYPE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_ERR_TYPE(qno), + octep_read_csr64(oct, CNXK_SDP_R_ERR_TYPE(qno))); +} + +/* Reset Hardware Tx queue */ +static int cnxk_reset_iq(struct octep_device *oct, int q_no) +{ + struct octep_config *conf = oct->conf; + u64 val = 0ULL; + + dev_dbg(&oct->pdev->dev, "Reset PF IQ-%d\n", q_no); + + /* Get absolute queue number */ + q_no += conf->pf_ring_cfg.srn; + + /* Disable the Tx/Instruction Ring */ + octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(q_no), val); + + /* clear the Instruction Ring packet/byte counts and doorbell CSRs */ + octep_write_csr64(oct, CNXK_SDP_R_IN_CNTS(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_PKT_CNT(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_BYTE_CNT(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(q_no), val); + + val = 0xFFFFFFFF; + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(q_no), val); + + return 0; +} + +/* Reset Hardware Rx queue */ +static void cnxk_reset_oq(struct octep_device *oct, int q_no) +{ + u64 val = 0ULL; + + q_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + /* Disable Output (Rx) Ring */ + octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(q_no), val); + + /* Clear count CSRs */ + val = octep_read_csr(oct, CNXK_SDP_R_OUT_CNTS(q_no)); + octep_write_csr(oct, CNXK_SDP_R_OUT_CNTS(q_no), val); + + octep_write_csr64(oct, CNXK_SDP_R_OUT_PKT_CNT(q_no), 0xFFFFFFFFFULL); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(q_no), 0xFFFFFFFF); +} + +/* Reset all hardware Tx/Rx queues */ +static void octep_reset_io_queues_cnxk_pf(struct octep_device *oct) +{ + struct pci_dev *pdev = oct->pdev; + int q; + + dev_dbg(&pdev->dev, "Reset OCTEP_CNXK PF IO Queues\n"); + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { + cnxk_reset_iq(oct, q); + cnxk_reset_oq(oct, q); + } +} + +/* Initialize windowed addresses to access some hardware registers */ +static void octep_setup_pci_window_regs_cnxk_pf(struct octep_device *oct) +{ + u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr; + + oct->pci_win_regs.pci_win_wr_addr = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_WR_ADDR64); + oct->pci_win_regs.pci_win_rd_addr = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_RD_ADDR64); + oct->pci_win_regs.pci_win_wr_data = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_WR_DATA64); + oct->pci_win_regs.pci_win_rd_data = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_RD_DATA64); +} + +/* Configure Hardware mapping: inform hardware which rings belong to PF. */ +static void octep_configure_ring_mapping_cnxk_pf(struct octep_device *oct) +{ + struct octep_config *conf = oct->conf; + struct pci_dev *pdev = oct->pdev; + u64 pf_srn = CFG_GET_PORTS_PF_SRN(oct->conf); + int q; + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(conf); q++) { + u64 regval = 0; + + if (oct->pcie_port) + regval = 8 << CNXK_SDP_FUNC_SEL_EPF_BIT_POS; + + octep_write_csr64(oct, CNXK_SDP_EPVF_RING(pf_srn + q), regval); + + regval = octep_read_csr64(oct, CNXK_SDP_EPVF_RING(pf_srn + q)); + dev_dbg(&pdev->dev, "Write SDP_EPVF_RING[0x%llx] = 0x%llx\n", + CNXK_SDP_EPVF_RING(pf_srn + q), regval); + } +} + +/* Initialize configuration limits and initial active config */ +static void octep_init_config_cnxk_pf(struct octep_device *oct) +{ + struct octep_config *conf = oct->conf; + struct pci_dev *pdev = oct->pdev; + u8 link = 0; + u64 val; + int pos; + + /* Read ring configuration: + * PF ring count, number of VFs and rings per VF supported + */ + val = octep_read_csr64(oct, CNXK_SDP_EPF_RINFO); + dev_info(&pdev->dev, "SDP_EPF_RINFO[0x%x]:0x%llx\n", CNXK_SDP_EPF_RINFO, val); + conf->sriov_cfg.max_rings_per_vf = CNXK_SDP_EPF_RINFO_RPVF(val); + conf->sriov_cfg.active_rings_per_vf = conf->sriov_cfg.max_rings_per_vf; + conf->sriov_cfg.max_vfs = CNXK_SDP_EPF_RINFO_NVFS(val); + conf->sriov_cfg.active_vfs = conf->sriov_cfg.max_vfs; + conf->sriov_cfg.vf_srn = CNXK_SDP_EPF_RINFO_SRN(val); + + val = octep_read_csr64(oct, CNXK_SDP_MAC_PF_RING_CTL(oct->pcie_port)); + dev_info(&pdev->dev, "SDP_MAC_PF_RING_CTL[%d]:0x%llx\n", oct->pcie_port, val); + conf->pf_ring_cfg.srn = CNXK_SDP_MAC_PF_RING_CTL_SRN(val); + conf->pf_ring_cfg.max_io_rings = CNXK_SDP_MAC_PF_RING_CTL_RPPF(val); + conf->pf_ring_cfg.active_io_rings = conf->pf_ring_cfg.max_io_rings; + dev_info(&pdev->dev, "pf_srn=%u rpvf=%u nvfs=%u rppf=%u\n", + conf->pf_ring_cfg.srn, conf->sriov_cfg.active_rings_per_vf, + conf->sriov_cfg.active_vfs, conf->pf_ring_cfg.active_io_rings); + + conf->iq.num_descs = OCTEP_IQ_MAX_DESCRIPTORS; + conf->iq.instr_type = OCTEP_64BYTE_INSTR; + conf->iq.db_min = OCTEP_DB_MIN; + conf->iq.intr_threshold = OCTEP_IQ_INTR_THRESHOLD; + + conf->oq.num_descs = OCTEP_OQ_MAX_DESCRIPTORS; + conf->oq.buf_size = OCTEP_OQ_BUF_SIZE; + conf->oq.refill_threshold = OCTEP_OQ_REFILL_THRESHOLD; + conf->oq.oq_intr_pkt = OCTEP_OQ_INTR_PKT_THRESHOLD; + conf->oq.oq_intr_time = OCTEP_OQ_INTR_TIME_THRESHOLD; + + conf->msix_cfg.non_ioq_msix = CNXK_NUM_NON_IOQ_INTR; + conf->msix_cfg.ioq_msix = conf->pf_ring_cfg.active_io_rings; + conf->msix_cfg.non_ioq_msix_names = cnxk_non_ioq_msix_names; + + pos = pci_find_ext_capability(oct->pdev, PCI_EXT_CAP_ID_SRIOV); + if (pos) { + pci_read_config_byte(oct->pdev, + pos + PCI_SRIOV_FUNC_LINK, + &link); + link = PCI_DEVFN(PCI_SLOT(oct->pdev->devfn), link); + } + conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr + + CNXK_PEM_BAR4_INDEX_OFFSET + + (link * CTRL_MBOX_SZ); + + conf->fw_info.hb_interval = OCTEP_DEFAULT_FW_HB_INTERVAL; + conf->fw_info.hb_miss_count = OCTEP_DEFAULT_FW_HB_MISS_COUNT; +} + +/* Setup registers for a hardware Tx Queue */ +static void octep_setup_iq_regs_cnxk_pf(struct octep_device *oct, int iq_no) +{ + struct octep_iq *iq = oct->iq[iq_no]; + u32 reset_instr_cnt; + u64 reg_val; + + iq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no)); + + /* wait for IDLE to set to 1 */ + if (!(reg_val & CNXK_R_IN_CTL_IDLE)) { + do { + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no)); + } while (!(reg_val & CNXK_R_IN_CTL_IDLE)); + } + + reg_val |= CNXK_R_IN_CTL_RDSIZE; + reg_val |= CNXK_R_IN_CTL_IS_64B; + reg_val |= CNXK_R_IN_CTL_ESR; + octep_write_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no), reg_val); + + /* Write the start of the input queue's ring and its size */ + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(iq_no), + iq->desc_ring_dma); + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(iq_no), + iq->max_count); + + /* Remember the doorbell & instruction count register addr + * for this queue + */ + iq->doorbell_reg = oct->mmio[0].hw_addr + + CNXK_SDP_R_IN_INSTR_DBELL(iq_no); + iq->inst_cnt_reg = oct->mmio[0].hw_addr + + CNXK_SDP_R_IN_CNTS(iq_no); + iq->intr_lvl_reg = oct->mmio[0].hw_addr + + CNXK_SDP_R_IN_INT_LEVELS(iq_no); + + /* Store the current instruction counter (used in flush_iq calculation) */ + reset_instr_cnt = readl(iq->inst_cnt_reg); + writel(reset_instr_cnt, iq->inst_cnt_reg); + + /* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */ + reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & 0xffffffff; + octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no), reg_val); +} + +/* Setup registers for a hardware Rx Queue */ +static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no) +{ + u64 reg_val; + u64 oq_ctl = 0ULL; + u32 time_threshold = 0; + struct octep_oq *oq = oct->oq[oq_no]; + + oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no)); + + /* wait for IDLE to set to 1 */ + if (!(reg_val & CNXK_R_OUT_CTL_IDLE)) { + do { + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no)); + } while (!(reg_val & CNXK_R_OUT_CTL_IDLE)); + } + + reg_val &= ~(CNXK_R_OUT_CTL_IMODE); + reg_val &= ~(CNXK_R_OUT_CTL_ROR_P); + reg_val &= ~(CNXK_R_OUT_CTL_NSR_P); + reg_val &= ~(CNXK_R_OUT_CTL_ROR_I); + reg_val &= ~(CNXK_R_OUT_CTL_NSR_I); + reg_val &= ~(CNXK_R_OUT_CTL_ES_I); + reg_val &= ~(CNXK_R_OUT_CTL_ROR_D); + reg_val &= ~(CNXK_R_OUT_CTL_NSR_D); + reg_val &= ~(CNXK_R_OUT_CTL_ES_D); + reg_val |= (CNXK_R_OUT_CTL_ES_P); + + octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), reg_val); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no), + oq->desc_ring_dma); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no), + oq->max_count); + + oq_ctl = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no)); + + /* Clear the ISIZE and BSIZE (22-0) */ + oq_ctl &= ~0x7fffffULL; + + /* Populate the BSIZE (15-0) */ + oq_ctl |= (oq->buffer_size & 0xffff); + octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), oq_ctl); + + /* Get the mapped address of the pkt_sent and pkts_credit regs */ + oq->pkts_sent_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_OUT_CNTS(oq_no); + oq->pkts_credit_reg = oct->mmio[0].hw_addr + + CNXK_SDP_R_OUT_SLIST_DBELL(oq_no); + + time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf); + reg_val = ((u64)time_threshold << 32) | + CFG_GET_OQ_INTR_PKT(oct->conf); + octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); +} + +/* Setup registers for a PF mailbox */ +static void octep_setup_mbox_regs_cnxk_pf(struct octep_device *oct, int q_no) +{ + struct octep_mbox *mbox = oct->mbox[q_no]; + + mbox->q_no = q_no; + + /* PF mbox interrupt reg */ + mbox->mbox_int_reg = oct->mmio[0].hw_addr + CNXK_SDP_EPF_MBOX_RINT(0); + + /* PF to VF DATA reg. PF writes into this reg */ + mbox->mbox_write_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_PF_VF_DATA(q_no); + + /* VF to PF DATA reg. PF reads from this reg */ + mbox->mbox_read_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_VF_PF_DATA(q_no); +} + +/* Poll OEI events like heartbeat */ +static void octep_poll_oei_cnxk_pf(struct octep_device *oct) +{ + u64 reg0; + + /* Check for OEI INTR */ + reg0 = octep_read_csr64(oct, CNXK_SDP_EPF_OEI_RINT); + if (reg0) { + octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT, reg0); + if (reg0 & CNXK_SDP_EPF_OEI_RINT_DATA_BIT_MBOX) + queue_work(octep_wq, &oct->ctrl_mbox_task); + if (reg0 & CNXK_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT) + atomic_set(&oct->hb_miss_cnt, 0); + } +} + +/* OEI interrupt handler */ +static irqreturn_t octep_oei_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + + octep_poll_oei_cnxk_pf(oct); + return IRQ_HANDLED; +} + +/* Process non-ioq interrupts required to keep pf interface running. + * OEI_RINT is needed for control mailbox + * MBOX_RINT is needed for pfvf mailbox + */ +static void octep_poll_non_ioq_interrupts_cnxk_pf(struct octep_device *oct) +{ + octep_poll_oei_cnxk_pf(oct); +} + +/* Interrupt handler for input ring error interrupts. */ +static irqreturn_t octep_ire_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + int i = 0; + + /* Check for IRERR INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_IRERR_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "received IRERR_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT, reg_val); + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + reg_val = octep_read_csr64(oct, + CNXK_SDP_R_ERR_TYPE(i)); + if (reg_val) { + dev_info(&pdev->dev, + "Received err type on IQ-%d: 0x%llx\n", + i, reg_val); + octep_write_csr64(oct, CNXK_SDP_R_ERR_TYPE(i), + reg_val); + } + } + } + return IRQ_HANDLED; +} + +/* Interrupt handler for output ring error interrupts. */ +static irqreturn_t octep_ore_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + int i = 0; + + /* Check for ORERR INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_ORERR_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "Received ORERR_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT, reg_val); + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + reg_val = octep_read_csr64(oct, CNXK_SDP_R_ERR_TYPE(i)); + if (reg_val) { + dev_info(&pdev->dev, + "Received err type on OQ-%d: 0x%llx\n", + i, reg_val); + octep_write_csr64(oct, CNXK_SDP_R_ERR_TYPE(i), + reg_val); + } + } + } + return IRQ_HANDLED; +} + +/* Interrupt handler for vf input ring error interrupts. */ +static irqreturn_t octep_vfire_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for VFIRE INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received VFIRE_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT(0), reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupt handler for vf output ring error interrupts. */ +static irqreturn_t octep_vfore_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for VFORE INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_VFORE_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received VFORE_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT(0), reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupt handler for dpi dma related interrupts. */ +static irqreturn_t octep_dma_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + u64 reg_val = 0; + + /* Check for DMA INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_DMA_RINT); + if (reg_val) + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT, reg_val); + + return IRQ_HANDLED; +} + +/* Interrupt handler for dpi dma transaction error interrupts for VFs */ +static irqreturn_t octep_dma_vf_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for DMA VF INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received DMA_VF_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT(0), reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupt handler for pp transaction error interrupts for VFs */ +static irqreturn_t octep_pp_vf_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for PPVF INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received PP_VF_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT(0), reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupt handler for mac related interrupts. */ +static irqreturn_t octep_misc_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for MISC INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_MISC_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "Received MISC_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT, reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupts handler for all reserved interrupts. */ +static irqreturn_t octep_rsvd_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + + dev_info(&pdev->dev, "Reserved interrupts raised; Ignore\n"); + return IRQ_HANDLED; +} + +/* Tx/Rx queue interrupt handler */ +static irqreturn_t octep_ioq_intr_handler_cnxk_pf(void *data) +{ + struct octep_ioq_vector *vector = (struct octep_ioq_vector *)data; + struct octep_oq *oq = vector->oq; + + napi_schedule_irqoff(oq->napi); + return IRQ_HANDLED; +} + +/* soft reset */ +static int octep_soft_reset_cnxk_pf(struct octep_device *oct) +{ + dev_info(&oct->pdev->dev, "CNXKXX: Doing soft reset\n"); + + octep_write_csr64(oct, CNXK_SDP_WIN_WR_MASK_REG, 0xFF); + + /* Firmware status CSR is supposed to be cleared by + * core domain reset, but due to a hw bug, it is not. + * Set it to RUNNING right before reset so that it is not + * left in READY (1) state after a reset. This is required + * in addition to the early setting to handle the case where + * the OcteonTX is unexpectedly reset, reboots, and then + * the module is removed. + */ + OCTEP_PCI_WIN_WRITE(oct, CNXK_PEMX_PFX_CSX_PFCFGX(0, 0, CNXK_PCIEEP_VSECST_CTL), + FW_STATUS_RUNNING); + + /* Set chip domain reset bit */ + OCTEP_PCI_WIN_WRITE(oct, CNXK_RST_CHIP_DOMAIN_W1S, 1); + /* Wait till Octeon resets. */ + mdelay(10); + /* restore the reset value */ + octep_write_csr64(oct, CNXK_SDP_WIN_WR_MASK_REG, 0xFF); + + return 0; +} + +/* Re-initialize Octeon hardware registers */ +static void octep_reinit_regs_cnxk_pf(struct octep_device *oct) +{ + u32 i; + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) + oct->hw_ops.setup_iq_regs(oct, i); + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) + oct->hw_ops.setup_oq_regs(oct, i); + + oct->hw_ops.enable_interrupts(oct); + oct->hw_ops.enable_io_queues(oct); + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) + writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg); +} + +/* Enable all interrupts */ +static void octep_enable_interrupts_cnxk_pf(struct octep_device *oct) +{ + u64 intr_mask = 0ULL; + int srn, num_rings, i; + + srn = CFG_GET_PORTS_PF_SRN(oct->conf); + num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + for (i = 0; i < num_rings; i++) + intr_mask |= (0x1ULL << (srn + i)); + + octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT_ENA_W1S, -1ULL); + + octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S(0), -1ULL); + octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT_ENA_W1S(0), -1ULL); + + octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask); + + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S(0), -1ULL); + octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S(0), -1ULL); +} + +/* Disable all interrupts */ +static void octep_disable_interrupts_cnxk_pf(struct octep_device *oct) +{ + u64 intr_mask = 0ULL; + int srn, num_rings, i; + + srn = CFG_GET_PORTS_PF_SRN(oct->conf); + num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + for (i = 0; i < num_rings; i++) + intr_mask |= (0x1ULL << (srn + i)); + + octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT_ENA_W1C, -1ULL); + + octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C(0), -1ULL); + octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT_ENA_W1C(0), -1ULL); + + octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask); + + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C(0), -1ULL); + octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C(0), -1ULL); +} + +/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */ +static u32 octep_update_iq_read_index_cnxk_pf(struct octep_iq *iq) +{ + u32 pkt_in_done = readl(iq->inst_cnt_reg); + u32 last_done, new_idx; + + last_done = pkt_in_done - iq->pkt_in_done; + iq->pkt_in_done = pkt_in_done; + + new_idx = (iq->octep_read_index + last_done) % iq->max_count; + + return new_idx; +} + +/* Enable a hardware Tx Queue */ +static void octep_enable_iq_cnxk_pf(struct octep_device *oct, int iq_no) +{ + u64 loop = HZ; + u64 reg_val; + + iq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(iq_no), 0xFFFFFFFF); + + while (octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(iq_no)) && + loop--) { + schedule_timeout_interruptible(1); + } + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no)); + reg_val |= (0x1ULL << 62); + octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no), reg_val); + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no)); + reg_val |= 0x1ULL; + octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no), reg_val); +} + +/* Enable a hardware Rx Queue */ +static void octep_enable_oq_cnxk_pf(struct octep_device *oct, int oq_no) +{ + u64 reg_val = 0ULL; + + oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no)); + reg_val |= (0x1ULL << 62); + octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); + + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(oq_no), 0xFFFFFFFF); + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no)); + reg_val |= 0x1ULL; + octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no), reg_val); +} + +/* Enable all hardware Tx/Rx Queues assined to PF */ +static void octep_enable_io_queues_cnxk_pf(struct octep_device *oct) +{ + u8 q; + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { + octep_enable_iq_cnxk_pf(oct, q); + octep_enable_oq_cnxk_pf(oct, q); + } +} + +/* Disable a hardware Tx Queue assined to PF */ +static void octep_disable_iq_cnxk_pf(struct octep_device *oct, int iq_no) +{ + u64 reg_val = 0ULL; + + iq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no)); + reg_val &= ~0x1ULL; + octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no), reg_val); +} + +/* Disable a hardware Rx Queue assined to PF */ +static void octep_disable_oq_cnxk_pf(struct octep_device *oct, int oq_no) +{ + u64 reg_val = 0ULL; + + oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no)); + reg_val &= ~0x1ULL; + octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no), reg_val); +} + +/* Disable all hardware Tx/Rx Queues assined to PF */ +static void octep_disable_io_queues_cnxk_pf(struct octep_device *oct) +{ + int q = 0; + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { + octep_disable_iq_cnxk_pf(oct, q); + octep_disable_oq_cnxk_pf(oct, q); + } +} + +/* Dump hardware registers (including Tx/Rx queues) for debugging. */ +static void octep_dump_registers_cnxk_pf(struct octep_device *oct) +{ + u8 srn, num_rings, q; + + srn = CFG_GET_PORTS_PF_SRN(oct->conf); + num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + for (q = srn; q < srn + num_rings; q++) + cnxk_dump_regs(oct, q); +} + +/** + * octep_device_setup_cnxk_pf() - Setup Octeon device. + * + * @oct: Octeon device private data structure. + * + * - initialize hardware operations. + * - get target side pcie port number for the device. + * - setup window access to hardware registers. + * - set initial configuration and max limits. + * - setup hardware mapping of rings to the PF device. + */ +void octep_device_setup_cnxk_pf(struct octep_device *oct) +{ + oct->hw_ops.setup_iq_regs = octep_setup_iq_regs_cnxk_pf; + oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cnxk_pf; + oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cnxk_pf; + + oct->hw_ops.oei_intr_handler = octep_oei_intr_handler_cnxk_pf; + oct->hw_ops.ire_intr_handler = octep_ire_intr_handler_cnxk_pf; + oct->hw_ops.ore_intr_handler = octep_ore_intr_handler_cnxk_pf; + oct->hw_ops.vfire_intr_handler = octep_vfire_intr_handler_cnxk_pf; + oct->hw_ops.vfore_intr_handler = octep_vfore_intr_handler_cnxk_pf; + oct->hw_ops.dma_intr_handler = octep_dma_intr_handler_cnxk_pf; + oct->hw_ops.dma_vf_intr_handler = octep_dma_vf_intr_handler_cnxk_pf; + oct->hw_ops.pp_vf_intr_handler = octep_pp_vf_intr_handler_cnxk_pf; + oct->hw_ops.misc_intr_handler = octep_misc_intr_handler_cnxk_pf; + oct->hw_ops.rsvd_intr_handler = octep_rsvd_intr_handler_cnxk_pf; + oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cnxk_pf; + oct->hw_ops.soft_reset = octep_soft_reset_cnxk_pf; + oct->hw_ops.reinit_regs = octep_reinit_regs_cnxk_pf; + + oct->hw_ops.enable_interrupts = octep_enable_interrupts_cnxk_pf; + oct->hw_ops.disable_interrupts = octep_disable_interrupts_cnxk_pf; + oct->hw_ops.poll_non_ioq_interrupts = octep_poll_non_ioq_interrupts_cnxk_pf; + + oct->hw_ops.update_iq_read_idx = octep_update_iq_read_index_cnxk_pf; + + oct->hw_ops.enable_iq = octep_enable_iq_cnxk_pf; + oct->hw_ops.enable_oq = octep_enable_oq_cnxk_pf; + oct->hw_ops.enable_io_queues = octep_enable_io_queues_cnxk_pf; + + oct->hw_ops.disable_iq = octep_disable_iq_cnxk_pf; + oct->hw_ops.disable_oq = octep_disable_oq_cnxk_pf; + oct->hw_ops.disable_io_queues = octep_disable_io_queues_cnxk_pf; + oct->hw_ops.reset_io_queues = octep_reset_io_queues_cnxk_pf; + + oct->hw_ops.dump_registers = octep_dump_registers_cnxk_pf; + + octep_setup_pci_window_regs_cnxk_pf(oct); + + oct->pcie_port = octep_read_csr64(oct, CNXK_SDP_MAC_NUMBER) & 0xff; + dev_info(&oct->pdev->dev, + "Octeon device using PCIE Port %d\n", oct->pcie_port); + + octep_init_config_cnxk_pf(oct); + octep_configure_ring_mapping_cnxk_pf(oct); + + /* Firmware status CSR is supposed to be cleared by + * core domain reset, but due to IPBUPEM-38842, it is not. + * Set it to RUNNING early in boot, so that unexpected resets + * leave it in a state that is not READY (1). + */ + OCTEP_PCI_WIN_WRITE(oct, CNXK_PEMX_PFX_CSX_PFCFGX(0, 0, CNXK_PCIEEP_VSECST_CTL), + FW_STATUS_RUNNING); +} diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h index 1622a6ebf036..91cfa19c65b9 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h @@ -13,9 +13,10 @@ #define OCTEP_64BYTE_INSTR 64 /* Tx Queue: maximum descriptors per ring */ +/* This needs to be a power of 2 */ #define OCTEP_IQ_MAX_DESCRIPTORS 1024 /* Minimum input (Tx) requests to be enqueued to ring doorbell */ -#define OCTEP_DB_MIN 1 +#define OCTEP_DB_MIN 8 /* Packet threshold for Tx queue interrupt */ #define OCTEP_IQ_INTR_THRESHOLD 0x0 diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 552970c7dec0..2da00a701df2 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -24,6 +24,10 @@ struct workqueue_struct *octep_wq; static const struct pci_device_id octep_pci_id_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_PF)}, {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF95N_PF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KA_PF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KA_PF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KB_PF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KB_PF)}, {0, }, }; MODULE_DEVICE_TABLE(pci, octep_pci_id_tbl); @@ -777,17 +781,24 @@ static int octep_stop(struct net_device *netdev) */ static inline int octep_iq_full_check(struct octep_iq *iq) { - if (likely((iq->max_count - atomic_read(&iq->instr_pending)) >= + if (likely((IQ_INSTR_SPACE(iq)) > OCTEP_WAKE_QUEUE_THRESHOLD)) return 0; /* Stop the queue if unable to send */ netif_stop_subqueue(iq->netdev, iq->q_no); + /* Allow for pending updates in write index + * from iq_process_completion in other cpus + * to reflect, in case queue gets free + * entries. + */ + smp_mb(); + /* check again and restart the queue, in case NAPI has just freed * enough Tx ring entries. */ - if (unlikely((iq->max_count - atomic_read(&iq->instr_pending)) >= + if (unlikely(IQ_INSTR_SPACE(iq) > OCTEP_WAKE_QUEUE_THRESHOLD)) { netif_start_subqueue(iq->netdev, iq->q_no); iq->stats.restart_cnt++; @@ -818,8 +829,12 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, struct octep_iq *iq; skb_frag_t *frag; u16 nr_frags, si; + int xmit_more; u16 q_no, wi; + if (skb_put_padto(skb, ETH_ZLEN)) + return NETDEV_TX_OK; + q_no = skb_get_queue_mapping(skb); if (q_no >= oct->num_iqs) { netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no); @@ -827,10 +842,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, } iq = oct->iq[q_no]; - if (octep_iq_full_check(iq)) { - iq->stats.tx_busy++; - return NETDEV_TX_BUSY; - } shinfo = skb_shinfo(skb); nr_frags = shinfo->nr_frags; @@ -869,9 +880,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, if (dma_mapping_error(iq->dev, dma)) goto dma_map_err; - dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma, - OCTEP_SGLIST_SIZE_PER_PKT, - DMA_TO_DEVICE); memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT); sglist[0].len[3] = len; sglist[0].dma_ptr[0] = dma; @@ -891,26 +899,33 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, frag++; si++; } - dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma, - OCTEP_SGLIST_SIZE_PER_PKT, - DMA_TO_DEVICE); - hw_desc->dptr = tx_buffer->sglist_dma; } - netdev_tx_sent_queue(iq->netdev_q, skb->len); + xmit_more = netdev_xmit_more(); + + __netdev_tx_sent_queue(iq->netdev_q, skb->len, xmit_more); + skb_tx_timestamp(skb); - atomic_inc(&iq->instr_pending); + iq->fill_cnt++; wi++; - if (wi == iq->max_count) - wi = 0; - iq->host_write_index = wi; + iq->host_write_index = wi & iq->ring_size_mask; + + /* octep_iq_full_check stops the queue and returns + * true if so, in case the queue has become full + * by inserting current packet. If so, we can + * go ahead and ring doorbell. + */ + if (!octep_iq_full_check(iq) && xmit_more && + iq->fill_cnt < iq->fill_threshold) + return NETDEV_TX_OK; + /* Flush the hw descriptor before writing to doorbell */ wmb(); - - /* Ring Doorbell to notify the NIC there is a new packet */ - writel(1, iq->doorbell_reg); - iq->stats.instr_posted++; + /* Ring Doorbell to notify the NIC of new packets */ + writel(iq->fill_cnt, iq->doorbell_reg); + iq->stats.instr_posted += iq->fill_cnt; + iq->fill_cnt = 0; return NETDEV_TX_OK; dma_map_sg_err: @@ -1136,6 +1151,14 @@ static const char *octep_devid_to_str(struct octep_device *oct) return "CN93XX"; case OCTEP_PCI_DEVICE_ID_CNF95N_PF: return "CNF95N"; + case OCTEP_PCI_DEVICE_ID_CN10KA_PF: + return "CN10KA"; + case OCTEP_PCI_DEVICE_ID_CNF10KA_PF: + return "CNF10KA"; + case OCTEP_PCI_DEVICE_ID_CNF10KB_PF: + return "CNF10KB"; + case OCTEP_PCI_DEVICE_ID_CN10KB_PF: + return "CN10KB"; default: return "Unsupported"; } @@ -1181,6 +1204,14 @@ int octep_device_setup(struct octep_device *oct) OCTEP_MINOR_REV(oct)); octep_device_setup_cn93_pf(oct); break; + case OCTEP_PCI_DEVICE_ID_CNF10KA_PF: + case OCTEP_PCI_DEVICE_ID_CN10KA_PF: + case OCTEP_PCI_DEVICE_ID_CNF10KB_PF: + case OCTEP_PCI_DEVICE_ID_CN10KB_PF: + dev_info(&pdev->dev, "Setting up OCTEON %s PF PASS%d.%d\n", + octep_devid_to_str(oct), OCTEP_MAJOR_REV(oct), OCTEP_MINOR_REV(oct)); + octep_device_setup_cnxk_pf(oct); + break; default: dev_err(&pdev->dev, "%s: unsupported device\n", __func__); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h index 6df902ebb7f3..e2fe8b28eb0e 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -23,6 +23,11 @@ #define OCTEP_PCI_DEVICE_ID_CNF95N_PF 0xB400 //95N PF +#define OCTEP_PCI_DEVICE_ID_CN10KA_PF 0xB900 //CN10KA PF +#define OCTEP_PCI_DEVICE_ID_CNF10KA_PF 0xBA00 //CNF10KA PF +#define OCTEP_PCI_DEVICE_ID_CNF10KB_PF 0xBC00 //CNF10KB PF +#define OCTEP_PCI_DEVICE_ID_CN10KB_PF 0xBD00 //CN10KB PF + #define OCTEP_MAX_QUEUES 63 #define OCTEP_MAX_IQ OCTEP_MAX_QUEUES #define OCTEP_MAX_OQ OCTEP_MAX_QUEUES @@ -40,6 +45,15 @@ #define OCTEP_OQ_INTR_RESEND_BIT 59 #define OCTEP_MMIO_REGIONS 3 + +#define IQ_INSTR_PENDING(iq) ({ typeof(iq) iq__ = (iq); \ + ((iq__)->host_write_index - (iq__)->flush_index) & \ + (iq__)->ring_size_mask; \ + }) +#define IQ_INSTR_SPACE(iq) ({ typeof(iq) iq_ = (iq); \ + (iq_)->max_count - IQ_INSTR_PENDING(iq_); \ + }) + /* PCI address space mapping information. * Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of * Octeon gets mapped to different physical address spaces in @@ -377,6 +391,7 @@ int octep_setup_oqs(struct octep_device *oct); void octep_free_oqs(struct octep_device *oct); void octep_oq_dbell_init(struct octep_device *oct); void octep_device_setup_cn93_pf(struct octep_device *oct); +void octep_device_setup_cnxk_pf(struct octep_device *oct); int octep_iq_process_completions(struct octep_iq *iq, u16 budget); int octep_oq_process_rx(struct octep_oq *oq, int budget); void octep_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h new file mode 100644 index 000000000000..abe02df8af11 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_REGS_CNXK_PF_H_ +#define _OCTEP_REGS_CNXK_PF_H_ + +/* ############################ RST ######################### */ +#define CNXK_RST_BOOT 0x000087E006001600ULL +#define CNXK_RST_CHIP_DOMAIN_W1S 0x000087E006001810ULL +#define CNXK_RST_CORE_DOMAIN_W1S 0x000087E006001820ULL +#define CNXK_RST_CORE_DOMAIN_W1C 0x000087E006001828ULL + +#define CNXK_CONFIG_XPANSION_BAR 0x38 +#define CNXK_CONFIG_PCIE_CAP 0x70 +#define CNXK_CONFIG_PCIE_DEVCAP 0x74 +#define CNXK_CONFIG_PCIE_DEVCTL 0x78 +#define CNXK_CONFIG_PCIE_LINKCAP 0x7C +#define CNXK_CONFIG_PCIE_LINKCTL 0x80 +#define CNXK_CONFIG_PCIE_SLOTCAP 0x84 +#define CNXK_CONFIG_PCIE_SLOTCTL 0x88 + +#define CNXK_PCIE_SRIOV_FDL 0x188 /* 0x98 */ +#define CNXK_PCIE_SRIOV_FDL_BIT_POS 0x10 +#define CNXK_PCIE_SRIOV_FDL_MASK 0xFF + +#define CNXK_CONFIG_PCIE_FLTMSK 0x720 + +/* ################# Offsets of RING, EPF, MAC ######################### */ +#define CNXK_RING_OFFSET (0x1ULL << 17) +#define CNXK_EPF_OFFSET (0x1ULL << 25) +#define CNXK_MAC_OFFSET (0x1ULL << 4) +#define CNXK_BIT_ARRAY_OFFSET (0x1ULL << 4) +#define CNXK_EPVF_RING_OFFSET (0x1ULL << 4) + +/* ################# Scratch Registers ######################### */ +#define CNXK_SDP_EPF_SCRATCH 0x209E0 + +/* ################# Window Registers ######################### */ +#define CNXK_SDP_WIN_WR_ADDR64 0x20000 +#define CNXK_SDP_WIN_RD_ADDR64 0x20010 +#define CNXK_SDP_WIN_WR_DATA64 0x20020 +#define CNXK_SDP_WIN_WR_MASK_REG 0x20030 +#define CNXK_SDP_WIN_RD_DATA64 0x20040 + +#define CNXK_SDP_MAC_NUMBER 0x2C100 + +/* ################# Global Previliged registers ######################### */ +#define CNXK_SDP_EPF_RINFO 0x209F0 + +#define CNXK_SDP_EPF_RINFO_SRN(val) ((val) & 0x7F) +#define CNXK_SDP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF) +#define CNXK_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0x7F) + +/* SDP Function select */ +#define CNXK_SDP_FUNC_SEL_EPF_BIT_POS 7 +#define CNXK_SDP_FUNC_SEL_FUNC_BIT_POS 0 + +/* ##### RING IN (Into device from PCI: Tx Ring) REGISTERS #### */ +#define CNXK_SDP_R_IN_CONTROL_START 0x10000 +#define CNXK_SDP_R_IN_ENABLE_START 0x10010 +#define CNXK_SDP_R_IN_INSTR_BADDR_START 0x10020 +#define CNXK_SDP_R_IN_INSTR_RSIZE_START 0x10030 +#define CNXK_SDP_R_IN_INSTR_DBELL_START 0x10040 +#define CNXK_SDP_R_IN_CNTS_START 0x10050 +#define CNXK_SDP_R_IN_INT_LEVELS_START 0x10060 +#define CNXK_SDP_R_IN_PKT_CNT_START 0x10080 +#define CNXK_SDP_R_IN_BYTE_CNT_START 0x10090 + +#define CNXK_SDP_R_IN_CONTROL(ring) \ + (CNXK_SDP_R_IN_CONTROL_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_ENABLE(ring) \ + (CNXK_SDP_R_IN_ENABLE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INSTR_BADDR(ring) \ + (CNXK_SDP_R_IN_INSTR_BADDR_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INSTR_RSIZE(ring) \ + (CNXK_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INSTR_DBELL(ring) \ + (CNXK_SDP_R_IN_INSTR_DBELL_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_CNTS(ring) \ + (CNXK_SDP_R_IN_CNTS_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INT_LEVELS(ring) \ + (CNXK_SDP_R_IN_INT_LEVELS_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_PKT_CNT(ring) \ + (CNXK_SDP_R_IN_PKT_CNT_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_BYTE_CNT(ring) \ + (CNXK_SDP_R_IN_BYTE_CNT_START + ((ring) * CNXK_RING_OFFSET)) + +/* Rings per Virtual Function */ +#define CNXK_R_IN_CTL_RPVF_MASK (0xF) +#define CNXK_R_IN_CTL_RPVF_POS (48) + +/* Number of instructions to be read in one MAC read request. + * setting to Max value(4) + */ +#define CNXK_R_IN_CTL_IDLE (0x1ULL << 28) +#define CNXK_R_IN_CTL_RDSIZE (0x3ULL << 25) +#define CNXK_R_IN_CTL_IS_64B (0x1ULL << 24) +#define CNXK_R_IN_CTL_D_NSR (0x1ULL << 8) +#define CNXK_R_IN_CTL_D_ESR (0x1ULL << 6) +#define CNXK_R_IN_CTL_D_ROR (0x1ULL << 5) +#define CNXK_R_IN_CTL_NSR (0x1ULL << 3) +#define CNXK_R_IN_CTL_ESR (0x1ULL << 1) +#define CNXK_R_IN_CTL_ROR (0x1ULL << 0) + +#define CNXK_R_IN_CTL_MASK (CNXK_R_IN_CTL_RDSIZE | CNXK_R_IN_CTL_IS_64B) + +/* ##### RING OUT (out from device to PCI host: Rx Ring) REGISTERS #### */ +#define CNXK_SDP_R_OUT_CNTS_START 0x10100 +#define CNXK_SDP_R_OUT_INT_LEVELS_START 0x10110 +#define CNXK_SDP_R_OUT_SLIST_BADDR_START 0x10120 +#define CNXK_SDP_R_OUT_SLIST_RSIZE_START 0x10130 +#define CNXK_SDP_R_OUT_SLIST_DBELL_START 0x10140 +#define CNXK_SDP_R_OUT_CONTROL_START 0x10150 +#define CNXK_SDP_R_OUT_WMARK_START 0x10160 +#define CNXK_SDP_R_OUT_ENABLE_START 0x10170 +#define CNXK_SDP_R_OUT_PKT_CNT_START 0x10180 +#define CNXK_SDP_R_OUT_BYTE_CNT_START 0x10190 + +#define CNXK_SDP_R_OUT_CONTROL(ring) \ + (CNXK_SDP_R_OUT_CONTROL_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_ENABLE(ring) \ + (CNXK_SDP_R_OUT_ENABLE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_SLIST_BADDR(ring) \ + (CNXK_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_SLIST_RSIZE(ring) \ + (CNXK_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_SLIST_DBELL(ring) \ + (CNXK_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_CNTS(ring) \ + (CNXK_SDP_R_OUT_CNTS_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_INT_LEVELS(ring) \ + (CNXK_SDP_R_OUT_INT_LEVELS_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_PKT_CNT(ring) \ + (CNXK_SDP_R_OUT_PKT_CNT_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_BYTE_CNT(ring) \ + (CNXK_SDP_R_OUT_BYTE_CNT_START + ((ring) * CNXK_RING_OFFSET)) + +/*------------------ R_OUT Masks ----------------*/ +#define CNXK_R_OUT_INT_LEVELS_BMODE BIT_ULL(63) +#define CNXK_R_OUT_INT_LEVELS_TIMET (32) + +#define CNXK_R_OUT_CTL_IDLE BIT_ULL(40) +#define CNXK_R_OUT_CTL_ES_I BIT_ULL(34) +#define CNXK_R_OUT_CTL_NSR_I BIT_ULL(33) +#define CNXK_R_OUT_CTL_ROR_I BIT_ULL(32) +#define CNXK_R_OUT_CTL_ES_D BIT_ULL(30) +#define CNXK_R_OUT_CTL_NSR_D BIT_ULL(29) +#define CNXK_R_OUT_CTL_ROR_D BIT_ULL(28) +#define CNXK_R_OUT_CTL_ES_P BIT_ULL(26) +#define CNXK_R_OUT_CTL_NSR_P BIT_ULL(25) +#define CNXK_R_OUT_CTL_ROR_P BIT_ULL(24) +#define CNXK_R_OUT_CTL_IMODE BIT_ULL(23) + +/* ############### Interrupt Moderation Registers ############### */ +#define CNXK_SDP_R_IN_INT_MDRT_CTL0_START 0x10280 +#define CNXK_SDP_R_IN_INT_MDRT_CTL1_START 0x102A0 +#define CNXK_SDP_R_IN_INT_MDRT_DBG_START 0x102C0 + +#define CNXK_SDP_R_OUT_INT_MDRT_CTL0_START 0x10380 +#define CNXK_SDP_R_OUT_INT_MDRT_CTL1_START 0x103A0 +#define CNXK_SDP_R_OUT_INT_MDRT_DBG_START 0x103C0 + +#define CNXK_SDP_R_OUT_CNTS_ISM_START 0x10510 +#define CNXK_SDP_R_IN_CNTS_ISM_START 0x10520 + +#define CNXK_SDP_R_IN_INT_MDRT_CTL0(ring) \ + (CNXK_SDP_R_IN_INT_MDRT_CTL0_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INT_MDRT_CTL1(ring) \ + (CNXK_SDP_R_IN_INT_MDRT_CTL1_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INT_MDRT_DBG(ring) \ + (CNXK_SDP_R_IN_INT_MDRT_DBG_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_INT_MDRT_CTL0(ring) \ + (CNXK_SDP_R_OUT_INT_MDRT_CTL0_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_INT_MDRT_CTL1(ring) \ + (CNXK_SDP_R_OUT_INT_MDRT_CTL1_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_INT_MDRT_DBG(ring) \ + (CNXK_SDP_R_OUT_INT_MDRT_DBG_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_CNTS_ISM(ring) \ + (CNXK_SDP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_CNTS_ISM(ring) \ + (CNXK_SDP_R_IN_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +/* ##################### Mail Box Registers ########################## */ +/* INT register for VF. when a MBOX write from PF happed to a VF, + * corresponding bit will be set in this register as well as in + * PF_VF_INT register. + * + * This is a RO register, the int can be cleared by writing 1 to PF_VF_INT + */ +/* Basically first 3 are from PF to VF. The last one is data from VF to PF */ +#define CNXK_SDP_R_MBOX_PF_VF_DATA_START 0x10210 +#define CNXK_SDP_R_MBOX_PF_VF_INT_START 0x10220 +#define CNXK_SDP_R_MBOX_VF_PF_DATA_START 0x10230 + +#define CNXK_SDP_R_MBOX_PF_VF_DATA(ring) \ + (CNXK_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_MBOX_PF_VF_INT(ring) \ + (CNXK_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_MBOX_VF_PF_DATA(ring) \ + (CNXK_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_RING_OFFSET)) + +/* ##################### Interrupt Registers ########################## */ +#define CNXK_SDP_R_ERR_TYPE_START 0x10400 + +#define CNXK_SDP_R_ERR_TYPE(ring) \ + (CNXK_SDP_R_ERR_TYPE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_MBOX_ISM_START 0x10500 +#define CNXK_SDP_R_OUT_CNTS_ISM_START 0x10510 +#define CNXK_SDP_R_IN_CNTS_ISM_START 0x10520 + +#define CNXK_SDP_R_MBOX_ISM(ring) \ + (CNXK_SDP_R_MBOX_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_CNTS_ISM(ring) \ + (CNXK_SDP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_CNTS_ISM(ring) \ + (CNXK_SDP_R_IN_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_EPF_MBOX_RINT_START 0x20100 +#define CNXK_SDP_EPF_MBOX_RINT_W1S_START 0x20120 +#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1C_START 0x20140 +#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1S_START 0x20160 + +#define CNXK_SDP_EPF_VFIRE_RINT_START 0x20180 +#define CNXK_SDP_EPF_VFIRE_RINT_W1S_START 0x201A0 +#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C_START 0x201C0 +#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S_START 0x201E0 + +#define CNXK_SDP_EPF_IRERR_RINT 0x20200 +#define CNXK_SDP_EPF_IRERR_RINT_W1S 0x20210 +#define CNXK_SDP_EPF_IRERR_RINT_ENA_W1C 0x20220 +#define CNXK_SDP_EPF_IRERR_RINT_ENA_W1S 0x20230 + +#define CNXK_SDP_EPF_VFORE_RINT_START 0x20240 +#define CNXK_SDP_EPF_VFORE_RINT_W1S_START 0x20260 +#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1C_START 0x20280 +#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1S_START 0x202A0 + +#define CNXK_SDP_EPF_ORERR_RINT 0x20320 +#define CNXK_SDP_EPF_ORERR_RINT_W1S 0x20330 +#define CNXK_SDP_EPF_ORERR_RINT_ENA_W1C 0x20340 +#define CNXK_SDP_EPF_ORERR_RINT_ENA_W1S 0x20350 + +#define CNXK_SDP_EPF_OEI_RINT 0x20400 +#define CNXK_SDP_EPF_OEI_RINT_W1S 0x20500 +#define CNXK_SDP_EPF_OEI_RINT_ENA_W1C 0x20600 +#define CNXK_SDP_EPF_OEI_RINT_ENA_W1S 0x20700 + +#define CNXK_SDP_EPF_DMA_RINT 0x20800 +#define CNXK_SDP_EPF_DMA_RINT_W1S 0x20810 +#define CNXK_SDP_EPF_DMA_RINT_ENA_W1C 0x20820 +#define CNXK_SDP_EPF_DMA_RINT_ENA_W1S 0x20830 + +#define CNXK_SDP_EPF_DMA_INT_LEVEL_START 0x20840 +#define CNXK_SDP_EPF_DMA_CNT_START 0x20860 +#define CNXK_SDP_EPF_DMA_TIM_START 0x20880 + +#define CNXK_SDP_EPF_MISC_RINT 0x208A0 +#define CNXK_SDP_EPF_MISC_RINT_W1S 0x208B0 +#define CNXK_SDP_EPF_MISC_RINT_ENA_W1C 0x208C0 +#define CNXK_SDP_EPF_MISC_RINT_ENA_W1S 0x208D0 + +#define CNXK_SDP_EPF_DMA_VF_RINT_START 0x208E0 +#define CNXK_SDP_EPF_DMA_VF_RINT_W1S_START 0x20900 +#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C_START 0x20920 +#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S_START 0x20940 + +#define CNXK_SDP_EPF_PP_VF_RINT_START 0x20960 +#define CNXK_SDP_EPF_PP_VF_RINT_W1S_START 0x20980 +#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C_START 0x209A0 +#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S_START 0x209C0 + +#define CNXK_SDP_EPF_MBOX_RINT(index) \ + (CNXK_SDP_EPF_MBOX_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_MBOX_RINT_W1S(index) \ + (CNXK_SDP_EPF_MBOX_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_MBOX_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_MBOX_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) + +#define CNXK_SDP_EPF_VFIRE_RINT(index) \ + (CNXK_SDP_EPF_VFIRE_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFIRE_RINT_W1S(index) \ + (CNXK_SDP_EPF_VFIRE_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) + +#define CNXK_SDP_EPF_VFORE_RINT(index) \ + (CNXK_SDP_EPF_VFORE_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFORE_RINT_W1S(index) \ + (CNXK_SDP_EPF_VFORE_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_VFORE_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_VFORE_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) + +#define CNXK_SDP_EPF_DMA_VF_RINT(index) \ + (CNXK_SDP_EPF_DMA_VF_RINT_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_DMA_VF_RINT_W1S(index) \ + (CNXK_SDP_EPF_DMA_VF_RINT_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) + +#define CNXK_SDP_EPF_PP_VF_RINT(index) \ + (CNXK_SDP_EPF_PP_VF_RINT_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_PP_VF_RINT_W1S(index) \ + (CNXK_SDP_EPF_PP_VF_RINT_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) + +/*------------------ Interrupt Masks ----------------*/ +#define CNXK_INTR_R_SEND_ISM BIT_ULL(63) +#define CNXK_INTR_R_OUT_INT BIT_ULL(62) +#define CNXK_INTR_R_IN_INT BIT_ULL(61) +#define CNXK_INTR_R_MBOX_INT BIT_ULL(60) +#define CNXK_INTR_R_RESEND BIT_ULL(59) +#define CNXK_INTR_R_CLR_TIM BIT_ULL(58) + +/* ####################### Ring Mapping Registers ################################## */ +#define CNXK_SDP_EPVF_RING_START 0x26000 +#define CNXK_SDP_IN_RING_TB_MAP_START 0x28000 +#define CNXK_SDP_IN_RATE_LIMIT_START 0x2A000 +#define CNXK_SDP_MAC_PF_RING_CTL_START 0x2C000 + +#define CNXK_SDP_EPVF_RING(ring) \ + (CNXK_SDP_EPVF_RING_START + ((ring) * CNXK_EPVF_RING_OFFSET)) +#define CNXK_SDP_IN_RING_TB_MAP(ring) \ + (CNXK_SDP_N_RING_TB_MAP_START + ((ring) * CNXK_EPVF_RING_OFFSET)) +#define CNXK_SDP_IN_RATE_LIMIT(ring) \ + (CNXK_SDP_IN_RATE_LIMIT_START + ((ring) * CNXK_EPVF_RING_OFFSET)) +#define CNXK_SDP_MAC_PF_RING_CTL(mac) \ + (CNXK_SDP_MAC_PF_RING_CTL_START + ((mac) * CNXK_MAC_OFFSET)) + +#define CNXK_SDP_MAC_PF_RING_CTL_NPFS(val) ((val) & 0x3) +#define CNXK_SDP_MAC_PF_RING_CTL_SRN(val) (((val) >> 8) & 0x7F) +#define CNXK_SDP_MAC_PF_RING_CTL_RPPF(val) (((val) >> 16) & 0x3F) + +/* Number of non-queue interrupts in CNXKxx */ +#define CNXK_NUM_NON_IOQ_INTR 32 + +/* bit 0 for control mbox interrupt */ +#define CNXK_SDP_EPF_OEI_RINT_DATA_BIT_MBOX BIT_ULL(0) +/* bit 1 for firmware heartbeat interrupt */ +#define CNXK_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT BIT_ULL(1) +#define FW_STATUS_RUNNING 2ULL +#define CNXK_PEMX_PFX_CSX_PFCFGX(pem, pf, offset) ({ typeof(offset) _off = (offset); \ + ((0x8e0000008000 | \ + (uint64_t)(pem) << 36 \ + | (pf) << 18 \ + | ((_off >> 16) & 1) << 16 \ + | (_off >> 3) << 3) \ + + (((_off >> 2) & 1) << 2)); \ + }) + +/* Register defines for use with CNXK_PEMX_PFX_CSX_PFCFGX */ +#define CNXK_PCIEEP_VSECST_CTL 0x418 + +#define CNXK_PEM_BAR4_INDEX 7 +#define CNXK_PEM_BAR4_INDEX_SIZE 0x400000ULL +#define CNXK_PEM_BAR4_INDEX_OFFSET (CNXK_PEM_BAR4_INDEX * CNXK_PEM_BAR4_INDEX_SIZE) + +#endif /* _OCTEP_REGS_CNXK_PF_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c index d0adb82d65c3..06851b78aa28 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c @@ -21,7 +21,6 @@ static void octep_iq_reset_indices(struct octep_iq *iq) iq->flush_index = 0; iq->pkts_processed = 0; iq->pkt_in_done = 0; - atomic_set(&iq->instr_pending, 0); } /** @@ -82,7 +81,6 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget) } iq->pkts_processed += compl_pkts; - atomic_sub(compl_pkts, &iq->instr_pending); iq->stats.instr_completed += compl_pkts; iq->stats.bytes_sent += compl_bytes; iq->stats.sgentry_sent += compl_sg; @@ -91,7 +89,7 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget) netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes); if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) && - ((iq->max_count - atomic_read(&iq->instr_pending)) > + (IQ_INSTR_SPACE(iq) > OCTEP_WAKE_QUEUE_THRESHOLD)) netif_wake_subqueue(iq->netdev, iq->q_no); return !budget; @@ -144,7 +142,6 @@ static void octep_iq_free_pending(struct octep_iq *iq) dev_kfree_skb_any(skb); } - atomic_set(&iq->instr_pending, 0); iq->flush_index = fi; netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no)); } diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h index 86c98b13fc44..1ba4ff65e54d 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h @@ -172,9 +172,6 @@ struct octep_iq { /* Statistics for this input queue. */ struct octep_iq_stats stats; - /* This field keeps track of the instructions pending in this queue. */ - atomic_t instr_pending; - /* Pointer to the Virtual Base addr of the input ring. */ struct octep_tx_desc_hw *desc_ring; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3cf6589cfdac..a6e91573f8da 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1159,15 +1159,18 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) phy_ring_tail = eth->phy_scratch_ring + soc->txrx.txd_size * (cnt - 1); for (i = 0; i < cnt; i++) { + dma_addr_t addr = dma_addr + i * MTK_QDMA_PAGE_SIZE; struct mtk_tx_dma_v2 *txd; txd = eth->scratch_ring + i * soc->txrx.txd_size; - txd->txd1 = dma_addr + i * MTK_QDMA_PAGE_SIZE; + txd->txd1 = addr; if (i < cnt - 1) txd->txd2 = eth->phy_scratch_ring + (i + 1) * soc->txrx.txd_size; txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE); + if (MTK_HAS_CAPS(soc->caps, MTK_36BIT_DMA)) + txd->txd3 |= TX_DMA_PREP_ADDR64(addr); txd->txd4 = 0; if (mtk_is_netsys_v2_or_greater(eth)) { txd->txd5 = 0; diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 9a6744c0d458..c895e265ae0e 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -670,7 +670,7 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) void *buf; int s; - page = __dev_alloc_pages(GFP_KERNEL, 0); + page = __dev_alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; @@ -691,10 +691,11 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) { struct mtk_wdma_desc *desc = desc_ptr; + u32 ctrl; desc->buf0 = cpu_to_le32(buf_phys); if (!mtk_wed_is_v3_or_greater(dev->hw)) { - u32 txd_size, ctrl; + u32 txd_size; txd_size = dev->wlan.init_buf(buf, buf_phys, token++); @@ -708,11 +709,11 @@ mtk_wed_tx_buffer_alloc(struct mtk_wed_device *dev) ctrl |= MTK_WDMA_DESC_CTRL_LAST_SEG0 | FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1_V2, MTK_WED_BUF_SIZE - txd_size); - desc->ctrl = cpu_to_le32(ctrl); desc->info = 0; } else { - desc->ctrl = cpu_to_le32(token << 16); + ctrl = token << 16 | TX_DMA_PREP_ADDR64(buf_phys); } + desc->ctrl = cpu_to_le32(ctrl); desc_ptr += desc_size; buf += MTK_WED_BUF_SIZE; @@ -811,6 +812,7 @@ mtk_wed_hwrro_buffer_alloc(struct mtk_wed_device *dev) buf_phys = page_phys; for (s = 0; s < MTK_WED_RX_BUF_PER_PAGE; s++) { desc->buf0 = cpu_to_le32(buf_phys); + desc->token = cpu_to_le32(RX_DMA_PREP_ADDR64(buf_phys)); buf_phys += MTK_WED_PAGE_BUF_SIZE; desc++; } diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c index 3bd51a3d6650..7ffbd4fca881 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c @@ -142,7 +142,8 @@ mtk_wed_wo_queue_refill(struct mtk_wed_wo *wo, struct mtk_wed_wo_queue *q, dma_addr_t addr; void *buf; - buf = page_frag_alloc(&q->cache, q->buf_size, GFP_ATOMIC); + buf = page_frag_alloc(&q->cache, q->buf_size, + GFP_ATOMIC | GFP_DMA32); if (!buf) break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c index 28d02749d3c4..7659ad21e6e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c @@ -55,7 +55,10 @@ int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data) ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_LOCK); if (ret) { - mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + if (ret == -EBUSY) + mlx5_core_info(dev, "SW reset semaphore is already in use\n"); + else + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); goto unlock_gw; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index b12fe3c5a258..a55452c69f06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -147,6 +147,20 @@ mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, } } +static void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + u64 dbytes; + u64 dpkts; + + dpkts = priv->stats.rep_stats.vport_rx_packets - rpriv->prev_vf_vport_stats.rx_packets; + dbytes = priv->stats.rep_stats.vport_rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; + mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats, &priv->stats.rep_stats); + flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies, + FLOW_ACTION_HW_STATS_DELAYED); +} + static int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *ma) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c index 368a95fa77d3..b14cd62edffc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/pedit.c @@ -48,7 +48,8 @@ mlx5e_tc_act_pedit_parse_action(struct mlx5e_priv *priv, struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { - u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; + u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? TCA_PEDIT_KEY_EX_CMD_SET : + TCA_PEDIT_KEY_EX_CMD_ADD; u8 htype = act->mangle.htype; int err = -EOPNOTSUPP; u32 mask, val, offset; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ea58c6917433..3aecdf099a2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5244,7 +5244,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->gso_partial_features |= NETIF_F_GSO_UDP_L4; netdev->hw_features |= NETIF_F_GSO_UDP_L4; - netdev->features |= NETIF_F_GSO_UDP_L4; mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3ab682bbcf86..fe0726c7b847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -112,8 +112,18 @@ static const struct counter_desc vport_rep_stats_desc[] = { tx_vport_rdma_multicast_bytes) }, }; +static const struct counter_desc vport_rep_loopback_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + vport_loopback_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + vport_loopback_bytes) }, +}; + #define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) #define NUM_VPORT_REP_HW_COUNTERS ARRAY_SIZE(vport_rep_stats_desc) +#define NUM_VPORT_REP_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, vport_counter_local_loopback) ? \ + ARRAY_SIZE(vport_rep_loopback_stats_desc) : 0) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(sw_rep) { @@ -157,7 +167,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw_rep) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport_rep) { - return NUM_VPORT_REP_HW_COUNTERS; + return NUM_VPORT_REP_HW_COUNTERS + + NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep) @@ -166,6 +177,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport_rep) for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_rep_stats_desc[i].format); + for (i = 0; i < NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_rep_loopback_stats_desc[i].format); return idx; } @@ -176,6 +190,9 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep) for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats, vport_rep_stats_desc, i); + for (i = 0; i < NUM_VPORT_REP_LOOPBACK_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats, + vport_rep_loopback_stats_desc, i); return idx; } @@ -247,6 +264,13 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) rep_stats->tx_vport_rdma_multicast_bytes = MLX5_GET_CTR(out, received_ib_multicast.octets); + if (MLX5_CAP_GEN(priv->mdev, vport_counter_local_loopback)) { + rep_stats->vport_loopback_packets = + MLX5_GET_CTR(out, local_loopback.packets); + rep_stats->vport_loopback_bytes = + MLX5_GET_CTR(out, local_loopback.octets); + } + out: kvfree(out); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 477c547dcc04..12b3607afecd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -476,6 +476,8 @@ struct mlx5e_rep_stats { u64 tx_vport_rdma_multicast_packets; u64 rx_vport_rdma_multicast_bytes; u64 tx_vport_rdma_multicast_bytes; + u64 vport_loopback_packets; + u64 vport_loopback_bytes; }; struct mlx5e_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7ca9e5b86778..9dca2809b46f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3206,10 +3206,10 @@ static int offload_pedit_fields(struct mlx5e_priv *priv, headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec); headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec); - set_masks = &hdrs[0].masks; - add_masks = &hdrs[1].masks; - set_vals = &hdrs[0].vals; - add_vals = &hdrs[1].vals; + set_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].masks; + add_masks = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].masks; + set_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_SET].vals; + add_vals = &hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].vals; for (i = 0; i < ARRAY_SIZE(fields); i++) { bool skip; @@ -5011,22 +5011,6 @@ int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, return apply_police_params(priv, 0, extack); } -void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, - struct tc_cls_matchall_offload *ma) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct rtnl_link_stats64 cur_stats; - u64 dbytes; - u64 dpkts; - - mlx5e_stats_copy_rep_stats(&cur_stats, &priv->stats.rep_stats); - dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; - dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; - rpriv->prev_vf_vport_stats = cur_stats; - flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies, - FLOW_ACTION_HW_STATS_DELAYED); -} - static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index adb39e30f90f..c24bda56b2b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -203,8 +203,6 @@ int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *f); int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *f); -void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, - struct tc_cls_matchall_offload *ma); struct mlx5e_encap_entry; void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 4aed1768b85f..78eb6b7097e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -181,7 +181,7 @@ struct mlx5_flow_rule { struct mlx5_flow_handle { int num_rules; - struct mlx5_flow_rule *rule[]; + struct mlx5_flow_rule *rule[] __counted_by(num_rules); }; /* Type of children is mlx5_flow_group */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 17fe30a4c06c..0c26d707eed2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -539,7 +539,7 @@ struct mlx5_fc_bulk { u32 base_id; int bulk_len; unsigned long *bitmask; - struct mlx5_fc fcs[]; + struct mlx5_fc fcs[] __counted_by(bulk_len); }; static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index b568988e92e3..4b8cb120362b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -325,6 +325,25 @@ static void mlx5_fw_live_patch_event(struct work_struct *work) mlx5_core_err(dev, "Failed to reload FW tracer\n"); } +static const struct pci_device_id mgt_ifc_device_ids[] = { + { PCI_VDEVICE(MELLANOX, 0xc2d2) }, /* BlueField1 MGT interface device ID */ + { PCI_VDEVICE(MELLANOX, 0xc2d3) }, /* BlueField2 MGT interface device ID */ + { PCI_VDEVICE(MELLANOX, 0xc2d4) }, /* BlueField3-Lx MGT interface device ID */ + { PCI_VDEVICE(MELLANOX, 0xc2d5) }, /* BlueField3 MGT interface device ID */ + { PCI_VDEVICE(MELLANOX, 0xc2d6) }, /* BlueField4 MGT interface device ID */ +}; + +static bool mlx5_is_mgt_ifc_pci_device(struct mlx5_core_dev *dev, u16 dev_id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mgt_ifc_device_ids); ++i) + if (mgt_ifc_device_ids[i].device == dev_id) + return true; + + return false; +} + static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) { struct pci_bus *bridge_bus = dev->pdev->bus; @@ -339,10 +358,15 @@ static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id); if (err) return pcibios_err_to_errno(err); - if (sdev_id != dev_id) { - mlx5_core_warn(dev, "unrecognized dev_id (0x%x)\n", sdev_id); - return -EPERM; - } + + if (sdev_id == dev_id) + continue; + + if (mlx5_is_mgt_ifc_pci_device(dev, sdev_id)) + continue; + + mlx5_core_warn(dev, "unrecognized dev_id (0x%x)\n", sdev_id); + return -EPERM; } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 0c83ef174275..0361741632a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -266,9 +266,6 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev, { u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; - if (!mlx5_modify_mtutc_allowed(mdev)) - return 0; - if (ts->tv_sec < 0 || ts->tv_sec > U32_MAX || ts->tv_nsec < 0 || ts->tv_nsec > NSEC_PER_SEC) return -EINVAL; @@ -286,12 +283,15 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; - int err; mdev = container_of(clock, struct mlx5_core_dev, clock); - err = mlx5_ptp_settime_real_time(mdev, ts); - if (err) - return err; + + if (mlx5_modify_mtutc_allowed(mdev)) { + int err = mlx5_ptp_settime_real_time(mdev, ts); + + if (err) + return err; + } write_seqlock_irqsave(&clock->lock, flags); timecounter_init(&timer->tc, &timer->cycles, timespec64_to_ns(ts)); @@ -341,9 +341,6 @@ static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta) { u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; - if (!mlx5_modify_mtutc_allowed(mdev)) - return 0; - /* HW time adjustment range is checked. If out of range, settime instead */ if (!mlx5_is_mtutc_time_adj_cap(mdev, delta)) { struct timespec64 ts; @@ -367,13 +364,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; - int err; mdev = container_of(clock, struct mlx5_core_dev, clock); - err = mlx5_ptp_adjtime_real_time(mdev, delta); - if (err) - return err; + if (mlx5_modify_mtutc_allowed(mdev)) { + int err = mlx5_ptp_adjtime_real_time(mdev, delta); + + if (err) + return err; + } + write_seqlock_irqsave(&clock->lock, flags); timecounter_adjtime(&timer->tc, delta); mlx5_update_clock_info_page(mdev); @@ -396,15 +396,14 @@ static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_p { u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; - if (!mlx5_modify_mtutc_allowed(mdev)) - return 0; - MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC); - if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units)) { + if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units) && + scaled_ppm <= S32_MAX && scaled_ppm >= S32_MIN) { + /* HW scaled_ppm support on mlx5 devices only supports a 32-bit value */ MLX5_SET(mtutc_reg, in, freq_adj_units, MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM); - MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm); + MLX5_SET(mtutc_reg, in, freq_adjustment, (s32)scaled_ppm); } else { MLX5_SET(mtutc_reg, in, freq_adj_units, MLX5_MTUTC_FREQ_ADJ_UNITS_PPB); MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm_to_ppb(scaled_ppm)); @@ -420,13 +419,15 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) struct mlx5_core_dev *mdev; unsigned long flags; u32 mult; - int err; mdev = container_of(clock, struct mlx5_core_dev, clock); - err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); - if (err) - return err; + if (mlx5_modify_mtutc_allowed(mdev)) { + int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); + + if (err) + return err; + } mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm); @@ -1004,14 +1005,38 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) info->frac = timer->tc.frac; } +static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; + u8 log_max_freq_adjustment = 0; + int err; + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTUTC, 0, 0); + if (!err) + log_max_freq_adjustment = + MLX5_GET(mtutc_reg, out, log_max_freq_adjustment); + + if (log_max_freq_adjustment) + clock->ptp_info.max_adj = + min(S32_MAX, 1 << log_max_freq_adjustment); +} + static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) { struct mlx5_clock *clock = &mdev->clock; + /* Configure the PHC */ + clock->ptp_info = mlx5_ptp_clock_info; + + if (MLX5_CAP_MCAM_REG(mdev, mtutc)) + mlx5_init_timer_max_freq_adjustment(mdev); + mlx5_timecounter_init(mdev); mlx5_init_clock_info(mdev); mlx5_init_overflow_period(clock); - clock->ptp_info = mlx5_ptp_clock_info; if (mlx5_real_time_mode(mdev)) { struct timespec64 ts; @@ -1042,11 +1067,10 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) } seqlock_init(&clock->lock); - mlx5_init_timer_clock(mdev); INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); - /* Configure the PHC */ - clock->ptp_info = mlx5_ptp_clock_info; + /* Initialize the device clock */ + mlx5_init_timer_clock(mdev); /* Initialize 1PPS data structures */ mlx5_init_pps(mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a17152c1cbb2..bccf6e53556c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -219,7 +219,6 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev) int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in, driver_version); u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {}; - int remaining_size = driver_ver_sz; char *string; if (!MLX5_CAP_GEN(dev, driver_version)) @@ -227,22 +226,9 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev) string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version); - strncpy(string, "Linux", remaining_size); - - remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); - strncat(string, ",", remaining_size); - - remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); - strncat(string, KBUILD_MODNAME, remaining_size); - - remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); - strncat(string, ",", remaining_size); - - remaining_size = max_t(int, 0, driver_ver_sz - strlen(string)); - - snprintf(string + strlen(string), remaining_size, "%u.%u.%u", - LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, - LINUX_VERSION_SUBLEVEL); + snprintf(string, driver_ver_sz, "Linux,%s,%u.%u.%u", + KBUILD_MODNAME, LINUX_VERSION_MAJOR, + LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL); /*Send the command*/ MLX5_SET(set_driver_version_in, in, opcode, diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index e827c78be114..e3271c845ee6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -282,6 +282,12 @@ MLXSW_ITEM32(cmd_mbox, query_fw, fw_day, 0x14, 0, 8); */ MLXSW_ITEM32(cmd_mbox, query_fw, lag_mode_support, 0x18, 1, 1); +/* cmd_mbox_query_fw_cff_support + * 0: CONFIG_PROFILE.flood_mode = 5 (CFF) is not supported by FW + * 1: CONFIG_PROFILE.flood_mode = 5 (CFF) is supported by FW + */ +MLXSW_ITEM32(cmd_mbox, query_fw, cff_support, 0x18, 2, 1); + /* cmd_mbox_query_fw_clr_int_base_offset * Clear Interrupt register's offset from clr_int_bar register * in PCI address space. @@ -779,6 +785,11 @@ enum mlxsw_cmd_mbox_config_profile_flood_mode { * used. */ MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CONTROLLED = 4, + /* CFF - Compressed FID Flood (CFF) mode. + * Reserved when legacy bridge model is used. + * Supported only by Spectrum-2+. + */ + MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CFF = 5, }; /* cmd_mbox_config_profile_flood_mode diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index f23421f038f3..e4d7739bd7c8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -211,6 +211,13 @@ mlxsw_core_lag_mode(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_lag_mode); +enum mlxsw_cmd_mbox_config_profile_flood_mode +mlxsw_core_flood_mode(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->flood_mode(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_flood_mode); + void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) { return mlxsw_core->driver_priv; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 764d14bd5bc0..6d11225594dd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -38,6 +38,8 @@ unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); int mlxsw_core_max_lag(struct mlxsw_core *mlxsw_core, u16 *p_max_lag); enum mlxsw_cmd_mbox_config_profile_lag_mode mlxsw_core_lag_mode(struct mlxsw_core *mlxsw_core); +enum mlxsw_cmd_mbox_config_profile_flood_mode +mlxsw_core_flood_mode(struct mlxsw_core *mlxsw_core); void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); @@ -322,7 +324,12 @@ struct mlxsw_config_profile { u16 max_regions; u8 max_flood_tables; u8 max_vid_flood_tables; + + /* Flood mode to use if used_flood_mode. If flood_mode_prefer_cff, + * the backup flood mode (if any) when CFF unsupported. + */ u8 flood_mode; + u8 max_fid_offset_flood_tables; u16 fid_offset_flood_table_size; u8 max_fid_flood_tables; @@ -338,6 +345,7 @@ struct mlxsw_config_profile { u8 kvd_hash_double_parts; u8 cqe_time_stamp_type; bool lag_mode_prefer_sw; + bool flood_mode_prefer_cff; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; @@ -489,6 +497,7 @@ struct mlxsw_bus { u32 (*read_utc_sec)(void *bus_priv); u32 (*read_utc_nsec)(void *bus_priv); enum mlxsw_cmd_mbox_config_profile_lag_mode (*lag_mode)(void *bus_priv); + enum mlxsw_cmd_mbox_config_profile_flood_mode (*flood_mode)(void *priv); u8 features; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index e4b25e187467..0d58f13a7c7d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -106,7 +106,9 @@ struct mlxsw_pci { u64 utc_sec_offset; u64 utc_nsec_offset; bool lag_mode_support; + bool cff_support; enum mlxsw_cmd_mbox_config_profile_lag_mode lag_mode; + enum mlxsw_cmd_mbox_config_profile_flood_mode flood_mode; struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT]; u32 doorbell_offset; struct mlxsw_core *core; @@ -130,6 +132,7 @@ struct mlxsw_pci { const struct pci_device_id *id; enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */ u8 num_sdq_cqs; /* Number of CQs used for SDQs */ + bool skip_reset; }; static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q) @@ -1245,11 +1248,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_fid_flood_table_size_set( mbox, profile->fid_flood_table_size); } - if (profile->used_flood_mode) { + if (profile->flood_mode_prefer_cff && mlxsw_pci->cff_support) { + enum mlxsw_cmd_mbox_config_profile_flood_mode flood_mode = + MLXSW_CMD_MBOX_CONFIG_PROFILE_FLOOD_MODE_CFF; + + mlxsw_cmd_mbox_config_profile_set_flood_mode_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_flood_mode_set(mbox, flood_mode); + mlxsw_pci->flood_mode = flood_mode; + } else if (profile->used_flood_mode) { mlxsw_cmd_mbox_config_profile_set_flood_mode_set( mbox, 1); mlxsw_cmd_mbox_config_profile_flood_mode_set( mbox, profile->flood_mode); + mlxsw_pci->flood_mode = profile->flood_mode; + } else { + WARN_ON(1); + return -EINVAL; } if (profile->used_max_ib_mc) { mlxsw_cmd_mbox_config_profile_set_max_ib_mc_set( @@ -1476,11 +1490,47 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci, return -EBUSY; } -static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, - const struct pci_device_id *id) +static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci) { struct pci_dev *pdev = mlxsw_pci->pdev; char mrsr_pl[MLXSW_REG_MRSR_LEN]; + int err; + + mlxsw_reg_mrsr_pack(mrsr_pl, + MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE); + err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl); + if (err) + return err; + + device_lock_assert(&pdev->dev); + + pci_cfg_access_lock(pdev); + pci_save_state(pdev); + + err = __pci_reset_function_locked(pdev); + if (err) + pci_err(pdev, "PCI function reset failed with %d\n", err); + + pci_restore_state(pdev); + pci_cfg_access_unlock(pdev); + + return err; +} + +static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci) +{ + char mrsr_pl[MLXSW_REG_MRSR_LEN]; + + mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET); + return mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl); +} + +static int +mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + char mcam_pl[MLXSW_REG_MCAM_LEN]; + bool pci_reset_supported; u32 sys_status; int err; @@ -1491,11 +1541,27 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, return err; } - mlxsw_reg_mrsr_pack(mrsr_pl); - err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl); + /* PCI core already issued a PCI reset, do not issue another reset. */ + if (mlxsw_pci->skip_reset) + return 0; + + mlxsw_reg_mcam_pack(mcam_pl, + MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES); + err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl); if (err) return err; + mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET, + &pci_reset_supported); + + if (pci_reset_supported) { + pci_dbg(pdev, "Starting PCI reset flow\n"); + err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci); + } else { + pci_dbg(pdev, "Starting software reset flow\n"); + err = mlxsw_pci_reset_sw(mlxsw_pci); + } + err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status); if (err) { dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n", @@ -1537,9 +1603,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (!mbox) return -ENOMEM; - err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id); + err = mlxsw_pci_reset(mlxsw_pci, mlxsw_pci->id); if (err) - goto err_sw_reset; + goto err_reset; err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci); if (err < 0) { @@ -1601,6 +1667,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, mlxsw_pci->lag_mode_support = mlxsw_cmd_mbox_query_fw_lag_mode_support_get(mbox); + mlxsw_pci->cff_support = + mlxsw_cmd_mbox_query_fw_cff_support_get(mbox); + num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox); err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages); if (err) @@ -1672,7 +1741,7 @@ err_iface_rev: err_query_fw: mlxsw_pci_free_irq_vectors(mlxsw_pci); err_alloc_irq: -err_sw_reset: +err_reset: mbox_put: mlxsw_cmd_mbox_free(mbox); return err; @@ -1917,6 +1986,14 @@ mlxsw_pci_lag_mode(void *bus_priv) return mlxsw_pci->lag_mode; } +static enum mlxsw_cmd_mbox_config_profile_flood_mode +mlxsw_pci_flood_mode(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + + return mlxsw_pci->flood_mode; +} + static const struct mlxsw_bus mlxsw_pci_bus = { .kind = "pci", .init = mlxsw_pci_init, @@ -1929,6 +2006,7 @@ static const struct mlxsw_bus mlxsw_pci_bus = { .read_utc_sec = mlxsw_pci_read_utc_sec, .read_utc_nsec = mlxsw_pci_read_utc_nsec, .lag_mode = mlxsw_pci_lag_mode, + .flood_mode = mlxsw_pci_flood_mode, .features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET, }; @@ -2059,11 +2137,34 @@ static void mlxsw_pci_remove(struct pci_dev *pdev) kfree(mlxsw_pci); } +static void mlxsw_pci_reset_prepare(struct pci_dev *pdev) +{ + struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev); + + mlxsw_core_bus_device_unregister(mlxsw_pci->core, false); +} + +static void mlxsw_pci_reset_done(struct pci_dev *pdev) +{ + struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev); + + mlxsw_pci->skip_reset = true; + mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus, + mlxsw_pci, false, NULL, NULL); + mlxsw_pci->skip_reset = false; +} + +static const struct pci_error_handlers mlxsw_pci_err_handler = { + .reset_prepare = mlxsw_pci_reset_prepare, + .reset_done = mlxsw_pci_reset_done, +}; + int mlxsw_pci_driver_register(struct pci_driver *pci_driver) { pci_driver->probe = mlxsw_pci_probe; pci_driver->remove = mlxsw_pci_remove; pci_driver->shutdown = mlxsw_pci_remove; + pci_driver->err_handler = &mlxsw_pci_err_handler; return pci_register_driver(pci_driver); } EXPORT_SYMBOL(mlxsw_pci_driver_register); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 25b294fdeb3d..3aae4467e431 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1024,6 +1024,8 @@ static inline void mlxsw_reg_spaft_pack(char *payload, u16 local_port, * ------------------------------------------ * The following register controls the association of flooding tables and MIDs * to packet types used for flooding. + * + * Reserved when CONFIG_PROFILE.flood_mode = CFF. */ #define MLXSW_REG_SFGC_ID 0x2011 #define MLXSW_REG_SFGC_LEN 0x14 @@ -1862,6 +1864,7 @@ MLXSW_ITEM32(reg, sfmr, fid, 0x00, 0, 16); * Access: RW * * Note: Reserved when legacy bridge model is used. + * Reserved when CONFIG_PROFILE.flood_mode = CFF. */ MLXSW_ITEM32(reg, sfmr, flood_rsp, 0x08, 31, 1); @@ -1872,6 +1875,7 @@ MLXSW_ITEM32(reg, sfmr, flood_rsp, 0x08, 31, 1); * Access: RW * * Note: Reserved when legacy bridge model is used and when flood_rsp=1. + * Reserved when CONFIG_PROFILE.flood_mode = CFF */ MLXSW_ITEM32(reg, sfmr, flood_bridge_type, 0x08, 28, 1); @@ -1880,6 +1884,8 @@ MLXSW_ITEM32(reg, sfmr, flood_bridge_type, 0x08, 28, 1); * Used to point into the flooding table selected by SFGC register if * the table is of type FID-Offset. Otherwise, this field is reserved. * Access: RW + * + * Note: Reserved when CONFIG_PROFILE.flood_mode = CFF */ MLXSW_ITEM32(reg, sfmr, fid_offset, 0x08, 0, 16); @@ -1938,6 +1944,26 @@ MLXSW_ITEM32(reg, sfmr, irif_v, 0x14, 24, 1); */ MLXSW_ITEM32(reg, sfmr, irif, 0x14, 0, 16); +/* reg_sfmr_cff_mid_base + * Pointer to PGT table. + * Range: 0..(cap_max_pgt-1) + * Access: RW + * + * Note: Reserved when SwitchX/-2 and Spectrum-1. + * Supported when CONFIG_PROFILE.flood_mode = CFF. + */ +MLXSW_ITEM32(reg, sfmr, cff_mid_base, 0x20, 0, 16); + +/* reg_sfmr_cff_prf_id + * Compressed Fid Flooding profile_id + * Range 0..(max_cap_nve_flood_prf-1) + * Access: RW + * + * Note: Reserved when SwitchX/-2 and Spectrum-1 + * Supported only when CONFIG_PROFLE.flood_mode = CFF. + */ +MLXSW_ITEM32(reg, sfmr, cff_prf_id, 0x24, 0, 2); + /* reg_sfmr_smpe_valid * SMPE is valid. * Access: RW @@ -1959,18 +1985,11 @@ MLXSW_ITEM32(reg, sfmr, smpe, 0x28, 0, 16); static inline void mlxsw_reg_sfmr_pack(char *payload, enum mlxsw_reg_sfmr_op op, u16 fid, - u16 fid_offset, bool flood_rsp, - enum mlxsw_reg_bridge_type bridge_type, bool smpe_valid, u16 smpe) { MLXSW_REG_ZERO(sfmr, payload); mlxsw_reg_sfmr_op_set(payload, op); mlxsw_reg_sfmr_fid_set(payload, fid); - mlxsw_reg_sfmr_fid_offset_set(payload, fid_offset); - mlxsw_reg_sfmr_vtfp_set(payload, false); - mlxsw_reg_sfmr_vv_set(payload, false); - mlxsw_reg_sfmr_flood_rsp_set(payload, flood_rsp); - mlxsw_reg_sfmr_flood_bridge_type_set(payload, bridge_type); mlxsw_reg_sfmr_smpe_valid_set(payload, smpe_valid); mlxsw_reg_sfmr_smpe_set(payload, smpe); } @@ -2168,6 +2187,50 @@ static inline void mlxsw_reg_spvc_pack(char *payload, u16 local_port, bool et1, mlxsw_reg_spvc_et0_set(payload, et0); } +/* SFFP - Switch FID Flooding Profiles Register + * -------------------------------------------- + * The SFFP register populates the fid flooding profile tables used for the NVE + * flooding and Compressed-FID Flooding (CFF). + * + * Reserved on Spectrum-1. + */ +#define MLXSW_REG_SFFP_ID 0x2029 +#define MLXSW_REG_SFFP_LEN 0x0C + +MLXSW_REG_DEFINE(sffp, MLXSW_REG_SFFP_ID, MLXSW_REG_SFFP_LEN); + +/* reg_sffp_profile_id + * Profile ID a.k.a. SFMR.nve_flood_prf_id or SFMR.cff_prf_id + * Range 0..max_cap_nve_flood_prf-1 + * Access: Index + */ +MLXSW_ITEM32(reg, sffp, profile_id, 0x00, 16, 2); + +/* reg_sffp_type + * The traffic type to reach the flooding table. + * Same as SFGC.type + * Access: Index + */ +MLXSW_ITEM32(reg, sffp, type, 0x00, 0, 4); + +/* reg_sffp_flood_offset + * Flood offset. Offset to add to SFMR.cff_mid_base to get the final PGT address + * for FID flood; or offset to add to SFMR.nve_tunnel_flood_ptr to get KVD + * pointer for NVE underlay. + * Access: RW + */ +MLXSW_ITEM32(reg, sffp, flood_offset, 0x04, 0, 3); + +static inline void mlxsw_reg_sffp_pack(char *payload, u8 profile_id, + enum mlxsw_reg_sfgc_type type, + u8 flood_offset) +{ + MLXSW_REG_ZERO(sffp, payload); + mlxsw_reg_sffp_profile_id_set(payload, profile_id); + mlxsw_reg_sffp_type_set(payload, type); + mlxsw_reg_sffp_flood_offset_set(payload, flood_offset); +} + /* SPEVET - Switch Port Egress VLAN EtherType * ------------------------------------------ * The switch port egress VLAN EtherType configures which EtherType to push at @@ -10122,6 +10185,15 @@ mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid, MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN); +enum mlxsw_reg_mrsr_command { + /* Switch soft reset, does not reset PCI firmware. */ + MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET = 1, + /* Reset will be done when PCI link will be disabled. + * This command will reset PCI firmware also. + */ + MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE = 6, +}; + /* reg_mrsr_command * Reset/shutdown command * 0 - do nothing @@ -10130,10 +10202,11 @@ MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN); */ MLXSW_ITEM32(reg, mrsr, command, 0x00, 0, 4); -static inline void mlxsw_reg_mrsr_pack(char *payload) +static inline void mlxsw_reg_mrsr_pack(char *payload, + enum mlxsw_reg_mrsr_command command) { MLXSW_REG_ZERO(mrsr, payload); - mlxsw_reg_mrsr_command_set(payload, 1); + mlxsw_reg_mrsr_command_set(payload, command); } /* MLCR - Management LED Control Register @@ -10584,6 +10657,8 @@ MLXSW_ITEM32(reg, mcam, feature_group, 0x00, 16, 8); enum mlxsw_reg_mcam_mng_feature_cap_mask_bits { /* If set, MCIA supports 128 bytes payloads. Otherwise, 48 bytes. */ MLXSW_REG_MCAM_MCIA_128B = 34, + /* If set, MRSR.command=6 is supported. */ + MLXSW_REG_MCAM_PCI_RESET = 48, }; #define MLXSW_REG_BYTES_PER_DWORD 0x4 @@ -12934,6 +13009,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(spvmlr), MLXSW_REG(spfsr), MLXSW_REG(spvc), + MLXSW_REG(sffp), MLXSW_REG(spevet), MLXSW_REG(smpe), MLXSW_REG(smid2), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 89dd2777ec4d..9d7977ebe186 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -27,6 +27,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_FID, MLXSW_RES_ID_MAX_LAG, MLXSW_RES_ID_MAX_LAG_MEMBERS, + MLXSW_RES_ID_MAX_NVE_FLOOD_PRF, MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER, MLXSW_RES_ID_CELL_SIZE, MLXSW_RES_ID_MAX_HEADROOM_SIZE, @@ -88,6 +89,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_FID] = 0x2512, [MLXSW_RES_ID_MAX_LAG] = 0x2520, [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521, + [MLXSW_RES_ID_MAX_NVE_FLOOD_PRF] = 0x2522, [MLXSW_RES_ID_GUARANTEED_SHARED_BUFFER] = 0x2805, /* Bytes */ [MLXSW_RES_ID_CELL_SIZE] = 0x2803, /* Bytes */ [MLXSW_RES_ID_MAX_HEADROOM_SIZE] = 0x2811, /* Bytes */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index c70333b460ea..800c461deefa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -753,6 +753,8 @@ union mlxsw_sp_l3addr { }; u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_rif_subport_port(const struct mlxsw_sp_rif *rif, + u16 *port, bool *is_lag); int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index e954b8cd2ee8..aad4bb17dfb1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -424,18 +424,35 @@ static enum mlxsw_reg_sfmr_op mlxsw_sp_sfmr_op(bool valid) MLXSW_REG_SFMR_OP_DESTROY_FID; } -static int mlxsw_sp_fid_op(const struct mlxsw_sp_fid *fid, bool valid) +static void mlxsw_sp_fid_pack(char *sfmr_pl, + const struct mlxsw_sp_fid *fid, + enum mlxsw_reg_sfmr_op op) { - struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; - char sfmr_pl[MLXSW_REG_SFMR_LEN]; u16 smpe; smpe = fid->fid_family->smpe_index_valid ? fid->fid_index : 0; - mlxsw_reg_sfmr_pack(sfmr_pl, mlxsw_sp_sfmr_op(valid), fid->fid_index, - fid->fid_offset, fid->fid_family->flood_rsp, - fid->fid_family->bridge_type, + mlxsw_reg_sfmr_pack(sfmr_pl, op, fid->fid_index, fid->fid_family->smpe_index_valid, smpe); +} + +static void mlxsw_sp_fid_pack_ctl(char *sfmr_pl, + const struct mlxsw_sp_fid *fid, + enum mlxsw_reg_sfmr_op op) +{ + mlxsw_sp_fid_pack(sfmr_pl, fid, op); + mlxsw_reg_sfmr_fid_offset_set(sfmr_pl, fid->fid_offset); + mlxsw_reg_sfmr_flood_rsp_set(sfmr_pl, fid->fid_family->flood_rsp); + mlxsw_reg_sfmr_flood_bridge_type_set(sfmr_pl, + fid->fid_family->bridge_type); +} + +static int mlxsw_sp_fid_op(const struct mlxsw_sp_fid *fid, bool valid) +{ + struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_sp_fid_pack_ctl(sfmr_pl, fid, mlxsw_sp_sfmr_op(valid)); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } @@ -444,15 +461,8 @@ static int mlxsw_sp_fid_edit_op(const struct mlxsw_sp_fid *fid, { struct mlxsw_sp *mlxsw_sp = fid->fid_family->mlxsw_sp; char sfmr_pl[MLXSW_REG_SFMR_LEN]; - u16 smpe; - - smpe = fid->fid_family->smpe_index_valid ? fid->fid_index : 0; - mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, - fid->fid_index, fid->fid_offset, - fid->fid_family->flood_rsp, - fid->fid_family->bridge_type, - fid->fid_family->smpe_index_valid, smpe); + mlxsw_sp_fid_pack_ctl(sfmr_pl, fid, MLXSW_REG_SFMR_OP_CREATE_FID); mlxsw_reg_sfmr_vv_set(sfmr_pl, fid->vni_valid); mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(fid->vni)); mlxsw_reg_sfmr_vtfp_set(sfmr_pl, fid->nve_flood_index_valid); @@ -1687,9 +1697,6 @@ mlxsw_sp_fid_flood_tables_init(struct mlxsw_sp_fid_family *fid_family) int err; int i; - if (!fid_family->nr_flood_tables) - return 0; - pgt_size = mlxsw_sp_fid_family_pgt_size(fid_family); err = mlxsw_sp_pgt_mid_alloc_range(mlxsw_sp, &fid_family->pgt_base, pgt_size); @@ -1718,9 +1725,6 @@ mlxsw_sp_fid_flood_tables_fini(struct mlxsw_sp_fid_family *fid_family) struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; u16 pgt_size; - if (!fid_family->nr_flood_tables) - return; - pgt_size = mlxsw_sp_fid_family_pgt_size(fid_family); mlxsw_sp_pgt_mid_free_range(mlxsw_sp, fid_family->pgt_base, pgt_size); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 82a95125d9ca..2c255ed9b8a9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8419,6 +8419,9 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, rif->ops = ops; rif->rif_entries = rif_entries; + if (ops->setup) + ops->setup(rif, params); + if (ops->fid_get) { fid = ops->fid_get(rif, params, extack); if (IS_ERR(fid)) { @@ -8428,9 +8431,6 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, rif->fid = fid; } - if (ops->setup) - ops->setup(rif, params); - err = ops->configure(rif, extack); if (err) goto err_configure; @@ -8660,6 +8660,20 @@ mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif) return container_of(rif, struct mlxsw_sp_rif_subport, common); } +int mlxsw_sp_rif_subport_port(const struct mlxsw_sp_rif *rif, + u16 *port, bool *is_lag) +{ + struct mlxsw_sp_rif_subport *rif_subport; + + if (WARN_ON(rif->ops->type != MLXSW_SP_RIF_TYPE_SUBPORT)) + return -EINVAL; + + rif_subport = mlxsw_sp_rif_subport_rif(rif); + *is_lag = rif_subport->lag; + *port = *is_lag ? rif_subport->lag_id : rif_subport->system_port; + return 0; +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index b648461787d2..be79cb0ae5af 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -1075,7 +1075,7 @@ struct lan743x_adapter { #define DMA_DESCRIPTOR_SPACING_32 (32) #define DMA_DESCRIPTOR_SPACING_64 (64) #define DMA_DESCRIPTOR_SPACING_128 (128) -#define DEFAULT_DMA_DESCRIPTOR_SPACING (L1_CACHE_BYTES) +#define DEFAULT_DMA_DESCRIPTOR_SPACING (DMA_DESCRIPTOR_SPACING_16) #define DMAC_CHANNEL_STATE_SET(start_bit, stop_bit) \ (((start_bit) ? 2 : 0) | ((stop_bit) ? 1 : 0)) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 939cfce15830..bd0e26524417 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -621,6 +621,9 @@ struct nfp_net_dp { * @mbox_amsg.lock: Protect message list * @mbox_amsg.list: List of message to process * @mbox_amsg.work: Work to process message asynchronously + * @fs: Flow steering + * @fs.count: Flow count + * @fs.list: List of flows * @app_priv: APP private data for this vNIC */ struct nfp_net { @@ -728,9 +731,39 @@ struct nfp_net { struct work_struct work; } mbox_amsg; + struct { + u16 count; + struct list_head list; + } fs; + void *app_priv; }; +struct nfp_fs_entry { + struct list_head node; + u32 flow_type; + u32 loc; + struct { + union { + struct { + __be32 sip4; + __be32 dip4; + }; + struct { + __be32 sip6[4]; + __be32 dip6[4]; + }; + }; + union { + __be16 l3_proto; + u8 l4_proto; + }; + __be16 sport; + __be16 dport; + } key, msk; + u64 action; +}; + struct nfp_mbox_amsg_entry { struct list_head list; int (*cfg)(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry); @@ -987,6 +1020,9 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn); int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new, struct netlink_ext_ack *extack); +int nfp_net_fs_add_hw(struct nfp_net *nn, struct nfp_fs_entry *entry); +int nfp_net_fs_del_hw(struct nfp_net *nn, struct nfp_fs_entry *entry); + #ifdef CONFIG_NFP_DEBUG void nfp_net_debugfs_create(void); void nfp_net_debugfs_destroy(void); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index de0a5d5ded30..ac1f4514b1d0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1763,6 +1763,186 @@ nfp_net_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) return nfp_net_mbox_reconfig_and_unlock(nn, cmd); } +static void +nfp_net_fs_fill_v4(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 op, u32 *addr) +{ + unsigned int i; + + union { + struct { + __be16 loc; + u8 k_proto, m_proto; + __be32 k_sip, m_sip, k_dip, m_dip; + __be16 k_sport, m_sport, k_dport, m_dport; + }; + __be32 val[7]; + } v4_rule; + + nn_writel(nn, *addr, op); + *addr += sizeof(u32); + + v4_rule.loc = cpu_to_be16(entry->loc); + v4_rule.k_proto = entry->key.l4_proto; + v4_rule.m_proto = entry->msk.l4_proto; + v4_rule.k_sip = entry->key.sip4; + v4_rule.m_sip = entry->msk.sip4; + v4_rule.k_dip = entry->key.dip4; + v4_rule.m_dip = entry->msk.dip4; + v4_rule.k_sport = entry->key.sport; + v4_rule.m_sport = entry->msk.sport; + v4_rule.k_dport = entry->key.dport; + v4_rule.m_dport = entry->msk.dport; + + for (i = 0; i < ARRAY_SIZE(v4_rule.val); i++, *addr += sizeof(__be32)) + nn_writel(nn, *addr, be32_to_cpu(v4_rule.val[i])); +} + +static void +nfp_net_fs_fill_v6(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 op, u32 *addr) +{ + unsigned int i; + + union { + struct { + __be16 loc; + u8 k_proto, m_proto; + __be32 k_sip[4], m_sip[4], k_dip[4], m_dip[4]; + __be16 k_sport, m_sport, k_dport, m_dport; + }; + __be32 val[19]; + } v6_rule; + + nn_writel(nn, *addr, op); + *addr += sizeof(u32); + + v6_rule.loc = cpu_to_be16(entry->loc); + v6_rule.k_proto = entry->key.l4_proto; + v6_rule.m_proto = entry->msk.l4_proto; + for (i = 0; i < 4; i++) { + v6_rule.k_sip[i] = entry->key.sip6[i]; + v6_rule.m_sip[i] = entry->msk.sip6[i]; + v6_rule.k_dip[i] = entry->key.dip6[i]; + v6_rule.m_dip[i] = entry->msk.dip6[i]; + } + v6_rule.k_sport = entry->key.sport; + v6_rule.m_sport = entry->msk.sport; + v6_rule.k_dport = entry->key.dport; + v6_rule.m_dport = entry->msk.dport; + + for (i = 0; i < ARRAY_SIZE(v6_rule.val); i++, *addr += sizeof(__be32)) + nn_writel(nn, *addr, be32_to_cpu(v6_rule.val[i])); +} + +#define NFP_FS_QUEUE_ID GENMASK(22, 16) +#define NFP_FS_ACT GENMASK(15, 0) +#define NFP_FS_ACT_DROP BIT(0) +#define NFP_FS_ACT_Q BIT(1) +static void +nfp_net_fs_fill_act(struct nfp_net *nn, struct nfp_fs_entry *entry, u32 addr) +{ + u32 action = 0; /* 0 means default passthrough */ + + if (entry->action == RX_CLS_FLOW_DISC) + action = NFP_FS_ACT_DROP; + else if (!(entry->flow_type & FLOW_RSS)) + action = FIELD_PREP(NFP_FS_QUEUE_ID, entry->action) | NFP_FS_ACT_Q; + + nn_writel(nn, addr, action); +} + +int nfp_net_fs_add_hw(struct nfp_net *nn, struct nfp_fs_entry *entry) +{ + u32 addr = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL; + int err; + + err = nfp_net_mbox_lock(nn, NFP_NET_CFG_FS_SZ); + if (err) + return err; + + switch (entry->flow_type & ~FLOW_RSS) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + case IPV4_USER_FLOW: + nfp_net_fs_fill_v4(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_ADD_V4, &addr); + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + case IPV6_USER_FLOW: + nfp_net_fs_fill_v6(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_ADD_V6, &addr); + break; + case ETHER_FLOW: + nn_writel(nn, addr, NFP_NET_CFG_MBOX_CMD_FS_ADD_ETHTYPE); + addr += sizeof(u32); + nn_writew(nn, addr, be16_to_cpu(entry->key.l3_proto)); + addr += sizeof(u32); + break; + } + + nfp_net_fs_fill_act(nn, entry, addr); + + err = nfp_net_mbox_reconfig_and_unlock(nn, NFP_NET_CFG_MBOX_CMD_FLOW_STEER); + if (err) { + nn_err(nn, "Add new fs rule failed with %d\n", err); + return -EIO; + } + + return 0; +} + +int nfp_net_fs_del_hw(struct nfp_net *nn, struct nfp_fs_entry *entry) +{ + u32 addr = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL; + int err; + + err = nfp_net_mbox_lock(nn, NFP_NET_CFG_FS_SZ); + if (err) + return err; + + switch (entry->flow_type & ~FLOW_RSS) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + case IPV4_USER_FLOW: + nfp_net_fs_fill_v4(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_DEL_V4, &addr); + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + case IPV6_USER_FLOW: + nfp_net_fs_fill_v6(nn, entry, NFP_NET_CFG_MBOX_CMD_FS_DEL_V6, &addr); + break; + case ETHER_FLOW: + nn_writel(nn, addr, NFP_NET_CFG_MBOX_CMD_FS_DEL_ETHTYPE); + addr += sizeof(u32); + nn_writew(nn, addr, be16_to_cpu(entry->key.l3_proto)); + addr += sizeof(u32); + break; + } + + nfp_net_fs_fill_act(nn, entry, addr); + + err = nfp_net_mbox_reconfig_and_unlock(nn, NFP_NET_CFG_MBOX_CMD_FLOW_STEER); + if (err) { + nn_err(nn, "Delete fs rule failed with %d\n", err); + return -EIO; + } + + return 0; +} + +static void nfp_net_fs_clean(struct nfp_net *nn) +{ + struct nfp_fs_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &nn->fs.list, node) { + nfp_net_fs_del_hw(nn, entry); + list_del(&entry->node); + kfree(entry); + } +} + static void nfp_net_stat64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -2740,6 +2920,8 @@ int nfp_net_init(struct nfp_net *nn) INIT_LIST_HEAD(&nn->mbox_amsg.list); INIT_WORK(&nn->mbox_amsg.work, nfp_net_mbox_amsg_work); + INIT_LIST_HEAD(&nn->fs.list); + return register_netdev(nn->dp.netdev); err_clean_mbox: @@ -2759,6 +2941,7 @@ void nfp_net_clean(struct nfp_net *nn) unregister_netdev(nn->dp.netdev); nfp_net_ipsec_clean(nn); nfp_ccm_mbox_clean(nn); + nfp_net_fs_clean(nn); flush_work(&nn->mbox_amsg.work); nfp_net_reconfig_wait_posted(nn); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 3e63f6d6a563..eaf4d3c499d1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -269,6 +269,7 @@ #define NFP_NET_CFG_CTRL_IPSEC (0x1 << 1) /* IPsec offload */ #define NFP_NET_CFG_CTRL_MCAST_FILTER (0x1 << 2) /* Multicast Filter */ #define NFP_NET_CFG_CTRL_FREELIST_EN (0x1 << 6) /* Freelist enable flag bit */ +#define NFP_NET_CFG_CTRL_FLOW_STEER (0x1 << 8) /* Flow steering */ #define NFP_NET_CFG_CAP_WORD1 0x00a4 @@ -418,6 +419,8 @@ #define NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD 8 #define NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL 9 +#define NFP_NET_CFG_MBOX_CMD_FLOW_STEER 10 + /* VLAN filtering using general use mailbox * %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox * %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter @@ -440,6 +443,18 @@ #define NFP_NET_CFG_MULTICAST_MAC_LO (NFP_NET_CFG_MULTICAST + 6) #define NFP_NET_CFG_MULTICAST_SZ 0x0006 +/* Max size of FS rules in bytes */ +#define NFP_NET_CFG_FS_SZ 0x0054 +/* Sub commands for FS */ +enum { + NFP_NET_CFG_MBOX_CMD_FS_ADD_V4, + NFP_NET_CFG_MBOX_CMD_FS_DEL_V4, + NFP_NET_CFG_MBOX_CMD_FS_ADD_V6, + NFP_NET_CFG_MBOX_CMD_FS_DEL_V6, + NFP_NET_CFG_MBOX_CMD_FS_ADD_ETHTYPE, + NFP_NET_CFG_MBOX_CMD_FS_DEL_ETHTYPE, +}; + /* TLV capabilities * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index e75cbb287625..d7896391b8ba 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1317,6 +1317,116 @@ static int nfp_net_get_rss_hash_opts(struct nfp_net *nn, return 0; } +#define NFP_FS_MAX_ENTRY 1024 + +static int nfp_net_fs_to_ethtool(struct nfp_fs_entry *entry, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fs = &cmd->fs; + unsigned int i; + + switch (entry->flow_type & ~FLOW_RSS) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + fs->h_u.tcp_ip4_spec.ip4src = entry->key.sip4; + fs->h_u.tcp_ip4_spec.ip4dst = entry->key.dip4; + fs->h_u.tcp_ip4_spec.psrc = entry->key.sport; + fs->h_u.tcp_ip4_spec.pdst = entry->key.dport; + fs->m_u.tcp_ip4_spec.ip4src = entry->msk.sip4; + fs->m_u.tcp_ip4_spec.ip4dst = entry->msk.dip4; + fs->m_u.tcp_ip4_spec.psrc = entry->msk.sport; + fs->m_u.tcp_ip4_spec.pdst = entry->msk.dport; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + for (i = 0; i < 4; i++) { + fs->h_u.tcp_ip6_spec.ip6src[i] = entry->key.sip6[i]; + fs->h_u.tcp_ip6_spec.ip6dst[i] = entry->key.dip6[i]; + fs->m_u.tcp_ip6_spec.ip6src[i] = entry->msk.sip6[i]; + fs->m_u.tcp_ip6_spec.ip6dst[i] = entry->msk.dip6[i]; + } + fs->h_u.tcp_ip6_spec.psrc = entry->key.sport; + fs->h_u.tcp_ip6_spec.pdst = entry->key.dport; + fs->m_u.tcp_ip6_spec.psrc = entry->msk.sport; + fs->m_u.tcp_ip6_spec.pdst = entry->msk.dport; + break; + case IPV4_USER_FLOW: + fs->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4; + fs->h_u.usr_ip4_spec.ip4src = entry->key.sip4; + fs->h_u.usr_ip4_spec.ip4dst = entry->key.dip4; + fs->h_u.usr_ip4_spec.proto = entry->key.l4_proto; + fs->m_u.usr_ip4_spec.ip4src = entry->msk.sip4; + fs->m_u.usr_ip4_spec.ip4dst = entry->msk.dip4; + fs->m_u.usr_ip4_spec.proto = entry->msk.l4_proto; + break; + case IPV6_USER_FLOW: + for (i = 0; i < 4; i++) { + fs->h_u.usr_ip6_spec.ip6src[i] = entry->key.sip6[i]; + fs->h_u.usr_ip6_spec.ip6dst[i] = entry->key.dip6[i]; + fs->m_u.usr_ip6_spec.ip6src[i] = entry->msk.sip6[i]; + fs->m_u.usr_ip6_spec.ip6dst[i] = entry->msk.dip6[i]; + } + fs->h_u.usr_ip6_spec.l4_proto = entry->key.l4_proto; + fs->m_u.usr_ip6_spec.l4_proto = entry->msk.l4_proto; + break; + case ETHER_FLOW: + fs->h_u.ether_spec.h_proto = entry->key.l3_proto; + fs->m_u.ether_spec.h_proto = entry->msk.l3_proto; + break; + default: + return -EINVAL; + } + + fs->flow_type = entry->flow_type; + fs->ring_cookie = entry->action; + + if (fs->flow_type & FLOW_RSS) { + /* Only rss_context of 0 is supported. */ + cmd->rss_context = 0; + /* RSS is used, mask the ring. */ + fs->ring_cookie |= ETHTOOL_RX_FLOW_SPEC_RING; + } + + return 0; +} + +static int nfp_net_get_fs_rule(struct nfp_net *nn, struct ethtool_rxnfc *cmd) +{ + struct nfp_fs_entry *entry; + + if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER)) + return -EOPNOTSUPP; + + if (cmd->fs.location >= NFP_FS_MAX_ENTRY) + return -EINVAL; + + list_for_each_entry(entry, &nn->fs.list, node) { + if (entry->loc == cmd->fs.location) + return nfp_net_fs_to_ethtool(entry, cmd); + + if (entry->loc > cmd->fs.location) + /* no need to continue */ + return -ENOENT; + } + + return -ENOENT; +} + +static int nfp_net_get_fs_loc(struct nfp_net *nn, u32 *rule_locs) +{ + struct nfp_fs_entry *entry; + u32 count = 0; + + if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER)) + return -EOPNOTSUPP; + + list_for_each_entry(entry, &nn->fs.list, node) + rule_locs[count++] = entry->loc; + + return 0; +} + static int nfp_net_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { @@ -1326,6 +1436,14 @@ static int nfp_net_get_rxnfc(struct net_device *netdev, case ETHTOOL_GRXRINGS: cmd->data = nn->dp.num_rx_rings; return 0; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = nn->fs.count; + return 0; + case ETHTOOL_GRXCLSRULE: + return nfp_net_get_fs_rule(nn, cmd); + case ETHTOOL_GRXCLSRLALL: + cmd->data = NFP_FS_MAX_ENTRY; + return nfp_net_get_fs_loc(nn, rule_locs); case ETHTOOL_GRXFH: return nfp_net_get_rss_hash_opts(nn, cmd); default: @@ -1385,6 +1503,253 @@ static int nfp_net_set_rss_hash_opt(struct nfp_net *nn, return 0; } +static int nfp_net_fs_from_ethtool(struct nfp_fs_entry *entry, struct ethtool_rx_flow_spec *fs) +{ + unsigned int i; + + /* FLOW_EXT/FLOW_MAC_EXT is not supported. */ + switch (fs->flow_type & ~FLOW_RSS) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + entry->msk.sip4 = fs->m_u.tcp_ip4_spec.ip4src; + entry->msk.dip4 = fs->m_u.tcp_ip4_spec.ip4dst; + entry->msk.sport = fs->m_u.tcp_ip4_spec.psrc; + entry->msk.dport = fs->m_u.tcp_ip4_spec.pdst; + entry->key.sip4 = fs->h_u.tcp_ip4_spec.ip4src & entry->msk.sip4; + entry->key.dip4 = fs->h_u.tcp_ip4_spec.ip4dst & entry->msk.dip4; + entry->key.sport = fs->h_u.tcp_ip4_spec.psrc & entry->msk.sport; + entry->key.dport = fs->h_u.tcp_ip4_spec.pdst & entry->msk.dport; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + for (i = 0; i < 4; i++) { + entry->msk.sip6[i] = fs->m_u.tcp_ip6_spec.ip6src[i]; + entry->msk.dip6[i] = fs->m_u.tcp_ip6_spec.ip6dst[i]; + entry->key.sip6[i] = fs->h_u.tcp_ip6_spec.ip6src[i] & entry->msk.sip6[i]; + entry->key.dip6[i] = fs->h_u.tcp_ip6_spec.ip6dst[i] & entry->msk.dip6[i]; + } + entry->msk.sport = fs->m_u.tcp_ip6_spec.psrc; + entry->msk.dport = fs->m_u.tcp_ip6_spec.pdst; + entry->key.sport = fs->h_u.tcp_ip6_spec.psrc & entry->msk.sport; + entry->key.dport = fs->h_u.tcp_ip6_spec.pdst & entry->msk.dport; + break; + case IPV4_USER_FLOW: + entry->msk.sip4 = fs->m_u.usr_ip4_spec.ip4src; + entry->msk.dip4 = fs->m_u.usr_ip4_spec.ip4dst; + entry->msk.l4_proto = fs->m_u.usr_ip4_spec.proto; + entry->key.sip4 = fs->h_u.usr_ip4_spec.ip4src & entry->msk.sip4; + entry->key.dip4 = fs->h_u.usr_ip4_spec.ip4dst & entry->msk.dip4; + entry->key.l4_proto = fs->h_u.usr_ip4_spec.proto & entry->msk.l4_proto; + break; + case IPV6_USER_FLOW: + for (i = 0; i < 4; i++) { + entry->msk.sip6[i] = fs->m_u.usr_ip6_spec.ip6src[i]; + entry->msk.dip6[i] = fs->m_u.usr_ip6_spec.ip6dst[i]; + entry->key.sip6[i] = fs->h_u.usr_ip6_spec.ip6src[i] & entry->msk.sip6[i]; + entry->key.dip6[i] = fs->h_u.usr_ip6_spec.ip6dst[i] & entry->msk.dip6[i]; + } + entry->msk.l4_proto = fs->m_u.usr_ip6_spec.l4_proto; + entry->key.l4_proto = fs->h_u.usr_ip6_spec.l4_proto & entry->msk.l4_proto; + break; + case ETHER_FLOW: + entry->msk.l3_proto = fs->m_u.ether_spec.h_proto; + entry->key.l3_proto = fs->h_u.ether_spec.h_proto & entry->msk.l3_proto; + break; + default: + return -EINVAL; + } + + switch (fs->flow_type & ~FLOW_RSS) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + entry->key.l4_proto = IPPROTO_TCP; + entry->msk.l4_proto = 0xff; + break; + case UDP_V4_FLOW: + case UDP_V6_FLOW: + entry->key.l4_proto = IPPROTO_UDP; + entry->msk.l4_proto = 0xff; + break; + case SCTP_V4_FLOW: + case SCTP_V6_FLOW: + entry->key.l4_proto = IPPROTO_SCTP; + entry->msk.l4_proto = 0xff; + break; + } + + entry->flow_type = fs->flow_type; + entry->action = fs->ring_cookie; + entry->loc = fs->location; + + return 0; +} + +static int nfp_net_fs_check_existing(struct nfp_net *nn, struct nfp_fs_entry *new) +{ + struct nfp_fs_entry *entry; + + list_for_each_entry(entry, &nn->fs.list, node) { + if (new->loc != entry->loc && + !((new->flow_type ^ entry->flow_type) & ~FLOW_RSS) && + !memcmp(&new->key, &entry->key, sizeof(new->key)) && + !memcmp(&new->msk, &entry->msk, sizeof(new->msk))) + return entry->loc; + } + + /* -1 means no duplicates */ + return -1; +} + +static int nfp_net_fs_add(struct nfp_net *nn, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fs = &cmd->fs; + struct nfp_fs_entry *new, *entry; + bool unsupp_mask; + int err, id; + + if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER)) + return -EOPNOTSUPP; + + /* Only default RSS context(0) is supported. */ + if ((fs->flow_type & FLOW_RSS) && cmd->rss_context) + return -EOPNOTSUPP; + + if (fs->location >= NFP_FS_MAX_ENTRY) + return -EINVAL; + + if (fs->ring_cookie != RX_CLS_FLOW_DISC && + fs->ring_cookie >= nn->dp.num_rx_rings) + return -EINVAL; + + /* FLOW_EXT/FLOW_MAC_EXT is not supported. */ + switch (fs->flow_type & ~FLOW_RSS) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + unsupp_mask = !!fs->m_u.tcp_ip4_spec.tos; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + unsupp_mask = !!fs->m_u.tcp_ip6_spec.tclass; + break; + case IPV4_USER_FLOW: + unsupp_mask = !!fs->m_u.usr_ip4_spec.l4_4_bytes || + !!fs->m_u.usr_ip4_spec.tos || + !!fs->m_u.usr_ip4_spec.ip_ver; + /* ip_ver must be ETH_RX_NFC_IP4. */ + unsupp_mask |= fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4; + break; + case IPV6_USER_FLOW: + unsupp_mask = !!fs->m_u.usr_ip6_spec.l4_4_bytes || + !!fs->m_u.usr_ip6_spec.tclass; + break; + case ETHER_FLOW: + if (fs->h_u.ether_spec.h_proto == htons(ETH_P_IP) || + fs->h_u.ether_spec.h_proto == htons(ETH_P_IPV6)) { + nn_err(nn, "Please use ip4/ip6 flow type instead.\n"); + return -EOPNOTSUPP; + } + /* Only unmasked ethtype is supported. */ + unsupp_mask = !is_zero_ether_addr(fs->m_u.ether_spec.h_dest) || + !is_zero_ether_addr(fs->m_u.ether_spec.h_source) || + (fs->m_u.ether_spec.h_proto != htons(0xffff)); + break; + default: + return -EOPNOTSUPP; + } + + if (unsupp_mask) + return -EOPNOTSUPP; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + + nfp_net_fs_from_ethtool(new, fs); + + id = nfp_net_fs_check_existing(nn, new); + if (id >= 0) { + nn_err(nn, "Identical rule is existing in %d.\n", id); + err = -EINVAL; + goto err; + } + + /* Insert to list in ascending order of location. */ + list_for_each_entry(entry, &nn->fs.list, node) { + if (entry->loc == fs->location) { + err = nfp_net_fs_del_hw(nn, entry); + if (err) + goto err; + + nn->fs.count--; + err = nfp_net_fs_add_hw(nn, new); + if (err) + goto err; + + nn->fs.count++; + list_replace(&entry->node, &new->node); + kfree(entry); + + return 0; + } + + if (entry->loc > fs->location) + break; + } + + if (nn->fs.count == NFP_FS_MAX_ENTRY) { + err = -ENOSPC; + goto err; + } + + err = nfp_net_fs_add_hw(nn, new); + if (err) + goto err; + + list_add_tail(&new->node, &entry->node); + nn->fs.count++; + + return 0; + +err: + kfree(new); + return err; +} + +static int nfp_net_fs_del(struct nfp_net *nn, struct ethtool_rxnfc *cmd) +{ + struct nfp_fs_entry *entry; + int err; + + if (!(nn->cap_w1 & NFP_NET_CFG_CTRL_FLOW_STEER)) + return -EOPNOTSUPP; + + if (!nn->fs.count || cmd->fs.location >= NFP_FS_MAX_ENTRY) + return -EINVAL; + + list_for_each_entry(entry, &nn->fs.list, node) { + if (entry->loc == cmd->fs.location) { + err = nfp_net_fs_del_hw(nn, entry); + if (err) + return err; + + list_del(&entry->node); + kfree(entry); + nn->fs.count--; + + return 0; + } else if (entry->loc > cmd->fs.location) { + /* no need to continue */ + break; + } + } + + return -ENOENT; +} + static int nfp_net_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) { @@ -1393,6 +1758,10 @@ static int nfp_net_set_rxnfc(struct net_device *netdev, switch (cmd->cmd) { case ETHTOOL_SRXFH: return nfp_net_set_rss_hash_opt(nn, cmd); + case ETHTOOL_SRXCLSRLINS: + return nfp_net_fs_add(nn, cmd); + case ETHTOOL_SRXCLSRLDEL: + return nfp_net_fs_del(nn, cmd); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 295366a85c63..dbc5c9d354c6 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -3100,6 +3100,33 @@ static void rtl_hw_start_8168g_2(struct rtl8169_private *tp) rtl_ephy_init(tp, e_info_8168g_2); } +static void rtl8411b_fix_phy_down(struct rtl8169_private *tp) +{ + static const u16 fix_data[] = { +/* 0xf800 */ 0xe008, 0xe00a, 0xe00c, 0xe00e, 0xe027, 0xe04f, 0xe05e, 0xe065, +/* 0xf810 */ 0xc602, 0xbe00, 0x0000, 0xc502, 0xbd00, 0x074c, 0xc302, 0xbb00, +/* 0xf820 */ 0x080a, 0x6420, 0x48c2, 0x8c20, 0xc516, 0x64a4, 0x49c0, 0xf009, +/* 0xf830 */ 0x74a2, 0x8ca5, 0x74a0, 0xc50e, 0x9ca2, 0x1c11, 0x9ca0, 0xe006, +/* 0xf840 */ 0x74f8, 0x48c4, 0x8cf8, 0xc404, 0xbc00, 0xc403, 0xbc00, 0x0bf2, +/* 0xf850 */ 0x0c0a, 0xe434, 0xd3c0, 0x49d9, 0xf01f, 0xc526, 0x64a5, 0x1400, +/* 0xf860 */ 0xf007, 0x0c01, 0x8ca5, 0x1c15, 0xc51b, 0x9ca0, 0xe013, 0xc519, +/* 0xf870 */ 0x74a0, 0x48c4, 0x8ca0, 0xc516, 0x74a4, 0x48c8, 0x48ca, 0x9ca4, +/* 0xf880 */ 0xc512, 0x1b00, 0x9ba0, 0x1b1c, 0x483f, 0x9ba2, 0x1b04, 0xc508, +/* 0xf890 */ 0x9ba0, 0xc505, 0xbd00, 0xc502, 0xbd00, 0x0300, 0x051e, 0xe434, +/* 0xf8a0 */ 0xe018, 0xe092, 0xde20, 0xd3c0, 0xc50f, 0x76a4, 0x49e3, 0xf007, +/* 0xf8b0 */ 0x49c0, 0xf103, 0xc607, 0xbe00, 0xc606, 0xbe00, 0xc602, 0xbe00, +/* 0xf8c0 */ 0x0c4c, 0x0c28, 0x0c2c, 0xdc00, 0xc707, 0x1d00, 0x8de2, 0x48c1, +/* 0xf8d0 */ 0xc502, 0xbd00, 0x00aa, 0xe0c0, 0xc502, 0xbd00, 0x0132 + }; + unsigned long flags; + int i; + + raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags); + for (i = 0; i < ARRAY_SIZE(fix_data); i++) + __r8168_mac_ocp_write(tp, 0xf800 + 2 * i, fix_data[i]); + raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); +} + static void rtl_hw_start_8411_2(struct rtl8169_private *tp) { static const struct ephy_info e_info_8411_2[] = { @@ -3133,117 +3160,7 @@ static void rtl_hw_start_8411_2(struct rtl8169_private *tp) mdelay(3); r8168_mac_ocp_write(tp, 0xFC26, 0x0000); - r8168_mac_ocp_write(tp, 0xF800, 0xE008); - r8168_mac_ocp_write(tp, 0xF802, 0xE00A); - r8168_mac_ocp_write(tp, 0xF804, 0xE00C); - r8168_mac_ocp_write(tp, 0xF806, 0xE00E); - r8168_mac_ocp_write(tp, 0xF808, 0xE027); - r8168_mac_ocp_write(tp, 0xF80A, 0xE04F); - r8168_mac_ocp_write(tp, 0xF80C, 0xE05E); - r8168_mac_ocp_write(tp, 0xF80E, 0xE065); - r8168_mac_ocp_write(tp, 0xF810, 0xC602); - r8168_mac_ocp_write(tp, 0xF812, 0xBE00); - r8168_mac_ocp_write(tp, 0xF814, 0x0000); - r8168_mac_ocp_write(tp, 0xF816, 0xC502); - r8168_mac_ocp_write(tp, 0xF818, 0xBD00); - r8168_mac_ocp_write(tp, 0xF81A, 0x074C); - r8168_mac_ocp_write(tp, 0xF81C, 0xC302); - r8168_mac_ocp_write(tp, 0xF81E, 0xBB00); - r8168_mac_ocp_write(tp, 0xF820, 0x080A); - r8168_mac_ocp_write(tp, 0xF822, 0x6420); - r8168_mac_ocp_write(tp, 0xF824, 0x48C2); - r8168_mac_ocp_write(tp, 0xF826, 0x8C20); - r8168_mac_ocp_write(tp, 0xF828, 0xC516); - r8168_mac_ocp_write(tp, 0xF82A, 0x64A4); - r8168_mac_ocp_write(tp, 0xF82C, 0x49C0); - r8168_mac_ocp_write(tp, 0xF82E, 0xF009); - r8168_mac_ocp_write(tp, 0xF830, 0x74A2); - r8168_mac_ocp_write(tp, 0xF832, 0x8CA5); - r8168_mac_ocp_write(tp, 0xF834, 0x74A0); - r8168_mac_ocp_write(tp, 0xF836, 0xC50E); - r8168_mac_ocp_write(tp, 0xF838, 0x9CA2); - r8168_mac_ocp_write(tp, 0xF83A, 0x1C11); - r8168_mac_ocp_write(tp, 0xF83C, 0x9CA0); - r8168_mac_ocp_write(tp, 0xF83E, 0xE006); - r8168_mac_ocp_write(tp, 0xF840, 0x74F8); - r8168_mac_ocp_write(tp, 0xF842, 0x48C4); - r8168_mac_ocp_write(tp, 0xF844, 0x8CF8); - r8168_mac_ocp_write(tp, 0xF846, 0xC404); - r8168_mac_ocp_write(tp, 0xF848, 0xBC00); - r8168_mac_ocp_write(tp, 0xF84A, 0xC403); - r8168_mac_ocp_write(tp, 0xF84C, 0xBC00); - r8168_mac_ocp_write(tp, 0xF84E, 0x0BF2); - r8168_mac_ocp_write(tp, 0xF850, 0x0C0A); - r8168_mac_ocp_write(tp, 0xF852, 0xE434); - r8168_mac_ocp_write(tp, 0xF854, 0xD3C0); - r8168_mac_ocp_write(tp, 0xF856, 0x49D9); - r8168_mac_ocp_write(tp, 0xF858, 0xF01F); - r8168_mac_ocp_write(tp, 0xF85A, 0xC526); - r8168_mac_ocp_write(tp, 0xF85C, 0x64A5); - r8168_mac_ocp_write(tp, 0xF85E, 0x1400); - r8168_mac_ocp_write(tp, 0xF860, 0xF007); - r8168_mac_ocp_write(tp, 0xF862, 0x0C01); - r8168_mac_ocp_write(tp, 0xF864, 0x8CA5); - r8168_mac_ocp_write(tp, 0xF866, 0x1C15); - r8168_mac_ocp_write(tp, 0xF868, 0xC51B); - r8168_mac_ocp_write(tp, 0xF86A, 0x9CA0); - r8168_mac_ocp_write(tp, 0xF86C, 0xE013); - r8168_mac_ocp_write(tp, 0xF86E, 0xC519); - r8168_mac_ocp_write(tp, 0xF870, 0x74A0); - r8168_mac_ocp_write(tp, 0xF872, 0x48C4); - r8168_mac_ocp_write(tp, 0xF874, 0x8CA0); - r8168_mac_ocp_write(tp, 0xF876, 0xC516); - r8168_mac_ocp_write(tp, 0xF878, 0x74A4); - r8168_mac_ocp_write(tp, 0xF87A, 0x48C8); - r8168_mac_ocp_write(tp, 0xF87C, 0x48CA); - r8168_mac_ocp_write(tp, 0xF87E, 0x9CA4); - r8168_mac_ocp_write(tp, 0xF880, 0xC512); - r8168_mac_ocp_write(tp, 0xF882, 0x1B00); - r8168_mac_ocp_write(tp, 0xF884, 0x9BA0); - r8168_mac_ocp_write(tp, 0xF886, 0x1B1C); - r8168_mac_ocp_write(tp, 0xF888, 0x483F); - r8168_mac_ocp_write(tp, 0xF88A, 0x9BA2); - r8168_mac_ocp_write(tp, 0xF88C, 0x1B04); - r8168_mac_ocp_write(tp, 0xF88E, 0xC508); - r8168_mac_ocp_write(tp, 0xF890, 0x9BA0); - r8168_mac_ocp_write(tp, 0xF892, 0xC505); - r8168_mac_ocp_write(tp, 0xF894, 0xBD00); - r8168_mac_ocp_write(tp, 0xF896, 0xC502); - r8168_mac_ocp_write(tp, 0xF898, 0xBD00); - r8168_mac_ocp_write(tp, 0xF89A, 0x0300); - r8168_mac_ocp_write(tp, 0xF89C, 0x051E); - r8168_mac_ocp_write(tp, 0xF89E, 0xE434); - r8168_mac_ocp_write(tp, 0xF8A0, 0xE018); - r8168_mac_ocp_write(tp, 0xF8A2, 0xE092); - r8168_mac_ocp_write(tp, 0xF8A4, 0xDE20); - r8168_mac_ocp_write(tp, 0xF8A6, 0xD3C0); - r8168_mac_ocp_write(tp, 0xF8A8, 0xC50F); - r8168_mac_ocp_write(tp, 0xF8AA, 0x76A4); - r8168_mac_ocp_write(tp, 0xF8AC, 0x49E3); - r8168_mac_ocp_write(tp, 0xF8AE, 0xF007); - r8168_mac_ocp_write(tp, 0xF8B0, 0x49C0); - r8168_mac_ocp_write(tp, 0xF8B2, 0xF103); - r8168_mac_ocp_write(tp, 0xF8B4, 0xC607); - r8168_mac_ocp_write(tp, 0xF8B6, 0xBE00); - r8168_mac_ocp_write(tp, 0xF8B8, 0xC606); - r8168_mac_ocp_write(tp, 0xF8BA, 0xBE00); - r8168_mac_ocp_write(tp, 0xF8BC, 0xC602); - r8168_mac_ocp_write(tp, 0xF8BE, 0xBE00); - r8168_mac_ocp_write(tp, 0xF8C0, 0x0C4C); - r8168_mac_ocp_write(tp, 0xF8C2, 0x0C28); - r8168_mac_ocp_write(tp, 0xF8C4, 0x0C2C); - r8168_mac_ocp_write(tp, 0xF8C6, 0xDC00); - r8168_mac_ocp_write(tp, 0xF8C8, 0xC707); - r8168_mac_ocp_write(tp, 0xF8CA, 0x1D00); - r8168_mac_ocp_write(tp, 0xF8CC, 0x8DE2); - r8168_mac_ocp_write(tp, 0xF8CE, 0x48C1); - r8168_mac_ocp_write(tp, 0xF8D0, 0xC502); - r8168_mac_ocp_write(tp, 0xF8D2, 0xBD00); - r8168_mac_ocp_write(tp, 0xF8D4, 0x00AA); - r8168_mac_ocp_write(tp, 0xF8D6, 0xE0C0); - r8168_mac_ocp_write(tp, 0xF8D8, 0xC502); - r8168_mac_ocp_write(tp, 0xF8DA, 0xBD00); - r8168_mac_ocp_write(tp, 0xF8DC, 0x0132); + rtl8411b_fix_phy_down(tp); r8168_mac_ocp_write(tp, 0xFC26, 0x8000); diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig index 8ef5b0241e64..733cbb6eb3ed 100644 --- a/drivers/net/ethernet/renesas/Kconfig +++ b/drivers/net/ethernet/renesas/Kconfig @@ -44,7 +44,16 @@ config RENESAS_ETHER_SWITCH select CRC32 select MII select PHYLINK + select RENESAS_GEN4_PTP help Renesas Ethernet Switch device driver. +config RENESAS_GEN4_PTP + tristate "Renesas R-Car Gen4 gPTP support" if COMPILE_TEST + select CRC32 + select MII + select PHYLIB + help + Renesas R-Car Gen4 gPTP device driver. + endif # NET_VENDOR_RENESAS diff --git a/drivers/net/ethernet/renesas/Makefile b/drivers/net/ethernet/renesas/Makefile index e8fd85b5fe8f..9070acfd6aaf 100644 --- a/drivers/net/ethernet/renesas/Makefile +++ b/drivers/net/ethernet/renesas/Makefile @@ -8,5 +8,6 @@ obj-$(CONFIG_SH_ETH) += sh_eth.o ravb-objs := ravb_main.o ravb_ptp.o obj-$(CONFIG_RAVB) += ravb.o -rswitch_drv-objs := rswitch.o rcar_gen4_ptp.o -obj-$(CONFIG_RENESAS_ETHER_SWITCH) += rswitch_drv.o +obj-$(CONFIG_RENESAS_ETHER_SWITCH) += rswitch.o + +obj-$(CONFIG_RENESAS_GEN4_PTP) += rcar_gen4_ptp.o diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c index c007e33c47e1..72e7fcc56693 100644 --- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c +++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c @@ -14,7 +14,7 @@ #include "rcar_gen4_ptp.h" #define ptp_to_priv(ptp) container_of(ptp, struct rcar_gen4_ptp_private, info) -static const struct rcar_gen4_ptp_reg_offset s4_offs = { +static const struct rcar_gen4_ptp_reg_offset gen4_offs = { .enable = PTPTMEC, .disable = PTPTMDC, .increment = PTPTIVC0, @@ -130,25 +130,42 @@ static struct ptp_clock_info rcar_gen4_ptp_info = { .enable = rcar_gen4_ptp_enable, }; -static void rcar_gen4_ptp_set_offs(struct rcar_gen4_ptp_private *ptp_priv, - enum rcar_gen4_ptp_reg_layout layout) +static int rcar_gen4_ptp_set_offs(struct rcar_gen4_ptp_private *ptp_priv, + enum rcar_gen4_ptp_reg_layout layout) { - WARN_ON(layout != RCAR_GEN4_PTP_REG_LAYOUT_S4); + if (layout != RCAR_GEN4_PTP_REG_LAYOUT) + return -EINVAL; - ptp_priv->offs = &s4_offs; + ptp_priv->offs = &gen4_offs; + + return 0; +} + +static s64 rcar_gen4_ptp_rate_to_increment(u32 rate) +{ + /* Timer increment in ns. + * bit[31:27] - integer + * bit[26:0] - decimal + * increment[ns] = perid[ns] * 2^27 => (1ns * 2^27) / rate[hz] + */ + return div_s64(1000000000LL << 27, rate); } int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv, - enum rcar_gen4_ptp_reg_layout layout, u32 clock) + enum rcar_gen4_ptp_reg_layout layout, u32 rate) { + int ret; + if (ptp_priv->initialized) return 0; spin_lock_init(&ptp_priv->lock); - rcar_gen4_ptp_set_offs(ptp_priv, layout); + ret = rcar_gen4_ptp_set_offs(ptp_priv, layout); + if (ret) + return ret; - ptp_priv->default_addend = clock; + ptp_priv->default_addend = rcar_gen4_ptp_rate_to_increment(rate); iowrite32(ptp_priv->default_addend, ptp_priv->addr + ptp_priv->offs->increment); ptp_priv->clock = ptp_clock_register(&ptp_priv->info, NULL); if (IS_ERR(ptp_priv->clock)) @@ -159,6 +176,7 @@ int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv, return 0; } +EXPORT_SYMBOL_GPL(rcar_gen4_ptp_register); int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv) { @@ -166,6 +184,7 @@ int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv) return ptp_clock_unregister(ptp_priv->clock); } +EXPORT_SYMBOL_GPL(rcar_gen4_ptp_unregister); struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev) { @@ -179,3 +198,8 @@ struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev) return ptp; } +EXPORT_SYMBOL_GPL(rcar_gen4_ptp_alloc); + +MODULE_AUTHOR("Yoshihiro Shimoda"); +MODULE_DESCRIPTION("Renesas R-Car Gen4 gPTP driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h index b1bbea8d3a52..e22da5acd53d 100644 --- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h +++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h @@ -9,13 +9,10 @@ #include <linux/ptp_clock_kernel.h> -#define PTPTIVC_INIT 0x19000000 /* 320MHz */ -#define RCAR_GEN4_PTP_CLOCK_S4 PTPTIVC_INIT #define RCAR_GEN4_GPTP_OFFSET_S4 0x00018000 -/* for rcar_gen4_ptp_init */ enum rcar_gen4_ptp_reg_layout { - RCAR_GEN4_PTP_REG_LAYOUT_S4 + RCAR_GEN4_PTP_REG_LAYOUT }; /* driver's definitions */ @@ -28,7 +25,7 @@ enum rcar_gen4_ptp_reg_layout { #define PTPRO 0 -enum rcar_gen4_ptp_reg_s4 { +enum rcar_gen4_ptp_reg { PTPTMEC = PTPRO + 0x0010, PTPTMDC = PTPRO + 0x0014, PTPTIVC0 = PTPRO + 0x0020, @@ -65,7 +62,7 @@ struct rcar_gen4_ptp_private { }; int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv, - enum rcar_gen4_ptp_reg_layout layout, u32 clock); + enum rcar_gen4_ptp_reg_layout layout, u32 rate); int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv); struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev); diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 43a7795d6591..d6089429f654 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1828,8 +1828,8 @@ static int rswitch_init(struct rswitch_private *priv) rswitch_fwd_init(priv); - err = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT_S4, - RCAR_GEN4_PTP_CLOCK_S4); + err = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT, + clk_get_rate(priv->clk)); if (err < 0) goto err_ptp_register; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index e3f650e88f82..6b935922054d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -580,6 +580,7 @@ struct mac_device_info { u32 vlan_filter[32]; bool vlan_fail_q_en; u8 vlan_fail_q; + bool hw_vlan_en; }; struct stmmac_rx_routing { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index c6ff1fa0e04d..5f35faf90963 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -1134,6 +1134,35 @@ static int dwmac4_config_l4_filter(struct mac_device_info *hw, u32 filter_no, return 0; } +static void dwmac4_rx_hw_vlan(struct mac_device_info *hw, + struct dma_desc *rx_desc, struct sk_buff *skb) +{ + if (hw->desc->get_rx_vlan_valid(rx_desc)) { + u16 vid = hw->desc->get_rx_vlan_tci(rx_desc); + + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } +} + +static void dwmac4_set_hw_vlan_mode(struct mac_device_info *hw) +{ + void __iomem *ioaddr = hw->pcsr; + u32 value = readl(ioaddr + GMAC_VLAN_TAG); + + value &= ~GMAC_VLAN_TAG_CTRL_EVLS_MASK; + + if (hw->hw_vlan_en) + /* Always strip VLAN on Receive */ + value |= GMAC_VLAN_TAG_STRIP_ALL; + else + /* Do not strip VLAN on Receive */ + value |= GMAC_VLAN_TAG_STRIP_NONE; + + /* Enable outer VLAN Tag in Rx DMA descriptor */ + value |= GMAC_VLAN_TAG_CTRL_EVLRXS; + writel(value, ioaddr + GMAC_VLAN_TAG); +} + const struct stmmac_ops dwmac4_ops = { .core_init = dwmac4_core_init, .phylink_get_caps = dwmac4_phylink_get_caps, @@ -1175,6 +1204,8 @@ const struct stmmac_ops dwmac4_ops = { .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, + .rx_hw_vlan = dwmac4_rx_hw_vlan, + .set_hw_vlan_mode = dwmac4_set_hw_vlan_mode, }; const struct stmmac_ops dwmac410_ops = { @@ -1224,6 +1255,8 @@ const struct stmmac_ops dwmac410_ops = { .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, + .rx_hw_vlan = dwmac4_rx_hw_vlan, + .set_hw_vlan_mode = dwmac4_set_hw_vlan_mode, }; const struct stmmac_ops dwmac510_ops = { @@ -1277,6 +1310,8 @@ const struct stmmac_ops dwmac510_ops = { .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, + .rx_hw_vlan = dwmac4_rx_hw_vlan, + .set_hw_vlan_mode = dwmac4_set_hw_vlan_mode, }; static u32 dwmac4_get_num_vlan(void __iomem *ioaddr) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 89a14084c611..1c5802e0d7f4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -198,6 +198,17 @@ static int dwmac4_get_tx_ls(struct dma_desc *p) >> TDES3_LAST_DESCRIPTOR_SHIFT; } +static u16 dwmac4_wrback_get_rx_vlan_tci(struct dma_desc *p) +{ + return (le32_to_cpu(p->des0) & RDES0_VLAN_TAG_MASK); +} + +static bool dwmac4_wrback_get_rx_vlan_valid(struct dma_desc *p) +{ + return ((le32_to_cpu(p->des3) & RDES3_LAST_DESCRIPTOR) && + (le32_to_cpu(p->des3) & RDES3_RDES0_VALID)); +} + static int dwmac4_wrback_get_rx_frame_len(struct dma_desc *p, int rx_coe) { return (le32_to_cpu(p->des3) & RDES3_PACKET_SIZE_MASK); @@ -551,6 +562,8 @@ const struct stmmac_desc_ops dwmac4_desc_ops = { .set_tx_owner = dwmac4_set_tx_owner, .set_rx_owner = dwmac4_set_rx_owner, .get_tx_ls = dwmac4_get_tx_ls, + .get_rx_vlan_tci = dwmac4_wrback_get_rx_vlan_tci, + .get_rx_vlan_valid = dwmac4_wrback_get_rx_vlan_valid, .get_rx_frame_len = dwmac4_wrback_get_rx_frame_len, .enable_tx_timestamp = dwmac4_rd_enable_tx_timestamp, .get_tx_timestamp_status = dwmac4_wrback_get_tx_timestamp_status, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index b95d3e137813..1d424c9bf037 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -56,6 +56,10 @@ struct stmmac_desc_ops { void (*set_tx_ic)(struct dma_desc *p); /* Last tx segment reports the transmit status */ int (*get_tx_ls)(struct dma_desc *p); + /* Get the tag of the descriptor */ + u16 (*get_rx_vlan_tci)(struct dma_desc *p); + /* Get the valid status of descriptor */ + bool (*get_rx_vlan_valid)(struct dma_desc *p); /* Return the transmit status looking at the TDES1 */ int (*tx_status)(struct stmmac_extra_stats *x, struct dma_desc *p, void __iomem *ioaddr); @@ -117,6 +121,10 @@ struct stmmac_desc_ops { stmmac_do_void_callback(__priv, desc, set_tx_ic, __args) #define stmmac_get_tx_ls(__priv, __args...) \ stmmac_do_callback(__priv, desc, get_tx_ls, __args) +#define stmmac_get_rx_vlan_tci(__priv, __args...) \ + stmmac_do_callback(__priv, desc, get_rx_vlan_tci, __args) +#define stmmac_get_rx_vlan_valid(__priv, __args...) \ + stmmac_do_callback(__priv, desc, get_rx_vlan_valid, __args) #define stmmac_tx_status(__priv, __args...) \ stmmac_do_callback(__priv, desc, tx_status, __args) #define stmmac_get_tx_len(__priv, __args...) \ @@ -388,6 +396,9 @@ struct stmmac_ops { void (*update_vlan_hash)(struct mac_device_info *hw, u32 hash, __le16 perfect_match, bool is_double); void (*enable_vlan)(struct mac_device_info *hw, u32 type); + void (*rx_hw_vlan)(struct mac_device_info *hw, struct dma_desc *rx_desc, + struct sk_buff *skb); + void (*set_hw_vlan_mode)(struct mac_device_info *hw); int (*add_hw_vlan_rx_fltr)(struct net_device *dev, struct mac_device_info *hw, __be16 proto, u16 vid); @@ -497,6 +508,10 @@ struct stmmac_ops { stmmac_do_void_callback(__priv, mac, update_vlan_hash, __args) #define stmmac_enable_vlan(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, enable_vlan, __args) +#define stmmac_rx_hw_vlan(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, rx_hw_vlan, __args) +#define stmmac_set_hw_vlan_mode(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_hw_vlan_mode, __args) #define stmmac_add_hw_vlan_rx_fltr(__priv, __args...) \ stmmac_do_callback(__priv, mac, add_hw_vlan_rx_fltr, __args) #define stmmac_del_hw_vlan_rx_fltr(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2afb2bd25977..8964fc8a955f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3469,6 +3469,8 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) /* Start the ball rolling... */ stmmac_start_all_dma(priv); + stmmac_set_hw_vlan_mode(priv, priv->hw); + if (priv->dma_cap.fpesel) { stmmac_fpe_start_wq(priv); @@ -4993,7 +4995,12 @@ static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, } stmmac_get_rx_hwtstamp(priv, p, np, skb); - stmmac_rx_vlan(priv->dev, skb); + if (priv->hw->hw_vlan_en) + /* MAC level stripping. */ + stmmac_rx_hw_vlan(priv, priv->hw, p, skb); + else + /* Driver level stripping. */ + stmmac_rx_vlan(priv->dev, skb); skb->protocol = eth_type_trans(skb, priv->dev); if (unlikely(!coe)) @@ -5509,7 +5516,14 @@ drain_data: /* Got entire packet into SKB. Finish it. */ stmmac_get_rx_hwtstamp(priv, p, np, skb); - stmmac_rx_vlan(priv->dev, skb); + + if (priv->hw->hw_vlan_en) + /* MAC level stripping. */ + stmmac_rx_hw_vlan(priv, priv->hw, p, skb); + else + /* Driver level stripping. */ + stmmac_rx_vlan(priv->dev, skb); + skb->protocol = eth_type_trans(skb, priv->dev); if (unlikely(!coe)) @@ -5818,6 +5832,13 @@ static int stmmac_set_features(struct net_device *netdev, stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); } + if (features & NETIF_F_HW_VLAN_CTAG_RX) + priv->hw->hw_vlan_en = true; + else + priv->hw->hw_vlan_en = false; + + stmmac_set_hw_vlan_mode(priv, priv->hw); + return 0; } @@ -6180,30 +6201,23 @@ static struct dentry *stmmac_fs_dir; static void sysfs_display_ring(void *head, int size, int extend_desc, struct seq_file *seq, dma_addr_t dma_phy_addr) { - int i; struct dma_extended_desc *ep = (struct dma_extended_desc *)head; struct dma_desc *p = (struct dma_desc *)head; + unsigned int desc_size; dma_addr_t dma_addr; + int i; + desc_size = extend_desc ? sizeof(*ep) : sizeof(*p); for (i = 0; i < size; i++) { - if (extend_desc) { - dma_addr = dma_phy_addr + i * sizeof(*ep); - seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", - i, &dma_addr, - le32_to_cpu(ep->basic.des0), - le32_to_cpu(ep->basic.des1), - le32_to_cpu(ep->basic.des2), - le32_to_cpu(ep->basic.des3)); - ep++; - } else { - dma_addr = dma_phy_addr + i * sizeof(*p); - seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", - i, &dma_addr, - le32_to_cpu(p->des0), le32_to_cpu(p->des1), - le32_to_cpu(p->des2), le32_to_cpu(p->des3)); + dma_addr = dma_phy_addr + i * desc_size; + seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, + le32_to_cpu(p->des0), le32_to_cpu(p->des1), + le32_to_cpu(p->des2), le32_to_cpu(p->des3)); + if (extend_desc) + p = &(++ep)->basic; + else p++; - } - seq_printf(seq, "\n"); } } @@ -7516,6 +7530,9 @@ int stmmac_dvr_probe(struct device *device, #ifdef STMMAC_VLAN_TAG_USED /* Both mac100 and gmac support receive VLAN tag detection */ ndev->features |= NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX; + ndev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + priv->hw->hw_vlan_en = true; + if (priv->dma_cap.vlhash) { ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER; diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c index c51e2af91f69..b9e1d568604b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c +++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c @@ -662,6 +662,31 @@ static void am65_cpsw_get_ethtool_stats(struct net_device *ndev, hw_stats[i].offset); } +static void am65_cpsw_get_eth_mac_stats(struct net_device *ndev, + struct ethtool_eth_mac_stats *s) +{ + struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct am65_cpsw_stats_regs __iomem *stats; + + stats = port->stat_base; + + s->FramesTransmittedOK = readl_relaxed(&stats->tx_good_frames); + s->SingleCollisionFrames = readl_relaxed(&stats->tx_single_coll_frames); + s->MultipleCollisionFrames = readl_relaxed(&stats->tx_mult_coll_frames); + s->FramesReceivedOK = readl_relaxed(&stats->rx_good_frames); + s->FrameCheckSequenceErrors = readl_relaxed(&stats->rx_crc_errors); + s->AlignmentErrors = readl_relaxed(&stats->rx_align_code_errors); + s->OctetsTransmittedOK = readl_relaxed(&stats->tx_octets); + s->FramesWithDeferredXmissions = readl_relaxed(&stats->tx_deferred_frames); + s->LateCollisions = readl_relaxed(&stats->tx_late_collisions); + s->CarrierSenseErrors = readl_relaxed(&stats->tx_carrier_sense_errors); + s->OctetsReceivedOK = readl_relaxed(&stats->rx_octets); + s->MulticastFramesXmittedOK = readl_relaxed(&stats->tx_multicast_frames); + s->BroadcastFramesXmittedOK = readl_relaxed(&stats->tx_broadcast_frames); + s->MulticastFramesReceivedOK = readl_relaxed(&stats->rx_multicast_frames); + s->BroadcastFramesReceivedOK = readl_relaxed(&stats->rx_broadcast_frames); +}; + static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) { @@ -729,6 +754,7 @@ const struct ethtool_ops am65_cpsw_ethtool_ops_slave = { .get_sset_count = am65_cpsw_get_sset_count, .get_strings = am65_cpsw_get_strings, .get_ethtool_stats = am65_cpsw_get_ethtool_stats, + .get_eth_mac_stats = am65_cpsw_get_eth_mac_stats, .get_ts_info = am65_cpsw_get_ethtool_ts_info, .get_priv_flags = am65_cpsw_get_ethtool_priv_flags, .set_priv_flags = am65_cpsw_set_ethtool_priv_flags, diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index ece9f8df98ae..7992a76ed4d8 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -136,6 +136,8 @@ NETIF_MSG_IFUP | NETIF_MSG_PROBE | NETIF_MSG_IFDOWN | \ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) +#define AM65_CPSW_DEFAULT_TX_CHNS 8 + static void am65_cpsw_port_set_sl_mac(struct am65_cpsw_port *slave, const u8 *dev_addr) { @@ -367,10 +369,81 @@ static void am65_cpsw_init_host_port_emac(struct am65_cpsw_common *common); static void am65_cpsw_init_port_switch_ale(struct am65_cpsw_port *port); static void am65_cpsw_init_port_emac_ale(struct am65_cpsw_port *port); +static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma) +{ + struct am65_cpsw_rx_chn *rx_chn = data; + struct cppi5_host_desc_t *desc_rx; + struct sk_buff *skb; + dma_addr_t buf_dma; + u32 buf_dma_len; + void **swdata; + + desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_rx); + skb = *swdata; + cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); + k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); + + dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); + k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); + + dev_kfree_skb_any(skb); +} + +static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, + struct cppi5_host_desc_t *desc) +{ + struct cppi5_host_desc_t *first_desc, *next_desc; + dma_addr_t buf_dma, next_desc_dma; + u32 buf_dma_len; + + first_desc = desc; + next_desc = first_desc; + + cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); + + dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE); + + next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); + while (next_desc_dma) { + next_desc = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, + next_desc_dma); + cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); + + dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len, + DMA_TO_DEVICE); + + next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); + + k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); + } + + k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); +} + +static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) +{ + struct am65_cpsw_tx_chn *tx_chn = data; + struct cppi5_host_desc_t *desc_tx; + struct sk_buff *skb; + void **swdata; + + desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); + swdata = cppi5_hdesc_get_swdata(desc_tx); + skb = *(swdata); + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); + + dev_kfree_skb_any(skb); +} + static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) { struct am65_cpsw_host *host_p = am65_common_get_host(common); - int port_idx, i, ret; + int port_idx, i, ret, tx; struct sk_buff *skb; u32 val, port_mask; @@ -437,8 +510,12 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) AM65_CPSW_MAX_PACKET_SIZE, GFP_KERNEL); if (!skb) { + ret = -ENOMEM; dev_err(common->dev, "cannot allocate skb\n"); - return -ENOMEM; + if (i) + goto fail_rx; + + return ret; } ret = am65_cpsw_nuss_rx_push(common, skb); @@ -447,17 +524,28 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) "cannot submit skb to channel rx, error %d\n", ret); kfree_skb(skb); + if (i) + goto fail_rx; + return ret; } - kmemleak_not_leak(skb); } - k3_udma_glue_enable_rx_chn(common->rx_chns.rx_chn); - for (i = 0; i < common->tx_ch_num; i++) { - ret = k3_udma_glue_enable_tx_chn(common->tx_chns[i].tx_chn); - if (ret) - return ret; - napi_enable(&common->tx_chns[i].napi_tx); + ret = k3_udma_glue_enable_rx_chn(common->rx_chns.rx_chn); + if (ret) { + dev_err(common->dev, "couldn't enable rx chn: %d\n", ret); + goto fail_rx; + } + + for (tx = 0; tx < common->tx_ch_num; tx++) { + ret = k3_udma_glue_enable_tx_chn(common->tx_chns[tx].tx_chn); + if (ret) { + dev_err(common->dev, "couldn't enable tx chn %d: %d\n", + tx, ret); + tx--; + goto fail_tx; + } + napi_enable(&common->tx_chns[tx].napi_tx); } napi_enable(&common->napi_rx); @@ -468,10 +556,22 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) dev_dbg(common->dev, "cpsw_nuss started\n"); return 0; -} -static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma); -static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma); +fail_tx: + while (tx >= 0) { + napi_disable(&common->tx_chns[tx].napi_tx); + k3_udma_glue_disable_tx_chn(common->tx_chns[tx].tx_chn); + tx--; + } + + k3_udma_glue_disable_rx_chn(common->rx_chns.rx_chn); + +fail_rx: + k3_udma_glue_reset_rx_chn(common->rx_chns.rx_chn, 0, + &common->rx_chns, + am65_cpsw_nuss_rx_cleanup, 0); + return ret; +} static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) { @@ -646,27 +746,6 @@ runtime_put: return ret; } -static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma) -{ - struct am65_cpsw_rx_chn *rx_chn = data; - struct cppi5_host_desc_t *desc_rx; - struct sk_buff *skb; - dma_addr_t buf_dma; - u32 buf_dma_len; - void **swdata; - - desc_rx = k3_cppi_desc_pool_dma2virt(rx_chn->desc_pool, desc_dma); - swdata = cppi5_hdesc_get_swdata(desc_rx); - skb = *swdata; - cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); - k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); - - dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - - dev_kfree_skb_any(skb); -} - static void am65_cpsw_nuss_rx_ts(struct sk_buff *skb, u32 *psdata) { struct skb_shared_hwtstamps *ssh; @@ -840,56 +919,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) return num_rx; } -static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, - struct cppi5_host_desc_t *desc) -{ - struct cppi5_host_desc_t *first_desc, *next_desc; - dma_addr_t buf_dma, next_desc_dma; - u32 buf_dma_len; - - first_desc = desc; - next_desc = first_desc; - - cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); - k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); - - dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE); - - next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc); - k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); - while (next_desc_dma) { - next_desc = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, - next_desc_dma); - cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len); - k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); - - dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len, - DMA_TO_DEVICE); - - next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc); - k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); - - k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); - } - - k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); -} - -static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) -{ - struct am65_cpsw_tx_chn *tx_chn = data; - struct cppi5_host_desc_t *desc_tx; - struct sk_buff *skb; - void **swdata; - - desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); - swdata = cppi5_hdesc_get_swdata(desc_tx); - skb = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); - - dev_kfree_skb_any(skb); -} - static struct sk_buff * am65_cpsw_nuss_tx_compl_packet(struct am65_cpsw_tx_chn *tx_chn, dma_addr_t desc_dma) @@ -2897,7 +2926,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) common->rx_flow_id_base = -1; init_completion(&common->tdown_complete); - common->tx_ch_num = 1; + common->tx_ch_num = AM65_CPSW_DEFAULT_TX_CHNS; common->pf_p0_rx_ptype_rrobin = false; common->default_vlan = 1; diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 0014729b8865..35d96c633a33 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -26,6 +26,7 @@ config XILINX_EMACLITE config XILINX_AXI_EMAC tristate "Xilinx 10/100/1000 AXI Ethernet support" depends on HAS_IOMEM + depends on XILINX_DMA select PHYLINK help This driver supports the 10/100/1000 Ethernet from Xilinx for the diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 575ff9de8985..807ead678551 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/if_vlan.h> #include <linux/phylink.h> +#include <linux/skbuff.h> /* Packet size info */ #define XAE_HDR_SIZE 14 /* Size of Ethernet header */ @@ -379,6 +380,22 @@ struct axidma_bd { #define XAE_NUM_MISC_CLOCKS 3 /** + * struct skbuf_dma_descriptor - skb for each dma descriptor + * @sgl: Pointer for sglist. + * @desc: Pointer to dma descriptor. + * @dma_address: dma address of sglist. + * @skb: Pointer to SKB transferred using DMA + * @sg_len: number of entries in the sglist. + */ +struct skbuf_dma_descriptor { + struct scatterlist sgl[MAX_SKB_FRAGS + 1]; + struct dma_async_tx_descriptor *desc; + dma_addr_t dma_address; + struct sk_buff *skb; + int sg_len; +}; + +/** * struct axienet_local - axienet private per device data * @ndev: Pointer for net_device to which it will be attached. * @dev: Pointer to device structure @@ -435,6 +452,15 @@ struct axidma_bd { * @coalesce_usec_rx: IRQ coalesce delay for RX * @coalesce_count_tx: Store the irq coalesce on TX side. * @coalesce_usec_tx: IRQ coalesce delay for TX + * @use_dmaengine: flag to check dmaengine framework usage. + * @tx_chan: TX DMA channel. + * @rx_chan: RX DMA channel. + * @tx_skb_ring: Pointer to TX skb ring buffer array. + * @rx_skb_ring: Pointer to RX skb ring buffer array. + * @tx_ring_head: TX skb ring buffer head index. + * @tx_ring_tail: TX skb ring buffer tail index. + * @rx_ring_head: RX skb ring buffer head index. + * @rx_ring_tail: RX skb ring buffer tail index. */ struct axienet_local { struct net_device *ndev; @@ -499,6 +525,15 @@ struct axienet_local { u32 coalesce_usec_rx; u32 coalesce_count_tx; u32 coalesce_usec_tx; + u8 use_dmaengine; + struct dma_chan *tx_chan; + struct dma_chan *rx_chan; + struct skbuf_dma_descriptor **tx_skb_ring; + struct skbuf_dma_descriptor **rx_skb_ring; + int tx_ring_head; + int tx_ring_tail; + int rx_ring_head; + int rx_ring_tail; }; /** diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index bf6e33990490..aaf780fd4f5e 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -38,6 +38,11 @@ #include <linux/phy.h> #include <linux/mii.h> #include <linux/ethtool.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/dma/xilinx_dma.h> +#include <linux/circ_buf.h> +#include <net/netdev_queues.h> #include "xilinx_axienet.h" @@ -47,6 +52,9 @@ #define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1) #define TX_BD_NUM_MAX 4096 #define RX_BD_NUM_MAX 4096 +#define DMA_NUM_APP_WORDS 5 +#define LEN_APP 4 +#define RX_BUF_NUM_DEFAULT 128 /* Must be shorter than length of ethtool_drvinfo.driver field to fit */ #define DRIVER_NAME "xaxienet" @@ -55,6 +63,8 @@ #define AXIENET_REGS_N 40 +static void axienet_rx_submit_desc(struct net_device *ndev); + /* Match table for of_platform binding */ static const struct of_device_id axienet_of_match[] = { { .compatible = "xlnx,axi-ethernet-1.00.a", }, @@ -120,6 +130,16 @@ static struct axienet_option axienet_options[] = { {} }; +static struct skbuf_dma_descriptor *axienet_get_rx_desc(struct axienet_local *lp, int i) +{ + return lp->rx_skb_ring[i & (RX_BUF_NUM_DEFAULT - 1)]; +} + +static struct skbuf_dma_descriptor *axienet_get_tx_desc(struct axienet_local *lp, int i) +{ + return lp->tx_skb_ring[i & (TX_BD_NUM_MAX - 1)]; +} + /** * axienet_dma_in32 - Memory mapped Axi DMA register read * @lp: Pointer to axienet local structure @@ -589,10 +609,6 @@ static int axienet_device_reset(struct net_device *ndev) struct axienet_local *lp = netdev_priv(ndev); int ret; - ret = __axienet_device_reset(lp); - if (ret) - return ret; - lp->max_frm_size = XAE_MAX_VLAN_FRAME_SIZE; lp->options |= XAE_OPTION_VLAN; lp->options &= (~XAE_OPTION_JUMBO); @@ -606,11 +622,17 @@ static int axienet_device_reset(struct net_device *ndev) lp->options |= XAE_OPTION_JUMBO; } - ret = axienet_dma_bd_init(ndev); - if (ret) { - netdev_err(ndev, "%s: descriptor allocation failed\n", - __func__); - return ret; + if (!lp->use_dmaengine) { + ret = __axienet_device_reset(lp); + if (ret) + return ret; + + ret = axienet_dma_bd_init(ndev); + if (ret) { + netdev_err(ndev, "%s: descriptor allocation failed\n", + __func__); + return ret; + } } axienet_status = axienet_ior(lp, XAE_RCW1_OFFSET); @@ -726,6 +748,128 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, } /** + * axienet_dma_tx_cb - DMA engine callback for TX channel. + * @data: Pointer to the axienet_local structure. + * @result: error reporting through dmaengine_result. + * This function is called by dmaengine driver for TX channel to notify + * that the transmit is done. + */ +static void axienet_dma_tx_cb(void *data, const struct dmaengine_result *result) +{ + struct skbuf_dma_descriptor *skbuf_dma; + struct axienet_local *lp = data; + struct netdev_queue *txq; + int len; + + skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_tail++); + len = skbuf_dma->skb->len; + txq = skb_get_tx_queue(lp->ndev, skbuf_dma->skb); + u64_stats_update_begin(&lp->tx_stat_sync); + u64_stats_add(&lp->tx_bytes, len); + u64_stats_add(&lp->tx_packets, 1); + u64_stats_update_end(&lp->tx_stat_sync); + dma_unmap_sg(lp->dev, skbuf_dma->sgl, skbuf_dma->sg_len, DMA_TO_DEVICE); + dev_consume_skb_any(skbuf_dma->skb); + netif_txq_completed_wake(txq, 1, len, + CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX), + 2 * MAX_SKB_FRAGS); +} + +/** + * axienet_start_xmit_dmaengine - Starts the transmission. + * @skb: sk_buff pointer that contains data to be Txed. + * @ndev: Pointer to net_device structure. + * + * Return: NETDEV_TX_OK on success or any non space errors. + * NETDEV_TX_BUSY when free element in TX skb ring buffer + * is not available. + * + * This function is invoked to initiate transmission. The + * function sets the skbs, register dma callback API and submit + * the dma transaction. + * Additionally if checksum offloading is supported, + * it populates AXI Stream Control fields with appropriate values. + */ +static netdev_tx_t +axienet_start_xmit_dmaengine(struct sk_buff *skb, struct net_device *ndev) +{ + struct dma_async_tx_descriptor *dma_tx_desc = NULL; + struct axienet_local *lp = netdev_priv(ndev); + u32 app_metadata[DMA_NUM_APP_WORDS] = {0}; + struct skbuf_dma_descriptor *skbuf_dma; + struct dma_device *dma_dev; + struct netdev_queue *txq; + u32 csum_start_off; + u32 csum_index_off; + int sg_len; + int ret; + + dma_dev = lp->tx_chan->device; + sg_len = skb_shinfo(skb)->nr_frags + 1; + if (CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX) <= sg_len) { + netif_stop_queue(ndev); + if (net_ratelimit()) + netdev_warn(ndev, "TX ring unexpectedly full\n"); + return NETDEV_TX_BUSY; + } + + skbuf_dma = axienet_get_tx_desc(lp, lp->tx_ring_head); + if (!skbuf_dma) + goto xmit_error_drop_skb; + + lp->tx_ring_head++; + sg_init_table(skbuf_dma->sgl, sg_len); + ret = skb_to_sgvec(skb, skbuf_dma->sgl, 0, skb->len); + if (ret < 0) + goto xmit_error_drop_skb; + + ret = dma_map_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE); + if (!ret) + goto xmit_error_drop_skb; + + /* Fill up app fields for checksum */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (lp->features & XAE_FEATURE_FULL_TX_CSUM) { + /* Tx Full Checksum Offload Enabled */ + app_metadata[0] |= 2; + } else if (lp->features & XAE_FEATURE_PARTIAL_TX_CSUM) { + csum_start_off = skb_transport_offset(skb); + csum_index_off = csum_start_off + skb->csum_offset; + /* Tx Partial Checksum Offload Enabled */ + app_metadata[0] |= 1; + app_metadata[1] = (csum_start_off << 16) | csum_index_off; + } + } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { + app_metadata[0] |= 2; /* Tx Full Checksum Offload Enabled */ + } + + dma_tx_desc = dma_dev->device_prep_slave_sg(lp->tx_chan, skbuf_dma->sgl, + sg_len, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT, (void *)app_metadata); + if (!dma_tx_desc) + goto xmit_error_unmap_sg; + + skbuf_dma->skb = skb; + skbuf_dma->sg_len = sg_len; + dma_tx_desc->callback_param = lp; + dma_tx_desc->callback_result = axienet_dma_tx_cb; + dmaengine_submit(dma_tx_desc); + dma_async_issue_pending(lp->tx_chan); + txq = skb_get_tx_queue(lp->ndev, skb); + netdev_tx_sent_queue(txq, skb->len); + netif_txq_maybe_stop(txq, CIRC_SPACE(lp->tx_ring_head, lp->tx_ring_tail, TX_BD_NUM_MAX), + MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS); + + return NETDEV_TX_OK; + +xmit_error_unmap_sg: + dma_unmap_sg(lp->dev, skbuf_dma->sgl, sg_len, DMA_TO_DEVICE); +xmit_error_drop_skb: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** * axienet_tx_poll - Invoked once a transmit is completed by the * Axi DMA Tx channel. * @napi: Pointer to NAPI structure. @@ -892,6 +1036,42 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) } /** + * axienet_dma_rx_cb - DMA engine callback for RX channel. + * @data: Pointer to the skbuf_dma_descriptor structure. + * @result: error reporting through dmaengine_result. + * This function is called by dmaengine driver for RX channel to notify + * that the packet is received. + */ +static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) +{ + struct skbuf_dma_descriptor *skbuf_dma; + size_t meta_len, meta_max_len, rx_len; + struct axienet_local *lp = data; + struct sk_buff *skb; + u32 *app_metadata; + + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_tail++); + skb = skbuf_dma->skb; + app_metadata = dmaengine_desc_get_metadata_ptr(skbuf_dma->desc, &meta_len, + &meta_max_len); + dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp->max_frm_size, + DMA_FROM_DEVICE); + /* TODO: Derive app word index programmatically */ + rx_len = (app_metadata[LEN_APP] & 0xFFFF); + skb_put(skb, rx_len); + skb->protocol = eth_type_trans(skb, lp->ndev); + skb->ip_summed = CHECKSUM_NONE; + + __netif_rx(skb); + u64_stats_update_begin(&lp->rx_stat_sync); + u64_stats_add(&lp->rx_packets, 1); + u64_stats_add(&lp->rx_bytes, rx_len); + u64_stats_update_end(&lp->rx_stat_sync); + axienet_rx_submit_desc(lp->ndev); + dma_async_issue_pending(lp->rx_chan); +} + +/** * axienet_rx_poll - Triggered by RX ISR to complete the BD processing. * @napi: Pointer to NAPI structure. * @budget: Max number of RX packets to process. @@ -1125,40 +1305,158 @@ static irqreturn_t axienet_eth_irq(int irq, void *_ndev) static void axienet_dma_err_handler(struct work_struct *work); /** - * axienet_open - Driver open routine. - * @ndev: Pointer to net_device structure + * axienet_rx_submit_desc - Submit the rx descriptors to dmaengine. + * allocate skbuff, map the scatterlist and obtain a descriptor + * and then add the callback information and submit descriptor. + * + * @ndev: net_device pointer + * + */ +static void axienet_rx_submit_desc(struct net_device *ndev) +{ + struct dma_async_tx_descriptor *dma_rx_desc = NULL; + struct axienet_local *lp = netdev_priv(ndev); + struct skbuf_dma_descriptor *skbuf_dma; + struct sk_buff *skb; + dma_addr_t addr; + + skbuf_dma = axienet_get_rx_desc(lp, lp->rx_ring_head); + if (!skbuf_dma) + return; + + lp->rx_ring_head++; + skb = netdev_alloc_skb(ndev, lp->max_frm_size); + if (!skb) + return; + + sg_init_table(skbuf_dma->sgl, 1); + addr = dma_map_single(lp->dev, skb->data, lp->max_frm_size, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(lp->dev, addr))) { + if (net_ratelimit()) + netdev_err(ndev, "DMA mapping error\n"); + goto rx_submit_err_free_skb; + } + sg_dma_address(skbuf_dma->sgl) = addr; + sg_dma_len(skbuf_dma->sgl) = lp->max_frm_size; + dma_rx_desc = dmaengine_prep_slave_sg(lp->rx_chan, skbuf_dma->sgl, + 1, DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT); + if (!dma_rx_desc) + goto rx_submit_err_unmap_skb; + + skbuf_dma->skb = skb; + skbuf_dma->dma_address = sg_dma_address(skbuf_dma->sgl); + skbuf_dma->desc = dma_rx_desc; + dma_rx_desc->callback_param = lp; + dma_rx_desc->callback_result = axienet_dma_rx_cb; + dmaengine_submit(dma_rx_desc); + + return; + +rx_submit_err_unmap_skb: + dma_unmap_single(lp->dev, addr, lp->max_frm_size, DMA_FROM_DEVICE); +rx_submit_err_free_skb: + dev_kfree_skb(skb); +} + +/** + * axienet_init_dmaengine - init the dmaengine code. + * @ndev: Pointer to net_device structure * * Return: 0, on success. - * non-zero error value on failure + * non-zero error value on failure * - * This is the driver open routine. It calls phylink_start to start the - * PHY device. - * It also allocates interrupt service routines, enables the interrupt lines - * and ISR handling. Axi Ethernet core is reset through Axi DMA core. Buffer - * descriptors are initialized. + * This is the dmaengine initialization code. */ -static int axienet_open(struct net_device *ndev) +static int axienet_init_dmaengine(struct net_device *ndev) { - int ret; struct axienet_local *lp = netdev_priv(ndev); + struct skbuf_dma_descriptor *skbuf_dma; + int i, ret; - dev_dbg(&ndev->dev, "axienet_open()\n"); + lp->tx_chan = dma_request_chan(lp->dev, "tx_chan0"); + if (IS_ERR(lp->tx_chan)) { + dev_err(lp->dev, "No Ethernet DMA (TX) channel found\n"); + return PTR_ERR(lp->tx_chan); + } - /* When we do an Axi Ethernet reset, it resets the complete core - * including the MDIO. MDIO must be disabled before resetting. - * Hold MDIO bus lock to avoid MDIO accesses during the reset. - */ - axienet_lock_mii(lp); - ret = axienet_device_reset(ndev); - axienet_unlock_mii(lp); + lp->rx_chan = dma_request_chan(lp->dev, "rx_chan0"); + if (IS_ERR(lp->rx_chan)) { + ret = PTR_ERR(lp->rx_chan); + dev_err(lp->dev, "No Ethernet DMA (RX) channel found\n"); + goto err_dma_release_tx; + } - ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0); - if (ret) { - dev_err(lp->dev, "phylink_of_phy_connect() failed: %d\n", ret); - return ret; + lp->tx_ring_tail = 0; + lp->tx_ring_head = 0; + lp->rx_ring_tail = 0; + lp->rx_ring_head = 0; + lp->tx_skb_ring = kcalloc(TX_BD_NUM_MAX, sizeof(*lp->tx_skb_ring), + GFP_KERNEL); + if (!lp->tx_skb_ring) { + ret = -ENOMEM; + goto err_dma_release_rx; + } + for (i = 0; i < TX_BD_NUM_MAX; i++) { + skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL); + if (!skbuf_dma) { + ret = -ENOMEM; + goto err_free_tx_skb_ring; + } + lp->tx_skb_ring[i] = skbuf_dma; } - phylink_start(lp->phylink); + lp->rx_skb_ring = kcalloc(RX_BUF_NUM_DEFAULT, sizeof(*lp->rx_skb_ring), + GFP_KERNEL); + if (!lp->rx_skb_ring) { + ret = -ENOMEM; + goto err_free_tx_skb_ring; + } + for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) { + skbuf_dma = kzalloc(sizeof(*skbuf_dma), GFP_KERNEL); + if (!skbuf_dma) { + ret = -ENOMEM; + goto err_free_rx_skb_ring; + } + lp->rx_skb_ring[i] = skbuf_dma; + } + /* TODO: Instead of BD_NUM_DEFAULT use runtime support */ + for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) + axienet_rx_submit_desc(ndev); + dma_async_issue_pending(lp->rx_chan); + + return 0; + +err_free_rx_skb_ring: + for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) + kfree(lp->rx_skb_ring[i]); + kfree(lp->rx_skb_ring); +err_free_tx_skb_ring: + for (i = 0; i < TX_BD_NUM_MAX; i++) + kfree(lp->tx_skb_ring[i]); + kfree(lp->tx_skb_ring); +err_dma_release_rx: + dma_release_channel(lp->rx_chan); +err_dma_release_tx: + dma_release_channel(lp->tx_chan); + return ret; +} + +/** + * axienet_init_legacy_dma - init the dma legacy code. + * @ndev: Pointer to net_device structure + * + * Return: 0, on success. + * non-zero error value on failure + * + * This is the dma initialization code. It also allocates interrupt + * service routines, enables the interrupt lines and ISR handling. + * + */ +static int axienet_init_legacy_dma(struct net_device *ndev) +{ + int ret; + struct axienet_local *lp = netdev_priv(ndev); /* Enable worker thread for Axi DMA error handling */ INIT_WORK(&lp->dma_err_task, axienet_dma_err_handler); @@ -1193,14 +1491,77 @@ err_rx_irq: err_tx_irq: napi_disable(&lp->napi_tx); napi_disable(&lp->napi_rx); - phylink_stop(lp->phylink); - phylink_disconnect_phy(lp->phylink); cancel_work_sync(&lp->dma_err_task); dev_err(lp->dev, "request_irq() failed\n"); return ret; } /** + * axienet_open - Driver open routine. + * @ndev: Pointer to net_device structure + * + * Return: 0, on success. + * non-zero error value on failure + * + * This is the driver open routine. It calls phylink_start to start the + * PHY device. + * It also allocates interrupt service routines, enables the interrupt lines + * and ISR handling. Axi Ethernet core is reset through Axi DMA core. Buffer + * descriptors are initialized. + */ +static int axienet_open(struct net_device *ndev) +{ + int ret; + struct axienet_local *lp = netdev_priv(ndev); + + dev_dbg(&ndev->dev, "%s\n", __func__); + + /* When we do an Axi Ethernet reset, it resets the complete core + * including the MDIO. MDIO must be disabled before resetting. + * Hold MDIO bus lock to avoid MDIO accesses during the reset. + */ + axienet_lock_mii(lp); + ret = axienet_device_reset(ndev); + axienet_unlock_mii(lp); + + ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0); + if (ret) { + dev_err(lp->dev, "phylink_of_phy_connect() failed: %d\n", ret); + return ret; + } + + phylink_start(lp->phylink); + + if (lp->use_dmaengine) { + /* Enable interrupts for Axi Ethernet core (if defined) */ + if (lp->eth_irq > 0) { + ret = request_irq(lp->eth_irq, axienet_eth_irq, IRQF_SHARED, + ndev->name, ndev); + if (ret) + goto err_phy; + } + + ret = axienet_init_dmaengine(ndev); + if (ret < 0) + goto err_free_eth_irq; + } else { + ret = axienet_init_legacy_dma(ndev); + if (ret) + goto err_phy; + } + + return 0; + +err_free_eth_irq: + if (lp->eth_irq > 0) + free_irq(lp->eth_irq, ndev); +err_phy: + phylink_stop(lp->phylink); + phylink_disconnect_phy(lp->phylink); + return ret; +} + +/** * axienet_stop - Driver stop routine. * @ndev: Pointer to net_device structure * @@ -1213,11 +1574,14 @@ err_tx_irq: static int axienet_stop(struct net_device *ndev) { struct axienet_local *lp = netdev_priv(ndev); + int i; dev_dbg(&ndev->dev, "axienet_close()\n"); - napi_disable(&lp->napi_tx); - napi_disable(&lp->napi_rx); + if (!lp->use_dmaengine) { + napi_disable(&lp->napi_tx); + napi_disable(&lp->napi_rx); + } phylink_stop(lp->phylink); phylink_disconnect_phy(lp->phylink); @@ -1225,18 +1589,33 @@ static int axienet_stop(struct net_device *ndev) axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); - axienet_dma_stop(lp); + if (!lp->use_dmaengine) { + axienet_dma_stop(lp); + cancel_work_sync(&lp->dma_err_task); + free_irq(lp->tx_irq, ndev); + free_irq(lp->rx_irq, ndev); + axienet_dma_bd_release(ndev); + } else { + dmaengine_terminate_sync(lp->tx_chan); + dmaengine_synchronize(lp->tx_chan); + dmaengine_terminate_sync(lp->rx_chan); + dmaengine_synchronize(lp->rx_chan); + + for (i = 0; i < TX_BD_NUM_MAX; i++) + kfree(lp->tx_skb_ring[i]); + kfree(lp->tx_skb_ring); + for (i = 0; i < RX_BUF_NUM_DEFAULT; i++) + kfree(lp->rx_skb_ring[i]); + kfree(lp->rx_skb_ring); + + dma_release_channel(lp->rx_chan); + dma_release_channel(lp->tx_chan); + } axienet_iow(lp, XAE_IE_OFFSET, 0); - cancel_work_sync(&lp->dma_err_task); - if (lp->eth_irq > 0) free_irq(lp->eth_irq, ndev); - free_irq(lp->tx_irq, ndev); - free_irq(lp->rx_irq, ndev); - - axienet_dma_bd_release(ndev); return 0; } @@ -1333,6 +1712,18 @@ static const struct net_device_ops axienet_netdev_ops = { #endif }; +static const struct net_device_ops axienet_netdev_dmaengine_ops = { + .ndo_open = axienet_open, + .ndo_stop = axienet_stop, + .ndo_start_xmit = axienet_start_xmit_dmaengine, + .ndo_get_stats64 = axienet_get_stats64, + .ndo_change_mtu = axienet_change_mtu, + .ndo_set_mac_address = netdev_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_eth_ioctl = axienet_ioctl, + .ndo_set_rx_mode = axienet_set_multicast_list, +}; + /** * axienet_ethtools_get_drvinfo - Get various Axi Ethernet driver information. * @ndev: Pointer to net_device structure @@ -1412,14 +1803,16 @@ static void axienet_ethtools_get_regs(struct net_device *ndev, data[29] = axienet_ior(lp, XAE_FMI_OFFSET); data[30] = axienet_ior(lp, XAE_AF0_OFFSET); data[31] = axienet_ior(lp, XAE_AF1_OFFSET); - data[32] = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - data[33] = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); - data[34] = axienet_dma_in32(lp, XAXIDMA_TX_CDESC_OFFSET); - data[35] = axienet_dma_in32(lp, XAXIDMA_TX_TDESC_OFFSET); - data[36] = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - data[37] = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); - data[38] = axienet_dma_in32(lp, XAXIDMA_RX_CDESC_OFFSET); - data[39] = axienet_dma_in32(lp, XAXIDMA_RX_TDESC_OFFSET); + if (!lp->use_dmaengine) { + data[32] = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); + data[33] = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); + data[34] = axienet_dma_in32(lp, XAXIDMA_TX_CDESC_OFFSET); + data[35] = axienet_dma_in32(lp, XAXIDMA_TX_TDESC_OFFSET); + data[36] = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); + data[37] = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); + data[38] = axienet_dma_in32(lp, XAXIDMA_RX_CDESC_OFFSET); + data[39] = axienet_dma_in32(lp, XAXIDMA_RX_TDESC_OFFSET); + } } static void @@ -1863,7 +2256,6 @@ static int axienet_probe(struct platform_device *pdev) SET_NETDEV_DEV(ndev, &pdev->dev); ndev->flags &= ~IFF_MULTICAST; /* clear multicast */ ndev->features = NETIF_F_SG; - ndev->netdev_ops = &axienet_netdev_ops; ndev->ethtool_ops = &axienet_ethtool_ops; /* MTU range: 64 - 9000 */ @@ -1880,9 +2272,6 @@ static int axienet_probe(struct platform_device *pdev) u64_stats_init(&lp->rx_stat_sync); u64_stats_init(&lp->tx_stat_sync); - netif_napi_add(ndev, &lp->napi_rx, axienet_rx_poll); - netif_napi_add(ndev, &lp->napi_tx, axienet_tx_poll); - lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk"); if (!lp->axi_clk) { /* For backward compatibility, if named AXI clock is not present, @@ -2008,82 +2397,118 @@ static int axienet_probe(struct platform_device *pdev) goto cleanup_clk; } - /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */ - np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0); - if (np) { - struct resource dmares; + if (!of_find_property(pdev->dev.of_node, "dmas", NULL)) { + /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */ + np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0); - ret = of_address_to_resource(np, 0, &dmares); - if (ret) { - dev_err(&pdev->dev, - "unable to get DMA resource\n"); + if (np) { + struct resource dmares; + + ret = of_address_to_resource(np, 0, &dmares); + if (ret) { + dev_err(&pdev->dev, + "unable to get DMA resource\n"); + of_node_put(np); + goto cleanup_clk; + } + lp->dma_regs = devm_ioremap_resource(&pdev->dev, + &dmares); + lp->rx_irq = irq_of_parse_and_map(np, 1); + lp->tx_irq = irq_of_parse_and_map(np, 0); of_node_put(np); + lp->eth_irq = platform_get_irq_optional(pdev, 0); + } else { + /* Check for these resources directly on the Ethernet node. */ + lp->dma_regs = devm_platform_get_and_ioremap_resource(pdev, 1, NULL); + lp->rx_irq = platform_get_irq(pdev, 1); + lp->tx_irq = platform_get_irq(pdev, 0); + lp->eth_irq = platform_get_irq_optional(pdev, 2); + } + if (IS_ERR(lp->dma_regs)) { + dev_err(&pdev->dev, "could not map DMA regs\n"); + ret = PTR_ERR(lp->dma_regs); + goto cleanup_clk; + } + if (lp->rx_irq <= 0 || lp->tx_irq <= 0) { + dev_err(&pdev->dev, "could not determine irqs\n"); + ret = -ENOMEM; goto cleanup_clk; } - lp->dma_regs = devm_ioremap_resource(&pdev->dev, - &dmares); - lp->rx_irq = irq_of_parse_and_map(np, 1); - lp->tx_irq = irq_of_parse_and_map(np, 0); - of_node_put(np); - lp->eth_irq = platform_get_irq_optional(pdev, 0); - } else { - /* Check for these resources directly on the Ethernet node. */ - lp->dma_regs = devm_platform_get_and_ioremap_resource(pdev, 1, NULL); - lp->rx_irq = platform_get_irq(pdev, 1); - lp->tx_irq = platform_get_irq(pdev, 0); - lp->eth_irq = platform_get_irq_optional(pdev, 2); - } - if (IS_ERR(lp->dma_regs)) { - dev_err(&pdev->dev, "could not map DMA regs\n"); - ret = PTR_ERR(lp->dma_regs); - goto cleanup_clk; - } - if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) { - dev_err(&pdev->dev, "could not determine irqs\n"); - ret = -ENOMEM; - goto cleanup_clk; - } - /* Reset core now that clocks are enabled, prior to accessing MDIO */ - ret = __axienet_device_reset(lp); - if (ret) - goto cleanup_clk; + /* Reset core now that clocks are enabled, prior to accessing MDIO */ + ret = __axienet_device_reset(lp); + if (ret) + goto cleanup_clk; + + /* Autodetect the need for 64-bit DMA pointers. + * When the IP is configured for a bus width bigger than 32 bits, + * writing the MSB registers is mandatory, even if they are all 0. + * We can detect this case by writing all 1's to one such register + * and see if that sticks: when the IP is configured for 32 bits + * only, those registers are RES0. + * Those MSB registers were introduced in IP v7.1, which we check first. + */ + if ((axienet_ior(lp, XAE_ID_OFFSET) >> 24) >= 0x9) { + void __iomem *desc = lp->dma_regs + XAXIDMA_TX_CDESC_OFFSET + 4; - /* Autodetect the need for 64-bit DMA pointers. - * When the IP is configured for a bus width bigger than 32 bits, - * writing the MSB registers is mandatory, even if they are all 0. - * We can detect this case by writing all 1's to one such register - * and see if that sticks: when the IP is configured for 32 bits - * only, those registers are RES0. - * Those MSB registers were introduced in IP v7.1, which we check first. - */ - if ((axienet_ior(lp, XAE_ID_OFFSET) >> 24) >= 0x9) { - void __iomem *desc = lp->dma_regs + XAXIDMA_TX_CDESC_OFFSET + 4; - - iowrite32(0x0, desc); - if (ioread32(desc) == 0) { /* sanity check */ - iowrite32(0xffffffff, desc); - if (ioread32(desc) > 0) { - lp->features |= XAE_FEATURE_DMA_64BIT; - addr_width = 64; - dev_info(&pdev->dev, - "autodetected 64-bit DMA range\n"); - } iowrite32(0x0, desc); + if (ioread32(desc) == 0) { /* sanity check */ + iowrite32(0xffffffff, desc); + if (ioread32(desc) > 0) { + lp->features |= XAE_FEATURE_DMA_64BIT; + addr_width = 64; + dev_info(&pdev->dev, + "autodetected 64-bit DMA range\n"); + } + iowrite32(0x0, desc); + } + } + if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) { + dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n"); + ret = -EINVAL; + goto cleanup_clk; } - } - if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) { - dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n"); - ret = -EINVAL; - goto cleanup_clk; - } - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width)); - if (ret) { - dev_err(&pdev->dev, "No suitable DMA available\n"); - goto cleanup_clk; + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width)); + if (ret) { + dev_err(&pdev->dev, "No suitable DMA available\n"); + goto cleanup_clk; + } + netif_napi_add(ndev, &lp->napi_rx, axienet_rx_poll); + netif_napi_add(ndev, &lp->napi_tx, axienet_tx_poll); + } else { + struct xilinx_vdma_config cfg; + struct dma_chan *tx_chan; + + lp->eth_irq = platform_get_irq_optional(pdev, 0); + if (lp->eth_irq < 0 && lp->eth_irq != -ENXIO) { + ret = lp->eth_irq; + goto cleanup_clk; + } + tx_chan = dma_request_chan(lp->dev, "tx_chan0"); + if (IS_ERR(tx_chan)) { + ret = PTR_ERR(tx_chan); + dev_err_probe(lp->dev, ret, "No Ethernet DMA (TX) channel found\n"); + goto cleanup_clk; + } + + cfg.reset = 1; + /* As name says VDMA but it has support for DMA channel reset */ + ret = xilinx_vdma_channel_set_config(tx_chan, &cfg); + if (ret < 0) { + dev_err(&pdev->dev, "Reset channel failed\n"); + dma_release_channel(tx_chan); + goto cleanup_clk; + } + + dma_release_channel(tx_chan); + lp->use_dmaengine = 1; } + if (lp->use_dmaengine) + ndev->netdev_ops = &axienet_netdev_dmaengine_ops; + else + ndev->netdev_ops = &axienet_netdev_ops; /* Check for Ethernet core IRQ (optional) */ if (lp->eth_irq <= 0) dev_info(&pdev->dev, "Ethernet core IRQ not defined\n"); @@ -2099,8 +2524,8 @@ static int axienet_probe(struct platform_device *pdev) } lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; - lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; + lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC; lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC; ret = axienet_mdio_setup(lp); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c8da94af4161..88dd8a2245b0 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1086,20 +1086,8 @@ static int macvlan_ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) { struct net_device *real_dev = macvlan_dev_real_dev(dev); - const struct ethtool_ops *ops = real_dev->ethtool_ops; - struct phy_device *phydev = real_dev->phydev; - if (phy_has_tsinfo(phydev)) { - return phy_ts_info(phydev, info); - } else if (ops->get_ts_info) { - return ops->get_ts_info(real_dev, info); - } else { - info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; - info->phc_index = -1; - } - - return 0; + return ethtool_get_ts_info_by_layer(real_dev, info); } static netdev_features_t macvlan_fix_features(struct net_device *dev, diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 421d2b62918f..25cfc5ded1da 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -96,10 +96,7 @@ config ADIN1100_PHY Currently supports the: - ADIN1100 - Robust,Industrial, Low Power 10BASE-T1L Ethernet PHY -config AQUANTIA_PHY - tristate "Aquantia PHYs" - help - Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 +source "drivers/net/phy/aquantia/Kconfig" config AX88796B_PHY tristate "Asix PHYs" diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index c945ed9bd14b..f65e85c91fc1 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -35,11 +35,7 @@ obj-y += $(sfp-obj-y) $(sfp-obj-m) obj-$(CONFIG_ADIN_PHY) += adin.o obj-$(CONFIG_ADIN1100_PHY) += adin1100.o obj-$(CONFIG_AMD_PHY) += amd.o -aquantia-objs += aquantia_main.o -ifdef CONFIG_HWMON -aquantia-objs += aquantia_hwmon.o -endif -obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o +obj-$(CONFIG_AQUANTIA_PHY) += aquantia/ obj-$(CONFIG_AT803X_PHY) += at803x.o obj-$(CONFIG_AX88796B_PHY) += ax88796b.o obj-$(CONFIG_BCM54140_PHY) += bcm54140.o diff --git a/drivers/net/phy/aquantia.h b/drivers/net/phy/aquantia.h deleted file mode 100644 index c684b65c642c..000000000000 --- a/drivers/net/phy/aquantia.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* HWMON driver for Aquantia PHY - * - * Author: Nikita Yushchenko <nikita.yoush@cogentembedded.com> - * Author: Andrew Lunn <andrew@lunn.ch> - * Author: Heiner Kallweit <hkallweit1@gmail.com> - */ - -#include <linux/device.h> -#include <linux/phy.h> - -#if IS_REACHABLE(CONFIG_HWMON) -int aqr_hwmon_probe(struct phy_device *phydev); -#else -static inline int aqr_hwmon_probe(struct phy_device *phydev) { return 0; } -#endif diff --git a/drivers/net/phy/aquantia/Kconfig b/drivers/net/phy/aquantia/Kconfig new file mode 100644 index 000000000000..a35de4b9b554 --- /dev/null +++ b/drivers/net/phy/aquantia/Kconfig @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +config AQUANTIA_PHY + tristate "Aquantia PHYs" + select CRC_CCITT + help + Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 diff --git a/drivers/net/phy/aquantia/Makefile b/drivers/net/phy/aquantia/Makefile new file mode 100644 index 000000000000..aa77fb63c8ec --- /dev/null +++ b/drivers/net/phy/aquantia/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +aquantia-objs += aquantia_main.o aquantia_firmware.o +ifdef CONFIG_HWMON +aquantia-objs += aquantia_hwmon.o +endif +obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o diff --git a/drivers/net/phy/aquantia/aquantia.h b/drivers/net/phy/aquantia/aquantia.h new file mode 100644 index 000000000000..9ed38972abdb --- /dev/null +++ b/drivers/net/phy/aquantia/aquantia.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* HWMON driver for Aquantia PHY + * + * Author: Nikita Yushchenko <nikita.yoush@cogentembedded.com> + * Author: Andrew Lunn <andrew@lunn.ch> + * Author: Heiner Kallweit <hkallweit1@gmail.com> + */ + +#include <linux/device.h> +#include <linux/phy.h> + +/* Vendor specific 1, MDIO_MMD_VEND1 */ +#define VEND1_GLOBAL_SC 0x0 +#define VEND1_GLOBAL_SC_SOFT_RESET BIT(15) +#define VEND1_GLOBAL_SC_LOW_POWER BIT(11) + +#define VEND1_GLOBAL_FW_ID 0x0020 +#define VEND1_GLOBAL_FW_ID_MAJOR GENMASK(15, 8) +#define VEND1_GLOBAL_FW_ID_MINOR GENMASK(7, 0) + +#define VEND1_GLOBAL_MAILBOX_INTERFACE1 0x0200 +#define VEND1_GLOBAL_MAILBOX_INTERFACE1_EXECUTE BIT(15) +#define VEND1_GLOBAL_MAILBOX_INTERFACE1_WRITE BIT(14) +#define VEND1_GLOBAL_MAILBOX_INTERFACE1_CRC_RESET BIT(12) +#define VEND1_GLOBAL_MAILBOX_INTERFACE1_BUSY BIT(8) + +#define VEND1_GLOBAL_MAILBOX_INTERFACE2 0x0201 +#define VEND1_GLOBAL_MAILBOX_INTERFACE3 0x0202 +#define VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR_MASK GENMASK(15, 0) +#define VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR(x) FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR_MASK, (u16)((x) >> 16)) +#define VEND1_GLOBAL_MAILBOX_INTERFACE4 0x0203 +#define VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR_MASK GENMASK(15, 2) +#define VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR(x) FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR_MASK, (u16)(x)) + +#define VEND1_GLOBAL_MAILBOX_INTERFACE5 0x0204 +#define VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA_MASK GENMASK(15, 0) +#define VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA(x) FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA_MASK, (u16)((x) >> 16)) +#define VEND1_GLOBAL_MAILBOX_INTERFACE6 0x0205 +#define VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA_MASK GENMASK(15, 0) +#define VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA(x) FIELD_PREP(VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA_MASK, (u16)(x)) + +/* The following registers all have similar layouts; first the registers... */ +#define VEND1_GLOBAL_CFG_10M 0x0310 +#define VEND1_GLOBAL_CFG_100M 0x031b +#define VEND1_GLOBAL_CFG_1G 0x031c +#define VEND1_GLOBAL_CFG_2_5G 0x031d +#define VEND1_GLOBAL_CFG_5G 0x031e +#define VEND1_GLOBAL_CFG_10G 0x031f +/* ...and now the fields */ +#define VEND1_GLOBAL_CFG_RATE_ADAPT GENMASK(8, 7) +#define VEND1_GLOBAL_CFG_RATE_ADAPT_NONE 0 +#define VEND1_GLOBAL_CFG_RATE_ADAPT_USX 1 +#define VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE 2 + +/* Vendor specific 1, MDIO_MMD_VEND2 */ +#define VEND1_GLOBAL_CONTROL2 0xc001 +#define VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_RST BIT(15) +#define VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD BIT(6) +#define VEND1_GLOBAL_CONTROL2_UP_RUN_STALL BIT(0) + +#define VEND1_THERMAL_PROV_HIGH_TEMP_FAIL 0xc421 +#define VEND1_THERMAL_PROV_LOW_TEMP_FAIL 0xc422 +#define VEND1_THERMAL_PROV_HIGH_TEMP_WARN 0xc423 +#define VEND1_THERMAL_PROV_LOW_TEMP_WARN 0xc424 +#define VEND1_THERMAL_STAT1 0xc820 +#define VEND1_THERMAL_STAT2 0xc821 +#define VEND1_THERMAL_STAT2_VALID BIT(0) +#define VEND1_GENERAL_STAT1 0xc830 +#define VEND1_GENERAL_STAT1_HIGH_TEMP_FAIL BIT(14) +#define VEND1_GENERAL_STAT1_LOW_TEMP_FAIL BIT(13) +#define VEND1_GENERAL_STAT1_HIGH_TEMP_WARN BIT(12) +#define VEND1_GENERAL_STAT1_LOW_TEMP_WARN BIT(11) + +#define VEND1_GLOBAL_GEN_STAT2 0xc831 +#define VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG BIT(15) + +#define VEND1_GLOBAL_RSVD_STAT1 0xc885 +#define VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID GENMASK(7, 4) +#define VEND1_GLOBAL_RSVD_STAT1_PROV_ID GENMASK(3, 0) + +#define VEND1_GLOBAL_RSVD_STAT9 0xc88d +#define VEND1_GLOBAL_RSVD_STAT9_MODE GENMASK(7, 0) +#define VEND1_GLOBAL_RSVD_STAT9_1000BT2 0x23 + +#define VEND1_GLOBAL_INT_STD_STATUS 0xfc00 +#define VEND1_GLOBAL_INT_VEND_STATUS 0xfc01 + +#define VEND1_GLOBAL_INT_STD_MASK 0xff00 +#define VEND1_GLOBAL_INT_STD_MASK_PMA1 BIT(15) +#define VEND1_GLOBAL_INT_STD_MASK_PMA2 BIT(14) +#define VEND1_GLOBAL_INT_STD_MASK_PCS1 BIT(13) +#define VEND1_GLOBAL_INT_STD_MASK_PCS2 BIT(12) +#define VEND1_GLOBAL_INT_STD_MASK_PCS3 BIT(11) +#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS1 BIT(10) +#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS2 BIT(9) +#define VEND1_GLOBAL_INT_STD_MASK_AN1 BIT(8) +#define VEND1_GLOBAL_INT_STD_MASK_AN2 BIT(7) +#define VEND1_GLOBAL_INT_STD_MASK_GBE BIT(6) +#define VEND1_GLOBAL_INT_STD_MASK_ALL BIT(0) + +#define VEND1_GLOBAL_INT_VEND_MASK 0xff01 +#define VEND1_GLOBAL_INT_VEND_MASK_PMA BIT(15) +#define VEND1_GLOBAL_INT_VEND_MASK_PCS BIT(14) +#define VEND1_GLOBAL_INT_VEND_MASK_PHY_XS BIT(13) +#define VEND1_GLOBAL_INT_VEND_MASK_AN BIT(12) +#define VEND1_GLOBAL_INT_VEND_MASK_GBE BIT(11) +#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL1 BIT(2) +#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL2 BIT(1) +#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3 BIT(0) + +#if IS_REACHABLE(CONFIG_HWMON) +int aqr_hwmon_probe(struct phy_device *phydev); +#else +static inline int aqr_hwmon_probe(struct phy_device *phydev) { return 0; } +#endif + +int aqr_firmware_load(struct phy_device *phydev); diff --git a/drivers/net/phy/aquantia/aquantia_firmware.c b/drivers/net/phy/aquantia/aquantia_firmware.c new file mode 100644 index 000000000000..c5f292b1c4c8 --- /dev/null +++ b/drivers/net/phy/aquantia/aquantia_firmware.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bitfield.h> +#include <linux/of.h> +#include <linux/firmware.h> +#include <linux/crc-ccitt.h> +#include <linux/nvmem-consumer.h> + +#include <asm/unaligned.h> + +#include "aquantia.h" + +#define UP_RESET_SLEEP 100 + +/* addresses of memory segments in the phy */ +#define DRAM_BASE_ADDR 0x3FFE0000 +#define IRAM_BASE_ADDR 0x40000000 + +/* firmware image format constants */ +#define VERSION_STRING_SIZE 0x40 +#define VERSION_STRING_OFFSET 0x0200 +/* primary offset is written at an offset from the start of the fw blob */ +#define PRIMARY_OFFSET_OFFSET 0x8 +/* primary offset needs to be then added to a base offset */ +#define PRIMARY_OFFSET_SHIFT 12 +#define PRIMARY_OFFSET(x) ((x) << PRIMARY_OFFSET_SHIFT) +#define HEADER_OFFSET 0x300 + +struct aqr_fw_header { + u32 padding; + u8 iram_offset[3]; + u8 iram_size[3]; + u8 dram_offset[3]; + u8 dram_size[3]; +} __packed; + +enum aqr_fw_src { + AQR_FW_SRC_NVMEM = 0, + AQR_FW_SRC_FS, +}; + +static const char * const aqr_fw_src_string[] = { + [AQR_FW_SRC_NVMEM] = "NVMEM", + [AQR_FW_SRC_FS] = "FS", +}; + +/* AQR firmware doesn't have fixed offsets for iram and dram section + * but instead provide an header with the offset to use on reading + * and parsing the firmware. + * + * AQR firmware can't be trusted and each offset is validated to be + * not negative and be in the size of the firmware itself. + */ +static bool aqr_fw_validate_get(size_t size, size_t offset, size_t get_size) +{ + return offset + get_size <= size; +} + +static int aqr_fw_get_be16(const u8 *data, size_t offset, size_t size, u16 *value) +{ + if (!aqr_fw_validate_get(size, offset, sizeof(u16))) + return -EINVAL; + + *value = get_unaligned_be16(data + offset); + + return 0; +} + +static int aqr_fw_get_le16(const u8 *data, size_t offset, size_t size, u16 *value) +{ + if (!aqr_fw_validate_get(size, offset, sizeof(u16))) + return -EINVAL; + + *value = get_unaligned_le16(data + offset); + + return 0; +} + +static int aqr_fw_get_le24(const u8 *data, size_t offset, size_t size, u32 *value) +{ + if (!aqr_fw_validate_get(size, offset, sizeof(u8) * 3)) + return -EINVAL; + + *value = get_unaligned_le24(data + offset); + + return 0; +} + +/* load data into the phy's memory */ +static int aqr_fw_load_memory(struct phy_device *phydev, u32 addr, + const u8 *data, size_t len) +{ + u16 crc = 0, up_crc; + size_t pos; + + /* PHY expect addr in LE */ + addr = (__force u32)cpu_to_le32(addr); + + phy_write_mmd(phydev, MDIO_MMD_VEND1, + VEND1_GLOBAL_MAILBOX_INTERFACE1, + VEND1_GLOBAL_MAILBOX_INTERFACE1_CRC_RESET); + phy_write_mmd(phydev, MDIO_MMD_VEND1, + VEND1_GLOBAL_MAILBOX_INTERFACE3, + VEND1_GLOBAL_MAILBOX_INTERFACE3_MSW_ADDR(addr)); + phy_write_mmd(phydev, MDIO_MMD_VEND1, + VEND1_GLOBAL_MAILBOX_INTERFACE4, + VEND1_GLOBAL_MAILBOX_INTERFACE4_LSW_ADDR(addr)); + + /* We assume and enforce the size to be word aligned. + * If a firmware that is not word aligned is found, please report upstream. + */ + for (pos = 0; pos < len; pos += sizeof(u32)) { + u32 word; + + /* FW data is always stored in little-endian */ + word = get_unaligned((const u32 *)(data + pos)); + + phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE5, + VEND1_GLOBAL_MAILBOX_INTERFACE5_MSW_DATA(word)); + phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE6, + VEND1_GLOBAL_MAILBOX_INTERFACE6_LSW_DATA(word)); + + phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE1, + VEND1_GLOBAL_MAILBOX_INTERFACE1_EXECUTE | + VEND1_GLOBAL_MAILBOX_INTERFACE1_WRITE); + + /* calculate CRC as we load data to the mailbox. + * We convert word to big-endian as PHY is BE and mailbox will + * return a BE CRC. + */ + word = (__force u32)cpu_to_be32(word); + crc = crc_ccitt_false(crc, (u8 *)&word, sizeof(word)); + } + + up_crc = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_MAILBOX_INTERFACE2); + if (crc != up_crc) { + phydev_err(phydev, "CRC mismatch: calculated 0x%04x PHY 0x%04x\n", + crc, up_crc); + return -EINVAL; + } + + return 0; +} + +static int aqr_fw_boot(struct phy_device *phydev, const u8 *data, size_t size, + enum aqr_fw_src fw_src) +{ + u16 calculated_crc, read_crc, read_primary_offset; + u32 iram_offset = 0, iram_size = 0; + u32 dram_offset = 0, dram_size = 0; + char version[VERSION_STRING_SIZE]; + u32 primary_offset = 0; + int ret; + + /* extract saved CRC at the end of the fw + * CRC is saved in big-endian as PHY is BE + */ + ret = aqr_fw_get_be16(data, size - sizeof(u16), size, &read_crc); + if (ret) { + phydev_err(phydev, "bad firmware CRC in firmware\n"); + return ret; + } + calculated_crc = crc_ccitt_false(0, data, size - sizeof(u16)); + if (read_crc != calculated_crc) { + phydev_err(phydev, "bad firmware CRC: file 0x%04x calculated 0x%04x\n", + read_crc, calculated_crc); + return -EINVAL; + } + + /* Get the primary offset to extract DRAM and IRAM sections. */ + ret = aqr_fw_get_le16(data, PRIMARY_OFFSET_OFFSET, size, &read_primary_offset); + if (ret) { + phydev_err(phydev, "bad primary offset in firmware\n"); + return ret; + } + primary_offset = PRIMARY_OFFSET(read_primary_offset); + + /* Find the DRAM and IRAM sections within the firmware file. + * Make sure the fw_header is correctly in the firmware. + */ + if (!aqr_fw_validate_get(size, primary_offset + HEADER_OFFSET, + sizeof(struct aqr_fw_header))) { + phydev_err(phydev, "bad fw_header in firmware\n"); + return -EINVAL; + } + + /* offset are in LE and values needs to be converted to cpu endian */ + ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET + + offsetof(struct aqr_fw_header, iram_offset), + size, &iram_offset); + if (ret) { + phydev_err(phydev, "bad iram offset in firmware\n"); + return ret; + } + ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET + + offsetof(struct aqr_fw_header, iram_size), + size, &iram_size); + if (ret) { + phydev_err(phydev, "invalid iram size in firmware\n"); + return ret; + } + ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET + + offsetof(struct aqr_fw_header, dram_offset), + size, &dram_offset); + if (ret) { + phydev_err(phydev, "bad dram offset in firmware\n"); + return ret; + } + ret = aqr_fw_get_le24(data, primary_offset + HEADER_OFFSET + + offsetof(struct aqr_fw_header, dram_size), + size, &dram_size); + if (ret) { + phydev_err(phydev, "invalid dram size in firmware\n"); + return ret; + } + + /* Increment the offset with the primary offset. + * Validate iram/dram offset and size. + */ + iram_offset += primary_offset; + if (iram_size % sizeof(u32)) { + phydev_err(phydev, "iram size if not aligned to word size. Please report this upstream!\n"); + return -EINVAL; + } + if (!aqr_fw_validate_get(size, iram_offset, iram_size)) { + phydev_err(phydev, "invalid iram offset for iram size\n"); + return -EINVAL; + } + + dram_offset += primary_offset; + if (dram_size % sizeof(u32)) { + phydev_err(phydev, "dram size if not aligned to word size. Please report this upstream!\n"); + return -EINVAL; + } + if (!aqr_fw_validate_get(size, dram_offset, dram_size)) { + phydev_err(phydev, "invalid iram offset for iram size\n"); + return -EINVAL; + } + + phydev_dbg(phydev, "primary %d IRAM offset=%d size=%d DRAM offset=%d size=%d\n", + primary_offset, iram_offset, iram_size, dram_offset, dram_size); + + if (!aqr_fw_validate_get(size, dram_offset + VERSION_STRING_OFFSET, + VERSION_STRING_SIZE)) { + phydev_err(phydev, "invalid version in firmware\n"); + return -EINVAL; + } + strscpy(version, (char *)data + dram_offset + VERSION_STRING_OFFSET, + VERSION_STRING_SIZE); + if (version[0] == '\0') { + phydev_err(phydev, "invalid version in firmware\n"); + return -EINVAL; + } + phydev_info(phydev, "loading firmware version '%s' from '%s'\n", version, + aqr_fw_src_string[fw_src]); + + /* stall the microcprocessor */ + phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_CONTROL2, + VEND1_GLOBAL_CONTROL2_UP_RUN_STALL | VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD); + + phydev_dbg(phydev, "loading DRAM 0x%08x from offset=%d size=%d\n", + DRAM_BASE_ADDR, dram_offset, dram_size); + ret = aqr_fw_load_memory(phydev, DRAM_BASE_ADDR, data + dram_offset, + dram_size); + if (ret) + return ret; + + phydev_dbg(phydev, "loading IRAM 0x%08x from offset=%d size=%d\n", + IRAM_BASE_ADDR, iram_offset, iram_size); + ret = aqr_fw_load_memory(phydev, IRAM_BASE_ADDR, data + iram_offset, + iram_size); + if (ret) + return ret; + + /* make sure soft reset and low power mode are clear */ + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_SC, + VEND1_GLOBAL_SC_SOFT_RESET | VEND1_GLOBAL_SC_LOW_POWER); + + /* Release the microprocessor. UP_RESET must be held for 100 usec. */ + phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_CONTROL2, + VEND1_GLOBAL_CONTROL2_UP_RUN_STALL | + VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD | + VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_RST); + usleep_range(UP_RESET_SLEEP, UP_RESET_SLEEP * 2); + + phy_write_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_CONTROL2, + VEND1_GLOBAL_CONTROL2_UP_RUN_STALL_OVD); + + return 0; +} + +static int aqr_firmware_load_nvmem(struct phy_device *phydev) +{ + struct nvmem_cell *cell; + size_t size; + u8 *buf; + int ret; + + cell = nvmem_cell_get(&phydev->mdio.dev, "firmware"); + if (IS_ERR(cell)) + return PTR_ERR(cell); + + buf = nvmem_cell_read(cell, &size); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto exit; + } + + ret = aqr_fw_boot(phydev, buf, size, AQR_FW_SRC_NVMEM); + if (ret) + phydev_err(phydev, "firmware loading failed: %d\n", ret); + + kfree(buf); +exit: + nvmem_cell_put(cell); + + return ret; +} + +static int aqr_firmware_load_fs(struct phy_device *phydev) +{ + struct device *dev = &phydev->mdio.dev; + const struct firmware *fw; + const char *fw_name; + int ret; + + ret = of_property_read_string(dev->of_node, "firmware-name", + &fw_name); + if (ret) + return ret; + + ret = request_firmware(&fw, fw_name, dev); + if (ret) { + phydev_err(phydev, "failed to find FW file %s (%d)\n", + fw_name, ret); + return ret; + } + + ret = aqr_fw_boot(phydev, fw->data, fw->size, AQR_FW_SRC_FS); + if (ret) + phydev_err(phydev, "firmware loading failed: %d\n", ret); + + release_firmware(fw); + + return ret; +} + +int aqr_firmware_load(struct phy_device *phydev) +{ + int ret; + + /* Check if the firmware is not already loaded by pooling + * the current version returned by the PHY. If 0 is returned, + * no firmware is loaded. + */ + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VEND1_GLOBAL_FW_ID); + if (ret > 0) + goto exit; + + ret = aqr_firmware_load_nvmem(phydev); + if (!ret) + goto exit; + + ret = aqr_firmware_load_fs(phydev); + if (ret) + return ret; + +exit: + return 0; +} diff --git a/drivers/net/phy/aquantia_hwmon.c b/drivers/net/phy/aquantia/aquantia_hwmon.c index 0da451e46f69..7b3c49c3bf49 100644 --- a/drivers/net/phy/aquantia_hwmon.c +++ b/drivers/net/phy/aquantia/aquantia_hwmon.c @@ -13,20 +13,6 @@ #include "aquantia.h" -/* Vendor specific 1, MDIO_MMD_VEND2 */ -#define VEND1_THERMAL_PROV_HIGH_TEMP_FAIL 0xc421 -#define VEND1_THERMAL_PROV_LOW_TEMP_FAIL 0xc422 -#define VEND1_THERMAL_PROV_HIGH_TEMP_WARN 0xc423 -#define VEND1_THERMAL_PROV_LOW_TEMP_WARN 0xc424 -#define VEND1_THERMAL_STAT1 0xc820 -#define VEND1_THERMAL_STAT2 0xc821 -#define VEND1_THERMAL_STAT2_VALID BIT(0) -#define VEND1_GENERAL_STAT1 0xc830 -#define VEND1_GENERAL_STAT1_HIGH_TEMP_FAIL BIT(14) -#define VEND1_GENERAL_STAT1_LOW_TEMP_FAIL BIT(13) -#define VEND1_GENERAL_STAT1_HIGH_TEMP_WARN BIT(12) -#define VEND1_GENERAL_STAT1_LOW_TEMP_WARN BIT(11) - #if IS_REACHABLE(CONFIG_HWMON) static umode_t aqr_hwmon_is_visible(const void *data, diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c index 334a6904ca5a..cc4a97741c4a 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia/aquantia_main.c @@ -91,61 +91,6 @@ #define MDIO_C22EXT_STAT_SGMII_TX_FRAME_ALIGN_ERR 0xd31a #define MDIO_C22EXT_STAT_SGMII_TX_RUNT_FRAMES 0xd31b -/* Vendor specific 1, MDIO_MMD_VEND1 */ -#define VEND1_GLOBAL_FW_ID 0x0020 -#define VEND1_GLOBAL_FW_ID_MAJOR GENMASK(15, 8) -#define VEND1_GLOBAL_FW_ID_MINOR GENMASK(7, 0) - -#define VEND1_GLOBAL_GEN_STAT2 0xc831 -#define VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG BIT(15) - -/* The following registers all have similar layouts; first the registers... */ -#define VEND1_GLOBAL_CFG_10M 0x0310 -#define VEND1_GLOBAL_CFG_100M 0x031b -#define VEND1_GLOBAL_CFG_1G 0x031c -#define VEND1_GLOBAL_CFG_2_5G 0x031d -#define VEND1_GLOBAL_CFG_5G 0x031e -#define VEND1_GLOBAL_CFG_10G 0x031f -/* ...and now the fields */ -#define VEND1_GLOBAL_CFG_RATE_ADAPT GENMASK(8, 7) -#define VEND1_GLOBAL_CFG_RATE_ADAPT_NONE 0 -#define VEND1_GLOBAL_CFG_RATE_ADAPT_USX 1 -#define VEND1_GLOBAL_CFG_RATE_ADAPT_PAUSE 2 - -#define VEND1_GLOBAL_RSVD_STAT1 0xc885 -#define VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID GENMASK(7, 4) -#define VEND1_GLOBAL_RSVD_STAT1_PROV_ID GENMASK(3, 0) - -#define VEND1_GLOBAL_RSVD_STAT9 0xc88d -#define VEND1_GLOBAL_RSVD_STAT9_MODE GENMASK(7, 0) -#define VEND1_GLOBAL_RSVD_STAT9_1000BT2 0x23 - -#define VEND1_GLOBAL_INT_STD_STATUS 0xfc00 -#define VEND1_GLOBAL_INT_VEND_STATUS 0xfc01 - -#define VEND1_GLOBAL_INT_STD_MASK 0xff00 -#define VEND1_GLOBAL_INT_STD_MASK_PMA1 BIT(15) -#define VEND1_GLOBAL_INT_STD_MASK_PMA2 BIT(14) -#define VEND1_GLOBAL_INT_STD_MASK_PCS1 BIT(13) -#define VEND1_GLOBAL_INT_STD_MASK_PCS2 BIT(12) -#define VEND1_GLOBAL_INT_STD_MASK_PCS3 BIT(11) -#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS1 BIT(10) -#define VEND1_GLOBAL_INT_STD_MASK_PHY_XS2 BIT(9) -#define VEND1_GLOBAL_INT_STD_MASK_AN1 BIT(8) -#define VEND1_GLOBAL_INT_STD_MASK_AN2 BIT(7) -#define VEND1_GLOBAL_INT_STD_MASK_GBE BIT(6) -#define VEND1_GLOBAL_INT_STD_MASK_ALL BIT(0) - -#define VEND1_GLOBAL_INT_VEND_MASK 0xff01 -#define VEND1_GLOBAL_INT_VEND_MASK_PMA BIT(15) -#define VEND1_GLOBAL_INT_VEND_MASK_PCS BIT(14) -#define VEND1_GLOBAL_INT_VEND_MASK_PHY_XS BIT(13) -#define VEND1_GLOBAL_INT_VEND_MASK_AN BIT(12) -#define VEND1_GLOBAL_INT_VEND_MASK_GBE BIT(11) -#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL1 BIT(2) -#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL2 BIT(1) -#define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3 BIT(0) - /* Sleep and timeout for checking if the Processor-Intensive * MDIO operation is finished */ @@ -713,11 +658,17 @@ static int aqr107_resume(struct phy_device *phydev) static int aqr107_probe(struct phy_device *phydev) { + int ret; + phydev->priv = devm_kzalloc(&phydev->mdio.dev, sizeof(struct aqr107_priv), GFP_KERNEL); if (!phydev->priv) return -ENOMEM; + ret = aqr_firmware_load(phydev); + if (ret) + return ret; + return aqr_hwmon_probe(phydev); } diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c index cb4b91af5e17..617d384d4551 100644 --- a/drivers/net/phy/bcm-phy-ptp.c +++ b/drivers/net/phy/bcm-phy-ptp.c @@ -782,16 +782,13 @@ out: } static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts, - struct ifreq *ifr) + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { struct bcm_ptp_private *priv = mii2priv(mii_ts); - struct hwtstamp_config cfg; u16 mode, ctrl; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: priv->hwts_rx = false; break; @@ -804,14 +801,14 @@ static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts, case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; priv->hwts_rx = true; break; default: return -ERANGE; } - priv->tx_type = cfg.tx_type; + priv->tx_type = cfg->tx_type; ctrl = priv->hwts_rx ? SLICE_RX_EN : 0; ctrl |= priv->tx_type != HWTSTAMP_TX_OFF ? SLICE_TX_EN : 0; @@ -840,7 +837,7 @@ static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts, /* purge existing data */ skb_queue_purge(&priv->tx_queue); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static int bcm_ptp_ts_info(struct mii_timestamper *mii_ts, diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 3a627105675a..312a8bb35d78 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -1135,6 +1135,8 @@ static struct phy_driver broadcom_drivers[] = { .handle_interrupt = bcm_phy_handle_interrupt, .link_change_notify = bcm54xx_link_change_notify, .led_brightness_set = bcm_phy_led_brightness_set, + .suspend = bcm54xx_suspend, + .resume = bcm54xx_resume, }, { .phy_id = PHY_ID_BCM54616S, .phy_id_mask = 0xfffffff0, diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 2657be7cc049..5c42c47dc564 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1207,22 +1207,20 @@ static irqreturn_t dp83640_handle_interrupt(struct phy_device *phydev) return IRQ_HANDLED; } -static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) +static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { struct dp83640_private *dp83640 = container_of(mii_ts, struct dp83640_private, mii_ts); - struct hwtstamp_config cfg; u16 txcfg0, rxcfg0; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - - if (cfg.tx_type < 0 || cfg.tx_type > HWTSTAMP_TX_ONESTEP_SYNC) + if (cfg->tx_type < 0 || cfg->tx_type > HWTSTAMP_TX_ONESTEP_SYNC) return -ERANGE; - dp83640->hwts_tx_en = cfg.tx_type; + dp83640->hwts_tx_en = cfg->tx_type; - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: dp83640->hwts_rx_en = 0; dp83640->layer = 0; @@ -1234,7 +1232,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) dp83640->hwts_rx_en = 1; dp83640->layer = PTP_CLASS_L4; dp83640->version = PTP_CLASS_V1; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; break; case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: @@ -1242,7 +1240,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) dp83640->hwts_rx_en = 1; dp83640->layer = PTP_CLASS_L4; dp83640->version = PTP_CLASS_V2; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; break; case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: @@ -1250,7 +1248,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) dp83640->hwts_rx_en = 1; dp83640->layer = PTP_CLASS_L2; dp83640->version = PTP_CLASS_V2; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; break; case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: @@ -1258,7 +1256,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) dp83640->hwts_rx_en = 1; dp83640->layer = PTP_CLASS_L4 | PTP_CLASS_L2; dp83640->version = PTP_CLASS_V2; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; default: return -ERANGE; @@ -1292,7 +1290,7 @@ static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) mutex_unlock(&dp83640->clock->extreg_lock); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static void rx_timestamp_work(struct work_struct *work) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 08e3915001c3..bd4cd082662f 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2395,24 +2395,22 @@ static void lan8814_flush_fifo(struct phy_device *phydev, bool egress) lanphy_read_page_reg(phydev, 5, PTP_TSU_INT_STS); } -static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) +static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct kszphy_ptp_priv *ptp_priv = container_of(mii_ts, struct kszphy_ptp_priv, mii_ts); struct phy_device *phydev = ptp_priv->phydev; struct lan8814_shared_priv *shared = phydev->shared->priv; struct lan8814_ptp_rx_ts *rx_ts, *tmp; - struct hwtstamp_config config; int txcfg = 0, rxcfg = 0; int pkt_ts_enable; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; + ptp_priv->hwts_tx_type = config->tx_type; + ptp_priv->rx_filter = config->rx_filter; - ptp_priv->hwts_tx_type = config.tx_type; - ptp_priv->rx_filter = config.rx_filter; - - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: ptp_priv->layer = 0; ptp_priv->version = 0; @@ -2458,13 +2456,13 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD, PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_); - if (config.rx_filter != HWTSTAMP_FILTER_NONE) + if (config->rx_filter != HWTSTAMP_FILTER_NONE) lan8814_config_ts_intr(ptp_priv->phydev, true); else lan8814_config_ts_intr(ptp_priv->phydev, false); mutex_lock(&shared->shared_lock); - if (config.rx_filter != HWTSTAMP_FILTER_NONE) + if (config->rx_filter != HWTSTAMP_FILTER_NONE) shared->ref++; else shared->ref--; @@ -2488,7 +2486,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) lan8814_flush_fifo(ptp_priv->phydev, false); lan8814_flush_fifo(ptp_priv->phydev, true); - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; + return 0; } static void lan8814_txtstamp(struct mii_timestamper *mii_ts, @@ -3631,12 +3629,8 @@ static int lan8841_ts_info(struct mii_timestamper *mii_ts, info->phc_index = ptp_priv->ptp_clock ? ptp_clock_index(ptp_priv->ptp_clock) : -1; - if (info->phc_index == -1) { - info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; + if (info->phc_index == -1) return 0; - } info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | @@ -3703,21 +3697,19 @@ static void lan8841_ptp_enable_processing(struct kszphy_ptp_priv *ptp_priv, #define LAN8841_PTP_TX_TIMESTAMP_EN 443 #define LAN8841_PTP_TX_MOD 445 -static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) +static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct kszphy_ptp_priv *ptp_priv = container_of(mii_ts, struct kszphy_ptp_priv, mii_ts); struct phy_device *phydev = ptp_priv->phydev; - struct hwtstamp_config config; int txcfg = 0, rxcfg = 0; int pkt_ts_enable; - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; + ptp_priv->hwts_tx_type = config->tx_type; + ptp_priv->rx_filter = config->rx_filter; - ptp_priv->hwts_tx_type = config.tx_type; - ptp_priv->rx_filter = config.rx_filter; - - switch (config.rx_filter) { + switch (config->rx_filter) { case HWTSTAMP_FILTER_NONE: ptp_priv->layer = 0; ptp_priv->version = 0; @@ -3771,13 +3763,13 @@ static int lan8841_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) /* Now enable/disable the timestamping */ lan8841_ptp_enable_processing(ptp_priv, - config.rx_filter != HWTSTAMP_FILTER_NONE); + config->rx_filter != HWTSTAMP_FILTER_NONE); skb_queue_purge(&ptp_priv->tx_queue); lan8841_ptp_flush_fifo(ptp_priv); - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; + return 0; } static bool lan8841_rxtstamp(struct mii_timestamper *mii_ts, diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index cf728bfd83e2..eb0b032cb613 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1045,19 +1045,17 @@ static void vsc85xx_ts_reset_fifo(struct phy_device *phydev) val); } -static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) +static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { struct vsc8531_private *vsc8531 = container_of(mii_ts, struct vsc8531_private, mii_ts); struct phy_device *phydev = vsc8531->ptp->phydev; - struct hwtstamp_config cfg; bool one_step = false; u32 val; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - - switch (cfg.tx_type) { + switch (cfg->tx_type) { case HWTSTAMP_TX_ONESTEP_SYNC: one_step = true; break; @@ -1069,9 +1067,9 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) return -ERANGE; } - vsc8531->ptp->tx_type = cfg.tx_type; + vsc8531->ptp->tx_type = cfg->tx_type; - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: break; case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: @@ -1084,7 +1082,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) return -ERANGE; } - vsc8531->ptp->rx_filter = cfg.rx_filter; + vsc8531->ptp->rx_filter = cfg->rx_filter; mutex_lock(&vsc8531->ts_lock); @@ -1132,7 +1130,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) vsc8531->ptp->configured = 1; mutex_unlock(&vsc8531->ts_lock); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static int vsc85xx_ts_info(struct mii_timestamper *mii_ts, diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 7ab080ff02df..780ad353cf55 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1022,24 +1022,21 @@ static bool nxp_c45_rxtstamp(struct mii_timestamper *mii_ts, } static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts, - struct ifreq *ifreq) + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { struct nxp_c45_phy *priv = container_of(mii_ts, struct nxp_c45_phy, mii_ts); struct phy_device *phydev = priv->phydev; const struct nxp_c45_phy_data *data; - struct hwtstamp_config cfg; - if (copy_from_user(&cfg, ifreq->ifr_data, sizeof(cfg))) - return -EFAULT; - - if (cfg.tx_type < 0 || cfg.tx_type > HWTSTAMP_TX_ON) + if (cfg->tx_type < 0 || cfg->tx_type > HWTSTAMP_TX_ON) return -ERANGE; data = nxp_c45_get_data(phydev); - priv->hwts_tx = cfg.tx_type; + priv->hwts_tx = cfg->tx_type; - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: priv->hwts_rx = 0; break; @@ -1047,7 +1044,7 @@ static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts, case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: priv->hwts_rx = 1; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; break; default: return -ERANGE; @@ -1074,7 +1071,7 @@ static int nxp_c45_hwtstamp(struct mii_timestamper *mii_ts, nxp_c45_clear_reg_field(phydev, &data->regmap->irq_egr_ts_en); nxp_c45_no_ptp_irq: - return copy_to_user(ifreq->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static int nxp_c45_ts_info(struct mii_timestamper *mii_ts, diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a5fa077650e8..3376e58e2b88 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -325,9 +325,13 @@ EXPORT_SYMBOL(phy_ethtool_ksettings_get); int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) { struct mii_ioctl_data *mii_data = if_mii(ifr); + struct kernel_hwtstamp_config kernel_cfg; + struct netlink_ext_ack extack = {}; u16 val = mii_data->val_in; bool change_autoneg = false; + struct hwtstamp_config cfg; int prtad, devad; + int ret; switch (cmd) { case SIOCGMIIPHY: @@ -411,8 +415,21 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd) return 0; case SIOCSHWTSTAMP: - if (phydev->mii_ts && phydev->mii_ts->hwtstamp) - return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr); + if (phydev->mii_ts && phydev->mii_ts->hwtstamp) { + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + hwtstamp_config_to_kernel(&kernel_cfg, &cfg); + ret = phydev->mii_ts->hwtstamp(phydev->mii_ts, &kernel_cfg, &extack); + if (ret) + return ret; + + hwtstamp_config_from_kernel(&cfg, &kernel_cfg); + if (copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg))) + return -EFAULT; + + return 0; + } fallthrough; default: @@ -469,7 +486,7 @@ int __phy_hwtstamp_get(struct phy_device *phydev, if (!phydev) return -ENODEV; - return phy_mii_ioctl(phydev, config->ifr, SIOCGHWTSTAMP); + return -EOPNOTSUPP; } /** @@ -486,7 +503,10 @@ int __phy_hwtstamp_set(struct phy_device *phydev, if (!phydev) return -ENODEV; - return phy_mii_ioctl(phydev, config->ifr, SIOCSHWTSTAMP); + if (phydev->mii_ts && phydev->mii_ts->hwtstamp) + return phydev->mii_ts->hwtstamp(phydev->mii_ts, config, extack); + + return -EOPNOTSUPP; } /** diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 25c19496a336..c276f9482f78 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -697,18 +697,16 @@ static int phylink_validate_mask(struct phylink *pl, unsigned long *supported, __ETHTOOL_DECLARE_LINK_MODE_MASK(all_s) = { 0, }; __ETHTOOL_DECLARE_LINK_MODE_MASK(s); struct phylink_link_state t; - int intf; + int interface; - for (intf = 0; intf < PHY_INTERFACE_MODE_MAX; intf++) { - if (test_bit(intf, interfaces)) { - linkmode_copy(s, supported); + for_each_set_bit(interface, interfaces, PHY_INTERFACE_MODE_MAX) { + linkmode_copy(s, supported); - t = *state; - t.interface = intf; - if (!phylink_validate_mac_and_pcs(pl, s, &t)) { - linkmode_or(all_s, all_s, s); - linkmode_or(all_adv, all_adv, t.advertising); - } + t = *state; + t.interface = interface; + if (!phylink_validate_mac_and_pcs(pl, s, &t)) { + linkmode_or(all_s, all_s, s); + linkmode_or(all_adv, all_adv, t.advertising); } } @@ -805,7 +803,7 @@ static int phylink_parse_fixedlink(struct phylink *pl, phylink_warn(pl, "fixed link specifies half duplex for %dMbps link?\n", pl->link_config.speed); - bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + linkmode_fill(pl->supported); linkmode_copy(pl->link_config.advertising, pl->supported); phylink_validate(pl, pl->supported, &pl->link_config); @@ -1640,7 +1638,7 @@ struct phylink *phylink_create(struct phylink_config *config, __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state); timer_setup(&pl->link_poll, phylink_fixed_poll, 0); - bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + linkmode_fill(pl->supported); linkmode_copy(pl->link_config.advertising, pl->supported); phylink_validate(pl, pl->supported, &pl->link_config); diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 208a9393c2df..6fa679b36290 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -328,7 +328,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, * modules use 2500Mbaud rather than 3100 or 3200Mbaud for * 2500BASE-X, so we allow some slack here. */ - if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS) && br_nom) { + if (linkmode_empty(modes) && br_nom) { if (br_min <= 1300 && br_max >= 1200) { phylink_set(modes, 1000baseX_Full); __set_bit(PHY_INTERFACE_MODE_1000BASEX, interfaces); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 4ea0e155bb0d..8d835fbc4316 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1298,8 +1298,6 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) netif_set_tso_max_size(dev->net, 16384); - ax88179_reset(dev); - return 0; } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 412c3c0b6990..764ea02ff911 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2379,7 +2379,17 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, else udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); #if IS_ENABLED(CONFIG_IPV6) - key.label = vxlan->cfg.label; + switch (vxlan->cfg.label_policy) { + case VXLAN_LABEL_FIXED: + key.label = vxlan->cfg.label; + break; + case VXLAN_LABEL_INHERIT: + key.label = ip_tunnel_get_flowlabel(old_iph, skb); + break; + default: + DEBUG_NET_WARN_ON_ONCE(1); + goto drop; + } #endif } else { if (!info) { @@ -3366,6 +3376,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_DF] = { .type = NLA_U8 }, [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 }, [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1), + [IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX), }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -3740,6 +3751,12 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf, return -EINVAL; } + if (conf->label_policy && !use_ipv6) { + NL_SET_ERR_MSG(extack, + "Label policy only applies to IPv6 VXLAN devices"); + return -EINVAL; + } + if (conf->remote_ifindex) { struct net_device *lowerdev; @@ -4082,6 +4099,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], if (data[IFLA_VXLAN_LABEL]) conf->label = nla_get_be32(data[IFLA_VXLAN_LABEL]) & IPV6_FLOWLABEL_MASK; + if (data[IFLA_VXLAN_LABEL_POLICY]) + conf->label_policy = nla_get_u32(data[IFLA_VXLAN_LABEL_POLICY]); if (data[IFLA_VXLAN_LEARNING]) { err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING, @@ -4398,6 +4417,7 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */ nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */ + nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LABEL_POLICY */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ @@ -4471,6 +4491,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) || nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) || + nla_put_u32(skb, IFLA_VXLAN_LABEL_POLICY, vxlan->cfg.label_policy) || nla_put_u8(skb, IFLA_VXLAN_LEARNING, !!(vxlan->cfg.flags & VXLAN_F_LEARN)) || nla_put_u8(skb, IFLA_VXLAN_PROXY, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 55bc3576a985..69d20d585f88 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1219,6 +1219,9 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) if (delay > PCI_RESET_WAIT) pci_info(dev, "ready %dms after %s\n", delay - 1, reset_type); + else + pci_dbg(dev, "ready %dms after %s\n", delay - 1, + reset_type); return 0; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ea476252280a..d208047d1b8f 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3787,6 +3787,19 @@ DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset); /* + * Spectrum-{1,2,3,4} devices report that a D3hot->D0 transition causes a reset + * (i.e., they advertise NoSoftRst-). However, this transition does not have + * any effect on the device: It continues to be operational and network ports + * remain up. Advertising this support makes it seem as if a PM reset is viable + * for these devices. Mark it as unavailable to skip it when testing reset + * methods. + */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcb84, quirk_no_pm_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf6c, quirk_no_pm_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf70, quirk_no_pm_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf80, quirk_no_pm_reset); + +/* * Thunderbolt controllers with broken MSI hotplug signaling: * Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part * of the 2nd generation (Cactus Ridge 4C up to revision 1, Port Ridge). diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c index ed215b458183..1d2940a78455 100644 --- a/drivers/ptp/ptp_ines.c +++ b/drivers/ptp/ptp_ines.c @@ -328,17 +328,15 @@ static u64 ines_find_txts(struct ines_port *port, struct sk_buff *skb) return ns; } -static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) +static int ines_hwtstamp(struct mii_timestamper *mii_ts, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { struct ines_port *port = container_of(mii_ts, struct ines_port, mii_ts); u32 cm_one_step = 0, port_conf, ts_stat_rx, ts_stat_tx; - struct hwtstamp_config cfg; unsigned long flags; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) - return -EFAULT; - - switch (cfg.tx_type) { + switch (cfg->tx_type) { case HWTSTAMP_TX_OFF: ts_stat_tx = 0; break; @@ -353,7 +351,7 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) return -ERANGE; } - switch (cfg.rx_filter) { + switch (cfg->rx_filter) { case HWTSTAMP_FILTER_NONE: ts_stat_rx = 0; break; @@ -372,7 +370,7 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: ts_stat_rx = TS_ENABLE; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + cfg->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; default: return -ERANGE; @@ -393,7 +391,7 @@ static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr) spin_unlock_irqrestore(&port->lock, flags); - return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; + return 0; } static void ines_link_state(struct mii_timestamper *mii_ts, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6762dac3ef76..258ba232e302 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -186,8 +186,8 @@ enum btf_field_type { BPF_LIST_NODE = (1 << 6), BPF_RB_ROOT = (1 << 7), BPF_RB_NODE = (1 << 8), - BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | - BPF_RB_NODE | BPF_RB_ROOT, + BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, + BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), }; @@ -1226,6 +1226,8 @@ enum bpf_dynptr_type { int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index aa4d19d0bc94..d99a636d36a7 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -618,6 +618,7 @@ struct bpf_verifier_env { int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ bool test_state_freq; /* test verifier with different pruning frequency */ + bool test_reg_invariants; /* fail verification on register invariants violations */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_verifier_state_list *free_list; @@ -695,6 +696,10 @@ int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level, void bpf_vlog_reset(struct bpf_verifier_log *log, u64 new_pos); int bpf_vlog_finalize(struct bpf_verifier_log *log, u32 *log_size_actual); +__printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, + u32 insn_off, + const char *prefix_fmt, ...); + static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; @@ -794,4 +799,76 @@ static inline bool bpf_type_has_unsafe_modifiers(u32 type) return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS; } +static inline bool type_is_ptr_alloc_obj(u32 type) +{ + return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC; +} + +static inline bool type_is_non_owning_ref(u32 type) +{ + return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF; +} + +static inline bool type_is_pkt_pointer(enum bpf_reg_type type) +{ + type = base_type(type); + return type == PTR_TO_PACKET || + type == PTR_TO_PACKET_META; +} + +static inline bool type_is_sk_pointer(enum bpf_reg_type type) +{ + return type == PTR_TO_SOCKET || + type == PTR_TO_SOCK_COMMON || + type == PTR_TO_TCP_SOCK || + type == PTR_TO_XDP_SOCK; +} + +static inline void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) +{ + env->scratched_regs |= 1U << regno; +} + +static inline void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) +{ + env->scratched_stack_slots |= 1ULL << spi; +} + +static inline bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) +{ + return (env->scratched_regs >> regno) & 1; +} + +static inline bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno) +{ + return (env->scratched_stack_slots >> regno) & 1; +} + +static inline bool verifier_state_scratched(const struct bpf_verifier_env *env) +{ + return env->scratched_regs || env->scratched_stack_slots; +} + +static inline void mark_verifier_state_clean(struct bpf_verifier_env *env) +{ + env->scratched_regs = 0U; + env->scratched_stack_slots = 0ULL; +} + +/* Used for printing the entire verifier state. */ +static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env) +{ + env->scratched_regs = ~0U; + env->scratched_stack_slots = ~0ULL; +} + +const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); +const char *dynptr_type_str(enum bpf_dynptr_type type); +const char *iter_type_str(const struct btf *btf, u32 btf_id); +const char *iter_state_str(enum bpf_iter_state state); + +void print_verifier_state(struct bpf_verifier_env *env, + const struct bpf_func_state *state, bool print_all); +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state); + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4a6b6b77ccb6..4caab0c6b361 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -563,6 +563,7 @@ struct cgroup_root { /* A list running through the active hierarchies */ struct list_head root_list; + struct rcu_head rcu; /* Hierarchy-specific flags */ unsigned int flags; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 0ef0af66080e..34aaf0e87def 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -69,6 +69,7 @@ struct css_task_iter { extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; +extern spinlock_t css_set_lock; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include <linux/cgroup_subsys.h> @@ -386,7 +387,6 @@ static inline void cgroup_unlock(void) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ rcu_read_lock_sched_held() || \ @@ -853,4 +853,6 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {} #endif /* CONFIG_CGROUP_BPF */ +struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id); + #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2ceba3fe4ec1..aebb65bf95a7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -136,7 +136,7 @@ #endif #define __diag_ignore_all(option, comment) \ - __diag_GCC(8, ignore, option) + __diag(__diag_GCC_ignore option) /* * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size" diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 689028257fcc..c2bb74143eda 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1044,6 +1044,14 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, } /** + * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer. + * @dev: pointer to net_device structure + * @info: buffer to hold the result + * Returns zero on success, non-zero otherwise. + */ +int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info); + +/** * ethtool_sprintf - Write formatted string to ethtool string data * @data: Pointer to a pointer to the start of string to update * @fmt: Format of string to write diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index c1c76a70a6ce..adb83a42a6b9 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -11,7 +11,7 @@ * @__VA_ARGS__: arguments for @f * * Avoid retpoline overhead for known builtin, checking @f vs each of them and - * eventually invoking directly the builtin function. The functions are check + * eventually invoking directly the builtin function. The functions are checked * in the given order. Fallback to the indirect call. */ #define INDIRECT_CALL_1(f, f1, ...) \ diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h index 7303b4bc2ce0..287f590ed56b 100644 --- a/include/linux/linkmode.h +++ b/include/linux/linkmode.h @@ -10,6 +10,11 @@ static inline void linkmode_zero(unsigned long *dst) bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static inline void linkmode_fill(unsigned long *dst) +{ + bitmap_fill(dst, __ETHTOOL_LINK_MODE_MASK_NBITS); +} + static inline void linkmode_copy(unsigned long *dst, const unsigned long *src) { bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h index fa940bbaf8ae..26b04f73f214 100644 --- a/include/linux/mii_timestamper.h +++ b/include/linux/mii_timestamper.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/ethtool.h> #include <linux/skbuff.h> +#include <linux/net_tstamp.h> struct phy_device; @@ -51,7 +52,8 @@ struct mii_timestamper { struct sk_buff *skb, int type); int (*hwtstamp)(struct mii_timestamper *mii_ts, - struct ifreq *ifreq); + struct kernel_hwtstamp_config *kernel_config, + struct netlink_ext_ack *extack); void (*link_state)(struct mii_timestamper *mii_ts, struct phy_device *phydev); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6f3631425f38..ce2e71cd6d2a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10103,7 +10103,10 @@ enum { struct mlx5_ifc_mtutc_reg_bits { u8 reserved_at_0[0x5]; u8 freq_adj_units[0x3]; - u8 reserved_at_8[0x14]; + u8 reserved_at_8[0x3]; + u8 log_max_freq_adjustment[0x5]; + + u8 reserved_at_10[0xc]; u8 operation[0x4]; u8 freq_adjustment[0x20]; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2564e209465e..e87caa81f70c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3964,6 +3964,9 @@ int generic_hwtstamp_get_lower(struct net_device *dev, int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack); +int dev_set_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 75d7de34c908..abe91ed6b9aa 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -351,5 +351,6 @@ bool netlink_ns_capable(const struct sk_buff *skb, struct user_namespace *ns, int cap); bool netlink_capable(const struct sk_buff *skb, int cap); bool netlink_net_capable(const struct sk_buff *skb, int cap); +struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast); #endif /* __LINUX_NETLINK_H */ diff --git a/include/linux/phy.h b/include/linux/phy.h index 3cc52826f18e..e5f1f41e399c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1560,9 +1560,11 @@ static inline bool phy_has_txtstamp(struct phy_device *phydev) return phydev && phydev->mii_ts && phydev->mii_ts->txtstamp; } -static inline int phy_hwtstamp(struct phy_device *phydev, struct ifreq *ifr) +static inline int phy_hwtstamp(struct phy_device *phydev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { - return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr); + return phydev->mii_ts->hwtstamp(phydev->mii_ts, cfg, extack); } static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb, diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 1c3948a1d6ad..3c13240077b8 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -106,6 +106,10 @@ int tnum_sbin(char *str, size_t size, struct tnum a); struct tnum tnum_subreg(struct tnum a); /* Returns the tnum with the lower 32-bit subreg cleared */ struct tnum tnum_clear_subreg(struct tnum a); +/* Returns the tnum with the lower 32-bit subreg in *reg* set to the lower + * 32-bit subreg in *subreg* + */ +struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg); /* Returns the tnum with the lower 32-bit subreg set to value */ struct tnum tnum_const_subreg(struct tnum a, u32 value); /* Returns true if 32-bit subreg @a is a known constant*/ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index f346b4efbc30..2d746f4c9a0a 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -416,6 +416,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, return 0; } +static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph, + const struct sk_buff *skb) +{ + __be16 payload_protocol = skb_protocol(skb, true); + + if (payload_protocol == htons(ETH_P_IPV6)) + return ip6_flowlabel((const struct ipv6hdr *)iph); + else + return 0; +} + static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, const struct sk_buff *skb) { diff --git a/include/net/netlink.h b/include/net/netlink.h index 83bdf787aeee..167b91348e57 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1011,6 +1011,20 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags) } /** + * nlmsg_new_large - Allocate a new netlink message with non-contiguous + * physical memory + * @payload: size of the message payload + * + * The allocated skb is unable to have frag page for shinfo->frags*, + * as the NULL setting for skb->head in netlink_skb_destructor() will + * bypass most of the handling in skb_release_data() + */ +static inline struct sk_buff *nlmsg_new_large(size_t payload) +{ + return netlink_alloc_large_skb(nlmsg_total_size(payload), 0); +} + +/** * nlmsg_end - Finalize a netlink message * @skb: socket buffer the message is stored in * @nlh: netlink message header diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 6fc5134095ed..e1bb92c192de 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -54,18 +54,22 @@ struct pp_alloc_cache { * @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV */ struct page_pool_params { - unsigned int flags; - unsigned int order; - unsigned int pool_size; - int nid; - struct device *dev; - struct napi_struct *napi; - enum dma_data_direction dma_dir; - unsigned int max_len; - unsigned int offset; + struct_group_tagged(page_pool_params_fast, fast, + unsigned int flags; + unsigned int order; + unsigned int pool_size; + int nid; + struct device *dev; + struct napi_struct *napi; + enum dma_data_direction dma_dir; + unsigned int max_len; + unsigned int offset; + ); + struct_group_tagged(page_pool_params_slow, slow, /* private: used by test code only */ - void (*init_callback)(struct page *page, void *arg); - void *init_arg; + void (*init_callback)(struct page *page, void *arg); + void *init_arg; + ); }; #ifdef CONFIG_PAGE_POOL_STATS @@ -119,7 +123,9 @@ struct page_pool_stats { #endif struct page_pool { - struct page_pool_params p; + struct page_pool_params_fast p; + + bool has_init_callback; long frag_users; struct page *frag_page; @@ -178,6 +184,9 @@ struct page_pool { refcount_t user_cnt; u64 destroy_cnt; + + /* Slow/Control-path information follows */ + struct page_pool_params_slow slow; }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 6a9f8a5f387c..33ba6fc151cf 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -210,22 +210,23 @@ struct vxlan_rdst { }; struct vxlan_config { - union vxlan_addr remote_ip; - union vxlan_addr saddr; - __be32 vni; - int remote_ifindex; - int mtu; - __be16 dst_port; - u16 port_min; - u16 port_max; - u8 tos; - u8 ttl; - __be32 label; - u32 flags; - unsigned long age_interval; - unsigned int addrmax; - bool no_share; - enum ifla_vxlan_df df; + union vxlan_addr remote_ip; + union vxlan_addr saddr; + __be32 vni; + int remote_ifindex; + int mtu; + __be16 dst_port; + u16 port_min; + u16 port_max; + u8 tos; + u8 ttl; + __be32 label; + enum ifla_vxlan_label_policy label_policy; + u32 flags; + unsigned long age_interval; + unsigned int addrmax; + bool no_share; + enum ifla_vxlan_df df; }; enum { diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h index 9204e4494b25..6e25753015df 100644 --- a/include/uapi/linux/batadv_packet.h +++ b/include/uapi/linux/batadv_packet.h @@ -116,6 +116,9 @@ enum batadv_icmp_packettype { * only need routable IPv4 multicast packets we signed up for explicitly * @BATADV_MCAST_WANT_NO_RTR6: we have no IPv6 multicast router and therefore * only need routable IPv6 multicast packets we signed up for explicitly + * @BATADV_MCAST_HAVE_MC_PTYPE_CAPA: we can parse, receive and forward + * batman-adv multicast packets with a multicast tracker TVLV. And all our + * hard interfaces have an MTU of at least 1280 bytes. */ enum batadv_mcast_flags { BATADV_MCAST_WANT_ALL_UNSNOOPABLES = 1UL << 0, @@ -123,6 +126,7 @@ enum batadv_mcast_flags { BATADV_MCAST_WANT_ALL_IPV6 = 1UL << 2, BATADV_MCAST_WANT_NO_RTR4 = 1UL << 3, BATADV_MCAST_WANT_NO_RTR6 = 1UL << 4, + BATADV_MCAST_HAVE_MC_PTYPE_CAPA = 1UL << 5, }; /* tt data subtypes */ @@ -174,14 +178,16 @@ enum batadv_bla_claimframe { * @BATADV_TVLV_TT: translation table tvlv * @BATADV_TVLV_ROAM: roaming advertisement tvlv * @BATADV_TVLV_MCAST: multicast capability tvlv + * @BATADV_TVLV_MCAST_TRACKER: multicast tracker tvlv */ enum batadv_tvlv_type { - BATADV_TVLV_GW = 0x01, - BATADV_TVLV_DAT = 0x02, - BATADV_TVLV_NC = 0x03, - BATADV_TVLV_TT = 0x04, - BATADV_TVLV_ROAM = 0x05, - BATADV_TVLV_MCAST = 0x06, + BATADV_TVLV_GW = 0x01, + BATADV_TVLV_DAT = 0x02, + BATADV_TVLV_NC = 0x03, + BATADV_TVLV_TT = 0x04, + BATADV_TVLV_ROAM = 0x05, + BATADV_TVLV_MCAST = 0x06, + BATADV_TVLV_MCAST_TRACKER = 0x07, }; #pragma pack(2) @@ -488,6 +494,25 @@ struct batadv_bcast_packet { }; /** + * struct batadv_mcast_packet - multicast packet for network payload + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the general header + * @ttl: time to live for this packet, part of the general header + * @reserved: reserved byte for alignment + * @tvlv_len: length of the appended tvlv buffer (in bytes) + */ +struct batadv_mcast_packet { + __u8 packet_type; + __u8 version; + __u8 ttl; + __u8 reserved; + __be16 tvlv_len; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ +}; + +/** * struct batadv_coded_packet - network coded packet * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the general header @@ -628,6 +653,14 @@ struct batadv_tvlv_mcast_data { __u8 reserved[3]; }; +/** + * struct batadv_tvlv_mcast_tracker - payload of a multicast tracker tvlv + * @num_dests: number of subsequent destination originator MAC addresses + */ +struct batadv_tvlv_mcast_tracker { + __be16 num_dests; +}; + #pragma pack() #endif /* _UAPI_LINUX_BATADV_PACKET_H_ */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0f6cdf52b1da..7a5498242eaa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1200,6 +1200,9 @@ enum bpf_perf_event_type { */ #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_REG_INVARIANTS (1U << 7) + /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ @@ -4517,6 +4520,8 @@ union bpf_attr { * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. + * Note: the user stack will only be populated if the *task* is + * the current task; all other tasks will return -EOPNOTSUPP. * To achieve this, the helper needs *task*, which is a valid * pointer to **struct task_struct**. To store the stacktrace, the * bpf program provides *buf* with a nonnegative *size*. @@ -4528,6 +4533,7 @@ union bpf_attr { * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. + * The *task* must be the current task. * **BPF_F_USER_BUILD_ID** * Collect buildid+offset instead of ips for user stack, * only valid if **BPF_F_USER_STACK** is also specified. @@ -7151,40 +7157,31 @@ struct bpf_spin_lock { }; struct bpf_timer { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_dynptr { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_head { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_node { - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[3]; } __attribute__((aligned(8))); struct bpf_rb_root { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_rb_node { - __u64 :64; - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[4]; } __attribute__((aligned(8))); struct bpf_refcount { - __u32 :32; + __u32 __opaque[1]; } __attribute__((aligned(4))); struct bpf_sysctl { diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 29ff80da2775..8181ef23a7a2 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -856,6 +856,7 @@ enum { IFLA_VXLAN_DF, IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ IFLA_VXLAN_LOCALBYPASS, + IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -873,6 +874,13 @@ enum ifla_vxlan_df { VXLAN_DF_MAX = __VXLAN_DF_END - 1, }; +enum ifla_vxlan_label_policy { + VXLAN_LABEL_FIXED = 0, + VXLAN_LABEL_INHERIT = 1, + __VXLAN_LABEL_END, + VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1, +}; + /* GENEVE section */ enum { IFLA_GENEVE_UNSPEC, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 15d71d2986d3..63cf4128fc05 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3840,9 +3840,6 @@ end: return ERR_PTR(ret); } -#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT) -#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE) - int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) { int i; @@ -3855,13 +3852,13 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * Hence we only need to ensure that bpf_{list_head,rb_root} ownership * does not form cycles. */ - if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK)) + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT)) return 0; for (i = 0; i < rec->cnt; i++) { struct btf_struct_meta *meta; u32 btf_id; - if (!(rec->fields[i].type & GRAPH_ROOT_MASK)) + if (!(rec->fields[i].type & BPF_GRAPH_ROOT)) continue; btf_id = rec->fields[i].graph_root.value_btf_id; meta = btf_find_struct_meta(btf, btf_id); @@ -3873,7 +3870,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * to check ownership cycle for a type unless it's also a * node type. */ - if (!(rec->field_mask & GRAPH_NODE_MASK)) + if (!(rec->field_mask & BPF_GRAPH_NODE)) continue; /* We need to ensure ownership acyclicity among all types. The @@ -3909,7 +3906,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * - A is both an root and node. * - B is only an node. */ - if (meta->record->field_mask & GRAPH_ROOT_MASK) + if (meta->record->field_mask & BPF_GRAPH_ROOT) return -ELOOP; } return 0; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 56b0c1f678ee..b45a8381f9bd 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1937,10 +1937,7 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu) ma = &bpf_global_percpu_ma; else ma = &bpf_global_ma; - if (rec && rec->refcount_off >= 0) - bpf_mem_free_rcu(ma, p); - else - bpf_mem_free(ma, p); + bpf_mem_free_rcu(ma, p); } __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) @@ -2231,6 +2228,25 @@ __bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task, rcu_read_unlock(); return ret; } + +/** + * bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a + * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its + * hierarchy ID. + * @task: The target task + * @hierarchy_id: The ID of a cgroup1 hierarchy + * + * On success, the cgroup is returen. On failure, NULL is returned. + */ +__bpf_kfunc struct cgroup * +bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) +{ + struct cgroup *cgrp = task_get_cgroup1(task, hierarchy_id); + + if (IS_ERR(cgrp)) + return NULL; + return cgrp; +} #endif /* CONFIG_CGROUPS */ /** @@ -2520,7 +2536,7 @@ BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU) BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) @@ -2537,6 +2553,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU) +BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL) #endif BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_throw) @@ -2618,3 +2635,22 @@ static int __init kfunc_init(void) } late_initcall(kfunc_init); + +/* Get a pointer to dynptr data up to len bytes for read only access. If + * the dynptr doesn't have continuous data up to len bytes, return NULL. + */ +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) +{ + return bpf_dynptr_slice(ptr, 0, NULL, len); +} + +/* Get a pointer to dynptr data up to len bytes for read write access. If + * the dynptr doesn't have continuous data up to len bytes, or the dynptr + * is read only, return NULL. + */ +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len) +{ + if (__bpf_dynptr_is_rdonly(ptr)) + return NULL; + return (void *)__bpf_dynptr_data(ptr, len); +} diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 850494423530..3505f3e5ae96 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -10,6 +10,8 @@ #include <linux/bpf_verifier.h> #include <linux/math64.h> +#define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) + static bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) { /* ubuf and len_total should both be specified (or not) together */ @@ -325,3 +327,481 @@ __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, va_end(args); } EXPORT_SYMBOL_GPL(bpf_log); + +static const struct bpf_line_info * +find_linfo(const struct bpf_verifier_env *env, u32 insn_off) +{ + const struct bpf_line_info *linfo; + const struct bpf_prog *prog; + u32 i, nr_linfo; + + prog = env->prog; + nr_linfo = prog->aux->nr_linfo; + + if (!nr_linfo || insn_off >= prog->len) + return NULL; + + linfo = prog->aux->linfo; + for (i = 1; i < nr_linfo; i++) + if (insn_off < linfo[i].insn_off) + break; + + return &linfo[i - 1]; +} + +static const char *ltrim(const char *s) +{ + while (isspace(*s)) + s++; + + return s; +} + +__printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, + u32 insn_off, + const char *prefix_fmt, ...) +{ + const struct bpf_line_info *linfo; + + if (!bpf_verifier_log_needed(&env->log)) + return; + + linfo = find_linfo(env, insn_off); + if (!linfo || linfo == env->prev_linfo) + return; + + if (prefix_fmt) { + va_list args; + + va_start(args, prefix_fmt); + bpf_verifier_vlog(&env->log, prefix_fmt, args); + va_end(args); + } + + verbose(env, "%s\n", + ltrim(btf_name_by_offset(env->prog->aux->btf, + linfo->line_off))); + + env->prev_linfo = linfo; +} + +static const char *btf_type_name(const struct btf *btf, u32 id) +{ + return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); +} + +/* string representation of 'enum bpf_reg_type' + * + * Note that reg_type_str() can not appear more than once in a single verbose() + * statement. + */ +const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type) +{ + char postfix[16] = {0}, prefix[64] = {0}; + static const char * const str[] = { + [NOT_INIT] = "?", + [SCALAR_VALUE] = "scalar", + [PTR_TO_CTX] = "ctx", + [CONST_PTR_TO_MAP] = "map_ptr", + [PTR_TO_MAP_VALUE] = "map_value", + [PTR_TO_STACK] = "fp", + [PTR_TO_PACKET] = "pkt", + [PTR_TO_PACKET_META] = "pkt_meta", + [PTR_TO_PACKET_END] = "pkt_end", + [PTR_TO_FLOW_KEYS] = "flow_keys", + [PTR_TO_SOCKET] = "sock", + [PTR_TO_SOCK_COMMON] = "sock_common", + [PTR_TO_TCP_SOCK] = "tcp_sock", + [PTR_TO_TP_BUFFER] = "tp_buffer", + [PTR_TO_XDP_SOCK] = "xdp_sock", + [PTR_TO_BTF_ID] = "ptr_", + [PTR_TO_MEM] = "mem", + [PTR_TO_BUF] = "buf", + [PTR_TO_FUNC] = "func", + [PTR_TO_MAP_KEY] = "map_key", + [CONST_PTR_TO_DYNPTR] = "dynptr_ptr", + }; + + if (type & PTR_MAYBE_NULL) { + if (base_type(type) == PTR_TO_BTF_ID) + strncpy(postfix, "or_null_", 16); + else + strncpy(postfix, "_or_null", 16); + } + + snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s", + type & MEM_RDONLY ? "rdonly_" : "", + type & MEM_RINGBUF ? "ringbuf_" : "", + type & MEM_USER ? "user_" : "", + type & MEM_PERCPU ? "percpu_" : "", + type & MEM_RCU ? "rcu_" : "", + type & PTR_UNTRUSTED ? "untrusted_" : "", + type & PTR_TRUSTED ? "trusted_" : "" + ); + + snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s", + prefix, str[base_type(type)], postfix); + return env->tmp_str_buf; +} + +const char *dynptr_type_str(enum bpf_dynptr_type type) +{ + switch (type) { + case BPF_DYNPTR_TYPE_LOCAL: + return "local"; + case BPF_DYNPTR_TYPE_RINGBUF: + return "ringbuf"; + case BPF_DYNPTR_TYPE_SKB: + return "skb"; + case BPF_DYNPTR_TYPE_XDP: + return "xdp"; + case BPF_DYNPTR_TYPE_INVALID: + return "<invalid>"; + default: + WARN_ONCE(1, "unknown dynptr type %d\n", type); + return "<unknown>"; + } +} + +const char *iter_type_str(const struct btf *btf, u32 btf_id) +{ + if (!btf || btf_id == 0) + return "<invalid>"; + + /* we already validated that type is valid and has conforming name */ + return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1; +} + +const char *iter_state_str(enum bpf_iter_state state) +{ + switch (state) { + case BPF_ITER_STATE_ACTIVE: + return "active"; + case BPF_ITER_STATE_DRAINED: + return "drained"; + case BPF_ITER_STATE_INVALID: + return "<invalid>"; + default: + WARN_ONCE(1, "unknown iter state %d\n", state); + return "<unknown>"; + } +} + +static char slot_type_char[] = { + [STACK_INVALID] = '?', + [STACK_SPILL] = 'r', + [STACK_MISC] = 'm', + [STACK_ZERO] = '0', + [STACK_DYNPTR] = 'd', + [STACK_ITER] = 'i', +}; + +static void print_liveness(struct bpf_verifier_env *env, + enum bpf_reg_liveness live) +{ + if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) + verbose(env, "_"); + if (live & REG_LIVE_READ) + verbose(env, "r"); + if (live & REG_LIVE_WRITTEN) + verbose(env, "w"); + if (live & REG_LIVE_DONE) + verbose(env, "D"); +} + +#define UNUM_MAX_DECIMAL U16_MAX +#define SNUM_MAX_DECIMAL S16_MAX +#define SNUM_MIN_DECIMAL S16_MIN + +static bool is_unum_decimal(u64 num) +{ + return num <= UNUM_MAX_DECIMAL; +} + +static bool is_snum_decimal(s64 num) +{ + return num >= SNUM_MIN_DECIMAL && num <= SNUM_MAX_DECIMAL; +} + +static void verbose_unum(struct bpf_verifier_env *env, u64 num) +{ + if (is_unum_decimal(num)) + verbose(env, "%llu", num); + else + verbose(env, "%#llx", num); +} + +static void verbose_snum(struct bpf_verifier_env *env, s64 num) +{ + if (is_snum_decimal(num)) + verbose(env, "%lld", num); + else + verbose(env, "%#llx", num); +} + +static void print_scalar_ranges(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + const char **sep) +{ + /* For signed ranges, we want to unify 64-bit and 32-bit values in the + * output as much as possible, but there is a bit of a complication. + * If we choose to print values as decimals, this is natural to do, + * because negative 64-bit and 32-bit values >= -S32_MIN have the same + * representation due to sign extension. But if we choose to print + * them in hex format (see is_snum_decimal()), then sign extension is + * misleading. + * E.g., smin=-2 and smin32=-2 are exactly the same in decimal, but in + * hex they will be smin=0xfffffffffffffffe and smin32=0xfffffffe, two + * very different numbers. + * So we avoid sign extension if we choose to print values in hex. + */ + struct { + const char *name; + u64 val; + bool omit; + } minmaxs[] = { + {"smin", reg->smin_value, reg->smin_value == S64_MIN}, + {"smax", reg->smax_value, reg->smax_value == S64_MAX}, + {"umin", reg->umin_value, reg->umin_value == 0}, + {"umax", reg->umax_value, reg->umax_value == U64_MAX}, + {"smin32", + is_snum_decimal((s64)reg->s32_min_value) + ? (s64)reg->s32_min_value + : (u32)reg->s32_min_value, reg->s32_min_value == S32_MIN}, + {"smax32", + is_snum_decimal((s64)reg->s32_max_value) + ? (s64)reg->s32_max_value + : (u32)reg->s32_max_value, reg->s32_max_value == S32_MAX}, + {"umin32", reg->u32_min_value, reg->u32_min_value == 0}, + {"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX}, + }, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)]; + bool neg1, neg2; + + for (m1 = &minmaxs[0]; m1 < mend; m1++) { + if (m1->omit) + continue; + + neg1 = m1->name[0] == 's' && (s64)m1->val < 0; + + verbose(env, "%s%s=", *sep, m1->name); + *sep = ","; + + for (m2 = m1 + 2; m2 < mend; m2 += 2) { + if (m2->omit || m2->val != m1->val) + continue; + /* don't mix negatives with positives */ + neg2 = m2->name[0] == 's' && (s64)m2->val < 0; + if (neg2 != neg1) + continue; + m2->omit = true; + verbose(env, "%s=", m2->name); + } + + if (m1->name[0] == 's') + verbose_snum(env, m1->val); + else + verbose_unum(env, m1->val); + } +} + +static bool type_is_map_ptr(enum bpf_reg_type t) { + switch (base_type(t)) { + case CONST_PTR_TO_MAP: + case PTR_TO_MAP_KEY: + case PTR_TO_MAP_VALUE: + return true; + default: + return false; + } +} + +static void print_reg_state(struct bpf_verifier_env *env, + const struct bpf_func_state *state, + const struct bpf_reg_state *reg) +{ + enum bpf_reg_type t; + const char *sep = ""; + + t = reg->type; + if (t == SCALAR_VALUE && reg->precise) + verbose(env, "P"); + if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) { + /* reg->off should be 0 for SCALAR_VALUE */ + verbose_snum(env, reg->var_off.value + reg->off); + return; + } +/* + * _a stands for append, was shortened to avoid multiline statements below. + * This macro is used to output a comma separated list of attributes. + */ +#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, ##__VA_ARGS__); sep = ","; }) + + verbose(env, "%s", reg_type_str(env, t)); + if (t == PTR_TO_STACK) { + if (state->frameno != reg->frameno) + verbose(env, "[%d]", reg->frameno); + if (tnum_is_const(reg->var_off)) { + verbose_snum(env, reg->var_off.value + reg->off); + return; + } + } + if (base_type(t) == PTR_TO_BTF_ID) + verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id)); + verbose(env, "("); + if (reg->id) + verbose_a("id=%d", reg->id); + if (reg->ref_obj_id) + verbose_a("ref_obj_id=%d", reg->ref_obj_id); + if (type_is_non_owning_ref(reg->type)) + verbose_a("%s", "non_own_ref"); + if (type_is_map_ptr(t)) { + if (reg->map_ptr->name[0]) + verbose_a("map=%s", reg->map_ptr->name); + verbose_a("ks=%d,vs=%d", + reg->map_ptr->key_size, + reg->map_ptr->value_size); + } + if (t != SCALAR_VALUE && reg->off) { + verbose_a("off="); + verbose_snum(env, reg->off); + } + if (type_is_pkt_pointer(t)) { + verbose_a("r="); + verbose_unum(env, reg->range); + } + if (tnum_is_const(reg->var_off)) { + /* a pointer register with fixed offset */ + if (reg->var_off.value) { + verbose_a("imm="); + verbose_snum(env, reg->var_off.value); + } + } else { + print_scalar_ranges(env, reg, &sep); + if (!tnum_is_unknown(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose_a("var_off=%s", tn_buf); + } + } + verbose(env, ")"); + +#undef verbose_a +} + +void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state, + bool print_all) +{ + const struct bpf_reg_state *reg; + int i; + + if (state->frameno) + verbose(env, " frame%d:", state->frameno); + for (i = 0; i < MAX_BPF_REG; i++) { + reg = &state->regs[i]; + if (reg->type == NOT_INIT) + continue; + if (!print_all && !reg_scratched(env, i)) + continue; + verbose(env, " R%d", i); + print_liveness(env, reg->live); + verbose(env, "="); + print_reg_state(env, state, reg); + } + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + char types_buf[BPF_REG_SIZE + 1]; + bool valid = false; + u8 slot_type; + int j; + + if (!print_all && !stack_slot_scratched(env, i)) + continue; + + for (j = 0; j < BPF_REG_SIZE; j++) { + slot_type = state->stack[i].slot_type[j]; + if (slot_type != STACK_INVALID) + valid = true; + types_buf[j] = slot_type_char[slot_type]; + } + types_buf[BPF_REG_SIZE] = 0; + if (!valid) + continue; + + reg = &state->stack[i].spilled_ptr; + switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) { + case STACK_SPILL: + /* print MISC/ZERO/INVALID slots above subreg spill */ + for (j = 0; j < BPF_REG_SIZE; j++) + if (state->stack[i].slot_type[j] == STACK_SPILL) + break; + types_buf[j] = '\0'; + + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); + verbose(env, "=%s", types_buf); + print_reg_state(env, state, reg); + break; + case STACK_DYNPTR: + /* skip to main dynptr slot */ + i += BPF_DYNPTR_NR_SLOTS - 1; + reg = &state->stack[i].spilled_ptr; + + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); + verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type)); + if (reg->ref_obj_id) + verbose(env, "(ref_id=%d)", reg->ref_obj_id); + break; + case STACK_ITER: + /* only main slot has ref_obj_id set; skip others */ + if (!reg->ref_obj_id) + continue; + + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); + verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)", + iter_type_str(reg->iter.btf, reg->iter.btf_id), + reg->ref_obj_id, iter_state_str(reg->iter.state), + reg->iter.depth); + break; + case STACK_MISC: + case STACK_ZERO: + default: + verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); + print_liveness(env, reg->live); + verbose(env, "=%s", types_buf); + break; + } + } + if (state->acquired_refs && state->refs[0].id) { + verbose(env, " refs=%d", state->refs[0].id); + for (i = 1; i < state->acquired_refs; i++) + if (state->refs[i].id) + verbose(env, ",%d", state->refs[i].id); + } + if (state->in_callback_fn) + verbose(env, " cb"); + if (state->in_async_callback_fn) + verbose(env, " async_cb"); + verbose(env, "\n"); + if (!print_all) + mark_verifier_state_clean(env); +} + +static inline u32 vlog_alignment(u32 pos) +{ + return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT), + BPF_LOG_MIN_ALIGNMENT) - pos - 1; +} + +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state) +{ + if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) { + /* remove new line character */ + bpf_vlog_reset(&env->log, env->prev_log_pos - 1); + verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' '); + } else { + verbose(env, "%d:", env->insn_idx); + } + print_verifier_state(env, state, false); +} diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 17c7e7782a1f..b32be680da6c 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -231,6 +231,9 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) struct lpm_trie_node *node, *found = NULL; struct bpf_lpm_trie_key *key = _key; + if (key->prefixlen > trie->max_prefixlen) + return NULL; + /* Start walking the trie from the root node ... */ for (node = rcu_dereference_check(trie->root, rcu_read_lock_bh_held()); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index d6b277482085..dff7ba539701 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -388,6 +388,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, { u32 trace_nr, copy_len, elem_size, num_elem, max_depth; bool user_build_id = flags & BPF_F_USER_BUILD_ID; + bool crosstask = task && task != current; u32 skip = flags & BPF_F_SKIP_FIELD_MASK; bool user = flags & BPF_F_USER_STACK; struct perf_callchain_entry *trace; @@ -410,6 +411,14 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, if (task && user && !user_mode(regs)) goto err_fault; + /* get_perf_callchain does not support crosstask user stack walking + * but returns an empty stack instead of NULL. + */ + if (crosstask && user) { + err = -EOPNOTSUPP; + goto clear; + } + num_elem = size / elem_size; max_depth = num_elem + skip; if (sysctl_perf_event_max_stack < max_depth) @@ -421,7 +430,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, trace = get_callchain_entry_for_task(task, max_depth); else trace = get_perf_callchain(regs, 0, kernel, user, max_depth, - false, false); + crosstask, false); if (unlikely(!trace)) goto err_fault; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0ed286b8a0f0..5e43ddd1b83f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2573,7 +2573,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) BPF_F_SLEEPABLE | BPF_F_TEST_RND_HI32 | BPF_F_XDP_HAS_FRAGS | - BPF_F_XDP_DEV_BOUND_ONLY)) + BPF_F_XDP_DEV_BOUND_ONLY | + BPF_F_TEST_REG_INVARIANTS)) return -EINVAL; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 26082b97894d..e5c3500443c6 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -70,15 +70,13 @@ static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_comm return NULL; retry: - task = next_thread(task); + task = __next_thread(task); + if (!task) + return NULL; next_tid = __task_pid_nr_ns(task, PIDTYPE_PID, common->ns); - if (!next_tid || next_tid == common->pid) { - /* Run out of tasks of a process. The tasks of a - * thread_group are linked as circular linked list. - */ - return NULL; - } + if (!next_tid) + goto retry; if (skip_if_dup_files && task->files == task->group_leader->files) goto retry; @@ -980,7 +978,6 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) != __alignof__(struct bpf_iter_task)); - kit->task = kit->pos = NULL; switch (flags) { case BPF_TASK_ITER_ALL_THREADS: case BPF_TASK_ITER_ALL_PROCS: @@ -1017,20 +1014,16 @@ __bpf_kfunc struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) if (flags == BPF_TASK_ITER_ALL_PROCS) goto get_next_task; - kit->pos = next_thread(kit->pos); - if (kit->pos == kit->task) { - if (flags == BPF_TASK_ITER_PROC_THREADS) { - kit->pos = NULL; - return pos; - } - } else + kit->pos = __next_thread(kit->pos); + if (kit->pos || flags == BPF_TASK_ITER_PROC_THREADS) return pos; get_next_task: - kit->pos = next_task(kit->pos); - kit->task = kit->pos; - if (kit->pos == &init_task) + kit->task = next_task(kit->task); + if (kit->task == &init_task) kit->pos = NULL; + else + kit->pos = kit->task; return pos; } diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index 3d7127f439a1..f4c91c9b27d7 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -208,7 +208,12 @@ struct tnum tnum_clear_subreg(struct tnum a) return tnum_lshift(tnum_rshift(a, 32), 32); } +struct tnum tnum_with_subreg(struct tnum reg, struct tnum subreg) +{ + return tnum_or(tnum_clear_subreg(reg), tnum_subreg(subreg)); +} + struct tnum tnum_const_subreg(struct tnum a, u32 value) { - return tnum_or(tnum_clear_subreg(a), tnum_const(value)); + return tnum_with_subreg(a, tnum_const(value)); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index af2819d5c8ee..405da1f9e724 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -342,27 +342,6 @@ struct btf *btf_vmlinux; static DEFINE_MUTEX(bpf_verifier_lock); static DEFINE_MUTEX(bpf_percpu_ma_lock); -static const struct bpf_line_info * -find_linfo(const struct bpf_verifier_env *env, u32 insn_off) -{ - const struct bpf_line_info *linfo; - const struct bpf_prog *prog; - u32 i, nr_linfo; - - prog = env->prog; - nr_linfo = prog->aux->nr_linfo; - - if (!nr_linfo || insn_off >= prog->len) - return NULL; - - linfo = prog->aux->linfo; - for (i = 1; i < nr_linfo; i++) - if (insn_off < linfo[i].insn_off) - break; - - return &linfo[i - 1]; -} - __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) { struct bpf_verifier_env *env = private_data; @@ -376,42 +355,6 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) va_end(args); } -static const char *ltrim(const char *s) -{ - while (isspace(*s)) - s++; - - return s; -} - -__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, - u32 insn_off, - const char *prefix_fmt, ...) -{ - const struct bpf_line_info *linfo; - - if (!bpf_verifier_log_needed(&env->log)) - return; - - linfo = find_linfo(env, insn_off); - if (!linfo || linfo == env->prev_linfo) - return; - - if (prefix_fmt) { - va_list args; - - va_start(args, prefix_fmt); - bpf_verifier_vlog(&env->log, prefix_fmt, args); - va_end(args); - } - - verbose(env, "%s\n", - ltrim(btf_name_by_offset(env->prog->aux->btf, - linfo->line_off))); - - env->prev_linfo = linfo; -} - static void verbose_invalid_scalar(struct bpf_verifier_env *env, struct bpf_reg_state *reg, struct tnum *range, const char *ctx, @@ -430,21 +373,6 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env, verbose(env, " should have been in %s\n", tn_buf); } -static bool type_is_pkt_pointer(enum bpf_reg_type type) -{ - type = base_type(type); - return type == PTR_TO_PACKET || - type == PTR_TO_PACKET_META; -} - -static bool type_is_sk_pointer(enum bpf_reg_type type) -{ - return type == PTR_TO_SOCKET || - type == PTR_TO_SOCK_COMMON || - type == PTR_TO_TCP_SOCK || - type == PTR_TO_XDP_SOCK; -} - static bool type_may_be_null(u32 type) { return type & PTR_MAYBE_NULL; @@ -468,16 +396,6 @@ static bool reg_not_null(const struct bpf_reg_state *reg) type == PTR_TO_MEM; } -static bool type_is_ptr_alloc_obj(u32 type) -{ - return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC; -} - -static bool type_is_non_owning_ref(u32 type) -{ - return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF; -} - static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) { struct btf_record *rec = NULL; @@ -605,83 +523,6 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn) insn->imm == BPF_CMPXCHG; } -/* string representation of 'enum bpf_reg_type' - * - * Note that reg_type_str() can not appear more than once in a single verbose() - * statement. - */ -static const char *reg_type_str(struct bpf_verifier_env *env, - enum bpf_reg_type type) -{ - char postfix[16] = {0}, prefix[64] = {0}; - static const char * const str[] = { - [NOT_INIT] = "?", - [SCALAR_VALUE] = "scalar", - [PTR_TO_CTX] = "ctx", - [CONST_PTR_TO_MAP] = "map_ptr", - [PTR_TO_MAP_VALUE] = "map_value", - [PTR_TO_STACK] = "fp", - [PTR_TO_PACKET] = "pkt", - [PTR_TO_PACKET_META] = "pkt_meta", - [PTR_TO_PACKET_END] = "pkt_end", - [PTR_TO_FLOW_KEYS] = "flow_keys", - [PTR_TO_SOCKET] = "sock", - [PTR_TO_SOCK_COMMON] = "sock_common", - [PTR_TO_TCP_SOCK] = "tcp_sock", - [PTR_TO_TP_BUFFER] = "tp_buffer", - [PTR_TO_XDP_SOCK] = "xdp_sock", - [PTR_TO_BTF_ID] = "ptr_", - [PTR_TO_MEM] = "mem", - [PTR_TO_BUF] = "buf", - [PTR_TO_FUNC] = "func", - [PTR_TO_MAP_KEY] = "map_key", - [CONST_PTR_TO_DYNPTR] = "dynptr_ptr", - }; - - if (type & PTR_MAYBE_NULL) { - if (base_type(type) == PTR_TO_BTF_ID) - strncpy(postfix, "or_null_", 16); - else - strncpy(postfix, "_or_null", 16); - } - - snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s", - type & MEM_RDONLY ? "rdonly_" : "", - type & MEM_RINGBUF ? "ringbuf_" : "", - type & MEM_USER ? "user_" : "", - type & MEM_PERCPU ? "percpu_" : "", - type & MEM_RCU ? "rcu_" : "", - type & PTR_UNTRUSTED ? "untrusted_" : "", - type & PTR_TRUSTED ? "trusted_" : "" - ); - - snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s", - prefix, str[base_type(type)], postfix); - return env->tmp_str_buf; -} - -static char slot_type_char[] = { - [STACK_INVALID] = '?', - [STACK_SPILL] = 'r', - [STACK_MISC] = 'm', - [STACK_ZERO] = '0', - [STACK_DYNPTR] = 'd', - [STACK_ITER] = 'i', -}; - -static void print_liveness(struct bpf_verifier_env *env, - enum bpf_reg_liveness live) -{ - if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) - verbose(env, "_"); - if (live & REG_LIVE_READ) - verbose(env, "r"); - if (live & REG_LIVE_WRITTEN) - verbose(env, "w"); - if (live & REG_LIVE_DONE) - verbose(env, "D"); -} - static int __get_spi(s32 off) { return (-off - 1) / BPF_REG_SIZE; @@ -751,87 +592,6 @@ static const char *btf_type_name(const struct btf *btf, u32 id) return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off); } -static const char *dynptr_type_str(enum bpf_dynptr_type type) -{ - switch (type) { - case BPF_DYNPTR_TYPE_LOCAL: - return "local"; - case BPF_DYNPTR_TYPE_RINGBUF: - return "ringbuf"; - case BPF_DYNPTR_TYPE_SKB: - return "skb"; - case BPF_DYNPTR_TYPE_XDP: - return "xdp"; - case BPF_DYNPTR_TYPE_INVALID: - return "<invalid>"; - default: - WARN_ONCE(1, "unknown dynptr type %d\n", type); - return "<unknown>"; - } -} - -static const char *iter_type_str(const struct btf *btf, u32 btf_id) -{ - if (!btf || btf_id == 0) - return "<invalid>"; - - /* we already validated that type is valid and has conforming name */ - return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1; -} - -static const char *iter_state_str(enum bpf_iter_state state) -{ - switch (state) { - case BPF_ITER_STATE_ACTIVE: - return "active"; - case BPF_ITER_STATE_DRAINED: - return "drained"; - case BPF_ITER_STATE_INVALID: - return "<invalid>"; - default: - WARN_ONCE(1, "unknown iter state %d\n", state); - return "<unknown>"; - } -} - -static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno) -{ - env->scratched_regs |= 1U << regno; -} - -static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi) -{ - env->scratched_stack_slots |= 1ULL << spi; -} - -static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno) -{ - return (env->scratched_regs >> regno) & 1; -} - -static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno) -{ - return (env->scratched_stack_slots >> regno) & 1; -} - -static bool verifier_state_scratched(const struct bpf_verifier_env *env) -{ - return env->scratched_regs || env->scratched_stack_slots; -} - -static void mark_verifier_state_clean(struct bpf_verifier_env *env) -{ - env->scratched_regs = 0U; - env->scratched_stack_slots = 0ULL; -} - -/* Used for printing the entire verifier state. */ -static void mark_verifier_state_scratched(struct bpf_verifier_env *env) -{ - env->scratched_regs = ~0U; - env->scratched_stack_slots = ~0ULL; -} - static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) { switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { @@ -1371,226 +1131,6 @@ static void scrub_spilled_slot(u8 *stype) *stype = STACK_MISC; } -static void print_scalar_ranges(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, - const char **sep) -{ - struct { - const char *name; - u64 val; - bool omit; - } minmaxs[] = { - {"smin", reg->smin_value, reg->smin_value == S64_MIN}, - {"smax", reg->smax_value, reg->smax_value == S64_MAX}, - {"umin", reg->umin_value, reg->umin_value == 0}, - {"umax", reg->umax_value, reg->umax_value == U64_MAX}, - {"smin32", (s64)reg->s32_min_value, reg->s32_min_value == S32_MIN}, - {"smax32", (s64)reg->s32_max_value, reg->s32_max_value == S32_MAX}, - {"umin32", reg->u32_min_value, reg->u32_min_value == 0}, - {"umax32", reg->u32_max_value, reg->u32_max_value == U32_MAX}, - }, *m1, *m2, *mend = &minmaxs[ARRAY_SIZE(minmaxs)]; - bool neg1, neg2; - - for (m1 = &minmaxs[0]; m1 < mend; m1++) { - if (m1->omit) - continue; - - neg1 = m1->name[0] == 's' && (s64)m1->val < 0; - - verbose(env, "%s%s=", *sep, m1->name); - *sep = ","; - - for (m2 = m1 + 2; m2 < mend; m2 += 2) { - if (m2->omit || m2->val != m1->val) - continue; - /* don't mix negatives with positives */ - neg2 = m2->name[0] == 's' && (s64)m2->val < 0; - if (neg2 != neg1) - continue; - m2->omit = true; - verbose(env, "%s=", m2->name); - } - - verbose(env, m1->name[0] == 's' ? "%lld" : "%llu", m1->val); - } -} - -static void print_verifier_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state, - bool print_all) -{ - const struct bpf_reg_state *reg; - enum bpf_reg_type t; - int i; - - if (state->frameno) - verbose(env, " frame%d:", state->frameno); - for (i = 0; i < MAX_BPF_REG; i++) { - reg = &state->regs[i]; - t = reg->type; - if (t == NOT_INIT) - continue; - if (!print_all && !reg_scratched(env, i)) - continue; - verbose(env, " R%d", i); - print_liveness(env, reg->live); - verbose(env, "="); - if (t == SCALAR_VALUE && reg->precise) - verbose(env, "P"); - if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && - tnum_is_const(reg->var_off)) { - /* reg->off should be 0 for SCALAR_VALUE */ - verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t)); - verbose(env, "%lld", reg->var_off.value + reg->off); - } else { - const char *sep = ""; - - verbose(env, "%s", reg_type_str(env, t)); - if (base_type(t) == PTR_TO_BTF_ID) - verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id)); - verbose(env, "("); -/* - * _a stands for append, was shortened to avoid multiline statements below. - * This macro is used to output a comma separated list of attributes. - */ -#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; }) - - if (reg->id) - verbose_a("id=%d", reg->id); - if (reg->ref_obj_id) - verbose_a("ref_obj_id=%d", reg->ref_obj_id); - if (type_is_non_owning_ref(reg->type)) - verbose_a("%s", "non_own_ref"); - if (t != SCALAR_VALUE) - verbose_a("off=%d", reg->off); - if (type_is_pkt_pointer(t)) - verbose_a("r=%d", reg->range); - else if (base_type(t) == CONST_PTR_TO_MAP || - base_type(t) == PTR_TO_MAP_KEY || - base_type(t) == PTR_TO_MAP_VALUE) - verbose_a("ks=%d,vs=%d", - reg->map_ptr->key_size, - reg->map_ptr->value_size); - if (tnum_is_const(reg->var_off)) { - /* Typically an immediate SCALAR_VALUE, but - * could be a pointer whose offset is too big - * for reg->off - */ - verbose_a("imm=%llx", reg->var_off.value); - } else { - print_scalar_ranges(env, reg, &sep); - if (!tnum_is_unknown(reg->var_off)) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose_a("var_off=%s", tn_buf); - } - } -#undef verbose_a - - verbose(env, ")"); - } - } - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - char types_buf[BPF_REG_SIZE + 1]; - bool valid = false; - int j; - - for (j = 0; j < BPF_REG_SIZE; j++) { - if (state->stack[i].slot_type[j] != STACK_INVALID) - valid = true; - types_buf[j] = slot_type_char[state->stack[i].slot_type[j]]; - } - types_buf[BPF_REG_SIZE] = 0; - if (!valid) - continue; - if (!print_all && !stack_slot_scratched(env, i)) - continue; - switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) { - case STACK_SPILL: - reg = &state->stack[i].spilled_ptr; - t = reg->type; - - verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); - print_liveness(env, reg->live); - verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t)); - if (t == SCALAR_VALUE && reg->precise) - verbose(env, "P"); - if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) - verbose(env, "%lld", reg->var_off.value + reg->off); - break; - case STACK_DYNPTR: - i += BPF_DYNPTR_NR_SLOTS - 1; - reg = &state->stack[i].spilled_ptr; - - verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); - print_liveness(env, reg->live); - verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type)); - if (reg->ref_obj_id) - verbose(env, "(ref_id=%d)", reg->ref_obj_id); - break; - case STACK_ITER: - /* only main slot has ref_obj_id set; skip others */ - reg = &state->stack[i].spilled_ptr; - if (!reg->ref_obj_id) - continue; - - verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); - print_liveness(env, reg->live); - verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)", - iter_type_str(reg->iter.btf, reg->iter.btf_id), - reg->ref_obj_id, iter_state_str(reg->iter.state), - reg->iter.depth); - break; - case STACK_MISC: - case STACK_ZERO: - default: - reg = &state->stack[i].spilled_ptr; - - for (j = 0; j < BPF_REG_SIZE; j++) - types_buf[j] = slot_type_char[state->stack[i].slot_type[j]]; - types_buf[BPF_REG_SIZE] = 0; - - verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE); - print_liveness(env, reg->live); - verbose(env, "=%s", types_buf); - break; - } - } - if (state->acquired_refs && state->refs[0].id) { - verbose(env, " refs=%d", state->refs[0].id); - for (i = 1; i < state->acquired_refs; i++) - if (state->refs[i].id) - verbose(env, ",%d", state->refs[i].id); - } - if (state->in_callback_fn) - verbose(env, " cb"); - if (state->in_async_callback_fn) - verbose(env, " async_cb"); - verbose(env, "\n"); - if (!print_all) - mark_verifier_state_clean(env); -} - -static inline u32 vlog_alignment(u32 pos) -{ - return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT), - BPF_LOG_MIN_ALIGNMENT) - pos - 1; -} - -static void print_insn_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state) -{ - if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) { - /* remove new line character */ - bpf_vlog_reset(&env->log, env->prev_log_pos - 1); - verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' '); - } else { - verbose(env, "%d:", env->insn_idx); - } - print_verifier_state(env, state, false); -} - /* copy array src of length n * size bytes to dst. dst is reallocated if it's too * small to hold src. This is different from krealloc since we don't want to preserve * the contents of dst. @@ -2341,69 +1881,214 @@ static void __update_reg_bounds(struct bpf_reg_state *reg) /* Uses signed min/max values to inform unsigned, and vice-versa */ static void __reg32_deduce_bounds(struct bpf_reg_state *reg) { - /* Learn sign from signed bounds. - * If we cannot cross the sign boundary, then signed and unsigned bounds - * are the same, so combine. This works even in the negative case, e.g. - * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. + /* If upper 32 bits of u64/s64 range don't change, we can use lower 32 + * bits to improve our u32/s32 boundaries. + * + * E.g., the case where we have upper 32 bits as zero ([10, 20] in + * u64) is pretty trivial, it's obvious that in u32 we'll also have + * [10, 20] range. But this property holds for any 64-bit range as + * long as upper 32 bits in that entire range of values stay the same. + * + * E.g., u64 range [0x10000000A, 0x10000000F] ([4294967306, 4294967311] + * in decimal) has the same upper 32 bits throughout all the values in + * that range. As such, lower 32 bits form a valid [0xA, 0xF] ([10, 15]) + * range. + * + * Note also, that [0xA, 0xF] is a valid range both in u32 and in s32, + * following the rules outlined below about u64/s64 correspondence + * (which equally applies to u32 vs s32 correspondence). In general it + * depends on actual hexadecimal values of 32-bit range. They can form + * only valid u32, or only valid s32 ranges in some cases. + * + * So we use all these insights to derive bounds for subregisters here. */ - if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) { - reg->s32_min_value = reg->u32_min_value = - max_t(u32, reg->s32_min_value, reg->u32_min_value); - reg->s32_max_value = reg->u32_max_value = - min_t(u32, reg->s32_max_value, reg->u32_max_value); - return; + if ((reg->umin_value >> 32) == (reg->umax_value >> 32)) { + /* u64 to u32 casting preserves validity of low 32 bits as + * a range, if upper 32 bits are the same + */ + reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->umin_value); + reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->umax_value); + + if ((s32)reg->umin_value <= (s32)reg->umax_value) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value); + } + } + if ((reg->smin_value >> 32) == (reg->smax_value >> 32)) { + /* low 32 bits should form a proper u32 range */ + if ((u32)reg->smin_value <= (u32)reg->smax_value) { + reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)reg->smin_value); + reg->u32_max_value = min_t(u32, reg->u32_max_value, (u32)reg->smax_value); + } + /* low 32 bits should form a proper s32 range */ + if ((s32)reg->smin_value <= (s32)reg->smax_value) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value); + } + } + /* Special case where upper bits form a small sequence of two + * sequential numbers (in 32-bit unsigned space, so 0xffffffff to + * 0x00000000 is also valid), while lower bits form a proper s32 range + * going from negative numbers to positive numbers. E.g., let's say we + * have s64 range [-1, 1] ([0xffffffffffffffff, 0x0000000000000001]). + * Possible s64 values are {-1, 0, 1} ({0xffffffffffffffff, + * 0x0000000000000000, 0x00000000000001}). Ignoring upper 32 bits, + * we still get a valid s32 range [-1, 1] ([0xffffffff, 0x00000001]). + * Note that it doesn't have to be 0xffffffff going to 0x00000000 in + * upper 32 bits. As a random example, s64 range + * [0xfffffff0fffffff0; 0xfffffff100000010], forms a valid s32 range + * [-16, 16] ([0xfffffff0; 0x00000010]) in its 32 bit subregister. + */ + if ((u32)(reg->umin_value >> 32) + 1 == (u32)(reg->umax_value >> 32) && + (s32)reg->umin_value < 0 && (s32)reg->umax_value >= 0) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->umin_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->umax_value); + } + if ((u32)(reg->smin_value >> 32) + 1 == (u32)(reg->smax_value >> 32) && + (s32)reg->smin_value < 0 && (s32)reg->smax_value >= 0) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, (s32)reg->smin_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, (s32)reg->smax_value); + } + /* if u32 range forms a valid s32 range (due to matching sign bit), + * try to learn from that + */ + if ((s32)reg->u32_min_value <= (s32)reg->u32_max_value) { + reg->s32_min_value = max_t(s32, reg->s32_min_value, reg->u32_min_value); + reg->s32_max_value = min_t(s32, reg->s32_max_value, reg->u32_max_value); } - /* Learn sign from unsigned bounds. Signed bounds cross the sign - * boundary, so we must be careful. + /* If we cannot cross the sign boundary, then signed and unsigned bounds + * are the same, so combine. This works even in the negative case, e.g. + * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. */ - if ((s32)reg->u32_max_value >= 0) { - /* Positive. We can't learn anything from the smin, but smax - * is positive, hence safe. - */ - reg->s32_min_value = reg->u32_min_value; - reg->s32_max_value = reg->u32_max_value = - min_t(u32, reg->s32_max_value, reg->u32_max_value); - } else if ((s32)reg->u32_min_value < 0) { - /* Negative. We can't learn anything from the smax, but smin - * is negative, hence safe. - */ - reg->s32_min_value = reg->u32_min_value = - max_t(u32, reg->s32_min_value, reg->u32_min_value); - reg->s32_max_value = reg->u32_max_value; + if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) { + reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value); + reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value); } } static void __reg64_deduce_bounds(struct bpf_reg_state *reg) { - /* Learn sign from signed bounds. - * If we cannot cross the sign boundary, then signed and unsigned bounds + /* If u64 range forms a valid s64 range (due to matching sign bit), + * try to learn from that. Let's do a bit of ASCII art to see when + * this is happening. Let's take u64 range first: + * + * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX + * |-------------------------------|--------------------------------| + * + * Valid u64 range is formed when umin and umax are anywhere in the + * range [0, U64_MAX], and umin <= umax. u64 case is simple and + * straightforward. Let's see how s64 range maps onto the same range + * of values, annotated below the line for comparison: + * + * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX + * |-------------------------------|--------------------------------| + * 0 S64_MAX S64_MIN -1 + * + * So s64 values basically start in the middle and they are logically + * contiguous to the right of it, wrapping around from -1 to 0, and + * then finishing as S64_MAX (0x7fffffffffffffff) right before + * S64_MIN. We can try drawing the continuity of u64 vs s64 values + * more visually as mapped to sign-agnostic range of hex values. + * + * u64 start u64 end + * _______________________________________________________________ + * / \ + * 0 0x7fffffffffffffff 0x8000000000000000 U64_MAX + * |-------------------------------|--------------------------------| + * 0 S64_MAX S64_MIN -1 + * / \ + * >------------------------------ -------------------------------> + * s64 continues... s64 end s64 start s64 "midpoint" + * + * What this means is that, in general, we can't always derive + * something new about u64 from any random s64 range, and vice versa. + * + * But we can do that in two particular cases. One is when entire + * u64/s64 range is *entirely* contained within left half of the above + * diagram or when it is *entirely* contained in the right half. I.e.: + * + * |-------------------------------|--------------------------------| + * ^ ^ ^ ^ + * A B C D + * + * [A, B] and [C, D] are contained entirely in their respective halves + * and form valid contiguous ranges as both u64 and s64 values. [A, B] + * will be non-negative both as u64 and s64 (and in fact it will be + * identical ranges no matter the signedness). [C, D] treated as s64 + * will be a range of negative values, while in u64 it will be + * non-negative range of values larger than 0x8000000000000000. + * + * Now, any other range here can't be represented in both u64 and s64 + * simultaneously. E.g., [A, C], [A, D], [B, C], [B, D] are valid + * contiguous u64 ranges, but they are discontinuous in s64. [B, C] + * in s64 would be properly presented as [S64_MIN, C] and [B, S64_MAX], + * for example. Similarly, valid s64 range [D, A] (going from negative + * to positive values), would be two separate [D, U64_MAX] and [0, A] + * ranges as u64. Currently reg_state can't represent two segments per + * numeric domain, so in such situations we can only derive maximal + * possible range ([0, U64_MAX] for u64, and [S64_MIN, S64_MAX] for s64). + * + * So we use these facts to derive umin/umax from smin/smax and vice + * versa only if they stay within the same "half". This is equivalent + * to checking sign bit: lower half will have sign bit as zero, upper + * half have sign bit 1. Below in code we simplify this by just + * casting umin/umax as smin/smax and checking if they form valid + * range, and vice versa. Those are equivalent checks. + */ + if ((s64)reg->umin_value <= (s64)reg->umax_value) { + reg->smin_value = max_t(s64, reg->smin_value, reg->umin_value); + reg->smax_value = min_t(s64, reg->smax_value, reg->umax_value); + } + /* If we cannot cross the sign boundary, then signed and unsigned bounds * are the same, so combine. This works even in the negative case, e.g. * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff. */ - if (reg->smin_value >= 0 || reg->smax_value < 0) { - reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, - reg->umin_value); - reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, - reg->umax_value); - return; + if ((u64)reg->smin_value <= (u64)reg->smax_value) { + reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value); + reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value); } - /* Learn sign from unsigned bounds. Signed bounds cross the sign - * boundary, so we must be careful. +} + +static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg) +{ + /* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit + * values on both sides of 64-bit range in hope to have tigher range. + * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from + * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff]. + * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound + * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of + * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a + * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff]. + * We just need to make sure that derived bounds we are intersecting + * with are well-formed ranges in respecitve s64 or u64 domain, just + * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments. */ - if ((s64)reg->umax_value >= 0) { - /* Positive. We can't learn anything from the smin, but smax - * is positive, hence safe. - */ - reg->smin_value = reg->umin_value; - reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, - reg->umax_value); - } else if ((s64)reg->umin_value < 0) { - /* Negative. We can't learn anything from the smax, but smin - * is negative, hence safe. - */ - reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, - reg->umin_value); - reg->smax_value = reg->umax_value; + __u64 new_umin, new_umax; + __s64 new_smin, new_smax; + + /* u32 -> u64 tightening, it's always well-formed */ + new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value; + new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value; + reg->umin_value = max_t(u64, reg->umin_value, new_umin); + reg->umax_value = min_t(u64, reg->umax_value, new_umax); + /* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */ + new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value; + new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value; + reg->smin_value = max_t(s64, reg->smin_value, new_smin); + reg->smax_value = min_t(s64, reg->smax_value, new_smax); + + /* if s32 can be treated as valid u32 range, we can use it as well */ + if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) { + /* s32 -> u64 tightening */ + new_umin = (reg->umin_value & ~0xffffffffULL) | (u32)reg->s32_min_value; + new_umax = (reg->umax_value & ~0xffffffffULL) | (u32)reg->s32_max_value; + reg->umin_value = max_t(u64, reg->umin_value, new_umin); + reg->umax_value = min_t(u64, reg->umax_value, new_umax); + /* s32 -> s64 tightening */ + new_smin = (reg->smin_value & ~0xffffffffULL) | (u32)reg->s32_min_value; + new_smax = (reg->smax_value & ~0xffffffffULL) | (u32)reg->s32_max_value; + reg->smin_value = max_t(s64, reg->smin_value, new_smin); + reg->smax_value = min_t(s64, reg->smax_value, new_smax); } } @@ -2411,6 +2096,7 @@ static void __reg_deduce_bounds(struct bpf_reg_state *reg) { __reg32_deduce_bounds(reg); __reg64_deduce_bounds(reg); + __reg_deduce_mixed_bounds(reg); } /* Attempts to improve var_off based on unsigned min/max information */ @@ -2432,6 +2118,7 @@ static void reg_bounds_sync(struct bpf_reg_state *reg) __update_reg_bounds(reg); /* We might have learned something about the sign bit. */ __reg_deduce_bounds(reg); + __reg_deduce_bounds(reg); /* We might have learned some bits from the bounds. */ __reg_bound_offset(reg); /* Intersecting with the old var_off might have improved our bounds @@ -2441,6 +2128,56 @@ static void reg_bounds_sync(struct bpf_reg_state *reg) __update_reg_bounds(reg); } +static int reg_bounds_sanity_check(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, const char *ctx) +{ + const char *msg; + + if (reg->umin_value > reg->umax_value || + reg->smin_value > reg->smax_value || + reg->u32_min_value > reg->u32_max_value || + reg->s32_min_value > reg->s32_max_value) { + msg = "range bounds violation"; + goto out; + } + + if (tnum_is_const(reg->var_off)) { + u64 uval = reg->var_off.value; + s64 sval = (s64)uval; + + if (reg->umin_value != uval || reg->umax_value != uval || + reg->smin_value != sval || reg->smax_value != sval) { + msg = "const tnum out of sync with range bounds"; + goto out; + } + } + + if (tnum_subreg_is_const(reg->var_off)) { + u32 uval32 = tnum_subreg(reg->var_off).value; + s32 sval32 = (s32)uval32; + + if (reg->u32_min_value != uval32 || reg->u32_max_value != uval32 || + reg->s32_min_value != sval32 || reg->s32_max_value != sval32) { + msg = "const subreg tnum out of sync with range bounds"; + goto out; + } + } + + return 0; +out: + verbose(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] " + "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n", + ctx, msg, reg->umin_value, reg->umax_value, + reg->smin_value, reg->smax_value, + reg->u32_min_value, reg->u32_max_value, + reg->s32_min_value, reg->s32_max_value, + reg->var_off.value, reg->var_off.mask); + if (env->test_reg_invariants) + return -EFAULT; + __mark_reg_unbounded(reg); + return 0; +} + static bool __reg32_bound_s64(s32 a) { return a >= 0 && a <= S32_MAX; @@ -2465,51 +2202,6 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg) } } -static void __reg_combine_32_into_64(struct bpf_reg_state *reg) -{ - /* special case when 64-bit register has upper 32-bit register - * zeroed. Typically happens after zext or <<32, >>32 sequence - * allowing us to use 32-bit bounds directly, - */ - if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) { - __reg_assign_32_into_64(reg); - } else { - /* Otherwise the best we can do is push lower 32bit known and - * unknown bits into register (var_off set from jmp logic) - * then learn as much as possible from the 64-bit tnum - * known and unknown bits. The previous smin/smax bounds are - * invalid here because of jmp32 compare so mark them unknown - * so they do not impact tnum bounds calculation. - */ - __mark_reg64_unbounded(reg); - } - reg_bounds_sync(reg); -} - -static bool __reg64_bound_s32(s64 a) -{ - return a >= S32_MIN && a <= S32_MAX; -} - -static bool __reg64_bound_u32(u64 a) -{ - return a >= U32_MIN && a <= U32_MAX; -} - -static void __reg_combine_64_into_32(struct bpf_reg_state *reg) -{ - __mark_reg32_unbounded(reg); - if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { - reg->s32_min_value = (s32)reg->smin_value; - reg->s32_max_value = (s32)reg->smax_value; - } - if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) { - reg->u32_min_value = (u32)reg->umin_value; - reg->u32_max_value = (u32)reg->umax_value; - } - reg_bounds_sync(reg); -} - /* Mark a register as having a completely unknown (scalar) value. */ static void __mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) @@ -4592,9 +4284,17 @@ static bool register_is_null(struct bpf_reg_state *reg) return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); } -static bool register_is_const(struct bpf_reg_state *reg) +/* check if register is a constant scalar value */ +static bool is_reg_const(struct bpf_reg_state *reg, bool subreg32) { - return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); + return reg->type == SCALAR_VALUE && + tnum_is_const(subreg32 ? tnum_subreg(reg->var_off) : reg->var_off); +} + +/* assuming is_reg_const() is true, return constant value of a register */ +static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) +{ + return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value; } static bool __is_scalar_unbounded(struct bpf_reg_state *reg) @@ -5451,10 +5151,23 @@ BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) { if (!btf_is_kernel(btf)) - return false; + return true; return btf_id_set_contains(&rcu_protected_types, btf_id); } +static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field) +{ + struct btf_struct_meta *meta; + + if (btf_is_kernel(kptr_field->kptr.btf)) + return NULL; + + meta = btf_find_struct_meta(kptr_field->kptr.btf, + kptr_field->kptr.btf_id); + + return meta ? meta->record : NULL; +} + static bool rcu_safe_kptr(const struct btf_field *field) { const struct btf_field_kptr *kptr = &field->kptr; @@ -5465,12 +5178,25 @@ static bool rcu_safe_kptr(const struct btf_field *field) static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field) { + struct btf_record *rec; + u32 ret; + + ret = PTR_MAYBE_NULL; if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) { - if (kptr_field->type != BPF_KPTR_PERCPU) - return PTR_MAYBE_NULL | MEM_RCU; - return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU; + ret |= MEM_RCU; + if (kptr_field->type == BPF_KPTR_PERCPU) + ret |= MEM_PERCPU; + else if (!btf_is_kernel(kptr_field->kptr.btf)) + ret |= MEM_ALLOC; + + rec = kptr_pointee_btf_record(kptr_field); + if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE)) + ret |= NON_OWN_REF; + } else { + ret |= PTR_UNTRUSTED; } - return PTR_MAYBE_NULL | PTR_UNTRUSTED; + + return ret; } static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, @@ -6244,9 +5970,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) * values are also truncated so we push 64-bit bounds into * 32-bit bounds. Above were truncated < 32-bits already. */ - if (size >= 4) - return; - __reg_combine_64_into_32(reg); + if (size < 4) { + __mark_reg32_unbounded(reg); + reg_bounds_sync(reg); + } } static void set_sext64_default_val(struct bpf_reg_state *reg, int size) @@ -8626,6 +8353,54 @@ static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env, return state->stack[spi].spilled_ptr.dynptr.type; } +static int check_reg_const_str(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, u32 regno) +{ + struct bpf_map *map = reg->map_ptr; + int err; + int map_off; + u64 map_addr; + char *str_ptr; + + if (reg->type != PTR_TO_MAP_VALUE) + return -EINVAL; + + if (!bpf_map_is_rdonly(map)) { + verbose(env, "R%d does not point to a readonly map'\n", regno); + return -EACCES; + } + + if (!tnum_is_const(reg->var_off)) { + verbose(env, "R%d is not a constant address'\n", regno); + return -EACCES; + } + + if (!map->ops->map_direct_value_addr) { + verbose(env, "no direct value access support for this map type\n"); + return -EACCES; + } + + err = check_map_access(env, regno, reg->off, + map->value_size - reg->off, false, + ACCESS_HELPER); + if (err) + return err; + + map_off = reg->off + reg->var_off.value; + err = map->ops->map_direct_value_addr(map, &map_addr, map_off); + if (err) { + verbose(env, "direct value access on string failed\n"); + return err; + } + + str_ptr = (char *)(long)(map_addr); + if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) { + verbose(env, "string is not zero-terminated\n"); + return -EINVAL; + } + return 0; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn, @@ -8870,44 +8645,9 @@ skip_type_check: } case ARG_PTR_TO_CONST_STR: { - struct bpf_map *map = reg->map_ptr; - int map_off; - u64 map_addr; - char *str_ptr; - - if (!bpf_map_is_rdonly(map)) { - verbose(env, "R%d does not point to a readonly map'\n", regno); - return -EACCES; - } - - if (!tnum_is_const(reg->var_off)) { - verbose(env, "R%d is not a constant address'\n", regno); - return -EACCES; - } - - if (!map->ops->map_direct_value_addr) { - verbose(env, "no direct value access support for this map type\n"); - return -EACCES; - } - - err = check_map_access(env, regno, reg->off, - map->value_size - reg->off, false, - ACCESS_HELPER); + err = check_reg_const_str(env, reg, regno); if (err) return err; - - map_off = reg->off + reg->var_off.value; - err = map->ops->map_direct_value_addr(map, &map_addr, map_off); - if (err) { - verbose(env, "direct value access on string failed\n"); - return err; - } - - str_ptr = (char *)(long)(map_addr); - if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) { - verbose(env, "string is not zero-terminated\n"); - return -EINVAL; - } break; } case ARG_PTR_TO_KPTR: @@ -9896,14 +9636,15 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return 0; } -static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, - int func_id, - struct bpf_call_arg_meta *meta) +static int do_refine_retval_range(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, int ret_type, + int func_id, + struct bpf_call_arg_meta *meta) { struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; if (ret_type != RET_INTEGER) - return; + return 0; switch (func_id) { case BPF_FUNC_get_stack: @@ -9929,6 +9670,8 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, reg_bounds_sync(ret_reg); break; } + + return reg_bounds_sanity_check(env, ret_reg, "retval"); } static int @@ -9998,7 +9741,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, val = reg->var_off.value; max = map->max_entries; - if (!(register_is_const(reg) && val < max)) { + if (!(is_reg_const(reg, false) && val < max)) { bpf_map_key_store(aux, BPF_MAP_KEY_POISON); return 0; } @@ -10593,7 +10336,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].ref_obj_id = id; } - do_refine_retval_range(regs, fn->ret_type, func_id, &meta); + err = do_refine_retval_range(env, regs, fn->ret_type, func_id, &meta); + if (err) + return err; err = check_map_func_compatibility(env, meta.map_ptr, func_id); if (err) @@ -10771,6 +10516,11 @@ static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param return __kfunc_param_match_suffix(btf, arg, "__nullable"); } +static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__str"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -10914,6 +10664,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_RB_ROOT, KF_ARG_PTR_TO_RB_NODE, KF_ARG_PTR_TO_NULL, + KF_ARG_PTR_TO_CONST_STR, }; enum special_kfunc_type { @@ -11064,6 +10815,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno])) return KF_ARG_PTR_TO_RB_NODE; + if (is_kfunc_arg_const_str(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_CONST_STR; + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", @@ -11695,6 +11449,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_MEM_SIZE: case KF_ARG_PTR_TO_CALLBACK: case KF_ARG_PTR_TO_REFCOUNTED_KPTR: + case KF_ARG_PTR_TO_CONST_STR: /* Trusted by default */ break; default: @@ -11966,6 +11721,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ meta->arg_btf = reg->btf; meta->arg_btf_id = reg->btf_id; break; + case KF_ARG_PTR_TO_CONST_STR: + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "arg#%d doesn't point to a const string\n", i); + return -EINVAL; + } + ret = check_reg_const_str(env, reg, regno); + if (ret) + return ret; + break; } } @@ -14086,13 +13850,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) /* check dest operand */ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); + err = err ?: adjust_reg_min_max_vals(env, insn); if (err) return err; - - return adjust_reg_min_max_vals(env, insn); } - return 0; + return reg_bounds_sanity_check(env, ®s[insn->dst_reg], "alu"); } static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, @@ -14174,161 +13937,130 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, })); } -static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) -{ - struct tnum subreg = tnum_subreg(reg->var_off); - s32 sval = (s32)val; - - switch (opcode) { - case BPF_JEQ: - if (tnum_is_const(subreg)) - return !!tnum_equals_const(subreg, val); - else if (val < reg->u32_min_value || val > reg->u32_max_value) - return 0; - else if (sval < reg->s32_min_value || sval > reg->s32_max_value) - return 0; - break; - case BPF_JNE: - if (tnum_is_const(subreg)) - return !tnum_equals_const(subreg, val); - else if (val < reg->u32_min_value || val > reg->u32_max_value) - return 1; - else if (sval < reg->s32_min_value || sval > reg->s32_max_value) - return 1; - break; - case BPF_JSET: - if ((~subreg.mask & subreg.value) & val) - return 1; - if (!((subreg.mask | subreg.value) & val)) - return 0; - break; - case BPF_JGT: - if (reg->u32_min_value > val) - return 1; - else if (reg->u32_max_value <= val) - return 0; - break; - case BPF_JSGT: - if (reg->s32_min_value > sval) - return 1; - else if (reg->s32_max_value <= sval) - return 0; - break; - case BPF_JLT: - if (reg->u32_max_value < val) - return 1; - else if (reg->u32_min_value >= val) - return 0; - break; - case BPF_JSLT: - if (reg->s32_max_value < sval) - return 1; - else if (reg->s32_min_value >= sval) - return 0; - break; - case BPF_JGE: - if (reg->u32_min_value >= val) - return 1; - else if (reg->u32_max_value < val) - return 0; - break; - case BPF_JSGE: - if (reg->s32_min_value >= sval) - return 1; - else if (reg->s32_max_value < sval) - return 0; - break; - case BPF_JLE: - if (reg->u32_max_value <= val) - return 1; - else if (reg->u32_min_value > val) - return 0; - break; - case BPF_JSLE: - if (reg->s32_max_value <= sval) - return 1; - else if (reg->s32_min_value > sval) - return 0; - break; - } - - return -1; -} - - -static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) -{ - s64 sval = (s64)val; +/* + * <reg1> <op> <reg2>, currently assuming reg2 is a constant + */ +static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, + u8 opcode, bool is_jmp32) +{ + struct tnum t1 = is_jmp32 ? tnum_subreg(reg1->var_off) : reg1->var_off; + struct tnum t2 = is_jmp32 ? tnum_subreg(reg2->var_off) : reg2->var_off; + u64 umin1 = is_jmp32 ? (u64)reg1->u32_min_value : reg1->umin_value; + u64 umax1 = is_jmp32 ? (u64)reg1->u32_max_value : reg1->umax_value; + s64 smin1 = is_jmp32 ? (s64)reg1->s32_min_value : reg1->smin_value; + s64 smax1 = is_jmp32 ? (s64)reg1->s32_max_value : reg1->smax_value; + u64 umin2 = is_jmp32 ? (u64)reg2->u32_min_value : reg2->umin_value; + u64 umax2 = is_jmp32 ? (u64)reg2->u32_max_value : reg2->umax_value; + s64 smin2 = is_jmp32 ? (s64)reg2->s32_min_value : reg2->smin_value; + s64 smax2 = is_jmp32 ? (s64)reg2->s32_max_value : reg2->smax_value; switch (opcode) { case BPF_JEQ: - if (tnum_is_const(reg->var_off)) - return !!tnum_equals_const(reg->var_off, val); - else if (val < reg->umin_value || val > reg->umax_value) + /* constants, umin/umax and smin/smax checks would be + * redundant in this case because they all should match + */ + if (tnum_is_const(t1) && tnum_is_const(t2)) + return t1.value == t2.value; + /* non-overlapping ranges */ + if (umin1 > umax2 || umax1 < umin2) return 0; - else if (sval < reg->smin_value || sval > reg->smax_value) + if (smin1 > smax2 || smax1 < smin2) return 0; + if (!is_jmp32) { + /* if 64-bit ranges are inconclusive, see if we can + * utilize 32-bit subrange knowledge to eliminate + * branches that can't be taken a priori + */ + if (reg1->u32_min_value > reg2->u32_max_value || + reg1->u32_max_value < reg2->u32_min_value) + return 0; + if (reg1->s32_min_value > reg2->s32_max_value || + reg1->s32_max_value < reg2->s32_min_value) + return 0; + } break; case BPF_JNE: - if (tnum_is_const(reg->var_off)) - return !tnum_equals_const(reg->var_off, val); - else if (val < reg->umin_value || val > reg->umax_value) + /* constants, umin/umax and smin/smax checks would be + * redundant in this case because they all should match + */ + if (tnum_is_const(t1) && tnum_is_const(t2)) + return t1.value != t2.value; + /* non-overlapping ranges */ + if (umin1 > umax2 || umax1 < umin2) return 1; - else if (sval < reg->smin_value || sval > reg->smax_value) + if (smin1 > smax2 || smax1 < smin2) return 1; + if (!is_jmp32) { + /* if 64-bit ranges are inconclusive, see if we can + * utilize 32-bit subrange knowledge to eliminate + * branches that can't be taken a priori + */ + if (reg1->u32_min_value > reg2->u32_max_value || + reg1->u32_max_value < reg2->u32_min_value) + return 1; + if (reg1->s32_min_value > reg2->s32_max_value || + reg1->s32_max_value < reg2->s32_min_value) + return 1; + } break; case BPF_JSET: - if ((~reg->var_off.mask & reg->var_off.value) & val) + if (!is_reg_const(reg2, is_jmp32)) { + swap(reg1, reg2); + swap(t1, t2); + } + if (!is_reg_const(reg2, is_jmp32)) + return -1; + if ((~t1.mask & t1.value) & t2.value) return 1; - if (!((reg->var_off.mask | reg->var_off.value) & val)) + if (!((t1.mask | t1.value) & t2.value)) return 0; break; case BPF_JGT: - if (reg->umin_value > val) + if (umin1 > umax2) return 1; - else if (reg->umax_value <= val) + else if (umax1 <= umin2) return 0; break; case BPF_JSGT: - if (reg->smin_value > sval) + if (smin1 > smax2) return 1; - else if (reg->smax_value <= sval) + else if (smax1 <= smin2) return 0; break; case BPF_JLT: - if (reg->umax_value < val) + if (umax1 < umin2) return 1; - else if (reg->umin_value >= val) + else if (umin1 >= umax2) return 0; break; case BPF_JSLT: - if (reg->smax_value < sval) + if (smax1 < smin2) return 1; - else if (reg->smin_value >= sval) + else if (smin1 >= smax2) return 0; break; case BPF_JGE: - if (reg->umin_value >= val) + if (umin1 >= umax2) return 1; - else if (reg->umax_value < val) + else if (umax1 < umin2) return 0; break; case BPF_JSGE: - if (reg->smin_value >= sval) + if (smin1 >= smax2) return 1; - else if (reg->smax_value < sval) + else if (smax1 < smin2) return 0; break; case BPF_JLE: - if (reg->umax_value <= val) + if (umax1 <= umin2) return 1; - else if (reg->umin_value > val) + else if (umin1 > umax2) return 0; break; case BPF_JSLE: - if (reg->smax_value <= sval) + if (smax1 <= smin2) return 1; - else if (reg->smin_value > sval) + else if (smin1 > smax2) return 0; break; } @@ -14336,41 +14068,6 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) return -1; } -/* compute branch direction of the expression "if (reg opcode val) goto target;" - * and return: - * 1 - branch will be taken and "goto target" will be executed - * 0 - branch will not be taken and fall-through to next insn - * -1 - unknown. Example: "if (reg < 5)" is unknown when register value - * range [0,10] - */ -static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, - bool is_jmp32) -{ - if (__is_pointer_value(false, reg)) { - if (!reg_not_null(reg)) - return -1; - - /* If pointer is valid tests against zero will fail so we can - * use this to direct branch taken. - */ - if (val != 0) - return -1; - - switch (opcode) { - case BPF_JEQ: - return 0; - case BPF_JNE: - return 1; - default: - return -1; - } - } - - if (is_jmp32) - return is_branch32_taken(reg, val, opcode); - return is_branch64_taken(reg, val, opcode); -} - static int flip_opcode(u32 opcode) { /* How can we transform "a <op> b" into "b <op> a"? */ @@ -14432,216 +14129,244 @@ static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, return -1; } -/* Adjusts the register min/max values in the case that the dst_reg is the - * variable register that we are working on, and src_reg is a constant or we're - * simply doing a BPF_K check. - * In JEQ/JNE cases we also adjust the var_off values. +/* compute branch direction of the expression "if (<reg1> opcode <reg2>) goto target;" + * and return: + * 1 - branch will be taken and "goto target" will be executed + * 0 - branch will not be taken and fall-through to next insn + * -1 - unknown. Example: "if (reg1 < 5)" is unknown when register value + * range [0,10] */ -static void reg_set_min_max(struct bpf_reg_state *true_reg, - struct bpf_reg_state *false_reg, - u64 val, u32 val32, - u8 opcode, bool is_jmp32) -{ - struct tnum false_32off = tnum_subreg(false_reg->var_off); - struct tnum false_64off = false_reg->var_off; - struct tnum true_32off = tnum_subreg(true_reg->var_off); - struct tnum true_64off = true_reg->var_off; - s64 sval = (s64)val; - s32 sval32 = (s32)val32; - - /* If the dst_reg is a pointer, we can't learn anything about its - * variable offset from the compare (unless src_reg were a pointer into - * the same object, but we don't bother with that. - * Since false_reg and true_reg have the same type by construction, we - * only need to check one of them for pointerness. - */ - if (__is_pointer_value(false, false_reg)) - return; +static int is_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, + u8 opcode, bool is_jmp32) +{ + if (reg_is_pkt_pointer_any(reg1) && reg_is_pkt_pointer_any(reg2) && !is_jmp32) + return is_pkt_ptr_branch_taken(reg1, reg2, opcode); + + if (__is_pointer_value(false, reg1) || __is_pointer_value(false, reg2)) { + u64 val; + + /* arrange that reg2 is a scalar, and reg1 is a pointer */ + if (!is_reg_const(reg2, is_jmp32)) { + opcode = flip_opcode(opcode); + swap(reg1, reg2); + } + /* and ensure that reg2 is a constant */ + if (!is_reg_const(reg2, is_jmp32)) + return -1; + + if (!reg_not_null(reg1)) + return -1; + + /* If pointer is valid tests against zero will fail so we can + * use this to direct branch taken. + */ + val = reg_const_value(reg2, is_jmp32); + if (val != 0) + return -1; + + switch (opcode) { + case BPF_JEQ: + return 0; + case BPF_JNE: + return 1; + default: + return -1; + } + } + /* now deal with two scalars, but not necessarily constants */ + return is_scalar_branch_taken(reg1, reg2, opcode, is_jmp32); +} + +/* Opcode that corresponds to a *false* branch condition. + * E.g., if r1 < r2, then reverse (false) condition is r1 >= r2 + */ +static u8 rev_opcode(u8 opcode) +{ switch (opcode) { - /* JEQ/JNE comparison doesn't change the register equivalence. - * - * r1 = r2; - * if (r1 == 42) goto label; - * ... - * label: // here both r1 and r2 are known to be 42. - * - * Hence when marking register as known preserve it's ID. + case BPF_JEQ: return BPF_JNE; + case BPF_JNE: return BPF_JEQ; + /* JSET doesn't have it's reverse opcode in BPF, so add + * BPF_X flag to denote the reverse of that operation */ + case BPF_JSET: return BPF_JSET | BPF_X; + case BPF_JSET | BPF_X: return BPF_JSET; + case BPF_JGE: return BPF_JLT; + case BPF_JGT: return BPF_JLE; + case BPF_JLE: return BPF_JGT; + case BPF_JLT: return BPF_JGE; + case BPF_JSGE: return BPF_JSLT; + case BPF_JSGT: return BPF_JSLE; + case BPF_JSLE: return BPF_JSGT; + case BPF_JSLT: return BPF_JSGE; + default: return 0; + } +} + +/* Refine range knowledge for <reg1> <op> <reg>2 conditional operation. */ +static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state *reg2, + u8 opcode, bool is_jmp32) +{ + struct tnum t; + u64 val; + +again: + switch (opcode) { case BPF_JEQ: if (is_jmp32) { - __mark_reg32_known(true_reg, val32); - true_32off = tnum_subreg(true_reg->var_off); + reg1->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value); + reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value); + reg1->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value); + reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value); + reg2->u32_min_value = reg1->u32_min_value; + reg2->u32_max_value = reg1->u32_max_value; + reg2->s32_min_value = reg1->s32_min_value; + reg2->s32_max_value = reg1->s32_max_value; + + t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off)); + reg1->var_off = tnum_with_subreg(reg1->var_off, t); + reg2->var_off = tnum_with_subreg(reg2->var_off, t); } else { - ___mark_reg_known(true_reg, val); - true_64off = true_reg->var_off; + reg1->umin_value = max(reg1->umin_value, reg2->umin_value); + reg1->umax_value = min(reg1->umax_value, reg2->umax_value); + reg1->smin_value = max(reg1->smin_value, reg2->smin_value); + reg1->smax_value = min(reg1->smax_value, reg2->smax_value); + reg2->umin_value = reg1->umin_value; + reg2->umax_value = reg1->umax_value; + reg2->smin_value = reg1->smin_value; + reg2->smax_value = reg1->smax_value; + + reg1->var_off = tnum_intersect(reg1->var_off, reg2->var_off); + reg2->var_off = reg1->var_off; } break; case BPF_JNE: - if (is_jmp32) { - __mark_reg32_known(false_reg, val32); - false_32off = tnum_subreg(false_reg->var_off); - } else { - ___mark_reg_known(false_reg, val); - false_64off = false_reg->var_off; - } + /* we don't derive any new information for inequality yet */ break; case BPF_JSET: + if (!is_reg_const(reg2, is_jmp32)) + swap(reg1, reg2); + if (!is_reg_const(reg2, is_jmp32)) + break; + val = reg_const_value(reg2, is_jmp32); + /* BPF_JSET (i.e., TRUE branch, *not* BPF_JSET | BPF_X) + * requires single bit to learn something useful. E.g., if we + * know that `r1 & 0x3` is true, then which bits (0, 1, or both) + * are actually set? We can learn something definite only if + * it's a single-bit value to begin with. + * + * BPF_JSET | BPF_X (i.e., negation of BPF_JSET) doesn't have + * this restriction. I.e., !(r1 & 0x3) means neither bit 0 nor + * bit 1 is set, which we can readily use in adjustments. + */ + if (!is_power_of_2(val)) + break; if (is_jmp32) { - false_32off = tnum_and(false_32off, tnum_const(~val32)); - if (is_power_of_2(val32)) - true_32off = tnum_or(true_32off, - tnum_const(val32)); + t = tnum_or(tnum_subreg(reg1->var_off), tnum_const(val)); + reg1->var_off = tnum_with_subreg(reg1->var_off, t); } else { - false_64off = tnum_and(false_64off, tnum_const(~val)); - if (is_power_of_2(val)) - true_64off = tnum_or(true_64off, - tnum_const(val)); + reg1->var_off = tnum_or(reg1->var_off, tnum_const(val)); } break; - case BPF_JGE: - case BPF_JGT: - { + case BPF_JSET | BPF_X: /* reverse of BPF_JSET, see rev_opcode() */ + if (!is_reg_const(reg2, is_jmp32)) + swap(reg1, reg2); + if (!is_reg_const(reg2, is_jmp32)) + break; + val = reg_const_value(reg2, is_jmp32); if (is_jmp32) { - u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1; - u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32; - - false_reg->u32_max_value = min(false_reg->u32_max_value, - false_umax); - true_reg->u32_min_value = max(true_reg->u32_min_value, - true_umin); + t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val)); + reg1->var_off = tnum_with_subreg(reg1->var_off, t); } else { - u64 false_umax = opcode == BPF_JGT ? val : val - 1; - u64 true_umin = opcode == BPF_JGT ? val + 1 : val; - - false_reg->umax_value = min(false_reg->umax_value, false_umax); - true_reg->umin_value = max(true_reg->umin_value, true_umin); + reg1->var_off = tnum_and(reg1->var_off, tnum_const(~val)); } break; - } - case BPF_JSGE: - case BPF_JSGT: - { + case BPF_JLE: if (is_jmp32) { - s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1; - s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32; - - false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax); - true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin); + reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value); + reg2->u32_min_value = max(reg1->u32_min_value, reg2->u32_min_value); } else { - s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1; - s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval; - - false_reg->smax_value = min(false_reg->smax_value, false_smax); - true_reg->smin_value = max(true_reg->smin_value, true_smin); + reg1->umax_value = min(reg1->umax_value, reg2->umax_value); + reg2->umin_value = max(reg1->umin_value, reg2->umin_value); } break; - } - case BPF_JLE: case BPF_JLT: - { if (is_jmp32) { - u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1; - u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32; - - false_reg->u32_min_value = max(false_reg->u32_min_value, - false_umin); - true_reg->u32_max_value = min(true_reg->u32_max_value, - true_umax); + reg1->u32_max_value = min(reg1->u32_max_value, reg2->u32_max_value - 1); + reg2->u32_min_value = max(reg1->u32_min_value + 1, reg2->u32_min_value); } else { - u64 false_umin = opcode == BPF_JLT ? val : val + 1; - u64 true_umax = opcode == BPF_JLT ? val - 1 : val; - - false_reg->umin_value = max(false_reg->umin_value, false_umin); - true_reg->umax_value = min(true_reg->umax_value, true_umax); + reg1->umax_value = min(reg1->umax_value, reg2->umax_value - 1); + reg2->umin_value = max(reg1->umin_value + 1, reg2->umin_value); } break; - } case BPF_JSLE: + if (is_jmp32) { + reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value); + reg2->s32_min_value = max(reg1->s32_min_value, reg2->s32_min_value); + } else { + reg1->smax_value = min(reg1->smax_value, reg2->smax_value); + reg2->smin_value = max(reg1->smin_value, reg2->smin_value); + } + break; case BPF_JSLT: - { if (is_jmp32) { - s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1; - s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32; - - false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin); - true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax); + reg1->s32_max_value = min(reg1->s32_max_value, reg2->s32_max_value - 1); + reg2->s32_min_value = max(reg1->s32_min_value + 1, reg2->s32_min_value); } else { - s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1; - s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval; - - false_reg->smin_value = max(false_reg->smin_value, false_smin); - true_reg->smax_value = min(true_reg->smax_value, true_smax); + reg1->smax_value = min(reg1->smax_value, reg2->smax_value - 1); + reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value); } break; - } + case BPF_JGE: + case BPF_JGT: + case BPF_JSGE: + case BPF_JSGT: + /* just reuse LE/LT logic above */ + opcode = flip_opcode(opcode); + swap(reg1, reg2); + goto again; default: return; } - - if (is_jmp32) { - false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off), - tnum_subreg(false_32off)); - true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off), - tnum_subreg(true_32off)); - __reg_combine_32_into_64(false_reg); - __reg_combine_32_into_64(true_reg); - } else { - false_reg->var_off = false_64off; - true_reg->var_off = true_64off; - __reg_combine_64_into_32(false_reg); - __reg_combine_64_into_32(true_reg); - } } -/* Same as above, but for the case that dst_reg holds a constant and src_reg is - * the variable reg. +/* Adjusts the register min/max values in the case that the dst_reg and + * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K + * check, in which case we havea fake SCALAR_VALUE representing insn->imm). + * Technically we can do similar adjustments for pointers to the same object, + * but we don't support that right now. */ -static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, - struct bpf_reg_state *false_reg, - u64 val, u32 val32, - u8 opcode, bool is_jmp32) +static int reg_set_min_max(struct bpf_verifier_env *env, + struct bpf_reg_state *true_reg1, + struct bpf_reg_state *true_reg2, + struct bpf_reg_state *false_reg1, + struct bpf_reg_state *false_reg2, + u8 opcode, bool is_jmp32) { - opcode = flip_opcode(opcode); - /* This uses zero as "not present in table"; luckily the zero opcode, - * BPF_JA, can't get here. + int err; + + /* If either register is a pointer, we can't learn anything about its + * variable offset from the compare (unless they were a pointer into + * the same object, but we don't bother with that). */ - if (opcode) - reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32); -} - -/* Regs are known to be equal, so intersect their min/max/var_off */ -static void __reg_combine_min_max(struct bpf_reg_state *src_reg, - struct bpf_reg_state *dst_reg) -{ - src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value, - dst_reg->umin_value); - src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value, - dst_reg->umax_value); - src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value, - dst_reg->smin_value); - src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value, - dst_reg->smax_value); - src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, - dst_reg->var_off); - reg_bounds_sync(src_reg); - reg_bounds_sync(dst_reg); -} + if (false_reg1->type != SCALAR_VALUE || false_reg2->type != SCALAR_VALUE) + return 0; -static void reg_combine_min_max(struct bpf_reg_state *true_src, - struct bpf_reg_state *true_dst, - struct bpf_reg_state *false_src, - struct bpf_reg_state *false_dst, - u8 opcode) -{ - switch (opcode) { - case BPF_JEQ: - __reg_combine_min_max(true_src, true_dst); - break; - case BPF_JNE: - __reg_combine_min_max(false_src, false_dst); - break; - } + /* fallthrough (FALSE) branch */ + regs_refine_cond_op(false_reg1, false_reg2, rev_opcode(opcode), is_jmp32); + reg_bounds_sync(false_reg1); + reg_bounds_sync(false_reg2); + + /* jump (TRUE) branch */ + regs_refine_cond_op(true_reg1, true_reg2, opcode, is_jmp32); + reg_bounds_sync(true_reg1); + reg_bounds_sync(true_reg2); + + err = reg_bounds_sanity_check(env, true_reg1, "true_reg1"); + err = err ?: reg_bounds_sanity_check(env, true_reg2, "true_reg2"); + err = err ?: reg_bounds_sanity_check(env, false_reg1, "false_reg1"); + err = err ?: reg_bounds_sanity_check(env, false_reg2, "false_reg2"); + return err; } static void mark_ptr_or_null_reg(struct bpf_func_state *state, @@ -14839,6 +14564,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; struct bpf_reg_state *eq_branch_regs; + struct bpf_reg_state fake_reg = {}; u8 opcode = BPF_OP(insn->code); bool is_jmp32; int pred = -1; @@ -14879,42 +14605,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } + src_reg = &fake_reg; + src_reg->type = SCALAR_VALUE; + __mark_reg_known(src_reg, insn->imm); } is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; - - if (BPF_SRC(insn->code) == BPF_K) { - pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32); - } else if (src_reg->type == SCALAR_VALUE && - is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) { - pred = is_branch_taken(dst_reg, - tnum_subreg(src_reg->var_off).value, - opcode, - is_jmp32); - } else if (src_reg->type == SCALAR_VALUE && - !is_jmp32 && tnum_is_const(src_reg->var_off)) { - pred = is_branch_taken(dst_reg, - src_reg->var_off.value, - opcode, - is_jmp32); - } else if (dst_reg->type == SCALAR_VALUE && - is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) { - pred = is_branch_taken(src_reg, - tnum_subreg(dst_reg->var_off).value, - flip_opcode(opcode), - is_jmp32); - } else if (dst_reg->type == SCALAR_VALUE && - !is_jmp32 && tnum_is_const(dst_reg->var_off)) { - pred = is_branch_taken(src_reg, - dst_reg->var_off.value, - flip_opcode(opcode), - is_jmp32); - } else if (reg_is_pkt_pointer_any(dst_reg) && - reg_is_pkt_pointer_any(src_reg) && - !is_jmp32) { - pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode); - } - + pred = is_branch_taken(dst_reg, src_reg, opcode, is_jmp32); if (pred >= 0) { /* If we get here with a dst_reg pointer type it is because * above is_branch_taken() special cased the 0 comparison. @@ -14962,53 +14659,27 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return -EFAULT; other_branch_regs = other_branch->frame[other_branch->curframe]->regs; - /* detect if we are comparing against a constant value so we can adjust - * our min/max values for our dst register. - * this is only legit if both are scalars (or pointers to the same - * object, I suppose, see the PTR_MAYBE_NULL related if block below), - * because otherwise the different base pointers mean the offsets aren't - * comparable. - */ if (BPF_SRC(insn->code) == BPF_X) { - struct bpf_reg_state *src_reg = ®s[insn->src_reg]; - - if (dst_reg->type == SCALAR_VALUE && - src_reg->type == SCALAR_VALUE) { - if (tnum_is_const(src_reg->var_off) || - (is_jmp32 && - tnum_is_const(tnum_subreg(src_reg->var_off)))) - reg_set_min_max(&other_branch_regs[insn->dst_reg], - dst_reg, - src_reg->var_off.value, - tnum_subreg(src_reg->var_off).value, - opcode, is_jmp32); - else if (tnum_is_const(dst_reg->var_off) || - (is_jmp32 && - tnum_is_const(tnum_subreg(dst_reg->var_off)))) - reg_set_min_max_inv(&other_branch_regs[insn->src_reg], - src_reg, - dst_reg->var_off.value, - tnum_subreg(dst_reg->var_off).value, - opcode, is_jmp32); - else if (!is_jmp32 && - (opcode == BPF_JEQ || opcode == BPF_JNE)) - /* Comparing for equality, we can combine knowledge */ - reg_combine_min_max(&other_branch_regs[insn->src_reg], - &other_branch_regs[insn->dst_reg], - src_reg, dst_reg, opcode); - if (src_reg->id && - !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { - find_equal_scalars(this_branch, src_reg); - find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]); - } - - } - } else if (dst_reg->type == SCALAR_VALUE) { - reg_set_min_max(&other_branch_regs[insn->dst_reg], - dst_reg, insn->imm, (u32)insn->imm, - opcode, is_jmp32); + err = reg_set_min_max(env, + &other_branch_regs[insn->dst_reg], + &other_branch_regs[insn->src_reg], + dst_reg, src_reg, opcode, is_jmp32); + } else /* BPF_SRC(insn->code) == BPF_K */ { + err = reg_set_min_max(env, + &other_branch_regs[insn->dst_reg], + src_reg /* fake one */, + dst_reg, src_reg /* same fake one */, + opcode, is_jmp32); } + if (err) + return err; + if (BPF_SRC(insn->code) == BPF_X && + src_reg->type == SCALAR_VALUE && src_reg->id && + !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { + find_equal_scalars(this_branch, src_reg); + find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]); + } if (dst_reg->type == SCALAR_VALUE && dst_reg->id && !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) { find_equal_scalars(this_branch, dst_reg); @@ -17541,10 +17212,8 @@ static int do_check(struct bpf_verifier_env *env) insn->off, BPF_SIZE(insn->code), BPF_READ, insn->dst_reg, false, BPF_MODE(insn->code) == BPF_MEMSX); - if (err) - return err; - - err = save_aux_ptr_type(env, src_reg_type, true); + err = err ?: save_aux_ptr_type(env, src_reg_type, true); + err = err ?: reg_bounds_sanity_check(env, ®s[insn->dst_reg], "ldx"); if (err) return err; } else if (class == BPF_STX) { @@ -20831,6 +20500,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; + env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index c56071f150f2..520b90dd97ec 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -164,13 +164,13 @@ struct cgroup_mgctx { #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) -extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; /* iterate across the hierarchies */ #define for_each_root(root) \ - list_for_each_entry((root), &cgroup_roots, root_list) + list_for_each_entry_rcu((root), &cgroup_roots, root_list, \ + lockdep_is_held(&cgroup_mutex)) /** * for_each_subsys - iterate all enabled cgroup subsystems diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 76db6c67e39a..04d11a7dd95f 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1262,6 +1262,40 @@ int cgroup1_get_tree(struct fs_context *fc) return ret; } +/** + * task_get_cgroup1 - Acquires the associated cgroup of a task within a + * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its + * hierarchy ID. + * @tsk: The target task + * @hierarchy_id: The ID of a cgroup1 hierarchy + * + * On success, the cgroup is returned. On failure, ERR_PTR is returned. + * We limit it to cgroup1 only. + */ +struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id) +{ + struct cgroup *cgrp = ERR_PTR(-ENOENT); + struct cgroup_root *root; + unsigned long flags; + + rcu_read_lock(); + for_each_root(root) { + /* cgroup1 only*/ + if (root == &cgrp_dfl_root) + continue; + if (root->hierarchy_id != hierarchy_id) + continue; + spin_lock_irqsave(&css_set_lock, flags); + cgrp = task_cgroup_from_root(tsk, root); + if (!cgrp || !cgroup_tryget(cgrp)) + cgrp = ERR_PTR(-ENOENT); + spin_unlock_irqrestore(&css_set_lock, flags); + break; + } + rcu_read_unlock(); + return cgrp; +} + static int __init cgroup1_wq_init(void) { /* diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 4b9ff41ca603..8f3cef1a4d8a 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1315,7 +1315,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root) void cgroup_free_root(struct cgroup_root *root) { - kfree(root); + kfree_rcu(root, rcu); } static void cgroup_destroy_root(struct cgroup_root *root) @@ -1347,10 +1347,9 @@ static void cgroup_destroy_root(struct cgroup_root *root) spin_unlock_irq(&css_set_lock); - if (!list_empty(&root->root_list)) { - list_del(&root->root_list); - cgroup_root_count--; - } + WARN_ON_ONCE(list_empty(&root->root_list)); + list_del_rcu(&root->root_list); + cgroup_root_count--; if (!have_favordynmods) cgroup_favor_dynmods(root, false); @@ -1390,7 +1389,15 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, } } - BUG_ON(!res_cgroup); + /* + * If cgroup_mutex is not held, the cgrp_cset_link will be freed + * before we remove the cgroup root from the root_list. Consequently, + * when accessing a cgroup root, the cset_link may have already been + * freed, resulting in a NULL res_cgroup. However, by holding the + * cgroup_mutex, we ensure that res_cgroup can't be NULL. + * If we don't hold cgroup_mutex in the caller, we must do the NULL + * check. + */ return res_cgroup; } @@ -1413,6 +1420,11 @@ current_cgns_cgroup_from_root(struct cgroup_root *root) rcu_read_unlock(); + /* + * The namespace_sem is held by current, so the root cgroup can't + * be umounted. Therefore, we can ensure that the res is non-NULL. + */ + WARN_ON_ONCE(!res); return res; } @@ -1449,7 +1461,6 @@ static struct cgroup *current_cgns_cgroup_dfl(void) static struct cgroup *cset_cgroup_from_root(struct css_set *cset, struct cgroup_root *root) { - lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&css_set_lock); return __cset_cgroup_from_root(cset, root); @@ -1457,7 +1468,9 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset, /* * Return the cgroup for "task" from the given hierarchy. Must be - * called with cgroup_mutex and css_set_lock held. + * called with css_set_lock held to prevent task's groups from being modified. + * Must be called with either cgroup_mutex or rcu read lock to prevent the + * cgroup root from being destroyed. */ struct cgroup *task_cgroup_from_root(struct task_struct *task, struct cgroup_root *root) @@ -2032,7 +2045,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx) struct cgroup_root *root = ctx->root; struct cgroup *cgrp = &root->cgrp; - INIT_LIST_HEAD(&root->root_list); + INIT_LIST_HEAD_RCU(&root->root_list); atomic_set(&root->nr_cgrps, 1); cgrp->root = root; init_cgroup_housekeeping(cgrp); @@ -2115,7 +2128,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) * care of subsystems' refcounts, which are explicitly dropped in * the failure exit path. */ - list_add(&root->root_list, &cgroup_roots); + list_add_rcu(&root->root_list, &cgroup_roots); cgroup_root_count++; /* @@ -6265,7 +6278,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, if (!buf) goto out; - cgroup_lock(); + rcu_read_lock(); spin_lock_irq(&css_set_lock); for_each_root(root) { @@ -6276,6 +6289,11 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, if (root == &cgrp_dfl_root && !READ_ONCE(cgrp_dfl_visible)) continue; + cgrp = task_cgroup_from_root(tsk, root); + /* The root has already been unmounted. */ + if (!cgrp) + continue; + seq_printf(m, "%d:", root->hierarchy_id); if (root != &cgrp_dfl_root) for_each_subsys(ss, ssid) @@ -6286,9 +6304,6 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, seq_printf(m, "%sname=%s", count ? "," : "", root->name); seq_putc(m, ':'); - - cgrp = task_cgroup_from_root(tsk, root); - /* * On traditional hierarchies, all zombie tasks show up as * belonging to the root cgroup. On the default hierarchy, @@ -6320,7 +6335,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, retval = 0; out_unlock: spin_unlock_irq(&css_set_lock); - cgroup_unlock(); + rcu_read_unlock(); kfree(buf); out: return retval; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 84e8a0f6e4e0..f0b8b7c29126 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1376,6 +1376,8 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, struct bpf_dynptr_kern *sig_ptr, struct bpf_key *trusted_keyring) { + const void *data, *sig; + u32 data_len, sig_len; int ret; if (trusted_keyring->has_ref) { @@ -1392,10 +1394,12 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, return ret; } - return verify_pkcs7_signature(data_ptr->data, - __bpf_dynptr_size(data_ptr), - sig_ptr->data, - __bpf_dynptr_size(sig_ptr), + data_len = __bpf_dynptr_size(data_ptr); + data = __bpf_dynptr_data(data_ptr, data_len); + sig_len = __bpf_dynptr_size(sig_ptr); + sig = __bpf_dynptr_data(sig_ptr, sig_len); + + return verify_pkcs7_signature(data, data_len, sig, sig_len, trusted_keyring->key, VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 7916503e6a6a..c148f8d1e564 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5144,22 +5144,6 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, - { - "ALU_MOVSX | BPF_W", - .u.insns_int = { - BPF_LD_IMM64(R2, 0x00000000deadbeefLL), - BPF_LD_IMM64(R3, 0xdeadbeefdeadbeefLL), - BPF_MOVSX32_REG(R1, R3, 32), - BPF_JMP_REG(BPF_JEQ, R2, R1, 2), - BPF_MOV32_IMM(R0, 2), - BPF_EXIT_INSN(), - BPF_MOV32_IMM(R0, 1), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0x1 } }, - }, /* MOVSX64 REG */ { "ALU64_MOVSX | BPF_B", diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 2a7f1b15714a..407b2335f091 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -702,20 +702,7 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); - const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops; - struct phy_device *phydev = vlan->real_dev->phydev; - - if (phy_has_tsinfo(phydev)) { - return phy_ts_info(phydev, info); - } else if (ops->get_ts_info) { - return ops->get_ts_info(vlan->real_dev, info); - } else { - info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; - info->phc_index = -1; - } - - return 0; + return ethtool_get_ts_info_by_layer(vlan->real_dev, info); } static void vlan_dev_get_stats64(struct net_device *dev, diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 3bd0760c76a2..b51d8b071b56 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -20,6 +20,7 @@ batman-adv-y += hash.o batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o +batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast_forw.o batman-adv-y += netlink.o batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 37ce6cfb3520..5f46ca3d4bb8 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -20,7 +20,6 @@ #include <linux/if_vlan.h> #include <linux/jhash.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> @@ -31,6 +30,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index c120c7c6d25f..757c084ac2d1 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -25,7 +25,6 @@ #include "hard-interface.h" #include "originator.h" -#include "routing.h" #include "send.h" /** @@ -351,18 +350,14 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_orig_node *orig_node_src) { struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct batadv_orig_node *orig_node_dst; struct batadv_neigh_node *neigh_node = NULL; struct batadv_frag_packet *packet; u16 total_size; bool ret = false; packet = (struct batadv_frag_packet *)skb->data; - orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest); - if (!orig_node_dst) - goto out; - neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if); + neigh_node = batadv_orig_to_router(bat_priv, packet->dest, recv_if); if (!neigh_node) goto out; @@ -381,7 +376,6 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, } out: - batadv_orig_node_put(orig_node_dst); batadv_neigh_node_put(neigh_node); return ret; } diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index d26124bc27e1..0ddd8b4b3f4c 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -18,7 +18,6 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> @@ -29,6 +28,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/udp.h> #include <net/sock.h> diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index e8a449915566..5fc754b0b3f7 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -6,6 +6,7 @@ #include "main.h" +#include <linux/array_size.h> #include <linux/atomic.h> #include <linux/build_bug.h> #include <linux/byteorder/generic.h> @@ -20,7 +21,6 @@ #include <linux/init.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <linux/kernel.h> #include <linux/kobject.h> #include <linux/kref.h> #include <linux/list.h> @@ -33,6 +33,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> @@ -532,6 +533,8 @@ static void batadv_recv_handler_init(void) /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; + /* multicast packet */ + batadv_rx_handler[BATADV_MCAST] = batadv_recv_mcast_packet; /* unicast packets ... */ /* unicast with 4 addresses packet */ diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 10007c5894a1..870dcd7f1786 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2023.3" +#define BATADV_SOURCE_VERSION "2024.0" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 315394f12c55..d982daea8329 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -25,7 +25,6 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -36,6 +35,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> @@ -236,6 +236,37 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv, } /** + * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags + * @bat_priv: the bat priv with all the soft interface information + * + * Checks if all active hard interfaces have an MTU larger or equal to 1280 + * bytes (IPv6 minimum MTU). + * + * Return: BATADV_MCAST_HAVE_MC_PTYPE_CAPA if yes, BATADV_NO_FLAGS otherwise. + */ +static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv) +{ + const struct batadv_hard_iface *hard_iface; + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->if_status != BATADV_IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) { + rcu_read_unlock(); + return BATADV_NO_FLAGS; + } + } + rcu_read_unlock(); + + return BATADV_MCAST_HAVE_MC_PTYPE_CAPA; +} + +/** * batadv_mcast_mla_flags_get() - get the new multicast flags * @bat_priv: the bat priv with all the soft interface information * @@ -256,6 +287,7 @@ batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv) mla_flags.enabled = 1; mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv, bridge); + mla_flags.tvlv_flags |= batadv_mcast_mla_forw_flags_get(bat_priv); if (!bridge) return mla_flags; @@ -806,23 +838,25 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) { bool old_enabled = bat_priv->mcast.mla_flags.enabled; u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags; - char str_old_flags[] = "[.... . ]"; + char str_old_flags[] = "[.... . .]"; - sprintf(str_old_flags, "[%c%c%c%s%s]", + sprintf(str_old_flags, "[%c%c%c%s%s%c]", (old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', (old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', (old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.', !(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ", - !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". "); + !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ", + !(old_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.'); batadv_dbg(BATADV_DBG_MCAST, bat_priv, - "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n", + "Changing multicast flags from '%s' to '[%c%c%c%s%s%c]'\n", old_enabled ? str_old_flags : "<undefined>", (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.', !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ", - !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". "); + !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ", + !(flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.'); } /** @@ -1136,16 +1170,61 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv, } /** + * batadv_mcast_forw_mode_by_count() - get forwarding mode by count + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to check + * @vid: the vlan identifier + * @is_routable: stores whether the destination is routable + * @count: the number of originators the multicast packet need to be sent to + * + * For a multicast packet with multiple destination originators, checks which + * mode to use. For BATADV_FORW_MCAST it also encapsulates the packet with a + * complete batman-adv multicast header. + * + * Return: + * BATADV_FORW_MCAST: If all nodes have multicast packet routing + * capabilities and an MTU >= 1280 on all hard interfaces (including us) + * and the encapsulated multicast packet with all destination addresses + * would still fit into an 1280 bytes batman-adv multicast packet + * (excluding the outer ethernet frame) and we could successfully push + * the full batman-adv multicast packet header. + * BATADV_FORW_UCASTS: If the packet cannot be sent in a batman-adv + * multicast packet and the amount of batman-adv unicast packets needed + * is smaller or equal to the configured multicast fanout. + * BATADV_FORW_BCAST: Otherwise. + */ +static enum batadv_forw_mode +batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv, + struct sk_buff *skb, unsigned short vid, + int is_routable, int count) +{ + unsigned int mcast_hdrlen = batadv_mcast_forw_packet_hdrlen(count); + u8 own_tvlv_flags = bat_priv->mcast.mla_flags.tvlv_flags; + + if (!atomic_read(&bat_priv->mcast.num_no_mc_ptype_capa) && + own_tvlv_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA && + skb->len + mcast_hdrlen <= IPV6_MIN_MTU && + batadv_mcast_forw_push(bat_priv, skb, vid, is_routable, count)) + return BATADV_FORW_MCAST; + + if (count <= atomic_read(&bat_priv->multicast_fanout)) + return BATADV_FORW_UCASTS; + + return BATADV_FORW_BCAST; +} + +/** * batadv_mcast_forw_mode() - check on how to forward a multicast packet * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to check + * @vid: the vlan identifier * @is_routable: stores whether the destination is routable * * Return: The forwarding mode as enum batadv_forw_mode. */ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - int *is_routable) + unsigned short vid, int *is_routable) { int ret, tt_count, ip_count, unsnoop_count, total_count; bool is_unsnoopable = false; @@ -1175,10 +1254,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, else if (unsnoop_count) return BATADV_FORW_BCAST; - if (total_count <= atomic_read(&bat_priv->multicast_fanout)) - return BATADV_FORW_UCASTS; - - return BATADV_FORW_BCAST; + return batadv_mcast_forw_mode_by_count(bat_priv, skb, vid, *is_routable, + total_count); } /** @@ -1739,6 +1816,31 @@ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv, } /** + * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node which multicast state might have changed of + * @mcast_flags: flags indicating the new multicast state + * + * If the BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag of this originator, orig, has + * toggled then this method updates the counter accordingly. + */ +static void batadv_mcast_have_mc_ptype_update(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + u8 mcast_flags) +{ + lockdep_assert_held(&orig->mcast_handler_lock); + + /* switched from flag set to unset */ + if (!(mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) && + orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) + atomic_inc(&bat_priv->mcast.num_no_mc_ptype_capa); + /* switched from flag unset to set */ + else if (mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA && + !(orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA)) + atomic_dec(&bat_priv->mcast.num_no_mc_ptype_capa); +} + +/** * batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV * @enabled: whether the originator has multicast TVLV support enabled * @tvlv_value: tvlv buffer containing the multicast flags @@ -1806,6 +1908,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags); batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags); batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags); + batadv_mcast_have_mc_ptype_update(bat_priv, orig, mcast_flags); orig->mcast_flags = mcast_flags; spin_unlock_bh(&orig->mcast_handler_lock); @@ -1820,6 +1923,10 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler, NULL, NULL, BATADV_TVLV_MCAST, 2, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); + batadv_tvlv_handler_register(bat_priv, NULL, NULL, + batadv_mcast_forw_tracker_tvlv_handler, + BATADV_TVLV_MCAST_TRACKER, 1, + BATADV_TVLV_HANDLER_OGM_CIFNOTFND); INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update); batadv_mcast_start_timer(bat_priv); diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index a9770d8d6d36..d97ee51d26f2 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -11,6 +11,7 @@ #include <linux/netlink.h> #include <linux/skbuff.h> +#include <linux/types.h> /** * enum batadv_forw_mode - the way a packet should be forwarded as @@ -28,6 +29,12 @@ enum batadv_forw_mode { */ BATADV_FORW_UCASTS, + /** + * @BATADV_FORW_MCAST: forward the packet to some nodes via a + * batman-adv multicast packet + */ + BATADV_FORW_MCAST, + /** @BATADV_FORW_NONE: don't forward, drop it */ BATADV_FORW_NONE, }; @@ -36,7 +43,7 @@ enum batadv_forw_mode { enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - int *is_routable); + unsigned short vid, int *is_routable); int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, int is_routable); @@ -52,11 +59,23 @@ void batadv_mcast_free(struct batadv_priv *bat_priv); void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); +/* multicast_forw.c */ + +int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv, + struct sk_buff *skb); + +unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests); + +bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, int is_routable, int count); + +int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, struct sk_buff *skb); + #else static inline enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - int *is_routable) + unsigned short vid, int *is_routable) { return BATADV_FORW_BCAST; } @@ -94,6 +113,13 @@ static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node) { } +static inline int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + kfree_skb(skb); + return NET_XMIT_DROP; +} + #endif /* CONFIG_BATMAN_ADV_MCAST */ #endif /* _NET_BATMAN_ADV_MULTICAST_H_ */ diff --git a/net/batman-adv/multicast_forw.c b/net/batman-adv/multicast_forw.c new file mode 100644 index 000000000000..fafd6ba8c056 --- /dev/null +++ b/net/batman-adv/multicast_forw.c @@ -0,0 +1,1178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) B.A.T.M.A.N. contributors: + * + * Linus Lüssing + */ + +#include "multicast.h" +#include "main.h" + +#include <linux/bug.h> +#include <linux/build_bug.h> +#include <linux/byteorder/generic.h> +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/etherdevice.h> +#include <linux/gfp.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/ipv6.h> +#include <linux/limits.h> +#include <linux/netdevice.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/types.h> +#include <uapi/linux/batadv_packet.h> + +#include "bridge_loop_avoidance.h" +#include "originator.h" +#include "send.h" +#include "translation-table.h" + +#define batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) \ + for (; num_dests; num_dests--, (dest) += ETH_ALEN) + +#define batadv_mcast_forw_tracker_for_each_dest2(dest1, dest2, num_dests) \ + for (; num_dests; num_dests--, (dest1) += ETH_ALEN, (dest2) += ETH_ALEN) + +/** + * batadv_mcast_forw_skb_push() - skb_push and memorize amount of pushed bytes + * @skb: the skb to push onto + * @size: the amount of bytes to push + * @len: stores the total amount of bytes pushed + * + * Performs an skb_push() onto the given skb and adds the amount of pushed bytes + * to the given len pointer. + * + * Return: the return value of the skb_push() call. + */ +static void *batadv_mcast_forw_skb_push(struct sk_buff *skb, size_t size, + unsigned short *len) +{ + *len += size; + return skb_push(skb, size); +} + +/** + * batadv_mcast_forw_push_padding() - push 2 padding bytes to skb's front + * @skb: the skb to push onto + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Pushes two padding bytes to the front of the given skb. + * + * Return: On success a pointer to the first byte of the two pushed padding + * bytes within the skb. NULL otherwise. + */ +static char * +batadv_mcast_forw_push_padding(struct sk_buff *skb, unsigned short *tvlv_len) +{ + const int pad_len = 2; + char *padding; + + if (skb_headroom(skb) < pad_len) + return NULL; + + padding = batadv_mcast_forw_skb_push(skb, pad_len, tvlv_len); + memset(padding, 0, pad_len); + + return padding; +} + +/** + * batadv_mcast_forw_push_est_padding() - push padding bytes if necessary + * @skb: the skb to potentially push the padding onto + * @count: the (estimated) number of originators the multicast packet needs to + * be sent to + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * If the number of destination entries is even then this adds two + * padding bytes to the end of the tracker TVLV. + * + * Return: true on success or if no padding is needed, false otherwise. + */ +static bool +batadv_mcast_forw_push_est_padding(struct sk_buff *skb, int count, + unsigned short *tvlv_len) +{ + if (!(count % 2) && !batadv_mcast_forw_push_padding(skb, tvlv_len)) + return false; + + return true; +} + +/** + * batadv_mcast_forw_orig_entry() - get orig_node from an hlist node + * @node: the hlist node to get the orig_node from + * @entry_offset: the offset of the hlist node within the orig_node struct + * + * Return: The orig_node containing the hlist node on success, NULL on error. + */ +static struct batadv_orig_node * +batadv_mcast_forw_orig_entry(struct hlist_node *node, + size_t entry_offset) +{ + /* sanity check */ + switch (entry_offset) { + case offsetof(struct batadv_orig_node, mcast_want_all_ipv4_node): + case offsetof(struct batadv_orig_node, mcast_want_all_ipv6_node): + case offsetof(struct batadv_orig_node, mcast_want_all_rtr4_node): + case offsetof(struct batadv_orig_node, mcast_want_all_rtr6_node): + break; + default: + WARN_ON(1); + return NULL; + } + + return (struct batadv_orig_node *)((void *)node - entry_offset); +} + +/** + * batadv_mcast_forw_push_dest() - push an originator MAC address onto an skb + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination address onto + * @vid: the vlan identifier + * @orig_node: the originator node to get the MAC address from + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * If the orig_node is a BLA backbone gateway, if there is not enough skb + * headroom available or if num_dests is already at its maximum (65535) then + * neither the skb nor num_dests is changed. Otherwise the originator's MAC + * address is pushed onto the given skb and num_dests incremented by one. + * + * Return: true if the orig_node is a backbone gateway or if an orig address + * was pushed successfully, false otherwise. + */ +static bool batadv_mcast_forw_push_dest(struct batadv_priv *bat_priv, + struct sk_buff *skb, unsigned short vid, + struct batadv_orig_node *orig_node, + unsigned short *num_dests, + unsigned short *tvlv_len) +{ + BUILD_BUG_ON(sizeof_field(struct batadv_tvlv_mcast_tracker, num_dests) + != sizeof(__be16)); + + /* Avoid sending to other BLA gateways - they already got the frame from + * the LAN side we share with them. + * TODO: Refactor to take BLA into account earlier in mode check. + */ + if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) + return true; + + if (skb_headroom(skb) < ETH_ALEN || *num_dests == U16_MAX) + return false; + + batadv_mcast_forw_skb_push(skb, ETH_ALEN, tvlv_len); + ether_addr_copy(skb->data, orig_node->orig); + (*num_dests)++; + + return true; +} + +/** + * batadv_mcast_forw_push_dests_list() - push originators from list onto an skb + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @head: the list to gather originators from + * @entry_offset: offset of an hlist node in an orig_node structure + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators in the given list onto the given + * skb. + * + * Return: true on success, false otherwise. + */ +static int batadv_mcast_forw_push_dests_list(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct hlist_head *head, + size_t entry_offset, + unsigned short *num_dests, + unsigned short *tvlv_len) +{ + struct hlist_node *node; + struct batadv_orig_node *orig_node; + + rcu_read_lock(); + __hlist_for_each_rcu(node, head) { + orig_node = batadv_mcast_forw_orig_entry(node, entry_offset); + if (!orig_node || + !batadv_mcast_forw_push_dest(bat_priv, skb, vid, orig_node, + num_dests, tvlv_len)) { + rcu_read_unlock(); + return false; + } + } + rcu_read_unlock(); + + return true; +} + +/** + * batadv_mcast_forw_push_tt() - push originators with interest through TT + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators which have indicated interest in + * this multicast packet through the translation table onto the given skb. + * + * Return: true on success, false otherwise. + */ +static bool +batadv_mcast_forw_push_tt(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, unsigned short *num_dests, + unsigned short *tvlv_len) +{ + struct batadv_tt_orig_list_entry *orig_entry; + + struct batadv_tt_global_entry *tt_global; + const u8 *addr = eth_hdr(skb)->h_dest; + + /* ok */ + int ret = true; + + tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid); + if (!tt_global) + goto out; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) { + if (!batadv_mcast_forw_push_dest(bat_priv, skb, vid, + orig_entry->orig_node, + num_dests, tvlv_len)) { + ret = false; + break; + } + } + rcu_read_unlock(); + + batadv_tt_global_entry_put(tt_global); + +out: + return ret; +} + +/** + * batadv_mcast_forw_push_want_all() - push originators with want-all flag + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators which have indicated interest in + * this multicast packet through the want-all flag onto the given skb. + * + * Return: true on success, false otherwise. + */ +static bool batadv_mcast_forw_push_want_all(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + unsigned short *num_dests, + unsigned short *tvlv_len) +{ + struct hlist_head *head = NULL; + size_t offset; + int ret; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + head = &bat_priv->mcast.want_all_ipv4_list; + offset = offsetof(struct batadv_orig_node, + mcast_want_all_ipv4_node); + break; + case htons(ETH_P_IPV6): + head = &bat_priv->mcast.want_all_ipv6_list; + offset = offsetof(struct batadv_orig_node, + mcast_want_all_ipv6_node); + break; + default: + return false; + } + + ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head, + offset, num_dests, tvlv_len); + if (!ret) + return false; + + return true; +} + +/** + * batadv_mcast_forw_push_want_rtr() - push originators with want-router flag + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators which have indicated interest in + * this multicast packet through the want-all-rtr flag onto the given skb. + * + * Return: true on success, false otherwise. + */ +static bool batadv_mcast_forw_push_want_rtr(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + unsigned short *num_dests, + unsigned short *tvlv_len) +{ + struct hlist_head *head = NULL; + size_t offset; + int ret; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + head = &bat_priv->mcast.want_all_rtr4_list; + offset = offsetof(struct batadv_orig_node, + mcast_want_all_rtr4_node); + break; + case htons(ETH_P_IPV6): + head = &bat_priv->mcast.want_all_rtr6_list; + offset = offsetof(struct batadv_orig_node, + mcast_want_all_rtr6_node); + break; + default: + return false; + } + + ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head, + offset, num_dests, tvlv_len); + if (!ret) + return false; + + return true; +} + +/** + * batadv_mcast_forw_scrape() - remove bytes within skb data + * @skb: the skb to remove bytes from + * @offset: the offset from the skb data from which to scrape + * @len: the amount of bytes to scrape starting from the offset + * + * Scrapes/removes len bytes from the given skb at the given offset from the + * skb data. + * + * Caller needs to ensure that the region from the skb data's start up + * to/including the to be removed bytes are linearized. + */ +static void batadv_mcast_forw_scrape(struct sk_buff *skb, + unsigned short offset, + unsigned short len) +{ + char *to, *from; + + SKB_LINEAR_ASSERT(skb); + + to = skb_pull(skb, len); + from = to - len; + + memmove(to, from, offset); +} + +/** + * batadv_mcast_forw_push_scrape_padding() - remove TVLV padding + * @skb: the skb to potentially adjust the TVLV's padding on + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Remove two padding bytes from the end of the multicast tracker TVLV, + * from before the payload data. + * + * Caller needs to ensure that the TVLV bytes are linearized. + */ +static void batadv_mcast_forw_push_scrape_padding(struct sk_buff *skb, + unsigned short *tvlv_len) +{ + const int pad_len = 2; + + batadv_mcast_forw_scrape(skb, *tvlv_len - pad_len, pad_len); + *tvlv_len -= pad_len; +} + +/** + * batadv_mcast_forw_push_insert_padding() - insert TVLV padding + * @skb: the skb to potentially adjust the TVLV's padding on + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Inserts two padding bytes at the end of the multicast tracker TVLV, + * before the payload data in the given skb. + * + * Return: true on success, false otherwise. + */ +static bool batadv_mcast_forw_push_insert_padding(struct sk_buff *skb, + unsigned short *tvlv_len) +{ + unsigned short offset = *tvlv_len; + char *to, *from = skb->data; + + to = batadv_mcast_forw_push_padding(skb, tvlv_len); + if (!to) + return false; + + memmove(to, from, offset); + memset(to + offset, 0, *tvlv_len - offset); + return true; +} + +/** + * batadv_mcast_forw_push_adjust_padding() - adjust padding if necessary + * @skb: the skb to potentially adjust the TVLV's padding on + * @count: the estimated number of originators the multicast packet needs to + * be sent to + * @num_dests_pushed: the number of originators that were actually added to the + * multicast packet's tracker TVLV + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Adjusts the padding in the multicast packet's tracker TVLV depending on the + * initially estimated amount of destinations versus the amount of destinations + * that were actually added to the tracker TVLV. + * + * If the initial estimate was correct or at least the oddness was the same then + * no padding adjustment is performed. + * If the initially estimated number was even, so padding was initially added, + * but it turned out to be odd then padding is removed. + * If the initially estimated number was odd, so no padding was initially added, + * but it turned out to be even then padding is added. + * + * Return: true if no padding adjustment is needed or the adjustment was + * successful, false otherwise. + */ +static bool +batadv_mcast_forw_push_adjust_padding(struct sk_buff *skb, int *count, + unsigned short num_dests_pushed, + unsigned short *tvlv_len) +{ + int ret = true; + + if (likely((num_dests_pushed % 2) == (*count % 2))) + goto out; + + /** + * estimated even number of destinations, but turned out to be odd + * -> remove padding + */ + if (!(*count % 2) && (num_dests_pushed % 2)) + batadv_mcast_forw_push_scrape_padding(skb, tvlv_len); + /** + * estimated odd number of destinations, but turned out to be even + * -> add padding + */ + else if ((*count % 2) && (!(num_dests_pushed % 2))) + ret = batadv_mcast_forw_push_insert_padding(skb, tvlv_len); + +out: + *count = num_dests_pushed; + return ret; +} + +/** + * batadv_mcast_forw_push_dests() - push originator addresses onto an skb + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @is_routable: indicates whether the destination is routable + * @count: the number of originators the multicast packet needs to be sent to + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators which have indicated interest in + * this multicast packet onto the given skb. + * + * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on + * success 0. + */ +static int +batadv_mcast_forw_push_dests(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, int is_routable, int *count, + unsigned short *tvlv_len) +{ + unsigned short num_dests = 0; + + if (!batadv_mcast_forw_push_est_padding(skb, *count, tvlv_len)) + goto err; + + if (!batadv_mcast_forw_push_tt(bat_priv, skb, vid, &num_dests, + tvlv_len)) + goto err; + + if (!batadv_mcast_forw_push_want_all(bat_priv, skb, vid, &num_dests, + tvlv_len)) + goto err; + + if (is_routable && + !batadv_mcast_forw_push_want_rtr(bat_priv, skb, vid, &num_dests, + tvlv_len)) + goto err; + + if (!batadv_mcast_forw_push_adjust_padding(skb, count, num_dests, + tvlv_len)) + goto err; + + return 0; +err: + return -ENOMEM; +} + +/** + * batadv_mcast_forw_push_tracker() - push a multicast tracker TVLV header + * @skb: the skb to push the tracker TVLV onto + * @num_dests: the number of destination addresses to set in the header + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Pushes a multicast tracker TVLV header onto the given skb, including the + * generic TVLV header but excluding the destination MAC addresses. + * + * The provided num_dests value is taken into consideration to set the + * num_dests field in the tracker header and to set the appropriate TVLV length + * value fields. + * + * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on + * success 0. + */ +static int batadv_mcast_forw_push_tracker(struct sk_buff *skb, int num_dests, + unsigned short *tvlv_len) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + struct batadv_tvlv_hdr *tvlv_hdr; + unsigned int tvlv_value_len; + + if (skb_headroom(skb) < sizeof(*mcast_tracker) + sizeof(*tvlv_hdr)) + return -ENOMEM; + + tvlv_value_len = sizeof(*mcast_tracker) + *tvlv_len; + if (tvlv_value_len + sizeof(*tvlv_hdr) > U16_MAX) + return -ENOMEM; + + batadv_mcast_forw_skb_push(skb, sizeof(*mcast_tracker), tvlv_len); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb->data; + mcast_tracker->num_dests = htons(num_dests); + + skb_reset_network_header(skb); + + batadv_mcast_forw_skb_push(skb, sizeof(*tvlv_hdr), tvlv_len); + tvlv_hdr = (struct batadv_tvlv_hdr *)skb->data; + tvlv_hdr->type = BATADV_TVLV_MCAST_TRACKER; + tvlv_hdr->version = 1; + tvlv_hdr->len = htons(tvlv_value_len); + + return 0; +} + +/** + * batadv_mcast_forw_push_tvlvs() - push a multicast tracker TVLV onto an skb + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the tracker TVLV onto + * @vid: the vlan identifier + * @is_routable: indicates whether the destination is routable + * @count: the number of originators the multicast packet needs to be sent to + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Pushes a multicast tracker TVLV onto the given skb, including the collected + * destination MAC addresses and the generic TVLV header. + * + * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on + * success 0. + */ +static int +batadv_mcast_forw_push_tvlvs(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, int is_routable, int count, + unsigned short *tvlv_len) +{ + int ret; + + ret = batadv_mcast_forw_push_dests(bat_priv, skb, vid, is_routable, + &count, tvlv_len); + if (ret < 0) + return ret; + + ret = batadv_mcast_forw_push_tracker(skb, count, tvlv_len); + if (ret < 0) + return ret; + + return 0; +} + +/** + * batadv_mcast_forw_push_hdr() - push a multicast packet header onto an skb + * @skb: the skb to push the header onto + * @tvlv_len: the total TVLV length value to set in the header + * + * Pushes a batman-adv multicast packet header onto the given skb and sets + * the provided total TVLV length value in it. + * + * Caller needs to ensure enough skb headroom is available. + * + * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on + * success 0. + */ +static int +batadv_mcast_forw_push_hdr(struct sk_buff *skb, unsigned short tvlv_len) +{ + struct batadv_mcast_packet *mcast_packet; + + if (skb_headroom(skb) < sizeof(*mcast_packet)) + return -ENOMEM; + + skb_push(skb, sizeof(*mcast_packet)); + + mcast_packet = (struct batadv_mcast_packet *)skb->data; + mcast_packet->version = BATADV_COMPAT_VERSION; + mcast_packet->ttl = BATADV_TTL; + mcast_packet->packet_type = BATADV_MCAST; + mcast_packet->reserved = 0; + mcast_packet->tvlv_len = htons(tvlv_len); + + return 0; +} + +/** + * batadv_mcast_forw_scrub_dests() - scrub destinations in a tracker TVLV + * @bat_priv: the bat priv with all the soft interface information + * @comp_neigh: next hop neighbor to scrub+collect destinations for + * @dest: start MAC entry in original skb's tracker TVLV + * @next_dest: start MAC entry in to be sent skb's tracker TVLV + * @num_dests: number of remaining destination MAC entries to iterate over + * + * This sorts destination entries into either the original batman-adv + * multicast packet or the skb (copy) that is going to be sent to comp_neigh + * next. + * + * In preparation for the next, to be (unicast) transmitted batman-adv multicast + * packet skb to be sent to the given neighbor node, tries to collect all + * originator MAC addresses that have the given neighbor node as their next hop + * in the to be transmitted skb (copy), which next_dest points into. That is we + * zero all destination entries in next_dest which do not have comp_neigh as + * their next hop. And zero all destination entries in the original skb that + * would have comp_neigh as their next hop (to avoid redundant transmissions and + * duplicated payload later). + */ +static void +batadv_mcast_forw_scrub_dests(struct batadv_priv *bat_priv, + struct batadv_neigh_node *comp_neigh, u8 *dest, + u8 *next_dest, u16 num_dests) +{ + struct batadv_neigh_node *next_neigh; + + /* skip first entry, this is what we are comparing with */ + eth_zero_addr(dest); + dest += ETH_ALEN; + next_dest += ETH_ALEN; + num_dests--; + + batadv_mcast_forw_tracker_for_each_dest2(dest, next_dest, num_dests) { + if (is_zero_ether_addr(next_dest)) + continue; + + /* sanity check, we expect unicast destinations */ + if (is_multicast_ether_addr(next_dest)) { + eth_zero_addr(dest); + eth_zero_addr(next_dest); + continue; + } + + next_neigh = batadv_orig_to_router(bat_priv, next_dest, NULL); + if (!next_neigh) { + eth_zero_addr(next_dest); + continue; + } + + if (!batadv_compare_eth(next_neigh->addr, comp_neigh->addr)) { + eth_zero_addr(next_dest); + batadv_neigh_node_put(next_neigh); + continue; + } + + /* found an entry for our next packet to transmit, so remove it + * from the original packet + */ + eth_zero_addr(dest); + batadv_neigh_node_put(next_neigh); + } +} + +/** + * batadv_mcast_forw_shrink_fill() - swap slot with next non-zero destination + * @slot: the to be filled zero-MAC destination entry in a tracker TVLV + * @num_dests_slot: remaining entries in tracker TVLV from/including slot + * + * Searches for the next non-zero-MAC destination entry in a tracker TVLV after + * the given slot pointer. And if found, swaps it with the zero-MAC destination + * entry which the slot points to. + * + * Return: true if slot was swapped/filled successfully, false otherwise. + */ +static bool batadv_mcast_forw_shrink_fill(u8 *slot, u16 num_dests_slot) +{ + u16 num_dests_filler; + u8 *filler; + + /* sanity check, should not happen */ + if (!num_dests_slot) + return false; + + num_dests_filler = num_dests_slot - 1; + filler = slot + ETH_ALEN; + + /* find a candidate to fill the empty slot */ + batadv_mcast_forw_tracker_for_each_dest(filler, num_dests_filler) { + if (is_zero_ether_addr(filler)) + continue; + + ether_addr_copy(slot, filler); + eth_zero_addr(filler); + return true; + } + + return false; +} + +/** + * batadv_mcast_forw_shrink_pack_dests() - pack destinations of a tracker TVLV + * @skb: the batman-adv multicast packet to compact destinations in + * + * Compacts the originator destination MAC addresses in the multicast tracker + * TVLV of the given multicast packet. This is done by moving all non-zero + * MAC addresses in direction of the skb head and all zero MAC addresses in skb + * tail direction, within the multicast tracker TVLV. + * + * Return: The number of consecutive zero MAC address destinations which are + * now at the end of the multicast tracker TVLV. + */ +static int batadv_mcast_forw_shrink_pack_dests(struct sk_buff *skb) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + unsigned char *skb_net_hdr; + u16 num_dests_slot; + u8 *slot; + + skb_net_hdr = skb_network_header(skb); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr; + num_dests_slot = ntohs(mcast_tracker->num_dests); + + slot = (u8 *)mcast_tracker + sizeof(*mcast_tracker); + + batadv_mcast_forw_tracker_for_each_dest(slot, num_dests_slot) { + /* find an empty slot */ + if (!is_zero_ether_addr(slot)) + continue; + + if (!batadv_mcast_forw_shrink_fill(slot, num_dests_slot)) + /* could not find a filler, so we successfully packed + * and can stop - and must not reduce num_dests_slot! + */ + break; + } + + /* num_dests_slot is now the amount of reduced, zeroed + * destinations at the end of the tracker TVLV + */ + return num_dests_slot; +} + +/** + * batadv_mcast_forw_shrink_align_offset() - get new alignment offset + * @num_dests_old: the old, to be updated amount of destination nodes + * @num_dests_reduce: the number of destinations that were removed + * + * Calculates the amount of potential extra alignment offset that is needed to + * adjust the TVLV padding after the change in destination nodes. + * + * Return: + * 0: If no change to padding is needed. + * 2: If padding needs to be removed. + * -2: If padding needs to be added. + */ +static short +batadv_mcast_forw_shrink_align_offset(unsigned int num_dests_old, + unsigned int num_dests_reduce) +{ + /* even amount of removed destinations -> no alignment change */ + if (!(num_dests_reduce % 2)) + return 0; + + /* even to odd amount of destinations -> remove padding */ + if (!(num_dests_old % 2)) + return 2; + + /* odd to even amount of destinations -> add padding */ + return -2; +} + +/** + * batadv_mcast_forw_shrink_update_headers() - update shrunk mc packet headers + * @skb: the batman-adv multicast packet to update headers of + * @num_dests_reduce: the number of destinations that were removed + * + * This updates any fields of a batman-adv multicast packet that are affected + * by the reduced number of destinations in the multicast tracket TVLV. In + * particular this updates: + * + * The num_dest field of the multicast tracker TVLV. + * The TVLV length field of the according generic TVLV header. + * The batman-adv multicast packet's total TVLV length field. + * + * Return: The offset in skb's tail direction at which the new batman-adv + * multicast packet header needs to start. + */ +static unsigned int +batadv_mcast_forw_shrink_update_headers(struct sk_buff *skb, + unsigned int num_dests_reduce) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + struct batadv_mcast_packet *mcast_packet; + struct batadv_tvlv_hdr *tvlv_hdr; + unsigned char *skb_net_hdr; + unsigned int offset; + short align_offset; + u16 num_dests; + + skb_net_hdr = skb_network_header(skb); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr; + num_dests = ntohs(mcast_tracker->num_dests); + + align_offset = batadv_mcast_forw_shrink_align_offset(num_dests, + num_dests_reduce); + offset = ETH_ALEN * num_dests_reduce + align_offset; + num_dests -= num_dests_reduce; + + /* update tracker header */ + mcast_tracker->num_dests = htons(num_dests); + + /* update tracker's tvlv header's length field */ + tvlv_hdr = (struct batadv_tvlv_hdr *)(skb_network_header(skb) - + sizeof(*tvlv_hdr)); + tvlv_hdr->len = htons(ntohs(tvlv_hdr->len) - offset); + + /* update multicast packet header's tvlv length field */ + mcast_packet = (struct batadv_mcast_packet *)skb->data; + mcast_packet->tvlv_len = htons(ntohs(mcast_packet->tvlv_len) - offset); + + return offset; +} + +/** + * batadv_mcast_forw_shrink_move_headers() - move multicast headers by offset + * @skb: the batman-adv multicast packet to move headers for + * @offset: a non-negative offset to move headers by, towards the skb tail + * + * Moves the batman-adv multicast packet header, its multicast tracker TVLV and + * any TVLVs in between by the given offset in direction towards the tail. + */ +static void +batadv_mcast_forw_shrink_move_headers(struct sk_buff *skb, unsigned int offset) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + unsigned char *skb_net_hdr; + unsigned int len; + u16 num_dests; + + skb_net_hdr = skb_network_header(skb); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr; + num_dests = ntohs(mcast_tracker->num_dests); + len = skb_network_offset(skb) + sizeof(*mcast_tracker); + len += num_dests * ETH_ALEN; + + batadv_mcast_forw_scrape(skb, len, offset); +} + +/** + * batadv_mcast_forw_shrink_tracker() - remove zero addresses in a tracker tvlv + * @skb: the batman-adv multicast packet to (potentially) shrink + * + * Removes all destinations with a zero MAC addresses (00:00:00:00:00:00) from + * the given batman-adv multicast packet's tracker TVLV and updates headers + * accordingly to maintain a valid batman-adv multicast packet. + */ +static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb) +{ + unsigned int offset; + u16 dests_reduced; + + dests_reduced = batadv_mcast_forw_shrink_pack_dests(skb); + if (!dests_reduced) + return; + + offset = batadv_mcast_forw_shrink_update_headers(skb, dests_reduced); + batadv_mcast_forw_shrink_move_headers(skb, offset); +} + +/** + * batadv_mcast_forw_packet() - forward a batman-adv multicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the received or locally generated batman-adv multicast packet + * @local_xmit: indicates that the packet was locally generated and not received + * + * Parses the tracker TVLV of a batman-adv multicast packet and forwards the + * packet as indicated in this TVLV. + * + * Caller needs to set the skb network header to the start of the multicast + * tracker TVLV (excluding the generic TVLV header) and the skb transport header + * to the next byte after this multicast tracker TVLV. + * + * Caller needs to free the skb. + * + * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error + * code on failure. NET_RX_SUCCESS if the received packet is supposed to be + * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise. + */ +static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv, + struct sk_buff *skb, bool local_xmit) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + struct batadv_neigh_node *neigh_node; + unsigned long offset, num_dests_off; + struct sk_buff *nexthop_skb; + unsigned char *skb_net_hdr; + bool local_recv = false; + unsigned int tvlv_len; + bool xmitted = false; + u8 *dest, *next_dest; + u16 num_dests; + int ret; + + /* (at least) TVLV part needs to be linearized */ + SKB_LINEAR_ASSERT(skb); + + /* check if num_dests is within skb length */ + num_dests_off = offsetof(struct batadv_tvlv_mcast_tracker, num_dests); + if (num_dests_off > skb_network_header_len(skb)) + return -EINVAL; + + skb_net_hdr = skb_network_header(skb); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr; + num_dests = ntohs(mcast_tracker->num_dests); + + dest = (u8 *)mcast_tracker + sizeof(*mcast_tracker); + + /* check if full tracker tvlv is within skb length */ + tvlv_len = sizeof(*mcast_tracker) + ETH_ALEN * num_dests; + if (tvlv_len > skb_network_header_len(skb)) + return -EINVAL; + + /* invalidate checksum: */ + skb->ip_summed = CHECKSUM_NONE; + + batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) { + if (is_zero_ether_addr(dest)) + continue; + + /* only unicast originator addresses supported */ + if (is_multicast_ether_addr(dest)) { + eth_zero_addr(dest); + continue; + } + + if (batadv_is_my_mac(bat_priv, dest)) { + eth_zero_addr(dest); + local_recv = true; + continue; + } + + neigh_node = batadv_orig_to_router(bat_priv, dest, NULL); + if (!neigh_node) { + eth_zero_addr(dest); + continue; + } + + nexthop_skb = skb_copy(skb, GFP_ATOMIC); + if (!nexthop_skb) { + batadv_neigh_node_put(neigh_node); + return -ENOMEM; + } + + offset = dest - skb->data; + next_dest = nexthop_skb->data + offset; + + batadv_mcast_forw_scrub_dests(bat_priv, neigh_node, dest, + next_dest, num_dests); + batadv_mcast_forw_shrink_tracker(nexthop_skb); + + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX); + batadv_add_counter(bat_priv, BATADV_CNT_MCAST_TX_BYTES, + nexthop_skb->len + ETH_HLEN); + xmitted = true; + ret = batadv_send_unicast_skb(nexthop_skb, neigh_node); + + batadv_neigh_node_put(neigh_node); + + if (ret < 0) + return ret; + } + + if (xmitted) { + if (local_xmit) { + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX_LOCAL); + batadv_add_counter(bat_priv, + BATADV_CNT_MCAST_TX_LOCAL_BYTES, + skb->len - + skb_transport_offset(skb)); + } else { + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_FWD); + batadv_add_counter(bat_priv, BATADV_CNT_MCAST_FWD_BYTES, + skb->len + ETH_HLEN); + } + } + + if (local_recv) + return NET_RX_SUCCESS; + else + return NET_RX_DROP; +} + +/** + * batadv_mcast_forw_tracker_tvlv_handler() - handle an mcast tracker tvlv + * @bat_priv: the bat priv with all the soft interface information + * @skb: the received batman-adv multicast packet + * + * Parses the tracker TVLV of an incoming batman-adv multicast packet and + * forwards the packet as indicated in this TVLV. + * + * Caller needs to set the skb network header to the start of the multicast + * tracker TVLV (excluding the generic TVLV header) and the skb transport header + * to the next byte after this multicast tracker TVLV. + * + * Caller needs to free the skb. + * + * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error + * code on failure. NET_RX_SUCCESS if the received packet is supposed to be + * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise. + */ +int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return batadv_mcast_forw_packet(bat_priv, skb, false); +} + +/** + * batadv_mcast_forw_packet_hdrlen() - multicast packet header length + * @num_dests: number of destination nodes + * + * Calculates the total batman-adv multicast packet header length for a given + * number of destination nodes (excluding the outer ethernet frame). + * + * Return: The calculated total batman-adv multicast packet header length. + */ +unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests) +{ + /** + * If the number of destination entries is even then we need to add + * two byte padding to the tracker TVLV. + */ + int padding = (!(num_dests % 2)) ? 2 : 0; + + return padding + num_dests * ETH_ALEN + + sizeof(struct batadv_tvlv_mcast_tracker) + + sizeof(struct batadv_tvlv_hdr) + + sizeof(struct batadv_mcast_packet); +} + +/** + * batadv_mcast_forw_expand_head() - expand headroom for an mcast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to send + * + * Tries to expand an skb's headroom so that its head to tail is 1298 + * bytes (minimum IPv6 MTU + vlan ethernet header size) large. + * + * Return: -EINVAL if the given skb's length is too large or -ENOMEM on memory + * allocation failure. Otherwise, on success, zero is returned. + */ +static int batadv_mcast_forw_expand_head(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + int hdr_size = VLAN_ETH_HLEN + IPV6_MIN_MTU - skb->len; + + /* TODO: Could be tightened to actual number of destination nodes? + * But it's tricky, number of destinations might have increased since + * we last checked. + */ + if (hdr_size < 0) { + /* batadv_mcast_forw_mode_check_count() should ensure we do not + * end up here + */ + WARN_ON(1); + return -EINVAL; + } + + if (skb_headroom(skb) < hdr_size && + pskb_expand_head(skb, hdr_size, 0, GFP_ATOMIC) < 0) + return -ENOMEM; + + return 0; +} + +/** + * batadv_mcast_forw_push() - encapsulate skb in a batman-adv multicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to encapsulate and send + * @vid: the vlan identifier + * @is_routable: indicates whether the destination is routable + * @count: the number of originators the multicast packet needs to be sent to + * + * Encapsulates the given multicast packet in a batman-adv multicast packet. + * A multicast tracker TVLV with destination originator addresses for any node + * that signaled interest in it, that is either via the translation table or the + * according want-all flags, is attached accordingly. + * + * Return: true on success, false otherwise. + */ +bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, int is_routable, int count) +{ + unsigned short tvlv_len = 0; + int ret; + + if (batadv_mcast_forw_expand_head(bat_priv, skb) < 0) + goto err; + + skb_reset_transport_header(skb); + + ret = batadv_mcast_forw_push_tvlvs(bat_priv, skb, vid, is_routable, + count, &tvlv_len); + if (ret < 0) + goto err; + + ret = batadv_mcast_forw_push_hdr(skb, tvlv_len); + if (ret < 0) + goto err; + + return true; + +err: + if (tvlv_len) + skb_pull(skb, tvlv_len); + + return false; +} + +/** + * batadv_mcast_forw_mcsend() - send a self prepared batman-adv multicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to encapsulate and send + * + * Transmits a batman-adv multicast packet that was locally prepared and + * consumes/frees it. + * + * Return: NET_XMIT_DROP on memory allocation failure. NET_XMIT_SUCCESS + * otherwise. + */ +int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + int ret = batadv_mcast_forw_packet(bat_priv, skb, true); + + if (ret < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + consume_skb(skb); + return NET_XMIT_SUCCESS; +} diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 0c64d81a7761..1f7ed9d4f6fd 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -7,6 +7,7 @@ #include "netlink.h" #include "main.h" +#include <linux/array_size.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/bug.h> @@ -20,7 +21,6 @@ #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/init.h> -#include <linux/kernel.h> #include <linux/limits.h> #include <linux/list.h> #include <linux/minmax.h> diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 34903df4fe93..71c143d4b6d0 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -312,6 +312,33 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node, } /** + * batadv_orig_to_router() - get next hop neighbor to an orig address + * @bat_priv: the bat priv with all the soft interface information + * @orig_addr: the originator MAC address to search the best next hop router for + * @if_outgoing: the interface where the payload packet has been received or + * the OGM should be sent to + * + * Return: A neighbor node which is the best router towards the given originator + * address. + */ +struct batadv_neigh_node * +batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_node *neigh_node; + struct batadv_orig_node *orig_node; + + orig_node = batadv_orig_hash_find(bat_priv, orig_addr); + if (!orig_node) + return NULL; + + neigh_node = batadv_find_router(bat_priv, orig_node, if_outgoing); + batadv_orig_node_put(orig_node); + + return neigh_node; +} + +/** * batadv_orig_ifinfo_get() - find the ifinfo from an orig_node * @orig_node: the orig node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired @@ -942,6 +969,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, #ifdef CONFIG_BATMAN_ADV_MCAST orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4; orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6; + orig_node->mcast_flags |= BATADV_MCAST_HAVE_MC_PTYPE_CAPA; INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node); INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node); INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node); diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index ea3d69e4e670..db0c55128170 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -36,6 +36,9 @@ void batadv_neigh_node_release(struct kref *ref); struct batadv_neigh_node * batadv_orig_router_get(struct batadv_orig_node *orig_node, const struct batadv_hard_iface *if_outgoing); +struct batadv_neigh_node * +batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr, + struct batadv_hard_iface *if_outgoing); struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 163cd43c4821..f1061985149f 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1270,3 +1270,73 @@ out: batadv_orig_node_put(orig_node); return ret; } + +#ifdef CONFIG_BATMAN_ADV_MCAST +/** + * batadv_recv_mcast_packet() - process received batman-adv multicast packet + * @skb: the received batman-adv multicast packet + * @recv_if: interface that the skb is received on + * + * Parses the given, received batman-adv multicast packet. Depending on the + * contents of its TVLV forwards it and/or decapsulates it to hand it to the + * soft interface. + * + * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. + */ +int batadv_recv_mcast_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_mcast_packet *mcast_packet; + int hdr_size = sizeof(*mcast_packet); + unsigned char *tvlv_buff; + int ret = NET_RX_DROP; + u16 tvlv_buff_len; + + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) + goto free_skb; + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, ETH_HLEN) < 0) + goto free_skb; + + /* packet needs to be linearized to access the tvlv content */ + if (skb_linearize(skb) < 0) + goto free_skb; + + mcast_packet = (struct batadv_mcast_packet *)skb->data; + if (mcast_packet->ttl-- < 2) + goto free_skb; + + tvlv_buff = (unsigned char *)(skb->data + hdr_size); + tvlv_buff_len = ntohs(mcast_packet->tvlv_len); + + if (tvlv_buff_len > skb->len - hdr_size) + goto free_skb; + + ret = batadv_tvlv_containers_process(bat_priv, BATADV_MCAST, NULL, skb, + tvlv_buff, tvlv_buff_len); + if (ret >= 0) { + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX); + batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_BYTES, + skb->len + ETH_HLEN); + } + + hdr_size += tvlv_buff_len; + + if (ret == NET_RX_SUCCESS && (skb->len - hdr_size >= ETH_HLEN)) { + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL); + batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL_BYTES, + skb->len - hdr_size); + + batadv_interface_rx(bat_priv->soft_iface, skb, hdr_size, NULL); + /* skb was consumed */ + skb = NULL; + } + +free_skb: + kfree_skb(skb); + + return ret; +} +#endif /* CONFIG_BATMAN_ADV_MCAST */ diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index afd15b3879f1..e9849f032a24 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -27,6 +27,17 @@ int batadv_recv_frag_packet(struct sk_buff *skb, struct batadv_hard_iface *iface); int batadv_recv_bcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); +#ifdef CONFIG_BATMAN_ADV_MCAST +int batadv_recv_mcast_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); +#else +static inline int batadv_recv_mcast_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + kfree_skb(skb); + return NET_RX_DROP; +} +#endif int batadv_recv_unicast_tvlv(struct sk_buff *skb, struct batadv_hard_iface *recv_if); int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 1bf1232a4f75..89c51b3cf430 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -301,12 +301,13 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, send: if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { - forw_mode = batadv_mcast_forw_mode(bat_priv, skb, + forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid, &mcast_is_routable); switch (forw_mode) { case BATADV_FORW_BCAST: break; case BATADV_FORW_UCASTS: + case BATADV_FORW_MCAST: do_bcast = false; break; case BATADV_FORW_NONE: @@ -365,6 +366,8 @@ send: } else if (forw_mode == BATADV_FORW_UCASTS) { ret = batadv_mcast_forw_send(bat_priv, skb, vid, mcast_is_routable); + } else if (forw_mode == BATADV_FORW_MCAST) { + ret = batadv_mcast_forw_mcsend(bat_priv, skb); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) @@ -762,6 +765,7 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); + atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0); #endif atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF); atomic_set(&bat_priv->gw.bandwidth_down, 100); @@ -925,6 +929,18 @@ static const struct { { "tt_response_rx" }, { "tt_roam_adv_tx" }, { "tt_roam_adv_rx" }, +#ifdef CONFIG_BATMAN_ADV_MCAST + { "mcast_tx" }, + { "mcast_tx_bytes" }, + { "mcast_tx_local" }, + { "mcast_tx_local_bytes" }, + { "mcast_rx" }, + { "mcast_rx_bytes" }, + { "mcast_rx_local" }, + { "mcast_rx_local_bytes" }, + { "mcast_fwd" }, + { "mcast_fwd_bytes" }, +#endif #ifdef CONFIG_BATMAN_ADV_DAT { "dat_get_tx" }, { "dat_get_rx" }, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 17d5ea1d8e84..00840d5784fe 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -862,6 +862,70 @@ enum batadv_counters { */ BATADV_CNT_TT_ROAM_ADV_RX, +#ifdef CONFIG_BATMAN_ADV_MCAST + /** + * @BATADV_CNT_MCAST_TX: transmitted batman-adv multicast packets + * counter + */ + BATADV_CNT_MCAST_TX, + + /** + * @BATADV_CNT_MCAST_TX_BYTES: transmitted batman-adv multicast packets + * bytes counter + */ + BATADV_CNT_MCAST_TX_BYTES, + + /** + * @BATADV_CNT_MCAST_TX_LOCAL: counter for multicast packets which + * were locally encapsulated and transmitted as batman-adv multicast + * packets + */ + BATADV_CNT_MCAST_TX_LOCAL, + + /** + * @BATADV_CNT_MCAST_TX_LOCAL_BYTES: bytes counter for multicast packets + * which were locally encapsulated and transmitted as batman-adv + * multicast packets + */ + BATADV_CNT_MCAST_TX_LOCAL_BYTES, + + /** + * @BATADV_CNT_MCAST_RX: received batman-adv multicast packet counter + */ + BATADV_CNT_MCAST_RX, + + /** + * @BATADV_CNT_MCAST_RX_BYTES: received batman-adv multicast packet + * bytes counter + */ + BATADV_CNT_MCAST_RX_BYTES, + + /** + * @BATADV_CNT_MCAST_RX_LOCAL: counter for received batman-adv multicast + * packets which were forwarded to the local soft interface + */ + BATADV_CNT_MCAST_RX_LOCAL, + + /** + * @BATADV_CNT_MCAST_RX_LOCAL_BYTES: bytes counter for received + * batman-adv multicast packets which were forwarded to the local soft + * interface + */ + BATADV_CNT_MCAST_RX_LOCAL_BYTES, + + /** + * @BATADV_CNT_MCAST_FWD: counter for received batman-adv multicast + * packets which were forwarded to other, neighboring nodes + */ + BATADV_CNT_MCAST_FWD, + + /** + * @BATADV_CNT_MCAST_FWD_BYTES: bytes counter for received batman-adv + * multicast packets which were forwarded to other, neighboring nodes + */ + BATADV_CNT_MCAST_FWD_BYTES, +#endif + #ifdef CONFIG_BATMAN_ADV_DAT /** * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter @@ -1279,6 +1343,12 @@ struct batadv_priv_mcast { atomic_t num_want_all_rtr6; /** + * @num_no_mc_ptype_capa: counter for number of nodes without the + * BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag + */ + atomic_t num_no_mc_ptype_capa; + + /** * @want_lists_lock: lock for protecting modifications to mcasts * want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked) */ diff --git a/net/core/dev.c b/net/core/dev.c index c879246be48d..3950ced396b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11236,17 +11236,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, dev_net_set(dev, net); dev->ifindex = new_ifindex; - /* Send a netdev-add uevent to the new namespace */ - kobject_uevent(&dev->dev.kobj, KOBJ_ADD); - netdev_adjacent_add_links(dev); - if (new_name[0]) /* Rename the netdev to prepared name */ strscpy(dev->name, new_name, IFNAMSIZ); /* Fixup kobjects */ + dev_set_uevent_suppress(&dev->dev, 1); err = device_rename(&dev->dev, dev->name); + dev_set_uevent_suppress(&dev->dev, 0); WARN_ON(err); + /* Send a netdev-add uevent to the new namespace */ + kobject_uevent(&dev->dev.kobj, KOBJ_ADD); + netdev_adjacent_add_links(dev); + /* Adapt owner in case owning user namespace of target network * namespace is different from the original one. */ diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index feeddf95f450..9a66cf5015f2 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -322,9 +322,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in * dev->priv_flags. */ -static int dev_set_hwtstamp_phylib(struct net_device *dev, - struct kernel_hwtstamp_config *cfg, - struct netlink_ext_ack *extack) +int dev_set_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; bool phy_ts = phy_has_hwtstamp(dev->phydev); @@ -363,6 +363,7 @@ static int dev_set_hwtstamp_phylib(struct net_device *dev, return 0; } +EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib); static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index dec544337236..df2a06d7da52 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -173,7 +173,8 @@ static int page_pool_init(struct page_pool *pool, { unsigned int ring_qsize = 1024; /* Default */ - memcpy(&pool->p, params, sizeof(pool->p)); + memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); + memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); /* Validate only known flags were used */ if (pool->p.flags & ~(PP_FLAG_ALL)) @@ -211,6 +212,8 @@ static int page_pool_init(struct page_pool *pool, */ } + pool->has_init_callback = !!pool->slow.init_callback; + #ifdef CONFIG_PAGE_POOL_STATS pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); if (!pool->recycle_stats) @@ -388,8 +391,8 @@ static void page_pool_set_pp_info(struct page_pool *pool, * the overhead is negligible. */ page_pool_fragment_page(page, 1); - if (pool->p.init_callback) - pool->p.init_callback(page, pool->p.init_arg); + if (pool->has_init_callback) + pool->slow.init_callback(page, pool->slow.init_arg); } static void page_pool_clear_pp_info(struct page *page) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e8431c6c8490..592164c2a540 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3849,7 +3849,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; err = -ENOBUFS; - nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); + nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask)); if (nskb == NULL) goto out; diff --git a/net/devlink/core.c b/net/devlink/core.c index 6984877e9f10..4275a2bc6d8e 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -503,14 +503,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) * all devlink instances from this namespace into init_net. */ devlinks_xa_for_each_registered_get(net, index, devlink) { - devl_lock(devlink); + devl_dev_lock(devlink, true); err = 0; if (devl_is_registered(devlink)) err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, DEVLINK_RELOAD_LIMIT_UNSPEC, &actions_performed, NULL); - devl_unlock(devlink); + devl_dev_unlock(devlink, true); devlink_put(devlink); if (err && err != -EOPNOTSUPP) pr_warn("Failed to reload devlink instance into init_net\n"); diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 4fc7adb32663..ea6a92f2e6a2 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -4,6 +4,7 @@ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ +#include <linux/device.h> #include <net/genetlink.h> #include <net/sock.h> #include "devl_internal.h" @@ -433,6 +434,13 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net, struct net *curr_net; int err; + /* Make sure the reload operations are invoked with the device lock + * held to allow drivers to trigger functionality that expects it + * (e.g., PCI reset) and to close possible races between these + * operations and probe/remove. + */ + device_lock_assert(devlink->dev); + memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 183dbe3807ab..5ea2e2012e93 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -3,6 +3,7 @@ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ +#include <linux/device.h> #include <linux/etherdevice.h> #include <linux/mutex.h> #include <linux/netdevice.h> @@ -96,6 +97,20 @@ static inline bool devl_is_registered(struct devlink *devlink) return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); } +static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock) +{ + if (dev_lock) + device_lock(devlink->dev); + devl_lock(devlink); +} + +static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock) +{ + devl_unlock(devlink); + if (dev_lock) + device_unlock(devlink->dev); +} + typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index); typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index, u32 rel_index); @@ -111,9 +126,6 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, bool *msg_updated); /* Netlink */ -#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) -#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) - enum devlink_multicast_groups { DEVLINK_MCGRP_CONFIG, }; @@ -140,7 +152,8 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg, int flags); struct devlink * -devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs); +devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, + bool dev_lock); int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, devlink_nl_dump_one_func_t *dump_one); diff --git a/net/devlink/health.c b/net/devlink/health.c index 695df61f8ac2..71ae121dc739 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -1151,7 +1151,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb) struct nlattr **attrs = info->attrs; struct devlink *devlink; - devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); + devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs, + false); if (IS_ERR(devlink)) return NULL; diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index d0b90ebc8b15..fa9afe3e6d9b 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -9,6 +9,10 @@ #include "devl_internal.h" +#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) +#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_DEV_LOCK BIT(2) + static const struct genl_multicast_group devlink_nl_mcgrps[] = { [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, }; @@ -61,7 +65,8 @@ int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info) } struct devlink * -devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs) +devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, + bool dev_lock) { struct devlink *devlink; unsigned long index; @@ -75,12 +80,12 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs) devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); devlinks_xa_for_each_registered_get(net, index, devlink) { - devl_lock(devlink); + devl_dev_lock(devlink, dev_lock); if (devl_is_registered(devlink) && strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0) return devlink; - devl_unlock(devlink); + devl_dev_unlock(devlink, dev_lock); devlink_put(devlink); } @@ -90,11 +95,13 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs) static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info, u8 flags) { + bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK; struct devlink_port *devlink_port; struct devlink *devlink; int err; - devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs); + devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs, + dev_lock); if (IS_ERR(devlink)) return PTR_ERR(devlink); @@ -114,7 +121,7 @@ static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info, return 0; unlock: - devl_unlock(devlink); + devl_dev_unlock(devlink, dev_lock); devlink_put(devlink); return err; } @@ -131,6 +138,12 @@ int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT); } +int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK); +} + int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -138,16 +151,30 @@ int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT); } -void devlink_nl_post_doit(const struct genl_split_ops *ops, - struct sk_buff *skb, struct genl_info *info) +static void __devlink_nl_post_doit(struct sk_buff *skb, struct genl_info *info, + u8 flags) { + bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK; struct devlink *devlink; devlink = info->user_ptr[0]; - devl_unlock(devlink); + devl_dev_unlock(devlink, dev_lock); devlink_put(devlink); } +void devlink_nl_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + __devlink_nl_post_doit(skb, info, 0); +} + +void +devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + __devlink_nl_post_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK); +} + static int devlink_nl_inst_single_dumpit(struct sk_buff *msg, struct netlink_callback *cb, int flags, devlink_nl_dump_one_func_t *dump_one, @@ -156,7 +183,7 @@ static int devlink_nl_inst_single_dumpit(struct sk_buff *msg, struct devlink *devlink; int err; - devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs); + devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs, false); if (IS_ERR(devlink)) return PTR_ERR(devlink); err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED); diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 788dfdc498a9..95f9b4350ab7 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -846,9 +846,9 @@ const struct genl_split_ops devlink_nl_ops[73] = { { .cmd = DEVLINK_CMD_RELOAD, .validate = GENL_DONT_VALIDATE_STRICT, - .pre_doit = devlink_nl_pre_doit, + .pre_doit = devlink_nl_pre_doit_dev_lock, .doit = devlink_nl_reload_doit, - .post_doit = devlink_nl_post_doit, + .post_doit = devlink_nl_post_doit_dev_lock, .policy = devlink_reload_nl_policy, .maxattr = DEVLINK_ATTR_RELOAD_LIMITS, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index 0e9e89c31c31..02f3c0bfae0e 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -22,12 +22,17 @@ int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); void devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +void +devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/devlink/region.c b/net/devlink/region.c index 0aab7b82d678..e3bab458db94 100644 --- a/net/devlink/region.c +++ b/net/devlink/region.c @@ -883,7 +883,8 @@ int devlink_nl_region_read_dumpit(struct sk_buff *skb, start_offset = state->start_offset; - devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); + devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs, + false); if (IS_ERR(devlink)) return PTR_ERR(devlink); diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index 4da5bad1a7aa..a019226ec6d2 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -23,7 +23,6 @@ #define RTL4_A_NAME "rtl4a" #define RTL4_A_HDR_LEN 4 -#define RTL4_A_ETHERTYPE 0x8899 #define RTL4_A_PROTOCOL_SHIFT 12 /* * 0x1 = Realtek Remote Control protocol (RRCP) @@ -54,7 +53,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, /* Set Ethertype */ p = (__be16 *)tag; - *p = htons(RTL4_A_ETHERTYPE); + *p = htons(ETH_P_REALTEK); out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT); /* The lower bits indicate the port number */ @@ -82,7 +81,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, tag = dsa_etype_header_pos_rx(skb); p = (__be16 *)tag; etype = ntohs(*p); - if (etype != RTL4_A_ETHERTYPE) { + if (etype != ETH_P_REALTEK) { /* Not custom, just pass through */ netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype); return skb; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index b4419fb6df6a..11d8797f63f6 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -661,6 +661,12 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) } EXPORT_SYMBOL(ethtool_get_phc_vclocks); +int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info) +{ + return __ethtool_get_ts_info(dev, info); +} +EXPORT_SYMBOL(ethtool_get_ts_info_by_layer); + const struct ethtool_phy_ops *ethtool_phy_ops; void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 306f942c3b28..7ceb9ac6e730 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -173,7 +173,24 @@ static int hsr_dev_open(struct net_device *dev) static int hsr_dev_close(struct net_device *dev) { - /* Nothing to do here. */ + struct hsr_port *port; + struct hsr_priv *hsr; + + hsr = netdev_priv(dev); + hsr_for_each_port(hsr, port) { + if (port->type == HSR_PT_MASTER) + continue; + switch (port->type) { + case HSR_PT_SLAVE_A: + case HSR_PT_SLAVE_B: + dev_uc_unsync(port->dev, dev); + dev_mc_unsync(port->dev, dev); + break; + default: + break; + } + } + return 0; } @@ -404,12 +421,60 @@ void hsr_del_ports(struct hsr_priv *hsr) hsr_del_port(port); } +static void hsr_set_rx_mode(struct net_device *dev) +{ + struct hsr_port *port; + struct hsr_priv *hsr; + + hsr = netdev_priv(dev); + + hsr_for_each_port(hsr, port) { + if (port->type == HSR_PT_MASTER) + continue; + switch (port->type) { + case HSR_PT_SLAVE_A: + case HSR_PT_SLAVE_B: + dev_mc_sync_multiple(port->dev, dev); + dev_uc_sync_multiple(port->dev, dev); + break; + default: + break; + } + } +} + +static void hsr_change_rx_flags(struct net_device *dev, int change) +{ + struct hsr_port *port; + struct hsr_priv *hsr; + + hsr = netdev_priv(dev); + + hsr_for_each_port(hsr, port) { + if (port->type == HSR_PT_MASTER) + continue; + switch (port->type) { + case HSR_PT_SLAVE_A: + case HSR_PT_SLAVE_B: + if (change & IFF_ALLMULTI) + dev_set_allmulti(port->dev, + dev->flags & + IFF_ALLMULTI ? 1 : -1); + break; + default: + break; + } + } +} + static const struct net_device_ops hsr_device_ops = { .ndo_change_mtu = hsr_dev_change_mtu, .ndo_open = hsr_dev_open, .ndo_stop = hsr_dev_close, .ndo_start_xmit = hsr_dev_xmit, + .ndo_change_rx_flags = hsr_change_rx_flags, .ndo_fix_features = hsr_fix_features, + .ndo_set_rx_mode = hsr_set_rx_mode, }; static struct device_type hsr_type = { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5f693bbd578d..86cc6d36f818 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -482,6 +482,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) const int code = icmp_hdr(skb)->code; struct sock *sk; struct request_sock *fastopen; + bool harderr = false; u32 seq, snd_una; int err; struct net *net = dev_net(skb->dev); @@ -555,6 +556,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) goto out; case ICMP_PARAMETERPROB: err = EPROTO; + harderr = true; break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) @@ -579,6 +581,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) } err = icmp_err_convert[code].errno; + harderr = icmp_err_convert[code].fatal; /* check if this ICMP message allows revert of backoff. * (see RFC 6069) */ @@ -604,6 +607,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); + if (!harderr) + break; + if (!sock_owned_by_user(sk)) { WRITE_ONCE(sk->sk_err, err); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 1f9f6c1c196b..d1ad20ce1c8c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -626,7 +626,6 @@ void tcp_retransmit_timer(struct sock *sk) * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ - icsk->icsk_backoff++; out_reset_timer: /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is @@ -647,11 +646,12 @@ out_reset_timer: tcp_rto_min(sk), TCP_RTO_MAX); } else if (sk->sk_state != TCP_SYN_SENT || - icsk->icsk_backoff > + tp->total_rto > READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { /* Use normal (exponential) backoff unless linear timeouts are * activated. */ + icsk->icsk_backoff++; icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 937a02c2e534..43deda49cc52 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -381,7 +381,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct tcp_sock *tp; __u32 seq, snd_una; struct sock *sk; - bool fatal; + bool harderr; int err; sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, @@ -402,9 +402,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } seq = ntohl(th->seq); - fatal = icmpv6_err_convert(type, code, &err); + harderr = icmpv6_err_convert(type, code, &err); if (sk->sk_state == TCP_NEW_SYN_RECV) { - tcp_req_err(sk, seq, fatal); + tcp_req_err(sk, seq, harderr); return 0; } @@ -489,6 +489,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); + if (!harderr) + break; + if (!sock_owned_by_user(sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 03757e76bb6b..374412ed780b 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -105,8 +105,11 @@ enum { struct ncsi_channel_version { - u32 version; /* Supported BCD encoded NCSI version */ - u32 alpha2; /* Supported BCD encoded NCSI version */ + u8 major; /* NCSI version major */ + u8 minor; /* NCSI version minor */ + u8 update; /* NCSI version update */ + char alpha1; /* NCSI version alpha1 */ + char alpha2; /* NCSI version alpha2 */ u8 fw_name[12]; /* Firmware name string */ u32 fw_version; /* Firmware version */ u16 pci_ids[4]; /* PCI identification */ diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index fd2236ee9a79..b3ff37a181d7 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -270,7 +270,8 @@ static struct ncsi_cmd_handler { { NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default }, { NCSI_PKT_CMD_OEM, -1, ncsi_cmd_handler_oem }, { NCSI_PKT_CMD_PLDM, 0, NULL }, - { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default } + { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_GMCMA, 0, ncsi_cmd_handler_default } }; static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index d9da942ad53d..745c788f1d1d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -689,8 +689,6 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, return 0; } -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) - static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) { unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN]; @@ -716,10 +714,6 @@ static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) return ret; } -#endif - -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) - /* NCSI OEM Command APIs */ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca) { @@ -856,8 +850,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id) return nch->handler(nca); } -#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ - /* Determine if a given channel from the channel_queue should be used for Tx */ static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc) @@ -1039,20 +1031,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) goto error; } - nd->state = ncsi_dev_state_config_oem_gma; + nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) + ? ncsi_dev_state_config_oem_gma + : ncsi_dev_state_config_clear_vids; break; case ncsi_dev_state_config_oem_gma: nd->state = ncsi_dev_state_config_clear_vids; - ret = -1; -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) - nca.type = NCSI_PKT_CMD_OEM; nca.package = np->id; nca.channel = nc->id; ndp->pending_req_num = 1; - ret = ncsi_gma_handler(&nca, nc->version.mf_id); -#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ - + if (nc->version.major >= 1 && nc->version.minor >= 2) { + nca.type = NCSI_PKT_CMD_GMCMA; + ret = ncsi_xmit_cmd(&nca); + } else { + nca.type = NCSI_PKT_CMD_OEM; + ret = ncsi_gma_handler(&nca, nc->version.mf_id); + } if (ret < 0) schedule_work(&ndp->work); @@ -1404,7 +1399,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) schedule_work(&ndp->work); break; -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) case ncsi_dev_state_probe_mlx_gma: ndp->pending_req_num = 1; @@ -1429,7 +1423,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_cis; break; -#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ case ncsi_dev_state_probe_cis: ndp->pending_req_num = NCSI_RESERVED_CHANNEL; @@ -1447,7 +1440,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)) nd->state = ncsi_dev_state_probe_keep_phy; break; -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) case ncsi_dev_state_probe_keep_phy: ndp->pending_req_num = 1; @@ -1460,7 +1452,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_gvi; break; -#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */ case ncsi_dev_state_probe_gvi: case ncsi_dev_state_probe_gc: case ncsi_dev_state_probe_gls: diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index a3a6753a1db7..2f872d064396 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -71,8 +71,8 @@ static int ncsi_write_channel_info(struct sk_buff *skb, if (nc == nc->package->preferred_channel) nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED); - nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version); - nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2); + nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major); + nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor); nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name); vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index ba66c7dc3a21..f2f3b5c1b941 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -197,9 +197,12 @@ struct ncsi_rsp_gls_pkt { /* Get Version ID */ struct ncsi_rsp_gvi_pkt { struct ncsi_rsp_pkt_hdr rsp; /* Response header */ - __be32 ncsi_version; /* NCSI version */ + unsigned char major; /* NCSI version major */ + unsigned char minor; /* NCSI version minor */ + unsigned char update; /* NCSI version update */ + unsigned char alpha1; /* NCSI version alpha1 */ unsigned char reserved[3]; /* Reserved */ - unsigned char alpha2; /* NCSI version */ + unsigned char alpha2; /* NCSI version alpha2 */ unsigned char fw_name[12]; /* f/w name string */ __be32 fw_version; /* f/w version */ __be16 pci_ids[4]; /* PCI IDs */ @@ -335,6 +338,14 @@ struct ncsi_rsp_gpuuid_pkt { __be32 checksum; }; +/* Get MC MAC Address */ +struct ncsi_rsp_gmcma_pkt { + struct ncsi_rsp_pkt_hdr rsp; + unsigned char address_count; + unsigned char reserved[3]; + unsigned char addresses[][ETH_ALEN]; +}; + /* AEN: Link State Change */ struct ncsi_aen_lsc_pkt { struct ncsi_aen_pkt_hdr aen; /* AEN header */ @@ -395,6 +406,7 @@ struct ncsi_aen_hncdsc_pkt { #define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ #define NCSI_PKT_CMD_QPNPR 0x56 /* Query Pending NC PLDM request */ #define NCSI_PKT_CMD_SNPR 0x57 /* Send NC PLDM Reply */ +#define NCSI_PKT_CMD_GMCMA 0x58 /* Get MC MAC Address */ /* NCSI packet responses */ @@ -430,6 +442,7 @@ struct ncsi_aen_hncdsc_pkt { #define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) #define NCSI_PKT_RSP_QPNPR (NCSI_PKT_CMD_QPNPR + 0x80) #define NCSI_PKT_RSP_SNPR (NCSI_PKT_CMD_SNPR + 0x80) +#define NCSI_PKT_RSP_GMCMA (NCSI_PKT_CMD_GMCMA + 0x80) /* NCSI response code/reason */ #define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 069c2659074b..bee290d0f48b 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -19,6 +19,19 @@ #include "ncsi-pkt.h" #include "ncsi-netlink.h" +/* Nibbles within [0xA, 0xF] add zero "0" to the returned value. + * Optional fields (encoded as 0xFF) will default to zero. + */ +static u8 decode_bcd_u8(u8 x) +{ + int lo = x & 0xF; + int hi = x >> 4; + + lo = lo < 0xA ? lo : 0; + hi = hi < 0xA ? hi : 0; + return lo + hi * 10; +} + static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, unsigned short payload) { @@ -755,9 +768,18 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr) if (!nc) return -ENODEV; - /* Update to channel's version info */ + /* Update channel's version info + * + * Major, minor, and update fields are supposed to be + * unsigned integers encoded as packed BCD. + * + * Alpha1 and alpha2 are ISO/IEC 8859-1 characters. + */ ncv = &nc->version; - ncv->version = ntohl(rsp->ncsi_version); + ncv->major = decode_bcd_u8(rsp->major); + ncv->minor = decode_bcd_u8(rsp->minor); + ncv->update = decode_bcd_u8(rsp->update); + ncv->alpha1 = rsp->alpha1; ncv->alpha2 = rsp->alpha2; memcpy(ncv->fw_name, rsp->fw_name, 12); ncv->fw_version = ntohl(rsp->fw_version); @@ -1069,6 +1091,44 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr) return ret; } +static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) +{ + struct ncsi_dev_priv *ndp = nr->ndp; + struct net_device *ndev = ndp->ndev.dev; + struct ncsi_rsp_gmcma_pkt *rsp; + struct sockaddr saddr; + int ret = -1; + int i; + + rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp); + saddr.sa_family = ndev->type; + ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + + netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n", + rsp->address_count); + for (i = 0; i < rsp->address_count; i++) { + netdev_info(ndev, "NCSI: MAC address %d: %02x:%02x:%02x:%02x:%02x:%02x\n", + i, rsp->addresses[i][0], rsp->addresses[i][1], + rsp->addresses[i][2], rsp->addresses[i][3], + rsp->addresses[i][4], rsp->addresses[i][5]); + } + + for (i = 0; i < rsp->address_count; i++) { + memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN); + ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr); + if (ret < 0) { + netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n", + saddr.sa_data); + continue; + } + netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data); + break; + } + + ndp->gma_flag = ret == 0; + return ret; +} + static struct ncsi_rsp_handler { unsigned char type; int payload; @@ -1105,7 +1165,8 @@ static struct ncsi_rsp_handler { { NCSI_PKT_RSP_PLDM, -1, ncsi_rsp_handler_pldm }, { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid }, { NCSI_PKT_RSP_QPNPR, -1, ncsi_rsp_handler_pldm }, - { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm } + { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm }, + { NCSI_PKT_RSP_GMCMA, -1, ncsi_rsp_handler_gmcma }, }; int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index eb086b06d60d..177126fb0484 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1204,8 +1204,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp) return sock; } -static struct sk_buff *netlink_alloc_large_skb(unsigned int size, - int broadcast) +struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) { struct sk_buff *skb; void *data; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d5bdfd4a7655..289e1755c26b 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -71,7 +71,7 @@ struct tc_u_hnode { struct tc_u_hnode __rcu *next; u32 handle; u32 prio; - int refcnt; + refcount_t refcnt; unsigned int divisor; struct idr handle_idr; bool is_root; @@ -86,7 +86,7 @@ struct tc_u_hnode { struct tc_u_common { struct tc_u_hnode __rcu *hlist; void *ptr; - int refcnt; + refcount_t refcnt; struct idr handle_idr; struct hlist_node hnode; long knodes; @@ -359,7 +359,7 @@ static int u32_init(struct tcf_proto *tp) if (root_ht == NULL) return -ENOBUFS; - root_ht->refcnt++; + refcount_set(&root_ht->refcnt, 1); root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; root_ht->prio = tp->prio; root_ht->is_root = true; @@ -371,18 +371,20 @@ static int u32_init(struct tcf_proto *tp) kfree(root_ht); return -ENOBUFS; } + refcount_set(&tp_c->refcnt, 1); tp_c->ptr = key; INIT_HLIST_NODE(&tp_c->hnode); idr_init(&tp_c->handle_idr); hlist_add_head(&tp_c->hnode, tc_u_hash(key)); + } else { + refcount_inc(&tp_c->refcnt); } - tp_c->refcnt++; RCU_INIT_POINTER(root_ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, root_ht); - root_ht->refcnt++; + /* root_ht must be destroyed when tcf_proto is destroyed */ rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; @@ -393,7 +395,7 @@ static void __u32_destroy_key(struct tc_u_knode *n) struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); tcf_exts_destroy(&n->exts); - if (ht && --ht->refcnt == 0) + if (ht && refcount_dec_and_test(&ht->refcnt)) kfree(ht); kfree(n); } @@ -601,8 +603,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; - WARN_ON(--ht->refcnt); - u32_clear_hnode(tp, ht, extack); hn = &tp_c->hlist; @@ -630,10 +630,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, WARN_ON(root_ht == NULL); - if (root_ht && --root_ht->refcnt == 1) + if (root_ht && refcount_dec_and_test(&root_ht->refcnt)) u32_destroy_hnode(tp, root_ht, extack); - if (--tp_c->refcnt == 0) { + if (refcount_dec_and_test(&tp_c->refcnt)) { struct tc_u_hnode *ht; hlist_del(&tp_c->hnode); @@ -645,7 +645,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, /* u32_destroy_key() will later free ht for us, if it's * still referenced by some knode */ - if (--ht->refcnt == 0) + if (refcount_dec_and_test(&ht->refcnt)) kfree_rcu(ht, rcu); } @@ -674,7 +674,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, return -EINVAL; } - if (ht->refcnt == 1) { + if (refcount_dec_if_one(&ht->refcnt)) { u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); @@ -682,7 +682,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, } out: - *last = tp_c->refcnt == 1 && tp_c->knodes == 0; + *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0; return ret; } @@ -766,14 +766,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, NL_SET_ERR_MSG_MOD(extack, "Not linking to root node"); return -EINVAL; } - ht_down->refcnt++; + refcount_inc(&ht_down->refcnt); } ht_old = rtnl_dereference(n->ht_down); rcu_assign_pointer(n->ht_down, ht_down); if (ht_old) - ht_old->refcnt--; + refcount_dec(&ht_old->refcnt); } if (ifindex >= 0) @@ -852,7 +852,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, /* bump reference count as long as we hold pointer to structure */ if (ht) - ht->refcnt++; + refcount_inc(&ht->refcnt); return new; } @@ -932,7 +932,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht_old = rtnl_dereference(n->ht_down); if (ht_old) - ht_old->refcnt++; + refcount_inc(&ht_old->refcnt); } __u32_destroy_key(new); return err; @@ -980,7 +980,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; } } - ht->refcnt = 1; + refcount_set(&ht->refcnt, 1); ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4195a4bc26ca..8dd0e5925342 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t) if (unlikely(timedout_ms)) { trace_net_dev_xmit_timeout(dev, i); - WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n", - dev->name, netdev_drivername(dev), i, timedout_ms); + netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n", + raw_smp_processor_id(), + i, timedout_ms); netif_freeze_queues(dev); dev->netdev_ops->ndo_tx_timeout(dev, i); netif_unfreeze_queues(dev); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index c763008a8adb..079aebb16ed8 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -168,7 +168,7 @@ static struct sk_buff *tipc_get_err_tlv(char *str) int str_len = strlen(str) + 1; struct sk_buff *buf; - buf = tipc_tlv_alloc(TLV_SPACE(str_len)); + buf = tipc_tlv_alloc(str_len); if (buf) tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len); diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 6965c94dfdaf..09e4f2ff5658 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -20,7 +20,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **link** | **cgroup** | **perf** | **net** | **feature** | + *OBJECT* := { **map** | **prog** | **link** | **cgroup** | **perf** | **net** | **feature** | **btf** | **gen** | **struct_ops** | **iter** } *OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0f6cdf52b1da..7a5498242eaa 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1200,6 +1200,9 @@ enum bpf_perf_event_type { */ #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) +/* The verifier internal test flag. Behavior is undefined */ +#define BPF_F_TEST_REG_INVARIANTS (1U << 7) + /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ @@ -4517,6 +4520,8 @@ union bpf_attr { * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. + * Note: the user stack will only be populated if the *task* is + * the current task; all other tasks will return -EOPNOTSUPP. * To achieve this, the helper needs *task*, which is a valid * pointer to **struct task_struct**. To store the stacktrace, the * bpf program provides *buf* with a nonnegative *size*. @@ -4528,6 +4533,7 @@ union bpf_attr { * * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. + * The *task* must be the current task. * **BPF_F_USER_BUILD_ID** * Collect buildid+offset instead of ips for user stack, * only valid if **BPF_F_USER_STACK** is also specified. @@ -7151,40 +7157,31 @@ struct bpf_spin_lock { }; struct bpf_timer { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_dynptr { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_head { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_list_node { - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[3]; } __attribute__((aligned(8))); struct bpf_rb_root { - __u64 :64; - __u64 :64; + __u64 __opaque[2]; } __attribute__((aligned(8))); struct bpf_rb_node { - __u64 :64; - __u64 :64; - __u64 :64; - __u64 :64; + __u64 __opaque[4]; } __attribute__((aligned(8))); struct bpf_refcount { - __u32 :32; + __u32 __opaque[1]; } __attribute__((aligned(4))); struct bpf_sysctl { diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index b7060f254486..8fe248e14eb6 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -79,11 +79,14 @@ */ #define LIBBPF_OPTS_RESET(NAME, ...) \ do { \ - memset(&NAME, 0, sizeof(NAME)); \ - NAME = (typeof(NAME)) { \ - .sz = sizeof(NAME), \ - __VA_ARGS__ \ - }; \ + typeof(NAME) ___##NAME = ({ \ + memset(&___##NAME, 0, sizeof(NAME)); \ + (typeof(NAME)) { \ + .sz = sizeof(NAME), \ + __VA_ARGS__ \ + }; \ + }); \ + memcpy(&NAME, &___##NAME, sizeof(NAME)); \ } while (0) #endif /* __LIBBPF_LIBBPF_COMMON_H */ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 5b1da2a32ea7..5aa133bf3688 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -45,9 +45,12 @@ #define format_parent_cgroup_path(buf, path) \ format_cgroup_path_pid(buf, path, getppid()) -#define format_classid_path(buf) \ - snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ - CGROUP_WORK_DIR) +#define format_classid_path_pid(buf, pid) \ + snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \ + CGROUP_WORK_DIR, pid) + +#define format_classid_path(buf) \ + format_classid_path_pid(buf, getpid()) static __thread bool cgroup_workdir_mounted; @@ -419,26 +422,23 @@ int create_and_get_cgroup(const char *relative_path) } /** - * get_cgroup_id() - Get cgroup id for a particular cgroup path - * @relative_path: The cgroup path, relative to the workdir, to join + * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path + * @cgroup_workdir: The absolute cgroup path * * On success, it returns the cgroup id. On failure it returns 0, * which is an invalid cgroup id. * If there is a failure, it prints the error to stderr. */ -unsigned long long get_cgroup_id(const char *relative_path) +unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir) { int dirfd, err, flags, mount_id, fhsize; union { unsigned long long cgid; unsigned char raw_bytes[8]; } id; - char cgroup_workdir[PATH_MAX + 1]; struct file_handle *fhp, *fhp2; unsigned long long ret = 0; - format_cgroup_path(cgroup_workdir, relative_path); - dirfd = AT_FDCWD; flags = 0; fhsize = sizeof(*fhp); @@ -474,6 +474,14 @@ free_mem: return ret; } +unsigned long long get_cgroup_id(const char *relative_path) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_cgroup_path(cgroup_workdir, relative_path); + return get_cgroup_id_from_path(cgroup_workdir); +} + int cgroup_setup_and_join(const char *path) { int cg_fd; @@ -523,10 +531,20 @@ int setup_classid_environment(void) return 1; } - if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && - errno != EBUSY) { - log_err("mount cgroup net_cls"); - return 1; + if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) { + if (errno != EBUSY) { + log_err("mount cgroup net_cls"); + return 1; + } + + if (rmdir(NETCLS_MOUNT_PATH)) { + log_err("rmdir cgroup net_cls"); + return 1; + } + if (umount(CGROUP_MOUNT_DFLT)) { + log_err("umount cgroup base"); + return 1; + } } cleanup_classid_environment(); @@ -541,15 +559,16 @@ int setup_classid_environment(void) /** * set_classid() - Set a cgroupv1 net_cls classid - * @id: the numeric classid * - * Writes the passed classid into the cgroup work dir's net_cls.classid + * Writes the classid into the cgroup work dir's net_cls.classid * file in order to later on trigger socket tagging. * + * We leverage the current pid as the classid, ensuring unique identification. + * * On success, it returns 0, otherwise on failure it returns 1. If there * is a failure, it prints the error to stderr. */ -int set_classid(unsigned int id) +int set_classid(void) { char cgroup_workdir[PATH_MAX - 42]; char cgroup_classid_path[PATH_MAX + 1]; @@ -565,7 +584,7 @@ int set_classid(unsigned int id) return 1; } - if (dprintf(fd, "%u\n", id) < 0) { + if (dprintf(fd, "%u\n", getpid()) < 0) { log_err("Setting cgroup classid"); rc = 1; } @@ -607,3 +626,66 @@ void cleanup_classid_environment(void) join_cgroup_from_top(NETCLS_MOUNT_PATH); nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); } + +/** + * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup + */ +unsigned long long get_classid_cgroup_id(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return get_cgroup_id_from_path(cgroup_workdir); +} + +/** + * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name. + * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be + * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like + * "net_cls,net_prio". + */ +int get_cgroup1_hierarchy_id(const char *subsys_name) +{ + char *c, *c2, *c3, *c4; + bool found = false; + char line[1024]; + FILE *file; + int i, id; + + if (!subsys_name) + return -1; + + file = fopen("/proc/self/cgroup", "r"); + if (!file) { + log_err("fopen /proc/self/cgroup"); + return -1; + } + + while (fgets(line, 1024, file)) { + i = 0; + for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) { + if (i == 0) { + id = strtol(c, NULL, 10); + } else if (i == 1) { + if (!strcmp(c, subsys_name)) { + found = true; + break; + } + + /* Multiple subsystems may share one single mount point */ + for (c3 = strtok_r(c, ",", &c4); c3; + c3 = strtok_r(NULL, ",", &c4)) { + if (!strcmp(c, subsys_name)) { + found = true; + break; + } + } + } + i++; + } + if (found) + break; + } + fclose(file); + return found ? id : -1; +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 5c2cb9c8b546..ee053641c026 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -20,6 +20,7 @@ int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); void remove_cgroup(const char *relative_path); unsigned long long get_cgroup_id(const char *relative_path); +int get_cgroup1_hierarchy_id(const char *subsys_name); int join_cgroup(const char *relative_path); int join_root_cgroup(void); @@ -29,8 +30,9 @@ int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); /* cgroupv1 related */ -int set_classid(unsigned int id); +int set_classid(void); int join_classid(void); +unsigned long long get_classid_cgroup_id(void); int setup_classid_environment(void); void cleanup_classid_environment(void); diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 253821494884..29c8635c5722 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_ARM_SMMU_V3=y @@ -37,6 +36,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_REDUCED=n CONFIG_DEBUG_LIST=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_NOTIFIERS=y @@ -46,7 +46,6 @@ CONFIG_DEBUG_SG=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_DEVTMPFS=y -CONFIG_DRM_VIRTIO_GPU=y CONFIG_DRM=y CONFIG_DUMMY=y CONFIG_EXPERT=y @@ -67,7 +66,6 @@ CONFIG_HAVE_KRETPROBES=y CONFIG_HEADERS_INSTALL=y CONFIG_HIGH_RES_TIMERS=y CONFIG_HUGETLBFS=y -CONFIG_HW_RANDOM_VIRTIO=y CONFIG_HW_RANDOM=y CONFIG_HZ_100=y CONFIG_IDLE_PAGE_TRACKING=y @@ -99,8 +97,6 @@ CONFIG_MEMCG=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y -CONFIG_NET_9P_VIRTIO=y -CONFIG_NET_9P=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NETDEVICES=y @@ -140,7 +136,6 @@ CONFIG_SCHED_TRACER=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_VIRTIO=y CONFIG_SCSI=y CONFIG_SECURITY_NETWORK=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y @@ -167,16 +162,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_FS=y -CONFIG_VIRTIO_INPUT=y -CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_VIRTIO_MMIO=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 2ba92167be35..e93330382849 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_AUDIT=y CONFIG_BLK_CGROUP=y @@ -84,8 +83,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NET_KEY=y @@ -114,7 +111,6 @@ CONFIG_SAMPLE_SECCOMP=y CONFIG_SAMPLES=y CONFIG_SCHED_TRACER=y CONFIG_SCSI=y -CONFIG_SCSI_VIRTIO=y CONFIG_SECURITY_NETWORK=y CONFIG_STACK_TRACER=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -136,11 +132,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm new file mode 100644 index 000000000000..a9746ca78777 --- /dev/null +++ b/tools/testing/selftests/bpf/config.vm @@ -0,0 +1,12 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 2e70a6048278..f7bfb2b09c82 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -1,6 +1,3 @@ -CONFIG_9P_FS=y -CONFIG_9P_FS_POSIX_ACL=y -CONFIG_9P_FS_SECURITY=y CONFIG_AGP=y CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y @@ -45,7 +42,6 @@ CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y -CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y @@ -145,8 +141,6 @@ CONFIG_MEMORY_FAILURE=y CONFIG_MINIX_SUBPARTITION=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_CLS_CGROUP=y CONFIG_NET_EMATCH=y @@ -228,12 +222,6 @@ CONFIG_USER_NS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_VETH=y CONFIG_VIRT_DRIVERS=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c index 8bf497a9843e..2ea36408816b 100644 --- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -131,10 +131,17 @@ static bool is_lru(__u32 map_type) map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; } +static bool is_percpu(__u32 map_type) +{ + return map_type == BPF_MAP_TYPE_PERCPU_HASH || + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + struct upsert_opts { __u32 map_type; int map_fd; __u32 n; + bool retry_for_nomem; }; static int create_small_hash(void) @@ -148,19 +155,38 @@ static int create_small_hash(void) return map_fd; } +static bool retry_for_nomem_fn(int err) +{ + return err == ENOMEM; +} + static void *patch_map_thread(void *arg) { + /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ + static __u8 blob[8 << 10]; struct upsert_opts *opts = arg; + void *val_ptr; int val; int ret; int i; for (i = 0; i < opts->n; i++) { - if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { val = create_small_hash(); - else + val_ptr = &val; + } else if (is_percpu(opts->map_type)) { + val_ptr = blob; + } else { val = rand(); - ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0); + val_ptr = &val; + } + + /* 2 seconds may be enough ? */ + if (opts->retry_for_nomem) + ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, + 40, retry_for_nomem_fn); + else + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) @@ -281,6 +307,13 @@ static void __test(int map_fd) else opts.n /= 2; + /* per-cpu bpf memory allocator may not be able to allocate per-cpu + * pointer successfully and it can not refill free llist timely, and + * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate + * the problem temporarily. + */ + opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); + /* * Upsert keys [0, n) under some competition: with random values from * N_THREADS threads. Check values, then delete all elements and check diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 465c1c3a3d3c..4ebd0da898f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -40,7 +40,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "2"}, {1, "R3_w", "4"}, @@ -68,7 +68,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "1"}, {1, "R3_w", "2"}, @@ -97,7 +97,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "4"}, {1, "R3_w", "8"}, @@ -119,7 +119,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "7"}, {1, "R3_w", "7"}, @@ -162,13 +162,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w", "pkt(off=8,r=8,imm=0)"}, + {6, "R0_w", "pkt(off=8,r=8)"}, {6, "R3_w", "var_off=(0x0; 0xff)"}, {7, "R3_w", "var_off=(0x0; 0x1fe)"}, {8, "R3_w", "var_off=(0x0; 0x3fc)"}, {9, "R3_w", "var_off=(0x0; 0x7f8)"}, {10, "R3_w", "var_off=(0x0; 0xff0)"}, - {12, "R3_w", "pkt_end(off=0,imm=0)"}, + {12, "R3_w", "pkt_end()"}, {17, "R4_w", "var_off=(0x0; 0xff)"}, {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, {19, "R4_w", "var_off=(0x0; 0xff0)"}, @@ -235,11 +235,11 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w", "pkt(off=0,r=0,imm=0)"}, - {4, "R5_w", "pkt(off=14,r=0,imm=0)"}, - {5, "R4_w", "pkt(off=14,r=0,imm=0)"}, - {9, "R2", "pkt(off=0,r=18,imm=0)"}, - {10, "R5", "pkt(off=14,r=18,imm=0)"}, + {2, "R5_w", "pkt(r=0)"}, + {4, "R5_w", "pkt(off=14,r=0)"}, + {5, "R4_w", "pkt(off=14,r=0)"}, + {9, "R2", "pkt(r=18)"}, + {10, "R5", "pkt(off=14,r=18)"}, {10, "R4_w", "var_off=(0x0; 0xff)"}, {13, "R4_w", "var_off=(0x0; 0xffff)"}, {14, "R4_w", "var_off=(0x0; 0xffff)"}, @@ -299,7 +299,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. @@ -337,7 +337,7 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w", "pkt(off=14,r=8,"}, + {26, "R5_w", "pkt(off=14,r=8)"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). See comment for insn #18 * for R4 = R5 trick. @@ -397,7 +397,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ {8, "R6_w", "var_off=(0x2; 0x7fc)"}, @@ -459,7 +459,7 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w", "pkt_end(off=0,imm=0)"}, + {3, "R5_w", "pkt_end()"}, /* (ptr - ptr) << 2 == unknown, (4n) */ {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because @@ -513,7 +513,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {8, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ {9, "R6_w", "var_off=(0x2; 0x7fc)"}, @@ -566,7 +566,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {9, "R6_w", "var_off=(0x0; 0x3c)"}, /* Adding 14 makes R6 be (4n+2) */ {10, "R6_w", "var_off=(0x2; 0x7c)"}, @@ -659,14 +659,14 @@ static int do_test_single(struct bpf_align_test *test) /* Check the next line as well in case the previous line * did not have a corresponding bpf insn. Example: * func#0 @0 - * 0: R1=ctx(off=0,imm=0) R10=fp0 + * 0: R1=ctx() R10=fp0 * 0: (b7) r3 = 2 ; R3_w=2 * * Sometimes it's actually two lines below, e.g. when * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))": - * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 - * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 - * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) + * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 + * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 + * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { cur_line = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c index a1766a298bb7..f7cd129cb82b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -9,8 +9,6 @@ #include "cap_helpers.h" #include "bind_perm.skel.h" -static int duration; - static int create_netns(void) { if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) @@ -27,7 +25,7 @@ void try_bind(int family, int port, int expected_errno) int fd = -1; fd = socket(family, SOCK_STREAM, 0); - if (CHECK(fd < 0, "fd", "errno %d", errno)) + if (!ASSERT_GE(fd, 0, "socket")) goto close_socket; if (family == AF_INET) { @@ -60,7 +58,7 @@ void test_bind_perm(void) return; cgroup_fd = test__join_cgroup("/bind_perm"); - if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) + if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; skel = bind_perm__open_and_load(); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index e3498f607b49..618af9dfae9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -34,8 +34,6 @@ #include "bpf_iter_ksym.skel.h" #include "bpf_iter_sockmap.skel.h" -static int duration; - static void test_btf_id_or_null(void) { struct bpf_iter_test_kern3 *skel; @@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_ /* not check contents, but ensure read() ends without error */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)); + ASSERT_GE(len, 0, "read"); close(iter_fd); @@ -334,6 +332,8 @@ static void test_task_stack(void) do_dummy_read(skel->progs.dump_task_stack); do_dummy_read(skel->progs.get_task_user_stacks); + ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks"); + bpf_iter_task_stack__destroy(skel); } @@ -413,7 +413,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) goto free_link; } - if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(err, 0, "read")) goto free_link; ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)", @@ -526,11 +526,11 @@ static int do_read_with_fd(int iter_fd, const char *expected, start = 0; while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) { start += len; - if (CHECK(start >= 16, "read", "read len %d\n", len)) + if (!ASSERT_LT(start, 16, "read")) return -1; read_buf_len = read_one_char ? 1 : 16 - start; } - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) return -1; if (!ASSERT_STREQ(buf, expected, "read")) @@ -571,8 +571,7 @@ static int do_read(const char *path, const char *expected) int err, iter_fd; iter_fd = open(path, O_RDONLY); - if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n", - path, strerror(errno))) + if (!ASSERT_GE(iter_fd, 0, "open")) return -1; err = do_read_with_fd(iter_fd, expected, false); @@ -600,7 +599,7 @@ static void test_file_iter(void) unlink(path); err = bpf_link__pin(link, path); - if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err)) + if (!ASSERT_OK(err, "pin_iter")) goto free_link; err = do_read(path, "abcd"); @@ -651,12 +650,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) * overflow and needs restart. */ map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map1_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map1_fd, 0, "bpf_map_create")) goto out; map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map2_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map2_fd, 0, "bpf_map_create")) goto free_map1; /* bpf_seq_printf kernel buffer is 8 pages, so one map @@ -685,14 +682,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) /* setup filtering map_id in bpf program */ map_info_len = sizeof(map_info); err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map1_id = map_info.id; err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map2_id = map_info.id; @@ -705,7 +700,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) goto free_link; buf = malloc(expected_read_len); - if (!buf) + if (!ASSERT_OK_PTR(buf, "malloc")) goto close_iter; /* do read */ @@ -714,16 +709,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - CHECK(len != -1 || errno != E2BIG, "read", - "expected ret -1, errno E2BIG, but get ret %d, error %s\n", - len, strerror(errno)); + ASSERT_EQ(len, -1, "read"); + ASSERT_EQ(errno, E2BIG, "read"); goto free_buf; } else if (!ret1) { while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } else { do { @@ -732,8 +725,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) total_read_len += len; } while (len > 0 || len == -EAGAIN); - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } @@ -836,7 +828,7 @@ static void test_bpf_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -878,6 +870,8 @@ static void test_bpf_percpu_hash_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_hash_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load")) @@ -917,7 +911,7 @@ static void test_bpf_percpu_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -983,17 +977,14 @@ static void test_bpf_array_map(void) start = 0; while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0) start += len; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ res_first_key = *(__u32 *)buf; res_first_val = *(__u64 *)(buf + sizeof(__u32)); - if (CHECK(res_first_key != 0 || res_first_val != first_val, - "bpf_seq_write", - "seq_write failure: first key %u vs expected 0, " - " first value %llu vs expected %llu\n", - res_first_key, res_first_val, first_val)) + if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") || + !ASSERT_EQ(res_first_val, first_val, "bpf_seq_write")) goto close_iter; if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum")) @@ -1057,6 +1048,8 @@ static void test_bpf_percpu_array_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_array_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load")) @@ -1092,7 +1085,7 @@ static void test_bpf_percpu_array_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -1131,6 +1124,7 @@ static void test_bpf_sk_storage_delete(void) sock_fd = socket(AF_INET6, SOCK_STREAM, 0); if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; + err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); if (!ASSERT_OK(err, "map_update")) goto out; @@ -1151,14 +1145,19 @@ static void test_bpf_sk_storage_delete(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", - "map value wasn't deleted (err=%d, errno=%d)\n", err, errno)) - goto close_iter; + + /* Note: The following assertions serve to ensure + * the value was deleted. It does so by asserting + * that bpf_map_lookup_elem has failed. This might + * seem counterintuitive at first. + */ + ASSERT_ERR(err, "bpf_map_lookup_elem"); + ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem"); close_iter: close(iter_fd); @@ -1203,17 +1202,15 @@ static void test_bpf_sk_storage_get(void) do_dummy_read(skel->progs.fill_socket_owner); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(err || val != getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - getpid(), val, err)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem") || + !ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem")) goto close_socket; do_dummy_read(skel->progs.negate_socket_local_storage); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - CHECK(err || val != -getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - -getpid(), val, err); + ASSERT_OK(err, "bpf_map_lookup_elem"); + ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem"); close_socket: close(sock_fd); @@ -1290,7 +1287,7 @@ static void test_bpf_sk_storage_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index 675b90b15280..f09d6ac2ef09 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -25,7 +25,7 @@ void serial_test_bpf_obj_id(void) */ __u32 map_ids[nr_iters + 1]; char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128]; - __u32 i, next_id, info_len, nr_id_found, duration = 0; + __u32 i, next_id, info_len, nr_id_found; struct timespec real_time_ts, boot_time_ts; int err = 0; __u64 array_value; @@ -33,16 +33,16 @@ void serial_test_bpf_obj_id(void) time_t now, load_time; err = bpf_prog_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id"); err = bpf_map_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_map_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id"); err = bpf_link_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_map_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id"); /* Check bpf_map_get_info_by_fd() */ bzero(zeros, sizeof(zeros)); @@ -53,25 +53,26 @@ void serial_test_bpf_obj_id(void) /* test_obj_id.o is a dumb prog. It should never fail * to load. */ - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_prog_test_load")) continue; /* Insert a magic value to the map */ map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); - if (CHECK_FAIL(map_fds[i] < 0)) + if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map")) goto done; + err = bpf_map_update_elem(map_fds[i], &array_key, &array_magic_value, 0); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_map_update_elem")) goto done; - prog = bpf_object__find_program_by_name(objs[i], - "test_obj_id"); - if (CHECK_FAIL(!prog)) + prog = bpf_object__find_program_by_name(objs[i], "test_obj_id"); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) goto done; + links[i] = bpf_program__attach(prog); err = libbpf_get_error(links[i]); - if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) { + if (!ASSERT_OK(err, "bpf_program__attach")) { links[i] = NULL; goto done; } @@ -81,24 +82,14 @@ void serial_test_bpf_obj_id(void) bzero(&map_infos[i], info_len); err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i], &info_len); - if (CHECK(err || - map_infos[i].type != BPF_MAP_TYPE_ARRAY || - map_infos[i].key_size != sizeof(__u32) || - map_infos[i].value_size != sizeof(__u64) || - map_infos[i].max_entries != 1 || - map_infos[i].map_flags != 0 || - info_len != sizeof(struct bpf_map_info) || - strcmp((char *)map_infos[i].name, expected_map_name), - "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", - err, errno, - map_infos[i].type, BPF_MAP_TYPE_ARRAY, - info_len, sizeof(struct bpf_map_info), - map_infos[i].key_size, - map_infos[i].value_size, - map_infos[i].max_entries, - map_infos[i].map_flags, - map_infos[i].name, expected_map_name)) + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") || + !ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") || + !ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") || + !ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") || + !ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") || + !ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") || + !ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") || + !ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name")) goto done; /* Check getting prog info */ @@ -112,48 +103,34 @@ void serial_test_bpf_obj_id(void) prog_infos[i].xlated_prog_len = sizeof(xlated_insns); prog_infos[i].map_ids = ptr_to_u64(map_ids + i); prog_infos[i].nr_map_ids = 2; + err = clock_gettime(CLOCK_REALTIME, &real_time_ts); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "clock_gettime")) goto done; + err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "clock_gettime")) goto done; + err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i], &info_len); load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + (prog_infos[i].load_time / nsec_per_sec); - if (CHECK(err || - prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT || - info_len != sizeof(struct bpf_prog_info) || - (env.jit_enabled && !prog_infos[i].jited_prog_len) || - (env.jit_enabled && - !memcmp(jited_insns, zeros, sizeof(zeros))) || - !prog_infos[i].xlated_prog_len || - !memcmp(xlated_insns, zeros, sizeof(zeros)) || - load_time < now - 60 || load_time > now + 60 || - prog_infos[i].created_by_uid != my_uid || - prog_infos[i].nr_map_ids != 1 || - *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || - strcmp((char *)prog_infos[i].name, expected_prog_name), - "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%zu) " - "jit_enabled %d jited_prog_len %u xlated_prog_len %u " - "jited_prog %d xlated_prog %d load_time %lu(%lu) " - "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) " - "name %s(%s)\n", - err, errno, i, - prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, - info_len, sizeof(struct bpf_prog_info), - env.jit_enabled, - prog_infos[i].jited_prog_len, - prog_infos[i].xlated_prog_len, - !!memcmp(jited_insns, zeros, sizeof(zeros)), - !!memcmp(xlated_insns, zeros, sizeof(zeros)), - load_time, now, - prog_infos[i].created_by_uid, my_uid, - prog_infos[i].nr_map_ids, 1, - *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, - prog_infos[i].name, expected_prog_name)) + + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") || + !ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") || + !ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") || + !ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") || + !ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))), + "jited_insns") || + !ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") || + !ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") || + !ASSERT_GE(load_time, (now - 60), "load_time") || + !ASSERT_LE(load_time, (now + 60), "load_time") || + !ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") || + !ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") || + !ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") || + !ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name")) goto done; /* Check getting link info */ @@ -163,25 +140,12 @@ void serial_test_bpf_obj_id(void) link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name); err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]), &link_infos[i], &info_len); - if (CHECK(err || - link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT || - link_infos[i].prog_id != prog_infos[i].id || - link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) || - strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), - "sys_enter") || - info_len != sizeof(struct bpf_link_info), - "get-link-info(fd)", - "err %d errno %d info_len %u(%zu) type %d(%d) id %d " - "prog_id %d (%d) tp_name %s(%s)\n", - err, errno, - info_len, sizeof(struct bpf_link_info), - link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, - link_infos[i].id, - link_infos[i].prog_id, prog_infos[i].id, - (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), - "sys_enter")) + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") || + !ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") || + !ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") || + !ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") || + !ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name")) goto done; - } /* Check bpf_prog_get_next_id() */ @@ -190,7 +154,7 @@ void serial_test_bpf_obj_id(void) while (!bpf_prog_get_next_id(next_id, &next_id)) { struct bpf_prog_info prog_info = {}; __u32 saved_map_id; - int prog_fd; + int prog_fd, cmp_res; info_len = sizeof(prog_info); @@ -198,9 +162,7 @@ void serial_test_bpf_obj_id(void) if (prog_fd < 0 && errno == ENOENT) /* The bpf_prog is in the dead row */ continue; - if (CHECK(prog_fd < 0, "get-prog-fd(next_id)", - "prog_fd %d next_id %d errno %d\n", - prog_fd, next_id, errno)) + if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -218,9 +180,8 @@ void serial_test_bpf_obj_id(void) */ prog_info.nr_map_ids = 1; err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); - if (CHECK(!err || errno != EFAULT, - "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)", - err, errno, EFAULT)) + if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") || + !ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd")) break; bzero(&prog_info, sizeof(prog_info)); info_len = sizeof(prog_info); @@ -231,27 +192,22 @@ void serial_test_bpf_obj_id(void) err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); prog_infos[i].jited_prog_insns = 0; prog_infos[i].xlated_prog_insns = 0; - CHECK(err || info_len != sizeof(struct bpf_prog_info) || - memcmp(&prog_info, &prog_infos[i], info_len) || - *(int *)(long)prog_info.map_ids != saved_map_id, - "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n", - err, errno, info_len, sizeof(struct bpf_prog_info), - memcmp(&prog_info, &prog_infos[i], info_len), - *(int *)(long)prog_info.map_ids, saved_map_id); + cmp_res = memcmp(&prog_info, &prog_infos[i], info_len); + + ASSERT_OK(err, "bpf_prog_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len"); + ASSERT_OK(cmp_res, "memcmp"); + ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id"); close(prog_fd); } - CHECK(nr_id_found != nr_iters, - "check total prog id found by get_next_id", - "nr_id_found %u(%u)\n", - nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found"); /* Check bpf_map_get_next_id() */ nr_id_found = 0; next_id = 0; while (!bpf_map_get_next_id(next_id, &next_id)) { struct bpf_map_info map_info = {}; - int map_fd; + int map_fd, cmp_res; info_len = sizeof(map_info); @@ -259,9 +215,7 @@ void serial_test_bpf_obj_id(void) if (map_fd < 0 && errno == ENOENT) /* The bpf_map is in the dead row */ continue; - if (CHECK(map_fd < 0, "get-map-fd(next_id)", - "map_fd %d next_id %u errno %d\n", - map_fd, next_id, errno)) + if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -274,25 +228,19 @@ void serial_test_bpf_obj_id(void) nr_id_found++; err = bpf_map_lookup_elem(map_fd, &array_key, &array_value); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) goto done; err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); - CHECK(err || info_len != sizeof(struct bpf_map_info) || - memcmp(&map_info, &map_infos[i], info_len) || - array_value != array_magic_value, - "check get-map-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n", - err, errno, info_len, sizeof(struct bpf_map_info), - memcmp(&map_info, &map_infos[i], info_len), - array_value, array_magic_value); + cmp_res = memcmp(&map_info, &map_infos[i], info_len); + ASSERT_OK(err, "bpf_map_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len"); + ASSERT_OK(cmp_res, "memcmp"); + ASSERT_EQ(array_value, array_magic_value, "array_value"); close(map_fd); } - CHECK(nr_id_found != nr_iters, - "check total map id found by get_next_id", - "nr_id_found %u(%u)\n", - nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found"); /* Check bpf_link_get_next_id() */ nr_id_found = 0; @@ -308,9 +256,7 @@ void serial_test_bpf_obj_id(void) if (link_fd < 0 && errno == ENOENT) /* The bpf_link is in the dead row */ continue; - if (CHECK(link_fd < 0, "get-link-fd(next_id)", - "link_fd %d next_id %u errno %d\n", - link_fd, next_id, errno)) + if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -325,17 +271,13 @@ void serial_test_bpf_obj_id(void) err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len); cmp_res = memcmp(&link_info, &link_infos[i], offsetof(struct bpf_link_info, raw_tracepoint)); - CHECK(err || info_len != sizeof(link_info) || cmp_res, - "check get-link-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d\n", - err, errno, info_len, sizeof(struct bpf_link_info), - cmp_res); + ASSERT_OK(err, "bpf_link_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(link_info), "info_len"); + ASSERT_OK(cmp_res, "memcmp"); close(link_fd); } - CHECK(nr_id_found != nr_iters, - "check total link id found by get_next_id", - "nr_id_found %u(%u)\n", nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found"); done: for (i = 0; i < nr_iters; i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 4aabeaa525d4..a88e6e07e4f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -20,15 +20,14 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; -static int stop, duration; +static int stop; static int settcpca(int fd, const char *tcp_ca) { int err; err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca)); - if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n", - errno)) + if (!ASSERT_NEQ(err, -1, "setsockopt")) return -1; return 0; @@ -65,8 +64,7 @@ static void *server(void *arg) bytes += nr_sent; } - CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n", - bytes, total_bytes, nr_sent, errno); + ASSERT_EQ(bytes, total_bytes, "send"); done: if (fd >= 0) @@ -92,10 +90,11 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) WRITE_ONCE(stop, 0); lfd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(lfd == -1, "socket", "errno:%d\n", errno)) + if (!ASSERT_NEQ(lfd, -1, "socket")) return; + fd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) { + if (!ASSERT_NEQ(fd, -1, "socket")) { close(lfd); return; } @@ -108,26 +107,27 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) sa6.sin6_family = AF_INET6; sa6.sin6_addr = in6addr_loopback; err = bind(lfd, (struct sockaddr *)&sa6, addrlen); - if (CHECK(err == -1, "bind", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "bind")) goto done; + err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen); - if (CHECK(err == -1, "getsockname", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "getsockname")) goto done; + err = listen(lfd, 1); - if (CHECK(err == -1, "listen", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "listen")) goto done; if (sk_stg_map) { err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd, &expected_stg, BPF_NOEXIST); - if (CHECK(err, "bpf_map_update_elem(sk_stg_map)", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) goto done; } /* connect to server */ err = connect(fd, (struct sockaddr *)&sa6, addrlen); - if (CHECK(err == -1, "connect", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "connect")) goto done; if (sk_stg_map) { @@ -135,14 +135,13 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd, &tmp_stg); - if (CHECK(!err || errno != ENOENT, - "bpf_map_lookup_elem(sk_stg_map)", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || + !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) goto done; } err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); - if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno)) + if (!ASSERT_OK(err, "pthread_create")) goto done; /* recv total_bytes */ @@ -156,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) bytes += nr_recv; } - CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n", - bytes, total_bytes, nr_recv, errno); + ASSERT_EQ(bytes, total_bytes, "recv"); WRITE_ONCE(stop, 1); pthread_join(srv_thread, &thread_ret); - CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld", - PTR_ERR(thread_ret)); + ASSERT_OK(IS_ERR(thread_ret), "thread_ret"); + done: close(lfd); close(fd); @@ -174,7 +172,7 @@ static void test_cubic(void) struct bpf_link *link; cubic_skel = bpf_cubic__open_and_load(); - if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load")) return; link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); @@ -197,7 +195,7 @@ static void test_dctcp(void) struct bpf_link *link; dctcp_skel = bpf_dctcp__open_and_load(); - if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load")) return; link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); @@ -207,9 +205,7 @@ static void test_dctcp(void) } do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map); - CHECK(dctcp_skel->bss->stg_result != expected_stg, - "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n", - dctcp_skel->bss->stg_result, expected_stg); + ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result"); bpf_link__destroy(link); bpf_dctcp__destroy(dctcp_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 731c343897d8..e770912fc1d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type) } bpf_program__set_type(prog, type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); err = bpf_object__load(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 92d51f377fe5..8fb4a04fbbc0 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -5265,6 +5265,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) #endif assert(0); + return 0; } static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c new file mode 100644 index 000000000000..74d6d7546f40 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include <sys/types.h> +#include <unistd.h> +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "test_cgroup1_hierarchy.skel.h" + +static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + ASSERT_NULL(fentry_link, "fentry_attach_fail"); + + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_lsm"); +} + +static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + ASSERT_NULL(fentry_link, "fentry_attach_fail"); + + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_lsm"); +} + +static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success")) + goto cleanup; + + err = bpf_link__destroy(fentry_link); + ASSERT_OK(err, "destroy_lsm"); + +cleanup: + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_fentry"); +} + +void test_cgroup1_hierarchy(void) +{ + struct test_cgroup1_hierarchy *skel; + __u64 current_cgid; + int hid, err; + + skel = test_cgroup1_hierarchy__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + skel->bss->target_pid = getpid(); + + err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1"); + if (!ASSERT_OK(err, "fentry_set_target")) + goto destroy; + + err = test_cgroup1_hierarchy__load(skel); + if (!ASSERT_OK(err, "load")) + goto destroy; + + /* Setup cgroup1 hierarchy */ + err = setup_classid_environment(); + if (!ASSERT_OK(err, "setup_classid_environment")) + goto destroy; + + err = join_classid(); + if (!ASSERT_OK(err, "join_cgroup1")) + goto cleanup; + + current_cgid = get_classid_cgroup_id(); + if (!ASSERT_GE(current_cgid, 0, "cgroup1 id")) + goto cleanup; + + hid = get_cgroup1_hierarchy_id("net_cls"); + if (!ASSERT_GE(hid, 0, "cgroup1 id")) + goto cleanup; + skel->bss->target_hid = hid; + + if (test__start_subtest("test_cgroup1_hierarchy")) { + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1(skel); + } + + if (test__start_subtest("test_root_cgid")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 0; + bpf_cgroup1(skel); + } + + if (test__start_subtest("test_invalid_level")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 1; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgid")) { + skel->bss->target_ancestor_cgid = 0; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_hid")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 0; + skel->bss->target_hid = -1; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgrp_name")) { + skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl"); + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgrp_name2")) { + skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,"); + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_sleepable_prog")) { + skel->bss->target_hid = hid; + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_sleepable(skel); + } + +cleanup: + cleanup_classid_environment(); +destroy: + test_cgroup1_hierarchy__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index 9026b42914d3..addf720428f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -71,7 +71,7 @@ void test_cgroup_v1v2(void) } ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only"); setup_classid_environment(); - set_classid(42); + set_classid(); ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2"); cleanup_classid_environment(); close(server_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index b25b870f87ba..e6e50a394472 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void) local_kptr_stash__destroy(skel); } +static void test_refcount_acquire_without_unstash(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash run"); + ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts); + ASSERT_OK(ret, "stash_refcounted_node run"); + ASSERT_OK(opts.retval, "stash_refcounted_node retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run"); + ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_fail(void) { RUN_TESTS(local_kptr_stash_fail); @@ -86,6 +117,8 @@ void test_local_kptr_stash(void) test_local_kptr_stash_plain(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); + if (test__start_subtest("refcount_acquire_without_unstash")) + test_refcount_acquire_without_unstash(); if (test__start_subtest("local_kptr_stash_fail")) test_local_kptr_stash_fail(); } diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index fe9a23e65ef4..0f7ea4d7d9f6 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -78,7 +78,7 @@ static void obj_load_log_buf(void) ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"), "libbpf_log_not_empty"); ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty"); - ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), + ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"), "good_log_verbose"); ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"), "bad_log_not_empty"); @@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void) opts.log_level = 2; fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL", good_prog_insns, good_prog_insn_cnt, &opts); - ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2"); + ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2"); ASSERT_GE(fd, 0, "good_fd2"); if (fd >= 0) close(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c new file mode 100644 index 000000000000..0c9abd279e18 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -0,0 +1,2124 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <limits.h> +#include <test_progs.h> +#include <linux/filter.h> +#include <linux/bpf.h> + +/* ================================= + * SHORT AND CONSISTENT NUMBER TYPES + * ================================= + */ +#define U64_MAX ((u64)UINT64_MAX) +#define U32_MAX ((u32)UINT_MAX) +#define U16_MAX ((u32)UINT_MAX) +#define S64_MIN ((s64)INT64_MIN) +#define S64_MAX ((s64)INT64_MAX) +#define S32_MIN ((s32)INT_MIN) +#define S32_MAX ((s32)INT_MAX) +#define S16_MIN ((s16)0x80000000) +#define S16_MAX ((s16)0x7fffffff) + +typedef unsigned long long ___u64; +typedef unsigned int ___u32; +typedef long long ___s64; +typedef int ___s32; + +/* avoid conflicts with already defined types in kernel headers */ +#define u64 ___u64 +#define u32 ___u32 +#define s64 ___s64 +#define s32 ___s32 + +/* ================================== + * STRING BUF ABSTRACTION AND HELPERS + * ================================== + */ +struct strbuf { + size_t buf_sz; + int pos; + char buf[0]; +}; + +#define DEFINE_STRBUF(name, N) \ + struct { struct strbuf buf; char data[(N)]; } ___##name; \ + struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf) + +__printf(2, 3) +static inline void snappendf(struct strbuf *s, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + s->pos += vsnprintf(s->buf + s->pos, + s->pos < s->buf_sz ? s->buf_sz - s->pos : 0, + fmt, args); + va_end(args); +} + +/* ================================== + * GENERIC NUMBER TYPE AND OPERATIONS + * ================================== + */ +enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 }; + +static __always_inline u64 min_t(enum num_t t, u64 x, u64 y) +{ + switch (t) { + case U64: return (u64)x < (u64)y ? (u64)x : (u64)y; + case U32: return (u32)x < (u32)y ? (u32)x : (u32)y; + case S64: return (s64)x < (s64)y ? (s64)x : (s64)y; + case S32: return (s32)x < (s32)y ? (s32)x : (s32)y; + default: printf("min_t!\n"); exit(1); + } +} + +static __always_inline u64 max_t(enum num_t t, u64 x, u64 y) +{ + switch (t) { + case U64: return (u64)x > (u64)y ? (u64)x : (u64)y; + case U32: return (u32)x > (u32)y ? (u32)x : (u32)y; + case S64: return (s64)x > (s64)y ? (s64)x : (s64)y; + case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y; + default: printf("max_t!\n"); exit(1); + } +} + +static __always_inline u64 cast_t(enum num_t t, u64 x) +{ + switch (t) { + case U64: return (u64)x; + case U32: return (u32)x; + case S64: return (s64)x; + case S32: return (u32)(s32)x; + default: printf("cast_t!\n"); exit(1); + } +} + +static const char *t_str(enum num_t t) +{ + switch (t) { + case U64: return "u64"; + case U32: return "u32"; + case S64: return "s64"; + case S32: return "s32"; + default: printf("t_str!\n"); exit(1); + } +} + +static enum num_t t_is_32(enum num_t t) +{ + switch (t) { + case U64: return false; + case U32: return true; + case S64: return false; + case S32: return true; + default: printf("t_is_32!\n"); exit(1); + } +} + +static enum num_t t_signed(enum num_t t) +{ + switch (t) { + case U64: return S64; + case U32: return S32; + case S64: return S64; + case S32: return S32; + default: printf("t_signed!\n"); exit(1); + } +} + +static enum num_t t_unsigned(enum num_t t) +{ + switch (t) { + case U64: return U64; + case U32: return U32; + case S64: return U64; + case S32: return U32; + default: printf("t_unsigned!\n"); exit(1); + } +} + +#define UNUM_MAX_DECIMAL U16_MAX +#define SNUM_MAX_DECIMAL S16_MAX +#define SNUM_MIN_DECIMAL S16_MIN + +static bool num_is_small(enum num_t t, u64 x) +{ + switch (t) { + case U64: return (u64)x <= UNUM_MAX_DECIMAL; + case U32: return (u32)x <= UNUM_MAX_DECIMAL; + case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL; + case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL; + default: printf("num_is_small!\n"); exit(1); + } +} + +static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x) +{ + bool is_small = num_is_small(t, x); + + if (is_small) { + switch (t) { + case U64: return snappendf(sb, "%llu", (u64)x); + case U32: return snappendf(sb, "%u", (u32)x); + case S64: return snappendf(sb, "%lld", (s64)x); + case S32: return snappendf(sb, "%d", (s32)x); + default: printf("snprintf_num!\n"); exit(1); + } + } else { + switch (t) { + case U64: + if (x == U64_MAX) + return snappendf(sb, "U64_MAX"); + else if (x >= U64_MAX - 256) + return snappendf(sb, "U64_MAX-%llu", U64_MAX - x); + else + return snappendf(sb, "%#llx", (u64)x); + case U32: + if ((u32)x == U32_MAX) + return snappendf(sb, "U32_MAX"); + else if ((u32)x >= U32_MAX - 256) + return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x); + else + return snappendf(sb, "%#x", (u32)x); + case S64: + if ((s64)x == S64_MAX) + return snappendf(sb, "S64_MAX"); + else if ((s64)x >= S64_MAX - 256) + return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x); + else if ((s64)x == S64_MIN) + return snappendf(sb, "S64_MIN"); + else if ((s64)x <= S64_MIN + 256) + return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN); + else + return snappendf(sb, "%#llx", (s64)x); + case S32: + if ((s32)x == S32_MAX) + return snappendf(sb, "S32_MAX"); + else if ((s32)x >= S32_MAX - 256) + return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x); + else if ((s32)x == S32_MIN) + return snappendf(sb, "S32_MIN"); + else if ((s32)x <= S32_MIN + 256) + return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN); + else + return snappendf(sb, "%#x", (s32)x); + default: printf("snprintf_num!\n"); exit(1); + } + } +} + +/* =================================== + * GENERIC RANGE STRUCT AND OPERATIONS + * =================================== + */ +struct range { + u64 a, b; +}; + +static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x) +{ + if (x.a == x.b) + return snprintf_num(t, sb, x.a); + + snappendf(sb, "["); + snprintf_num(t, sb, x.a); + snappendf(sb, "; "); + snprintf_num(t, sb, x.b); + snappendf(sb, "]"); +} + +static void print_range(enum num_t t, struct range x, const char *sfx) +{ + DEFINE_STRBUF(sb, 128); + + snprintf_range(t, sb, x); + printf("%s%s", sb->buf, sfx); +} + +static const struct range unkn[] = { + [U64] = { 0, U64_MAX }, + [U32] = { 0, U32_MAX }, + [S64] = { (u64)S64_MIN, (u64)S64_MAX }, + [S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX }, +}; + +static struct range unkn_subreg(enum num_t t) +{ + switch (t) { + case U64: return unkn[U32]; + case U32: return unkn[U32]; + case S64: return unkn[U32]; + case S32: return unkn[S32]; + default: printf("unkn_subreg!\n"); exit(1); + } +} + +static struct range range(enum num_t t, u64 a, u64 b) +{ + switch (t) { + case U64: return (struct range){ (u64)a, (u64)b }; + case U32: return (struct range){ (u32)a, (u32)b }; + case S64: return (struct range){ (s64)a, (s64)b }; + case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b }; + default: printf("range!\n"); exit(1); + } +} + +static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; } +static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; } +static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); } +static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; } + +static bool range_eq(struct range x, struct range y) +{ + return x.a == y.a && x.b == y.b; +} + +static struct range range_cast_to_s32(struct range x) +{ + u64 a = x.a, b = x.b; + + /* if upper 32 bits are constant, lower 32 bits should form a proper + * s32 range to be correct + */ + if (upper32(a) == upper32(b) && (s32)a <= (s32)b) + return range(S32, a, b); + + /* Special case where upper bits form a small sequence of two + * sequential numbers (in 32-bit unsigned space, so 0xffffffff to + * 0x00000000 is also valid), while lower bits form a proper s32 range + * going from negative numbers to positive numbers. + * + * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating + * over full 64-bit numbers range will form a proper [-16, 16] + * ([0xffffff00; 0x00000010]) range in its lower 32 bits. + */ + if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0) + return range(S32, a, b); + + /* otherwise we can't derive much meaningful information */ + return unkn[S32]; +} + +static struct range range_cast_u64(enum num_t to_t, struct range x) +{ + u64 a = (u64)x.a, b = (u64)x.b; + + switch (to_t) { + case U64: + return x; + case U32: + if (upper32(a) != upper32(b)) + return unkn[U32]; + return range(U32, a, b); + case S64: + if (sign64(a) != sign64(b)) + return unkn[S64]; + return range(S64, a, b); + case S32: + return range_cast_to_s32(x); + default: printf("range_cast_u64!\n"); exit(1); + } +} + +static struct range range_cast_s64(enum num_t to_t, struct range x) +{ + s64 a = (s64)x.a, b = (s64)x.b; + + switch (to_t) { + case U64: + /* equivalent to (s64)a <= (s64)b check */ + if (sign64(a) != sign64(b)) + return unkn[U64]; + return range(U64, a, b); + case U32: + if (upper32(a) != upper32(b) || sign32(a) != sign32(b)) + return unkn[U32]; + return range(U32, a, b); + case S64: + return x; + case S32: + return range_cast_to_s32(x); + default: printf("range_cast_s64!\n"); exit(1); + } +} + +static struct range range_cast_u32(enum num_t to_t, struct range x) +{ + u32 a = (u32)x.a, b = (u32)x.b; + + switch (to_t) { + case U64: + case S64: + /* u32 is always a valid zero-extended u64/s64 */ + return range(to_t, a, b); + case U32: + return x; + case S32: + return range_cast_to_s32(range(U32, a, b)); + default: printf("range_cast_u32!\n"); exit(1); + } +} + +static struct range range_cast_s32(enum num_t to_t, struct range x) +{ + s32 a = (s32)x.a, b = (s32)x.b; + + switch (to_t) { + case U64: + case U32: + case S64: + if (sign32(a) != sign32(b)) + return unkn[to_t]; + return range(to_t, a, b); + case S32: + return x; + default: printf("range_cast_s32!\n"); exit(1); + } +} + +/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving + * all possible information. Worst case, it will be unknown range within + * *to_t* domain, if nothing more specific can be guaranteed during the + * conversion + */ +static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from) +{ + switch (from_t) { + case U64: return range_cast_u64(to_t, from); + case U32: return range_cast_u32(to_t, from); + case S64: return range_cast_s64(to_t, from); + case S32: return range_cast_s32(to_t, from); + default: printf("range_cast!\n"); exit(1); + } +} + +static bool is_valid_num(enum num_t t, u64 x) +{ + switch (t) { + case U64: return true; + case U32: return upper32(x) == 0; + case S64: return true; + case S32: return upper32(x) == 0; + default: printf("is_valid_num!\n"); exit(1); + } +} + +static bool is_valid_range(enum num_t t, struct range x) +{ + if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b)) + return false; + + switch (t) { + case U64: return (u64)x.a <= (u64)x.b; + case U32: return (u32)x.a <= (u32)x.b; + case S64: return (s64)x.a <= (s64)x.b; + case S32: return (s32)x.a <= (s32)x.b; + default: printf("is_valid_range!\n"); exit(1); + } +} + +static struct range range_improve(enum num_t t, struct range old, struct range new) +{ + return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b)); +} + +static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y) +{ + struct range y_cast; + + y_cast = range_cast(y_t, x_t, y); + + /* the case when new range knowledge, *y*, is a 32-bit subregister + * range, while previous range knowledge, *x*, is a full register + * 64-bit range, needs special treatment to take into account upper 32 + * bits of full register range + */ + if (t_is_32(y_t) && !t_is_32(x_t)) { + struct range x_swap; + + /* some combinations of upper 32 bits and sign bit can lead to + * invalid ranges, in such cases it's easier to detect them + * after cast/swap than try to enumerate all the conditions + * under which transformation and knowledge transfer is valid + */ + x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b)); + if (!is_valid_range(x_t, x_swap)) + return x; + return range_improve(x_t, x, x_swap); + } + + /* otherwise, plain range cast and intersection works */ + return range_improve(x_t, x, y_cast); +} + +/* ======================= + * GENERIC CONDITIONAL OPS + * ======================= + */ +enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE }; + +static enum op complement_op(enum op op) +{ + switch (op) { + case OP_LT: return OP_GE; + case OP_LE: return OP_GT; + case OP_GT: return OP_LE; + case OP_GE: return OP_LT; + case OP_EQ: return OP_NE; + case OP_NE: return OP_EQ; + default: printf("complement_op!\n"); exit(1); + } +} + +static const char *op_str(enum op op) +{ + switch (op) { + case OP_LT: return "<"; + case OP_LE: return "<="; + case OP_GT: return ">"; + case OP_GE: return ">="; + case OP_EQ: return "=="; + case OP_NE: return "!="; + default: printf("op_str!\n"); exit(1); + } +} + +/* Can register with range [x.a, x.b] *EVER* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op) +{ +#define range_canbe(T) do { \ + switch (op) { \ + case OP_LT: return (T)x.a < (T)y.b; \ + case OP_LE: return (T)x.a <= (T)y.b; \ + case OP_GT: return (T)x.b > (T)y.a; \ + case OP_GE: return (T)x.b >= (T)y.a; \ + case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b); \ + case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a); \ + default: printf("range_canbe op %d\n", op); exit(1); \ + } \ +} while (0) + + switch (t) { + case U64: { range_canbe(u64); } + case U32: { range_canbe(u32); } + case S64: { range_canbe(s64); } + case S32: { range_canbe(s32); } + default: printf("range_canbe!\n"); exit(1); + } +#undef range_canbe +} + +/* Does register with range [x.a, x.b] *ALWAYS* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op) +{ + /* always op <=> ! canbe complement(op) */ + return !range_canbe_op(t, x, y, complement_op(op)); +} + +/* Does register with range [x.a, x.b] *NEVER* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op) +{ + return !range_canbe_op(t, x, y, op); +} + +/* similar to verifier's is_branch_taken(): + * 1 - always taken; + * 0 - never taken, + * -1 - unsure. + */ +static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op) +{ + if (range_always_op(t, x, y, op)) + return 1; + if (range_never_op(t, x, y, op)) + return 0; + return -1; +} + +/* What would be the new estimates for register x and y ranges assuming truthful + * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy. + * + * We assume "interesting" cases where ranges overlap. Cases where it's + * obvious that (x OP y) is either always true or false should be filtered with + * range_never and range_always checks. + */ +static void range_cond(enum num_t t, struct range x, struct range y, + enum op op, struct range *newx, struct range *newy) +{ + if (!range_canbe_op(t, x, y, op)) { + /* nothing to adjust, can't happen, return original values */ + *newx = x; + *newy = y; + return; + } + switch (op) { + case OP_LT: + *newx = range(t, x.a, min_t(t, x.b, y.b - 1)); + *newy = range(t, max_t(t, x.a + 1, y.a), y.b); + break; + case OP_LE: + *newx = range(t, x.a, min_t(t, x.b, y.b)); + *newy = range(t, max_t(t, x.a, y.a), y.b); + break; + case OP_GT: + *newx = range(t, max_t(t, x.a, y.a + 1), x.b); + *newy = range(t, y.a, min_t(t, x.b - 1, y.b)); + break; + case OP_GE: + *newx = range(t, max_t(t, x.a, y.a), x.b); + *newy = range(t, y.a, min_t(t, x.b, y.b)); + break; + case OP_EQ: + *newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); + *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); + break; + case OP_NE: + /* generic case, can't derive more information */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b); + break; + + /* below extended logic is not supported by verifier just yet */ + if (x.a == x.b && x.a == y.a) { + /* X is a constant matching left side of Y */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a + 1, y.b); + } else if (x.a == x.b && x.b == y.b) { + /* X is a constant matching rigth side of Y */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b - 1); + } else if (y.a == y.b && x.a == y.a) { + /* Y is a constant matching left side of X */ + *newx = range(t, x.a + 1, x.b); + *newy = range(t, y.a, y.b); + } else if (y.a == y.b && x.b == y.b) { + /* Y is a constant matching rigth side of X */ + *newx = range(t, x.a, x.b - 1); + *newy = range(t, y.a, y.b); + } else { + /* generic case, can't derive more information */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b); + } + + break; + default: + break; + } +} + +/* ======================= + * REGISTER STATE HANDLING + * ======================= + */ +struct reg_state { + struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */ + bool valid; +}; + +static void print_reg_state(struct reg_state *r, const char *sfx) +{ + DEFINE_STRBUF(sb, 512); + enum num_t t; + int cnt = 0; + + if (!r->valid) { + printf("<not found>%s", sfx); + return; + } + + snappendf(sb, "scalar("); + for (t = first_t; t <= last_t; t++) { + snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t)); + snprintf_range(t, sb, r->r[t]); + } + snappendf(sb, ")"); + + printf("%s%s", sb->buf, sfx); +} + +static void print_refinement(enum num_t s_t, struct range src, + enum num_t d_t, struct range old, struct range new, + const char *ctx) +{ + printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t)); + print_range(s_t, src, ""); + printf(" (%s)DST_OLD=", t_str(d_t)); + print_range(d_t, old, ""); + printf(" (%s)DST_NEW=", t_str(d_t)); + print_range(d_t, new, "\n"); +} + +static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx) +{ + enum num_t d_t, s_t; + struct range old; + bool keep_going = false; + +again: + /* try to derive new knowledge from just learned range x of type t */ + for (d_t = first_t; d_t <= last_t; d_t++) { + old = r->r[d_t]; + r->r[d_t] = range_refine(d_t, r->r[d_t], t, x); + if (!range_eq(r->r[d_t], old)) { + keep_going = true; + if (env.verbosity >= VERBOSE_VERY) + print_refinement(t, x, d_t, old, r->r[d_t], ctx); + } + } + + /* now see if we can derive anything new from updated reg_state's ranges */ + for (s_t = first_t; s_t <= last_t; s_t++) { + for (d_t = first_t; d_t <= last_t; d_t++) { + old = r->r[d_t]; + r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]); + if (!range_eq(r->r[d_t], old)) { + keep_going = true; + if (env.verbosity >= VERBOSE_VERY) + print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx); + } + } + } + + /* keep refining until we converge */ + if (keep_going) { + keep_going = false; + goto again; + } +} + +static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val) +{ + enum num_t tt; + + rs->valid = true; + for (tt = first_t; tt <= last_t; tt++) + rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt]; + + reg_state_refine(rs, t, rs->r[t], "CONST"); +} + +static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op, + struct reg_state *newx, struct reg_state *newy, const char *ctx) +{ + char buf[32]; + enum num_t ts[2]; + struct reg_state xx = *x, yy = *y; + int i, t_cnt; + struct range z1, z2; + + if (op == OP_EQ || op == OP_NE) { + /* OP_EQ and OP_NE are sign-agnostic, so we need to process + * both signed and unsigned domains at the same time + */ + ts[0] = t_unsigned(t); + ts[1] = t_signed(t); + t_cnt = 2; + } else { + ts[0] = t; + t_cnt = 1; + } + + for (i = 0; i < t_cnt; i++) { + t = ts[i]; + z1 = x->r[t]; + z2 = y->r[t]; + + range_cond(t, z1, z2, op, &z1, &z2); + + if (newx) { + snprintf(buf, sizeof(buf), "%s R1", ctx); + reg_state_refine(&xx, t, z1, buf); + } + if (newy) { + snprintf(buf, sizeof(buf), "%s R2", ctx); + reg_state_refine(&yy, t, z2, buf); + } + } + + if (newx) + *newx = xx; + if (newy) + *newy = yy; +} + +static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y, + enum op op) +{ + if (op == OP_EQ || op == OP_NE) { + /* OP_EQ and OP_NE are sign-agnostic */ + enum num_t tu = t_unsigned(t); + enum num_t ts = t_signed(t); + int br_u, br_s, br; + + br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op); + br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op); + + if (br_u >= 0 && br_s >= 0 && br_u != br_s) + ASSERT_FALSE(true, "branch taken inconsistency!\n"); + + /* if 64-bit ranges are indecisive, use 32-bit subranges to + * eliminate always/never taken branches, if possible + */ + if (br_u == -1 && (t == U64 || t == S64)) { + br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op); + /* we can only reject for OP_EQ, never take branch + * based on lower 32 bits + */ + if (op == OP_EQ && br == 0) + return 0; + /* for OP_NEQ we can be conclusive only if lower 32 bits + * differ and thus inequality branch is always taken + */ + if (op == OP_NE && br == 1) + return 1; + + br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op); + if (op == OP_EQ && br == 0) + return 0; + if (op == OP_NE && br == 1) + return 1; + } + + return br_u >= 0 ? br_u : br_s; + } + return range_branch_taken_op(t, x->r[t], y->r[t], op); +} + +/* ===================================== + * BPF PROGS GENERATION AND VERIFICATION + * ===================================== + */ +struct case_spec { + /* whether to init full register (r1) or sub-register (w1) */ + bool init_subregs; + /* whether to establish initial value range on full register (r1) or + * sub-register (w1) + */ + bool setup_subregs; + /* whether to establish initial value range using signed or unsigned + * comparisons (i.e., initialize umin/umax or smin/smax directly) + */ + bool setup_signed; + /* whether to perform comparison on full registers or sub-registers */ + bool compare_subregs; + /* whether to perform comparison using signed or unsigned operations */ + bool compare_signed; +}; + +/* Generate test BPF program based on provided test ranges, operation, and + * specifications about register bitness and signedness. + */ +static int load_range_cmp_prog(struct range x, struct range y, enum op op, + int branch_taken, struct case_spec spec, + char *log_buf, size_t log_sz, + int *false_pos, int *true_pos) +{ +#define emit(insn) ({ \ + struct bpf_insn __insns[] = { insn }; \ + int __i; \ + for (__i = 0; __i < ARRAY_SIZE(__insns); __i++) \ + insns[cur_pos + __i] = __insns[__i]; \ + cur_pos += __i; \ +}) +#define JMP_TO(target) (target - cur_pos - 1) + int cur_pos = 0, exit_pos, fd, op_code; + struct bpf_insn insns[64]; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_level = 2, + .log_buf = log_buf, + .log_size = log_sz, + .prog_flags = BPF_F_TEST_REG_INVARIANTS, + ); + + /* ; skip exit block below + * goto +2; + */ + emit(BPF_JMP_A(2)); + exit_pos = cur_pos; + /* ; exit block for all the preparatory conditionals + * out: + * r0 = 0; + * exit; + */ + emit(BPF_MOV64_IMM(BPF_REG_0, 0)); + emit(BPF_EXIT_INSN()); + /* + * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value + * call bpf_get_current_pid_tgid; + * r6 = r0; | w6 = w0; + * call bpf_get_current_pid_tgid; + * r7 = r0; | w7 = w0; + */ + emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid)); + if (spec.init_subregs) + emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0)); + else + emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0)); + emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid)); + if (spec.init_subregs) + emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0)); + else + emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + /* ; setup initial r6/w6 possible value range ([x.a, x.b]) + * r1 = %[x.a] ll; | w1 = %[x.a]; + * r2 = %[x.b] ll; | w2 = %[x.b]; + * if r6 < r1 goto out; | if w6 < w1 goto out; + * if r6 > r2 goto out; | if w6 > w2 goto out; + */ + if (spec.setup_subregs) { + emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a)); + emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b)); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos))); + } else { + emit(BPF_LD_IMM64(BPF_REG_1, x.a)); + emit(BPF_LD_IMM64(BPF_REG_2, x.b)); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos))); + } + /* ; setup initial r7/w7 possible value range ([y.a, y.b]) + * r1 = %[y.a] ll; | w1 = %[y.a]; + * r2 = %[y.b] ll; | w2 = %[y.b]; + * if r7 < r1 goto out; | if w7 < w1 goto out; + * if r7 > r2 goto out; | if w7 > w2 goto out; + */ + if (spec.setup_subregs) { + emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a)); + emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b)); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos))); + } else { + emit(BPF_LD_IMM64(BPF_REG_1, y.a)); + emit(BPF_LD_IMM64(BPF_REG_2, y.b)); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos))); + } + /* ; range test instruction + * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3; + */ + switch (op) { + case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break; + case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break; + case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break; + case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break; + case OP_EQ: op_code = BPF_JEQ; break; + case OP_NE: op_code = BPF_JNE; break; + default: + printf("unrecognized op %d\n", op); + return -ENOTSUP; + } + /* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * ; this is used for debugging, as verifier doesn't always print + * ; registers states as of condition jump instruction (e.g., when + * ; precision marking happens) + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + */ + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (spec.compare_subregs) + emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3)); + else + emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3)); + /* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + * exit; + */ + *false_pos = cur_pos; + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (branch_taken == 1) /* false branch is never taken */ + emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */ + else + emit(BPF_EXIT_INSN()); + /* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + * exit; + */ + *true_pos = cur_pos; + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (branch_taken == 0) /* true branch is never taken */ + emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */ + emit(BPF_EXIT_INSN()); /* last instruction has to be exit */ + + fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test", + "GPL", insns, cur_pos, &opts); + if (fd < 0) + return fd; + + close(fd); + return 0; +#undef emit +#undef JMP_TO +} + +#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0) + +/* Parse register state from verifier log. + * `s` should point to the start of "Rx = ..." substring in the verifier log. + */ +static int parse_reg_state(const char *s, struct reg_state *reg) +{ + /* There are two generic forms for SCALAR register: + * - known constant: R6_rwD=P%lld + * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated + * list of optional range specifiers: + * - umin=%llu, if missing, assumed 0; + * - umax=%llu, if missing, assumed U64_MAX; + * - smin=%lld, if missing, assumed S64_MIN; + * - smax=%lld, if missing, assummed S64_MAX; + * - umin32=%d, if missing, assumed 0; + * - umax32=%d, if missing, assumed U32_MAX; + * - smin32=%d, if missing, assumed S32_MIN; + * - smax32=%d, if missing, assummed S32_MAX; + * - var_off=(%#llx; %#llx), tnum part, we don't care about it. + * + * If some of the values are equal, they will be grouped (but min/max + * are not mixed together, and similarly negative values are not + * grouped with non-negative ones). E.g.: + * + * R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000) + * + * _rwD part is optional (and any of the letters can be missing). + * P (precision mark) is optional as well. + * + * Anything inside scalar() is optional, including id, of course. + */ + struct { + const char *pfx; + u64 *dst, def; + bool is_32, is_set; + } *f, fields[8] = { + {"smin=", ®->r[S64].a, S64_MIN}, + {"smax=", ®->r[S64].b, S64_MAX}, + {"umin=", ®->r[U64].a, 0}, + {"umax=", ®->r[U64].b, U64_MAX}, + {"smin32=", ®->r[S32].a, (u32)S32_MIN, true}, + {"smax32=", ®->r[S32].b, (u32)S32_MAX, true}, + {"umin32=", ®->r[U32].a, 0, true}, + {"umax32=", ®->r[U32].b, U32_MAX, true}, + }; + const char *p; + int i; + + p = strchr(s, '='); + if (!p) + return -EINVAL; + p++; + if (*p == 'P') + p++; + + if (!str_has_pfx(p, "scalar(")) { + long long sval; + enum num_t t; + + if (p[0] == '0' && p[1] == 'x') { + if (sscanf(p, "%llx", &sval) != 1) + return -EINVAL; + } else { + if (sscanf(p, "%lld", &sval) != 1) + return -EINVAL; + } + + reg->valid = true; + for (t = first_t; t <= last_t; t++) { + reg->r[t] = range(t, sval, sval); + } + return 0; + } + + p += sizeof("scalar"); + while (p) { + int midxs[ARRAY_SIZE(fields)], mcnt = 0; + u64 val; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + if (!str_has_pfx(p, f->pfx)) + continue; + midxs[mcnt++] = i; + p += strlen(f->pfx); + } + + if (mcnt) { + /* populate all matched fields */ + if (p[0] == '0' && p[1] == 'x') { + if (sscanf(p, "%llx", &val) != 1) + return -EINVAL; + } else { + if (sscanf(p, "%lld", &val) != 1) + return -EINVAL; + } + + for (i = 0; i < mcnt; i++) { + f = &fields[midxs[i]]; + f->is_set = true; + *f->dst = f->is_32 ? (u64)(u32)val : val; + } + } else if (str_has_pfx(p, "var_off")) { + /* skip "var_off=(0x0; 0x3f)" part completely */ + p = strchr(p, ')'); + if (!p) + return -EINVAL; + p++; + } + + p = strpbrk(p, ",)"); + if (*p == ')') + break; + if (p) + p++; + } + + reg->valid = true; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + if (!f->is_set) + *f->dst = f->def; + } + + return 0; +} + + +/* Parse all register states (TRUE/FALSE branches and DST/SRC registers) + * out of the verifier log for a corresponding test case BPF program. + */ +static int parse_range_cmp_log(const char *log_buf, struct case_spec spec, + int false_pos, int true_pos, + struct reg_state *false1_reg, struct reg_state *false2_reg, + struct reg_state *true1_reg, struct reg_state *true2_reg) +{ + struct { + int insn_idx; + int reg_idx; + const char *reg_upper; + struct reg_state *state; + } specs[] = { + {false_pos, 6, "R6=", false1_reg}, + {false_pos + 1, 7, "R7=", false2_reg}, + {true_pos, 6, "R6=", true1_reg}, + {true_pos + 1, 7, "R7=", true2_reg}, + }; + char buf[32]; + const char *p = log_buf, *q; + int i, err; + + for (i = 0; i < 4; i++) { + sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx, + spec.compare_subregs ? "bc" : "bf", + spec.compare_subregs ? "w0" : "r0", + spec.compare_subregs ? "w" : "r", specs[i].reg_idx); + + q = strstr(p, buf); + if (!q) { + *specs[i].state = (struct reg_state){.valid = false}; + continue; + } + p = strstr(q, specs[i].reg_upper); + if (!p) + return -EINVAL; + err = parse_reg_state(p, specs[i].state); + if (err) + return -EINVAL; + } + return 0; +} + +/* Validate ranges match, and print details if they don't */ +static bool assert_range_eq(enum num_t t, struct range x, struct range y, + const char *ctx1, const char *ctx2) +{ + DEFINE_STRBUF(sb, 512); + + if (range_eq(x, y)) + return true; + + snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2); + snprintf_range(t, sb, x); + snappendf(sb, " != "); + snprintf_range(t, sb, y); + + printf("%s\n", sb->buf); + + return false; +} + +/* Validate that register states match, and print details if they don't */ +static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx) +{ + bool ok = true; + enum num_t t; + + if (r->valid != e->valid) { + printf("MISMATCH %s: actual %s != expected %s\n", ctx, + r->valid ? "<valid>" : "<invalid>", + e->valid ? "<valid>" : "<invalid>"); + return false; + } + + if (!r->valid) + return true; + + for (t = first_t; t <= last_t; t++) { + if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t))) + ok = false; + } + + return ok; +} + +/* Printf verifier log, filtering out irrelevant noise */ +static void print_verifier_log(const char *buf) +{ + const char *p; + + while (buf[0]) { + p = strchrnul(buf, '\n'); + + /* filter out irrelevant precision backtracking logs */ + if (str_has_pfx(buf, "mark_precise: ")) + goto skip_line; + + printf("%.*s\n", (int)(p - buf), buf); + +skip_line: + buf = *p == '\0' ? p : p + 1; + } +} + +/* Simulate provided test case purely with our own range-based logic. + * This is done to set up expectations for verifier's branch_taken logic and + * verifier's register states in the verifier log. + */ +static void sim_case(enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, enum op op, + struct reg_state *fr1, struct reg_state *fr2, + struct reg_state *tr1, struct reg_state *tr2, + int *branch_taken) +{ + const u64 A = x.a; + const u64 B = x.b; + const u64 C = y.a; + const u64 D = y.b; + struct reg_state rc; + enum op rev_op = complement_op(op); + enum num_t t; + + fr1->valid = fr2->valid = true; + tr1->valid = tr2->valid = true; + for (t = first_t; t <= last_t; t++) { + /* if we are initializing using 32-bit subregisters, + * full registers get upper 32 bits zeroed automatically + */ + struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t]; + + fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z; + } + + /* step 1: r1 >= A, r2 >= C */ + reg_state_set_const(&rc, init_t, A); + reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A"); + reg_state_set_const(&rc, init_t, C); + reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C"); + *tr1 = *fr1; + *tr2 = *fr2; + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n"); + printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n"); + } + + /* step 2: r1 <= B, r2 <= D */ + reg_state_set_const(&rc, init_t, B); + reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B"); + reg_state_set_const(&rc, init_t, D); + reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D"); + *tr1 = *fr1; + *tr2 = *fr2; + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n"); + printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n"); + } + + /* step 3: r1 <op> r2 */ + *branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op); + fr1->valid = fr2->valid = false; + tr1->valid = tr2->valid = false; + if (*branch_taken != 1) { /* FALSE is possible */ + fr1->valid = fr2->valid = true; + reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE"); + } + if (*branch_taken != 0) { /* TRUE is possible */ + tr1->valid = tr2->valid = true; + reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE"); + } + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n"); + printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n"); + printf("STEP3 (%s) TRUE R1:", t_str(cond_t)); print_reg_state(tr1, "\n"); + printf("STEP3 (%s) TRUE R2:", t_str(cond_t)); print_reg_state(tr2, "\n"); + } +} + +/* =============================== + * HIGH-LEVEL TEST CASE VALIDATION + * =============================== + */ +static u32 upper_seeds[] = { + 0, + 1, + U32_MAX, + U32_MAX - 1, + S32_MAX, + (u32)S32_MIN, +}; + +static u32 lower_seeds[] = { + 0, + 1, + 2, (u32)-2, + 255, (u32)-255, + UINT_MAX, + UINT_MAX - 1, + INT_MAX, + (u32)INT_MIN, +}; + +struct ctx { + int val_cnt, subval_cnt, range_cnt, subrange_cnt; + u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)]; + s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)]; + u32 usubvals[ARRAY_SIZE(lower_seeds)]; + s32 ssubvals[ARRAY_SIZE(lower_seeds)]; + struct range *uranges, *sranges; + struct range *usubranges, *ssubranges; + int max_failure_cnt, cur_failure_cnt; + int total_case_cnt, case_cnt; + int rand_case_cnt; + unsigned rand_seed; + __u64 start_ns; + char progress_ctx[64]; +}; + +static void cleanup_ctx(struct ctx *ctx) +{ + free(ctx->uranges); + free(ctx->sranges); + free(ctx->usubranges); + free(ctx->ssubranges); +} + +struct subtest_case { + enum num_t init_t; + enum num_t cond_t; + struct range x; + struct range y; + enum op op; +}; + +static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op) +{ + snappendf(sb, "(%s)", t_str(t->init_t)); + snprintf_range(t->init_t, sb, t->x); + snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>"); + snprintf_range(t->init_t, sb, t->y); +} + +/* Generate and validate test case based on specific combination of setup + * register ranges (including their expected num_t domain), and conditional + * operation to perform (including num_t domain in which it has to be + * performed) + */ +static int verify_case_op(enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, enum op op) +{ + char log_buf[256 * 1024]; + size_t log_sz = sizeof(log_buf); + int err, false_pos = 0, true_pos = 0, branch_taken; + struct reg_state fr1, fr2, tr1, tr2; + struct reg_state fe1, fe2, te1, te2; + bool failed = false; + struct case_spec spec = { + .init_subregs = (init_t == U32 || init_t == S32), + .setup_subregs = (init_t == U32 || init_t == S32), + .setup_signed = (init_t == S64 || init_t == S32), + .compare_subregs = (cond_t == U32 || cond_t == S32), + .compare_signed = (cond_t == S64 || cond_t == S32), + }; + + log_buf[0] = '\0'; + + sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken); + + err = load_range_cmp_prog(x, y, op, branch_taken, spec, + log_buf, log_sz, &false_pos, &true_pos); + if (err) { + ASSERT_OK(err, "load_range_cmp_prog"); + failed = true; + } + + err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos, + &fr1, &fr2, &tr1, &tr2); + if (err) { + ASSERT_OK(err, "parse_range_cmp_log"); + failed = true; + } + + if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") || + !assert_reg_state_eq(&fr2, &fe2, "false_reg2") || + !assert_reg_state_eq(&tr1, &te1, "true_reg1") || + !assert_reg_state_eq(&tr2, &te2, "true_reg2")) { + failed = true; + } + + if (failed || env.verbosity >= VERBOSE_NORMAL) { + if (failed || env.verbosity >= VERBOSE_VERY) { + printf("VERIFIER LOG:\n========================\n"); + print_verifier_log(log_buf); + printf("=====================\n"); + } + printf("ACTUAL FALSE1: "); print_reg_state(&fr1, "\n"); + printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n"); + printf("ACTUAL FALSE2: "); print_reg_state(&fr2, "\n"); + printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n"); + printf("ACTUAL TRUE1: "); print_reg_state(&tr1, "\n"); + printf("EXPECTED TRUE1: "); print_reg_state(&te1, "\n"); + printf("ACTUAL TRUE2: "); print_reg_state(&tr2, "\n"); + printf("EXPECTED TRUE2: "); print_reg_state(&te2, "\n"); + + return failed ? -EINVAL : 0; + } + + return 0; +} + +/* Given setup ranges and number types, go over all supported operations, + * generating individual subtest for each allowed combination + */ +static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, bool is_subtest) +{ + DEFINE_STRBUF(sb, 256); + int err; + struct subtest_case sub = { + .init_t = init_t, + .cond_t = cond_t, + .x = x, + .y = y, + }; + + sb->pos = 0; /* reset position in strbuf */ + subtest_case_str(sb, &sub, false /* ignore op */); + if (is_subtest && !test__start_subtest(sb->buf)) + return 0; + + for (sub.op = first_op; sub.op <= last_op; sub.op++) { + sb->pos = 0; /* reset position in strbuf */ + subtest_case_str(sb, &sub, true /* print op */); + + if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */ + printf("TEST CASE: %s\n", sb->buf); + + err = verify_case_op(init_t, cond_t, x, y, sub.op); + if (err || env.verbosity >= VERBOSE_NORMAL) + ASSERT_OK(err, sb->buf); + if (err) { + ctx->cur_failure_cnt++; + if (ctx->cur_failure_cnt > ctx->max_failure_cnt) + return err; + return 0; /* keep testing other cases */ + } + ctx->case_cnt++; + if ((ctx->case_cnt % 10000) == 0) { + double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt; + u64 elapsed_ns = get_time_ns() - ctx->start_ns; + double remain_ns = elapsed_ns / progress * (1 - progress); + + fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), " + "elapsed %llu mins (%.2lf hrs), " + "ETA %.0lf mins (%.2lf hrs)\n", + ctx->progress_ctx, + ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress, + elapsed_ns / 1000000000 / 60, + elapsed_ns / 1000000000.0 / 3600, + remain_ns / 1000000000.0 / 60, + remain_ns / 1000000000.0 / 3600); + } + } + + return 0; +} + +static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, + struct range x, struct range y) +{ + return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */); +} + +/* ================================ + * GENERATED CASES FROM SEED VALUES + * ================================ + */ +static int u64_cmp(const void *p1, const void *p2) +{ + u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int u32_cmp(const void *p1, const void *p2) +{ + u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int s64_cmp(const void *p1, const void *p2) +{ + s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int s32_cmp(const void *p1, const void *p2) +{ + s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +/* Generate valid unique constants from seeds, both signed and unsigned */ +static void gen_vals(struct ctx *ctx) +{ + int i, j, cnt = 0; + + for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) { + for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) { + ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j]; + } + } + + /* sort and compact uvals (i.e., it's `sort | uniq`) */ + qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp); + for (i = 1, j = 0; i < cnt; i++) { + if (ctx->uvals[j] == ctx->uvals[i]) + continue; + j++; + ctx->uvals[j] = ctx->uvals[i]; + } + ctx->val_cnt = j + 1; + + /* we have exactly the same number of s64 values, they are just in + * a different order than u64s, so just sort them differently + */ + for (i = 0; i < ctx->val_cnt; i++) + ctx->svals[i] = ctx->uvals[i]; + qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp); + + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + for (i = 0; i < ctx->val_cnt; i++) { + sb1->pos = sb2->pos = 0; + snprintf_num(U64, sb1, ctx->uvals[i]); + snprintf_num(S64, sb2, ctx->svals[i]); + printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf); + } + } + + /* 32-bit values are generated separately */ + cnt = 0; + for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) { + ctx->usubvals[cnt++] = lower_seeds[i]; + } + + /* sort and compact usubvals (i.e., it's `sort | uniq`) */ + qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp); + for (i = 1, j = 0; i < cnt; i++) { + if (ctx->usubvals[j] == ctx->usubvals[i]) + continue; + j++; + ctx->usubvals[j] = ctx->usubvals[i]; + } + ctx->subval_cnt = j + 1; + + for (i = 0; i < ctx->subval_cnt; i++) + ctx->ssubvals[i] = ctx->usubvals[i]; + qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp); + + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + for (i = 0; i < ctx->subval_cnt; i++) { + sb1->pos = sb2->pos = 0; + snprintf_num(U32, sb1, ctx->usubvals[i]); + snprintf_num(S32, sb2, ctx->ssubvals[i]); + printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf); + } + } +} + +/* Generate valid ranges from upper/lower seeds */ +static int gen_ranges(struct ctx *ctx) +{ + int i, j, cnt = 0; + + for (i = 0; i < ctx->val_cnt; i++) { + for (j = i; j < ctx->val_cnt; j++) { + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + sb1->pos = sb2->pos = 0; + snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j])); + snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j])); + printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf); + } + cnt++; + } + } + ctx->range_cnt = cnt; + + ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges)); + if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc")) + return -EINVAL; + ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges)); + if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc")) + return -EINVAL; + + cnt = 0; + for (i = 0; i < ctx->val_cnt; i++) { + for (j = i; j < ctx->val_cnt; j++) { + ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]); + ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]); + cnt++; + } + } + + cnt = 0; + for (i = 0; i < ctx->subval_cnt; i++) { + for (j = i; j < ctx->subval_cnt; j++) { + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + sb1->pos = sb2->pos = 0; + snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j])); + snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j])); + printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf); + } + cnt++; + } + } + ctx->subrange_cnt = cnt; + + ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges)); + if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc")) + return -EINVAL; + ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges)); + if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc")) + return -EINVAL; + + cnt = 0; + for (i = 0; i < ctx->subval_cnt; i++) { + for (j = i; j < ctx->subval_cnt; j++) { + ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]); + ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]); + cnt++; + } + } + + return 0; +} + +static int parse_env_vars(struct ctx *ctx) +{ + const char *s; + + if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) { + errno = 0; + ctx->max_failure_cnt = strtol(s, NULL, 10); + if (errno || ctx->max_failure_cnt < 0) { + ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT"); + return -EINVAL; + } + } + + if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) { + errno = 0; + ctx->rand_case_cnt = strtol(s, NULL, 10); + if (errno || ctx->rand_case_cnt < 0) { + ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT"); + return -EINVAL; + } + } + + if ((s = getenv("REG_BOUNDS_RAND_SEED"))) { + errno = 0; + ctx->rand_seed = strtoul(s, NULL, 10); + if (errno) { + ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED"); + return -EINVAL; + } + } + + return 0; +} + +static int prepare_gen_tests(struct ctx *ctx) +{ + const char *s; + int err; + + if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) { + test__skip(); + return -ENOTSUP; + } + + err = parse_env_vars(ctx); + if (err) + return err; + + gen_vals(ctx); + err = gen_ranges(ctx); + if (err) { + ASSERT_OK(err, "gen_ranges"); + return err; + } + + return 0; +} + +/* Go over generated constants and ranges and validate various supported + * combinations of them + */ +static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + struct range rconst; + const struct range *ranges; + const u64 *vals; + int i, j; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + ranges = init_t == U64 ? ctx.uranges : ctx.sranges; + vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals; + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x CONST, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.val_cnt; i++) { + for (j = 0; j < ctx.range_cnt; j++) { + rconst = range(init_t, vals[i], vals[i]); + + /* (u64|s64)(<range> x <const>) */ + if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst)) + goto cleanup; + /* (u64|s64)(<const> x <range>) */ + if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + struct range rconst; + const struct range *ranges; + const u32 *vals; + int i, j; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges; + vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals; + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x CONST, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.subval_cnt; i++) { + for (j = 0; j < ctx.subrange_cnt; j++) { + rconst = range(init_t, vals[i], vals[i]); + + /* (u32|s32)(<range> x <const>) */ + if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst)) + goto cleanup; + /* (u32|s32)(<const> x <range>) */ + if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + const struct range *ranges; + int i, j, rcnt; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + switch (init_t) + { + case U64: + ranges = ctx.uranges; + rcnt = ctx.range_cnt; + break; + case U32: + ranges = ctx.usubranges; + rcnt = ctx.subrange_cnt; + break; + case S64: + ranges = ctx.sranges; + rcnt = ctx.range_cnt; + break; + case S32: + ranges = ctx.ssubranges; + rcnt = ctx.subrange_cnt; + break; + default: + printf("validate_gen_range_vs_range!\n"); + exit(1); + } + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x RANGE, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < rcnt; i++) { + for (j = i; j < rcnt; j++) { + /* (<range> x <range>) */ + if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j])) + goto cleanup; + if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +/* Go over thousands of test cases generated from initial seed values. + * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If + * envvar is not set, this test is skipped during test_progs testing. + * + * We split this up into smaller subsets based on initialization and + * conditiona numeric domains to get an easy parallelization with test_progs' + * -j argument. + */ + +/* RANGE x CONST, U64 initial range */ +void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); } +void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); } +void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); } +void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); } +/* RANGE x CONST, S64 initial range */ +void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); } +void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); } +void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); } +void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); } +/* RANGE x CONST, U32 initial range */ +void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); } +void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); } +void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); } +void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); } +/* RANGE x CONST, S32 initial range */ +void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); } +void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); } +void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); } +void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); } + +/* RANGE x RANGE, U64 initial range */ +void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); } +void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); } +void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); } +void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); } +/* RANGE x RANGE, S64 initial range */ +void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); } +void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); } +void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); } +void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); } +/* RANGE x RANGE, U32 initial range */ +void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); } +void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); } +void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); } +void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); } +/* RANGE x RANGE, S32 initial range */ +void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); } +void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); } +void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); } +void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); } + +#define DEFAULT_RAND_CASE_CNT 100 + +#define RAND_21BIT_MASK ((1 << 22) - 1) + +static u64 rand_u64() +{ + /* RAND_MAX is guaranteed to be at least 1<<15, but in practice it + * seems to be 1<<31, so we need to call it thrice to get full u64; + * we'll use rougly equal split: 22 + 21 + 21 bits + */ + return ((u64)random() << 42) | + (((u64)random() & RAND_21BIT_MASK) << 21) | + (random() & RAND_21BIT_MASK); +} + +static u64 rand_const(enum num_t t) +{ + return cast_t(t, rand_u64()); +} + +static struct range rand_range(enum num_t t) +{ + u64 x = rand_const(t), y = rand_const(t); + + return range(t, min_t(t, x, y), max_t(t, x, y)); +} + +static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range) +{ + struct ctx ctx; + struct range range1, range2; + int err, i; + u64 t; + + memset(&ctx, 0, sizeof(ctx)); + + err = parse_env_vars(&ctx); + if (err) { + ASSERT_OK(err, "parse_env_vars"); + return; + } + + if (ctx.rand_case_cnt == 0) + ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT; + if (ctx.rand_seed == 0) + ctx.rand_seed = (unsigned)get_time_ns(); + + srandom(ctx.rand_seed); + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "[RANDOM SEED %u] RANGE x %s, %s -> %s", + ctx.rand_seed, const_range ? "CONST" : "RANGE", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.rand_case_cnt; i++) { + range1 = rand_range(init_t); + if (const_range) { + t = rand_const(init_t); + range2 = range(init_t, t, t); + } else { + range2 = rand_range(init_t); + } + + /* <range1> x <range2> */ + if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */)) + goto cleanup; + /* <range2> x <range1> */ + if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */)) + goto cleanup; + } + +cleanup: + /* make sure we report random seed for reproducing */ + ASSERT_TRUE(true, ctx.progress_ctx); + cleanup_ctx(&ctx); +} + +/* [RANDOM] RANGE x CONST, U64 initial range */ +void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); } +void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); } +void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); } +void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, S64 initial range */ +void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); } +void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); } +void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); } +void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, U32 initial range */ +void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); } +void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); } +void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); } +void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, S32 initial range */ +void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); } +void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); } +void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); } +void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); } + +/* [RANDOM] RANGE x RANGE, U64 initial range */ +void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); } +void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); } +void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); } +void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, S64 initial range */ +void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); } +void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); } +void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); } +void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, U32 initial range */ +void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); } +void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); } +void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); } +void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, S32 initial range */ +void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); } +void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); } +void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); } +void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); } + +/* A set of hard-coded "interesting" cases to validate as part of normal + * test_progs test runs + */ +static struct subtest_case crafted_cases[] = { + {U64, U64, {0, 0xffffffff}, {0, 0}}, + {U64, U64, {0, 0x80000000}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}}, + {U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}}, + + /* single point overlap, interesting BPF_EQ and BPF_NE interactions */ + {U64, U64, {0, 1}, {1, 0x80000000}}, + {U64, S64, {0, 1}, {1, 0x80000000}}, + {U64, U32, {0, 1}, {1, 0x80000000}}, + {U64, S32, {0, 1}, {1, 0x80000000}}, + + {U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0, 0xffffffffULL}, {1, 1}}, + {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}}, + + {U64, U32, {0, 0x100000000}, {0, 0}}, + {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}}, + + {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}}, + /* these are tricky cases where lower 32 bits allow to tighten 64 + * bit boundaries based on tightened lower 32 bit boundaries + */ + {U64, S32, {0, 0x0ffffffffULL}, {0, 0}}, + {U64, S32, {0, 0x100000000ULL}, {0, 0}}, + {U64, S32, {0, 0x100000001ULL}, {0, 0}}, + {U64, S32, {0, 0x180000000ULL}, {0, 0}}, + {U64, S32, {0, 0x17fffffffULL}, {0, 0}}, + {U64, S32, {0, 0x180000001ULL}, {0, 0}}, + + /* verifier knows about [-1, 0] range for s32 for this case already */ + {S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}}, + /* but didn't know about these cases initially */ + {U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */ + {U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */ + + /* longer convergence case: learning from u64 -> s64 -> u64 -> u32, + * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX]) + */ + {S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}}, + + {U32, U32, {1, U32_MAX}, {0, 0}}, + + {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, + + {S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}}, + {S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}}, + {S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, + {S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, +}; + +/* Go over crafted hard-coded cases. This is fast, so we do it as part of + * normal test_progs run. + */ +void test_reg_bounds_crafted(void) +{ + struct ctx ctx; + int i; + + memset(&ctx, 0, sizeof(ctx)); + + for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) { + struct subtest_case *c = &crafted_cases[i]; + + verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y); + verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x); + } + + cleanup_ctx(&ctx); +} diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index f29c08d93beb..18d451be57c8 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -13,22 +13,22 @@ static struct { const char *err_msg; } spin_lock_fail_tests[] = { { "lock_id_kptr_preserve", - "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) " - "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) " + "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" "R1 type=ptr_ expected=percpu_ptr_" }, { "lock_id_global_zero", - "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" + "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)" - " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n" + " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)" + " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)" - " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n" + " R0=map_value(id=2,ks=4,vs=8)" + " R1_w=map_value(id=2,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 51883ccb8020..196abf223465 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -2387,12 +2387,9 @@ static int generate_dummy_prog(void) const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn); LIBBPF_OPTS(bpf_prog_load_opts, opts); const size_t log_buf_sz = 256; - char *log_buf; + char log_buf[log_buf_sz]; int fd = -1; - log_buf = malloc(log_buf_sz); - if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) - return fd; opts.log_buf = log_buf; opts.log_size = log_buf_sz; @@ -2402,7 +2399,6 @@ static int generate_dummy_prog(void) prog_insns, prog_insn_cnt, &opts); ASSERT_STREQ(log_buf, "", "log_0"); ASSERT_GE(fd, 0, "prog_fd"); - free(log_buf); return fd; } diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c index 72310cfc6474..6fb2217d940b 100644 --- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c +++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c @@ -16,27 +16,27 @@ static void nsleep() void test_vmlinux(void) { - int duration = 0, err; + int err; struct test_vmlinux* skel; struct test_vmlinux__bss *bss; skel = test_vmlinux__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load")) return; bss = skel->bss; err = test_vmlinux__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_vmlinux__attach")) goto cleanup; /* trigger everything */ nsleep(); - CHECK(!bss->tp_called, "tp", "not called\n"); - CHECK(!bss->raw_tp_called, "raw_tp", "not called\n"); - CHECK(!bss->tp_btf_called, "tp_btf", "not called\n"); - CHECK(!bss->kprobe_called, "kprobe", "not called\n"); - CHECK(!bss->fentry_called, "fentry", "not called\n"); + ASSERT_TRUE(bss->tp_called, "tp"); + ASSERT_TRUE(bss->raw_tp_called, "raw_tp"); + ASSERT_TRUE(bss->tp_btf_called, "tp_btf"); + ASSERT_TRUE(bss->kprobe_called, "kprobe"); + ASSERT_TRUE(bss->fentry_called, "fentry"); cleanup: test_vmlinux__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index f2b8167b72a8..442f4ca39fd7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -35,6 +35,8 @@ int dump_task_stack(struct bpf_iter__task *ctx) return 0; } +int num_user_stacks = 0; + SEC("iter/task") int get_task_user_stacks(struct bpf_iter__task *ctx) { @@ -51,6 +53,9 @@ int get_task_user_stacks(struct bpf_iter__task *ctx) if (res <= 0) return 0; + /* Only one task, the current one, should succeed */ + ++num_user_stacks; + buf_sz += res; /* If the verifier doesn't refine bpf_get_task_stack res, and instead diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index e1e5c54a6a11..49efaed143fc 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -18,48 +18,48 @@ return *(u64 *)num; \ } -__msg(": R0_w=-2147483648 R10=fp0") +__msg(": R0_w=0xffffffff80000000 R10=fp0") check_assert(s64, eq, int_min, INT_MIN); -__msg(": R0_w=2147483647 R10=fp0") +__msg(": R0_w=0x7fffffff R10=fp0") check_assert(s64, eq, int_max, INT_MAX); __msg(": R0_w=0 R10=fp0") check_assert(s64, eq, zero, 0); -__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0") +__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0") check_assert(s64, eq, llong_min, LLONG_MIN); -__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0") +__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0") check_assert(s64, eq, llong_max, LLONG_MAX); -__msg(": R0_w=scalar(smax=2147483646) R10=fp0") +__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0") check_assert(s64, lt, pos, INT_MAX); -__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, zero, 0); -__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, neg, INT_MIN); -__msg(": R0_w=scalar(smax=2147483647) R10=fp0") +__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0") check_assert(s64, le, pos, INT_MAX); __msg(": R0_w=scalar(smax=0) R10=fp0") check_assert(s64, le, zero, 0); -__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, le, neg, INT_MIN); -__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, pos, INT_MAX); -__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, zero, 0); -__msg(": R0_w=scalar(smin=-2147483647) R10=fp0") +__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0") check_assert(s64, gt, neg, INT_MIN); -__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, ge, pos, INT_MAX); -__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") +__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") check_assert(s64, ge, zero, 0); -__msg(": R0_w=scalar(smin=-2147483648) R10=fp0") +__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0") check_assert(s64, ge, neg, INT_MIN); SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0") +__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0") int check_assert_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") +__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") int check_assert_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; @@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +__msg(": R0=0 R1=ctx() R2=4096 R10=fp0") int check_assert_single_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -103,7 +103,7 @@ int check_assert_single_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +__msg(": R1=ctx() R2=4096 R10=fp0") int check_assert_single_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; @@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0") +__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0") int check_assert_generic(struct __sk_buff *ctx) { u8 *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index c20c4e38b71c..b2181f850d3e 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1411,4 +1411,26 @@ __naked int checkpoint_states_deletion(void) ); } +struct { + int data[32]; + int n; +} loop_data; + +SEC("raw_tp") +__success +int iter_arr_with_actual_elem_count(const void *ctx) +{ + int i, n = loop_data.n, sum = 0; + + if (n > ARRAY_SIZE(loop_data.data)) + return 0; + + bpf_for(i, 0, n) { + /* no rechecking of i against ARRAY_SIZE(loop_data.n) */ + sum += loop_data.data[i]; + } + + return sum; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index b567a666d2b8..1769fdff6aea 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -14,6 +14,24 @@ struct node_data { struct bpf_rb_node node; }; +struct refcounted_node { + long data; + struct bpf_rb_node rb_node; + struct bpf_refcount refcount; +}; + +struct stash { + struct bpf_spin_lock l; + struct refcounted_node __kptr *stashed; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct stash); + __uint(max_entries, 10); +} refcounted_node_stash SEC(".maps"); + struct plain_local { long key; long data; @@ -38,6 +56,7 @@ struct map_value { * Had to do the same w/ bpf_kfunc_call_test_release below */ struct node_data *just_here_because_btf_bug; +struct refcounted_node *just_here_because_btf_bug2; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx) return 0; } +SEC("tc") +long refcount_acquire_without_unstash(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int ret = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &ret); + if (!s) + return 1; + + if (!s->stashed) + /* refcount_acquire failure is expected when no refcounted_node + * has been stashed before this program executes + */ + return 2; + + p = bpf_refcount_acquire(s->stashed); + if (!p) + return 3; + + ret = s->stashed ? s->stashed->data : -1; + bpf_obj_drop(p); + return ret; +} + +/* Helper for refcount_acquire_without_unstash test */ +SEC("tc") +long stash_refcounted_node(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int key = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &key); + if (!s) + return 1; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 2; + p->data = 42; + + p = bpf_kptr_xchg(&s->stashed, p); + if (p) { + bpf_obj_drop(p); + return 3; + } + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c index c39f559d3100..42c4a8b62e36 100644 --- a/tools/testing/selftests/bpf/progs/pyperf180.c +++ b/tools/testing/selftests/bpf/progs/pyperf180.c @@ -1,4 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 180 + +/* llvm upstream commit at clang18 + * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e + * changed inlining behavior and caused compilation failure as some branch + * target distance exceeded 16bit representation which is the maximum for + * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18 + * to specify which cpu version is used for compilation. So a smaller + * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which + * reduced some branch target distances and resolved the compilation failure. + * + * To capture the case where a developer/ci uses clang18 but the corresponding + * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count + * will be set as well to prevent potential compilation failures. + */ +#ifdef __BPF_CPU_VERSION__ +#if __BPF_CPU_VERSION__ < 4 +#define UNROLL_COUNT 90 +#endif +#elif __clang_major__ == 18 +#define UNROLL_COUNT 90 +#endif + #include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 1ef07f6ee580..1553b9c16aa7 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -54,6 +54,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) } SEC("?tc") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +long refcount_acquire_maybe_null(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + /* Intentionally not testing !n + * it's MAYBE_NULL for refcount_acquire + */ + m = bpf_refcount_acquire(n); + if (m) + bpf_obj_drop(m); + if (n) + bpf_obj_drop(n); + + return 0; +} + +SEC("?tc") __failure __msg("Unreleased reference id=3 alloc_insn=9") long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c new file mode 100644 index 000000000000..44628865fe1d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +__u32 target_ancestor_level; +__u64 target_ancestor_cgid; +int target_pid, target_hid; + +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static int bpf_link_create_verify(int cmd) +{ + struct cgroup *cgrp, *ancestor; + struct task_struct *task; + int ret = 0; + + if (cmd != BPF_LINK_CREATE) + return 0; + + task = bpf_get_current_task_btf(); + + /* Then it can run in parallel with others */ + if (task->pid != target_pid) + return 0; + + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + /* Refuse it if its cgid or its ancestor's cgid is the target cgid */ + if (cgrp->kn->id == target_ancestor_cgid) + ret = -1; + + ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level); + if (!ancestor) + goto out; + + if (ancestor->kn->id == target_ancestor_cgid) + ret = -1; + bpf_cgroup_release(ancestor); + +out: + bpf_cgroup_release(cgrp); + return ret; +} + +SEC("lsm/bpf") +int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + return bpf_link_create_verify(cmd); +} + +SEC("lsm.s/bpf") +int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + return bpf_link_create_verify(cmd); +} + +SEC("fentry") +int BPF_PROG(fentry_run) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index c5588a14fe2e..ec430b71730b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -965,6 +965,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP_JSLT for crossing 64-bit signed boundary") __success __retval(0) +__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ __naked void crossing_64_bit_signed_boundary_2(void) { asm volatile (" \ @@ -1046,6 +1047,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary") __success __retval(0) +__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ __naked void crossing_32_bit_signed_boundary_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 37ffa57f28a1..a350ecdfba4a 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -153,6 +153,14 @@ static int parse_retval(const char *str, int *val, const char *name) return parse_int(str, val, name); } +static void update_flags(int *flags, int flag, bool clear) +{ + if (clear) + *flags &= ~flag; + else + *flags |= flag; +} + /* Uses btf_decl_tag attributes to describe the expected test * behavior, see bpf_misc.h for detailed description of each attribute * and attribute combinations. @@ -171,6 +179,7 @@ static int parse_test_spec(struct test_loader *tester, memset(spec, 0, sizeof(*spec)); spec->prog_name = bpf_program__name(prog); + spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */ btf = bpf_object__btf(obj); if (!btf) { @@ -187,7 +196,8 @@ static int parse_test_spec(struct test_loader *tester, for (i = 1; i < btf__type_cnt(btf); i++) { const char *s, *val, *msg; const struct btf_type *t; - int tmp; + bool clear; + int flags; t = btf__type_by_id(btf, i); if (!btf_is_decl_tag(t)) @@ -253,23 +263,30 @@ static int parse_test_spec(struct test_loader *tester, goto cleanup; } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) { val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1; + + clear = val[0] == '!'; + if (clear) + val++; + if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) { - spec->prog_flags |= BPF_F_STRICT_ALIGNMENT; + update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear); } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) { - spec->prog_flags |= BPF_F_ANY_ALIGNMENT; + update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear); } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) { - spec->prog_flags |= BPF_F_TEST_RND_HI32; + update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear); } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) { - spec->prog_flags |= BPF_F_TEST_STATE_FREQ; + update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear); } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) { - spec->prog_flags |= BPF_F_SLEEPABLE; + update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear); } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) { - spec->prog_flags |= BPF_F_XDP_HAS_FRAGS; + update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear); + } else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) { + update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear); } else /* assume numeric value */ { - err = parse_int(val, &tmp, "test prog flags"); + err = parse_int(val, &flags, "test prog flags"); if (err) goto cleanup; - spec->prog_flags |= tmp; + update_flags(&spec->prog_flags, flags, clear); } } } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 7fc00e423e4d..767e0693df10 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1396,13 +1396,18 @@ static void test_map_stress(void) #define MAX_DELAY_US 50000 #define MIN_DELAY_RANGE_US 5000 -static int map_update_retriable(int map_fd, const void *key, const void *value, - int flags, int attempts) +static bool retry_for_again_or_busy(int err) +{ + return (err == EAGAIN || err == EBUSY); +} + +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry) { int delay = rand() % MIN_DELAY_RANGE_US; while (bpf_map_update_elem(map_fd, key, value, flags)) { - if (!attempts || (errno != EAGAIN && errno != EBUSY)) + if (!attempts || !need_retry(errno)) return -errno; if (delay <= MAX_DELAY_US / 2) @@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data) key = value = i; if (do_update) { - err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); - err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h index f6fbca761732..e4ac704a536c 100644 --- a/tools/testing/selftests/bpf/test_maps.h +++ b/tools/testing/selftests/bpf/test_maps.h @@ -4,6 +4,7 @@ #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #define CHECK(condition, tag, format...) ({ \ int __ret = !!(condition); \ @@ -16,4 +17,8 @@ extern int skips; +typedef bool (*retry_for_error_fn)(int err); +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry); + #endif diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 2c89674fc62c..b0068a9d2cfe 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -679,7 +679,7 @@ static int load_path(const struct sock_addr_test *test, const char *path) bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); bpf_program__set_expected_attach_type(prog, test->expected_attach_type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); err = bpf_object__load(obj); if (err) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 98107e0452d3..f36e41435be7 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (fixup_skips != skips) return; - pflags = BPF_F_TEST_RND_HI32; + pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 8d994884c7b4..d2458c1b1671 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type, if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type) bpf_program__set_type(prog, type); - flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32; + flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; bpf_program__set_flags(prog, flags); err = bpf_object__load(obj); @@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, { LIBBPF_OPTS(bpf_prog_load_opts, opts, .kern_version = kern_version, - .prog_flags = BPF_F_TEST_RND_HI32, + .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS, .log_level = extra_prog_load_log_flags, .log_buf = log_buf, .log_size = log_buf_sz, diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 655095810d4a..1d418d66e375 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -18,6 +18,7 @@ #include <libelf.h> #include <gelf.h> #include <float.h> +#include <math.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -99,6 +100,7 @@ struct stat_specs { enum stat_id ids[ALL_STATS_CNT]; enum stat_variant variants[ALL_STATS_CNT]; bool asc[ALL_STATS_CNT]; + bool abs[ALL_STATS_CNT]; int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ }; @@ -133,6 +135,7 @@ struct filter { int stat_id; enum stat_variant stat_var; long value; + bool abs; }; static struct env { @@ -142,10 +145,12 @@ static struct env { bool debug; bool quiet; bool force_checkpoints; + bool force_reg_invariants; enum resfmt out_fmt; bool show_version; bool comparison_mode; bool replay_mode; + int top_n; int log_level; int log_size; @@ -210,8 +215,7 @@ static const struct argp_option opts[] = { { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" }, { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, - { "test-states", 't', NULL, 0, - "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, + { "top-n", 'n', "N", 0, "Emit only up to first N results." }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, { "sort", 's', "SPEC", 0, "Specify sort order" }, @@ -219,6 +223,10 @@ static const struct argp_option opts[] = { { "compare", 'C', NULL, 0, "Comparison mode" }, { "replay", 'R', NULL, 0, "Replay mode" }, { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." }, + { "test-states", 't', NULL, 0, + "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, + { "test-reg-invariants", 'r', NULL, 0, + "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, {}, }; @@ -290,6 +298,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 't': env.force_checkpoints = true; break; + case 'r': + env.force_reg_invariants = true; + break; + case 'n': + errno = 0; + env.top_n = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid top N specifier: %s\n", arg); + argp_usage(state); + } case 'C': env.comparison_mode = true; break; @@ -455,7 +473,8 @@ static struct { { OP_EQ, "=" }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var); +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs); static int append_filter(struct filter **filters, int *cnt, const char *str) { @@ -488,13 +507,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) long val; const char *end = str; const char *op_str; + bool is_abs; op_str = operators[i].op_str; p = strstr(str, op_str); if (!p) continue; - if (!parse_stat_id_var(str, p - str, &id, &var)) { + if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name in '%s'!\n", str); return -EINVAL; } @@ -533,6 +553,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) f->stat_id = id; f->stat_var = var; f->op = operators[i].op_kind; + f->abs = true; f->value = val; *cnt += 1; @@ -657,7 +678,8 @@ static struct stat_def { [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var) +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs) { static const char *var_sfxs[] = { [VARIANT_A] = "_a", @@ -667,6 +689,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v }; int i, j, k; + /* |<stat>| means we take absolute value of given stat */ + *is_abs = false; + if (len > 2 && name[0] == '|' && name[len - 1] == '|') { + *is_abs = true; + name += 1; + len -= 2; + } + for (i = 0; i < ARRAY_SIZE(stat_defs); i++) { struct stat_def *def = &stat_defs[i]; size_t alias_len, sfx_len; @@ -722,7 +752,7 @@ static bool is_desc_sym(char c) static int parse_stat(const char *stat_name, struct stat_specs *specs) { int id; - bool has_order = false, is_asc = false; + bool has_order = false, is_asc = false, is_abs = false; size_t len = strlen(stat_name); enum stat_variant var; @@ -737,7 +767,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) len -= 1; } - if (!parse_stat_id_var(stat_name, len, &id, &var)) { + if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); return -ESRCH; } @@ -745,6 +775,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) specs->ids[specs->spec_cnt] = id; specs->variants[specs->spec_cnt] = var; specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default; + specs->abs[specs->spec_cnt] = is_abs; specs->spec_cnt++; return 0; @@ -997,6 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_checkpoints) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ); + if (env.force_reg_invariants) + bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); err = bpf_object__load(obj); env.progs_processed++; @@ -1103,7 +1136,7 @@ cleanup: } static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, - enum stat_id id, bool asc) + enum stat_id id, bool asc, bool abs) { int cmp = 0; @@ -1124,6 +1157,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, long v1 = s1->stats[id]; long v2 = s2->stats[id]; + if (abs) { + v1 = v1 < 0 ? -v1 : v1; + v2 = v2 < 0 ? -v2 : v2; + } + if (v1 != v2) cmp = v1 < v2 ? -1 : 1; break; @@ -1142,7 +1180,8 @@ static int cmp_prog_stats(const void *v1, const void *v2) int i, cmp; for (i = 0; i < env.sort_spec.spec_cnt; i++) { - cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]); + cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], + env.sort_spec.asc[i], env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1211,7 +1250,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s, static int cmp_join_stat(const struct verif_stats_join *s1, const struct verif_stats_join *s2, - enum stat_id id, enum stat_variant var, bool asc) + enum stat_id id, enum stat_variant var, + bool asc, bool abs) { const char *str1 = NULL, *str2 = NULL; double v1, v2; @@ -1220,6 +1260,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1, fetch_join_stat_value(s1, id, var, &str1, &v1); fetch_join_stat_value(s2, id, var, &str2, &v2); + if (abs) { + v1 = fabs(v1); + v2 = fabs(v2); + } + if (str1) cmp = strcmp(str1, str2); else if (v1 != v2) @@ -1237,7 +1282,8 @@ static int cmp_join_stats(const void *v1, const void *v2) cmp = cmp_join_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.variants[i], - env.sort_spec.asc[i]); + env.sort_spec.asc[i], + env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1720,6 +1766,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value); + if (f->abs) + value = fabs(value); + switch (f->op) { case OP_EQ: return value > f->value - eps && value < f->value + eps; case OP_NEQ: return value < f->value - eps || value > f->value + eps; @@ -1766,7 +1815,7 @@ static int handle_comparison_mode(void) struct stat_specs base_specs = {}, comp_specs = {}; struct stat_specs tmp_sort_spec; enum resfmt cur_fmt; - int err, i, j, last_idx; + int err, i, j, last_idx, cnt; if (env.filename_cnt != 2) { fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n"); @@ -1879,7 +1928,7 @@ static int handle_comparison_mode(void) env.join_stat_cnt += 1; } - /* now sort joined results accorsing to sort spec */ + /* now sort joined results according to sort spec */ qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats); /* for human-readable table output we need to do extra pass to @@ -1896,16 +1945,22 @@ one_more_time: output_comp_headers(cur_fmt); last_idx = -1; + cnt = 0; for (i = 0; i < env.join_stat_cnt; i++) { const struct verif_stats_join *join = &env.join_stats[i]; if (!should_output_join_stats(join)) continue; + if (env.top_n && cnt >= env.top_n) + break; + if (cur_fmt == RESFMT_TABLE_CALCLEN) last_idx = i; output_comp_stats(join, cur_fmt, i == last_idx); + + cnt++; } if (cur_fmt == RESFMT_TABLE_CALCLEN) { @@ -1920,6 +1975,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s { long value = stats->stats[f->stat_id]; + if (f->abs) + value = value < 0 ? -value : value; + switch (f->op) { case OP_EQ: return value == f->value; case OP_NEQ: return value != f->value; @@ -1964,7 +2022,7 @@ static bool should_output_stats(const struct verif_stats *stats) static void output_prog_stats(void) { const struct verif_stats *stats; - int i, last_stat_idx = 0; + int i, last_stat_idx = 0, cnt = 0; if (env.out_fmt == RESFMT_TABLE) { /* calculate column widths */ @@ -1984,7 +2042,10 @@ static void output_prog_stats(void) stats = &env.prog_stats[i]; if (!should_output_stats(stats)) continue; + if (env.top_n && cnt >= env.top_n) + break; output_stats(stats, env.out_fmt, i == last_stat_idx); + cnt++; } } diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 685034528018..65d14f3bbe30 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -36,7 +36,9 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" + "tools/testing/selftests/bpf/config.vm" + "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh new file mode 100755 index 000000000000..fe0343b95e6c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that PCI reset works correctly by verifying that only the expected reset +# methods are supported and that after issuing the reset the ifindex of the +# port changes. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + pci_reset_test +" +NUM_NETIFS=1 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +pci_reset_test() +{ + RET=0 + + local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1) + local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2) + + if [ $bus != "pci" ]; then + check_err 1 "devlink device is not a PCI device" + log_test "pci reset" + return + fi + + if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then + check_err 1 "reset is not supported" + log_test "pci reset" + return + fi + + [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]] + check_err $? "only \"bus\" reset method should be supported" + + local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + echo 1 > /sys/bus/pci/devices/$bdf/reset + check_err $? "reset failed" + + # Wait for udev to rename newly created netdev. + udevadm settle + + local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + [[ $ifindex_pre != $ifindex_post ]] + check_err $? "reset not performed" + + log_test "pci reset" +} + +swp1=${NETIFS[p1]} +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5b2aca4c5f10..9274edfb76ff 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -91,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh +TEST_PROGS += fq_band_pktlimit.sh TEST_FILES := settings diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 24b21b15ed3f..8d7575389f58 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -45,11 +45,13 @@ struct options { const char *host; const char *service; unsigned int size; + unsigned int num_pkt; struct { unsigned int mark; unsigned int dontfrag; unsigned int tclass; unsigned int hlimit; + unsigned int priority; } sockopt; struct { unsigned int family; @@ -72,6 +74,7 @@ struct options { } v6; } opt = { .size = 13, + .num_pkt = 1, .sock = { .family = AF_UNSPEC, .type = SOCK_DGRAM, @@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[]) cs_usage(argv[0]); } break; - + case 'P': + opt.sockopt.priority = atoi(optarg); + break; case 'm': opt.mark.ena = true; opt.mark.val = atoi(optarg); @@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[]) case 'M': opt.sockopt.mark = atoi(optarg); break; + case 'n': + opt.num_pkt = atoi(optarg); + break; case 'd': opt.txtime.ena = true; opt.txtime.delay = atoi(optarg); @@ -410,6 +418,10 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + if (opt.sockopt.priority && + setsockopt(fd, SOL_SOCKET, SO_PRIORITY, + &opt.sockopt.priority, sizeof(opt.sockopt.priority))) + error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); } int main(int argc, char *argv[]) @@ -421,6 +433,7 @@ int main(int argc, char *argv[]) char *buf; int err; int fd; + int i; cs_parse_args(argc, argv); @@ -480,24 +493,27 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); - err = sendmsg(fd, &msg, 0); - if (err < 0) { - if (!opt.silent_send) - fprintf(stderr, "send failed: %s\n", strerror(errno)); - err = ERN_SEND; - goto err_out; - } else if (err != (int)opt.size) { - fprintf(stderr, "short send\n"); - err = ERN_SEND_SHORT; - goto err_out; - } else { - err = ERN_SUCCESS; + for (i = 0; i < opt.num_pkt; i++) { + err = sendmsg(fd, &msg, 0); + if (err < 0) { + if (!opt.silent_send) + fprintf(stderr, "send failed: %s\n", strerror(errno)); + err = ERN_SEND; + goto err_out; + } else if (err != (int)opt.size) { + fprintf(stderr, "short send\n"); + err = ERN_SEND_SHORT; + goto err_out; + } } + err = ERN_SUCCESS; - /* Make sure all timestamps have time to loop back */ - usleep(opt.txtime.delay); + if (opt.ts.ena) { + /* Make sure all timestamps have time to loop back */ + usleep(opt.txtime.delay); - cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + } err_out: close(fd); diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh new file mode 100755 index 000000000000..24b77bdf41ff --- /dev/null +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that FQ has a packet limit per band: +# +# 1. set the limit to 10 per band +# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped +# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped +# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped +# +# Send packets with a 100ms delay to ensure that previously sent +# packets are still queued when later ones are sent. +# Use SO_TXTIME for this. + +die() { + echo "$1" + exit 1 +} + +# run inside private netns +if [[ $# -eq 0 ]]; then + ./in_netns.sh "$0" __subprocess + exit +fi + +ip link add type dummy +ip link set dev dummy0 up +ip -6 addr add fdaa::1/128 dev dummy0 +ip -6 route add fdaa::/64 dev dummy0 +tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10 + +./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000 +OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Initial stats will report zero sent, as all packets are still +# queued in FQ. Sleep for the delay period (100ms) and see that +# twenty are now sent. +sleep 0.1 +OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Log the output after the test +echo "${OUT1}" +echo "${OUT2}" +echo "${OUT3}" +echo "${OUT4}" + +# Test the output for expected values +echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1" +echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2" +echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3" +echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4" diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh new file mode 100755 index 000000000000..4fe0befa13fb --- /dev/null +++ b/tools/testing/selftests/net/net_helper.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Helper functions + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local port_hex + local i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \ + grep -q "${port_hex}"; then + break + fi + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 0c743752669a..af5dc57c8ce9 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro functional tests. +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -51,8 +53,7 @@ run_one() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) @@ -97,7 +98,7 @@ run_one_nat() { echo "ok" || \ echo "failed"& - sleep 0.1 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? kill -INT $pid @@ -118,11 +119,9 @@ run_one_2sock() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 - sleep 0.1 - # first UDP GSO socket should be closed at this point + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 894972877e8b..cb664679b434 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -40,8 +42,7 @@ run_one() { ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 0a6359bed0b9..dd47fa96f6b3 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -45,8 +47,7 @@ run_one() { echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index b62429b0fcdb..65c8f3f983b9 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -9,43 +9,13 @@ from TdcPlugin import TdcPlugin from tdc_config import * -def prepare_suite(obj, test): - original = obj.args.NAMES - - if 'skip' in test and test['skip'] == 'yes': - return - - if 'nsPlugin' not in test['plugins']: - return - - shadow = {} - shadow['IP'] = original['IP'] - shadow['TC'] = original['TC'] - shadow['NS'] = '{}-{}'.format(original['NS'], test['random']) - shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id']) - shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id']) - shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id']) - shadow['DEV2'] = original['DEV2'] - obj.args.NAMES = shadow - - if obj.args.namespace: - obj._ns_create() - else: - obj._ports_create() - - # Make sure the netns is visible in the fs - while True: - obj._proc_check() - try: - ns = obj.args.NAMES['NS'] - f = open('/run/netns/{}'.format(ns)) - f.close() - break - except: - time.sleep(0.1) - continue - - obj.args.NAMES = original +try: + from pyroute2 import netns + from pyroute2 import IPRoute + netlink = True +except ImportError: + netlink = False + print("!!! Consider installing pyroute2 !!!") class SubPlugin(TdcPlugin): def __init__(self): @@ -57,60 +27,69 @@ class SubPlugin(TdcPlugin): super().pre_suite(testcount, testlist) - print("Setting up namespaces and devices...") + def prepare_test(self, test): + if 'skip' in test and test['skip'] == 'yes': + return - with Pool(self.args.mp) as p: - it = zip(cycle([self]), testlist) - p.starmap(prepare_suite, it) + if 'nsPlugin' not in test['plugins']: + return - def pre_case(self, caseinfo, test_skip): + if netlink == True: + self._nl_ns_create() + else: + self._ns_create() + + # Make sure the netns is visible in the fs + ticks = 20 + while True: + if ticks == 0: + raise TimeoutError + self._proc_check() + try: + ns = self.args.NAMES['NS'] + f = open('/run/netns/{}'.format(ns)) + f.close() + break + except: + time.sleep(0.1) + ticks -= 1 + continue + + def pre_case(self, test, test_skip): if self.args.verbose: print('{}.pre_case'.format(self.sub_class)) if test_skip: return + self.prepare_test(test) def post_case(self): if self.args.verbose: print('{}.post_case'.format(self.sub_class)) - if self.args.namespace: - self._ns_destroy() + if netlink == True: + self._nl_ns_destroy() else: - self._ports_destroy() + self._ns_destroy() def post_suite(self, index): if self.args.verbose: print('{}.post_suite'.format(self.sub_class)) # Make sure we don't leak resources - for f in os.listdir('/run/netns/'): - cmd = self._replace_keywords("$IP netns del {}".format(f)) - - if self.args.verbose > 3: - print('_exec_cmd: command "{}"'.format(cmd)) + cmd = "$IP -a netns del" - subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + if self.args.verbose > 3: + print('_exec_cmd: command "{}"'.format(cmd)) - def add_args(self, parser): - super().add_args(parser) - self.argparser_group = self.argparser.add_argument_group( - 'netns', - 'options for nsPlugin(run commands in net namespace)') - self.argparser_group.add_argument( - '-N', '--no-namespace', action='store_false', default=True, - dest='namespace', help='Don\'t run commands in namespace') - return self.argparser + subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) def adjust_command(self, stage, command): super().adjust_command(stage, command) cmdform = 'list' cmdlist = list() - if not self.args.namespace: - return command - if self.args.verbose: print('{}.adjust_command'.format(self.sub_class)) @@ -138,41 +117,67 @@ class SubPlugin(TdcPlugin): print('adjust_command: return command [{}]'.format(command)) return command - def _ports_create_cmds(self): - cmds = [] - - cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1')) - cmds.append(self._replace_keywords('link set $DEV0 up')) - cmds.append(self._replace_keywords('link add $DUMMY type dummy')) - if not self.args.namespace: - cmds.append(self._replace_keywords('link set $DEV1 up')) - - return cmds + def _nl_ns_create(self): + ns = self.args.NAMES["NS"]; + dev0 = self.args.NAMES["DEV0"]; + dev1 = self.args.NAMES["DEV1"]; + dummy = self.args.NAMES["DUMMY"]; - def _ports_create(self): - self._exec_cmd_batched('pre', self._ports_create_cmds()) - - def _ports_destroy_cmd(self): - return self._replace_keywords('link del $DEV0') - - def _ports_destroy(self): - self._exec_cmd('post', self._ports_destroy_cmd()) + if self.args.verbose: + print('{}._nl_ns_create'.format(self.sub_class)) + + netns.create(ns) + netns.pushns(newns=ns) + with IPRoute() as ip: + ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'}) + ip.link('add', ifname=dummy, kind='dummy') + ticks = 20 + while True: + if ticks == 0: + raise TimeoutError + try: + dev1_idx = ip.link_lookup(ifname=dev1)[0] + dummy_idx = ip.link_lookup(ifname=dummy)[0] + ip.link('set', index=dev1_idx, state='up') + ip.link('set', index=dummy_idx, state='up') + break + except: + time.sleep(0.1) + ticks -= 1 + continue + netns.popns() + + with IPRoute() as ip: + ticks = 20 + while True: + if ticks == 0: + raise TimeoutError + try: + dev0_idx = ip.link_lookup(ifname=dev0)[0] + ip.link('set', index=dev0_idx, state='up') + break + except: + time.sleep(0.1) + ticks -= 1 + continue def _ns_create_cmds(self): cmds = [] - if self.args.namespace: - ns = self.args.NAMES['NS'] + ns = self.args.NAMES['NS'] - cmds.append(self._replace_keywords('netns add {}'.format(ns))) - cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) - cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) + cmds.append(self._replace_keywords('netns add {}'.format(ns))) + cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0')) + cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) + cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns))) + cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) + cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns))) - if self.args.device: - cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns))) + if self.args.device: + cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns))) return cmds @@ -181,9 +186,12 @@ class SubPlugin(TdcPlugin): Create the network namespace in which the tests will be run and set up the required network devices for it. ''' - self._ports_create() self._exec_cmd_batched('pre', self._ns_create_cmds()) + def _nl_ns_destroy(self): + ns = self.args.NAMES['NS'] + netns.remove(ns) + def _ns_destroy_cmd(self): return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS'])) @@ -192,9 +200,7 @@ class SubPlugin(TdcPlugin): Destroy the network namespace for testing (and any associated network devices as well) ''' - if self.args.namespace: - self._exec_cmd('post', self._ns_destroy_cmd()) - self._ports_destroy() + self._exec_cmd('post', self._ns_destroy_cmd()) @cached_property def _proc(self): diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index ddc7c355be0a..24bd0c2a3014 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -272,5 +272,62 @@ "teardown": [ "$TC qdisc del dev $DEV1 parent root drr" ] + }, + { + "id": "bd32", + "name": "Try to delete hashtable referenced by another u32 filter", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] + }, + { + "id": "4585", + "name": "Delete small tree of u32 hashtables and filters", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index a6718192aff3..669ec89ebfe1 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -616,7 +616,7 @@ def test_runner_mp(pm, args, alltests): batches.insert(0, serial) print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial))) - print("Using {} batches".format(len(batches))) + print("Using {} batches and {} workers".format(len(batches), args.mp)) # We can't pickle these objects so workaround them global mp_pm @@ -1017,6 +1017,7 @@ def main(): parser = pm.call_add_args(parser) (args, remaining) = parser.parse_known_args() args.NAMES = NAMES + args.mp = min(args.mp, 4) pm.set_args(args) check_default_settings(args, remaining, pm) if args.verbose > 2: diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index eb357bd7923c..4dbe50bde5a0 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -1,7 +1,68 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -modprobe netdevsim -modprobe sch_teql -./tdc.py -c actions --nobuildebpf -./tdc.py -c qdisc +# If a module is required and was not compiled +# the test that requires it will fail anyways +try_modprobe() { + modprobe -q -R "$1" + if [ $? -ne 0 ]; then + echo "Module $1 not found... skipping." + else + modprobe "$1" + fi +} + +try_modprobe netdevsim +try_modprobe act_bpf +try_modprobe act_connmark +try_modprobe act_csum +try_modprobe act_ct +try_modprobe act_ctinfo +try_modprobe act_gact +try_modprobe act_gate +try_modprobe act_ipt +try_modprobe act_mirred +try_modprobe act_mpls +try_modprobe act_nat +try_modprobe act_pedit +try_modprobe act_police +try_modprobe act_sample +try_modprobe act_simple +try_modprobe act_skbedit +try_modprobe act_skbmod +try_modprobe act_tunnel_key +try_modprobe act_vlan +try_modprobe cls_basic +try_modprobe cls_bpf +try_modprobe cls_cgroup +try_modprobe cls_flow +try_modprobe cls_flower +try_modprobe cls_fw +try_modprobe cls_matchall +try_modprobe cls_route +try_modprobe cls_u32 +try_modprobe em_canid +try_modprobe em_cmp +try_modprobe em_ipset +try_modprobe em_ipt +try_modprobe em_meta +try_modprobe em_nbyte +try_modprobe em_text +try_modprobe em_u32 +try_modprobe sch_cake +try_modprobe sch_cbs +try_modprobe sch_choke +try_modprobe sch_codel +try_modprobe sch_drr +try_modprobe sch_etf +try_modprobe sch_ets +try_modprobe sch_fq +try_modprobe sch_fq_codel +try_modprobe sch_fq_pie +try_modprobe sch_gred +try_modprobe sch_hfsc +try_modprobe sch_hhf +try_modprobe sch_htb +try_modprobe sch_teql +./tdc.py -J`nproc` -c actions --nobuildebpf +./tdc.py -J`nproc` -c qdisc |