summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlxsw
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-26 16:07:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-26 16:07:23 -0700
commit6e98b09da931a00bf4e0477d0fa52748bf28fcce (patch)
tree9c658ed95add5693f42f29f63df80a2ede3f6ec2 /drivers/net/ethernet/mellanox/mlxsw
parentb68ee1c6131c540a62ecd443be89c406401df091 (diff)
parent9b78d919632b7149d311aaad5a977e4b48b10321 (diff)
downloadlinux-stable-6e98b09da931a00bf4e0477d0fa52748bf28fcce.tar.gz
linux-stable-6e98b09da931a00bf4e0477d0fa52748bf28fcce.tar.bz2
linux-stable-6e98b09da931a00bf4e0477d0fa52748bf28fcce.zip
Merge tag 'net-next-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core: - Introduce a config option to tweak MAX_SKB_FRAGS. Increasing the default value allows for better BIG TCP performances - Reduce compound page head access for zero-copy data transfers - RPS/RFS improvements, avoiding unneeded NET_RX_SOFTIRQ when possible - Threaded NAPI improvements, adding defer skb free support and unneeded softirq avoidance - Address dst_entry reference count scalability issues, via false sharing avoidance and optimize refcount tracking - Add lockless accesses annotation to sk_err[_soft] - Optimize again the skb struct layout - Extends the skb drop reasons to make it usable by multiple subsystems - Better const qualifier awareness for socket casts BPF: - Add skb and XDP typed dynptrs which allow BPF programs for more ergonomic and less brittle iteration through data and variable-sized accesses - Add a new BPF netfilter program type and minimal support to hook BPF programs to netfilter hooks such as prerouting or forward - Add more precise memory usage reporting for all BPF map types - Adds support for using {FOU,GUE} encap with an ipip device operating in collect_md mode and add a set of BPF kfuncs for controlling encap params - Allow BPF programs to detect at load time whether a particular kfunc exists or not, and also add support for this in light skeleton - Bigger batch of BPF verifier improvements to prepare for upcoming BPF open-coded iterators allowing for less restrictive looping capabilities - Rework RCU enforcement in the verifier, add kptr_rcu and enforce BPF programs to NULL-check before passing such pointers into kfunc - Add support for kptrs in percpu hashmaps, percpu LRU hashmaps and in local storage maps - Enable RCU semantics for task BPF kptrs and allow referenced kptr tasks to be stored in BPF maps - Add support for refcounted local kptrs to the verifier for allowing shared ownership, useful for adding a node to both the BPF list and rbtree - Add BPF verifier support for ST instructions in convert_ctx_access() which will help new -mcpu=v4 clang flag to start emitting them - Add ARM32 USDT support to libbpf - Improve bpftool's visual program dump which produces the control flow graph in a DOT format by adding C source inline annotations Protocols: - IPv4: Allow adding to IPv4 address a 'protocol' tag. Such value indicates the provenance of the IP address - IPv6: optimize route lookup, dropping unneeded R/W lock acquisition - Add the handshake upcall mechanism, allowing the user-space to implement generic TLS handshake on kernel's behalf - Bridge: support per-{Port, VLAN} neighbor suppression, increasing resilience to nodes failures - SCTP: add support for Fair Capacity and Weighted Fair Queueing schedulers - MPTCP: delay first subflow allocation up to its first usage. This will allow for later better LSM interaction - xfrm: Remove inner/outer modes from input/output path. These are not needed anymore - WiFi: - reduced neighbor report (RNR) handling for AP mode - HW timestamping support - support for randomized auth/deauth TA for PASN privacy - per-link debugfs for multi-link - TC offload support for mac80211 drivers - mac80211 mesh fast-xmit and fast-rx support - enable Wi-Fi 7 (EHT) mesh support Netfilter: - Add nf_tables 'brouting' support, to force a packet to be routed instead of being bridged - Update bridge netfilter and ovs conntrack helpers to handle IPv6 Jumbo packets properly, i.e. fetch the packet length from hop-by-hop extension header. This is needed for BIT TCP support - The iptables 32bit compat interface isn't compiled in by default anymore - Move ip(6)tables builtin icmp matches to the udptcp one. This has the advantage that icmp/icmpv6 match doesn't load the iptables/ip6tables modules anymore when iptables-nft is used - Extended netlink error report for netdevice in flowtables and netdev/chains. Allow for incrementally add/delete devices to netdev basechain. Allow to create netdev chain without device Driver API: - Remove redundant Device Control Error Reporting Enable, as PCI core has already error reporting enabled at enumeration time - Move Multicast DB netlink handlers to core, allowing devices other then bridge to use them - Allow the page_pool to directly recycle the pages from safely localized NAPI - Implement lockless TX queue stop/wake combo macros, allowing for further code de-duplication and sanitization - Add YNL support for user headers and struct attrs - Add partial YNL specification for devlink - Add partial YNL specification for ethtool - Add tc-mqprio and tc-taprio support for preemptible traffic classes - Add tx push buf len param to ethtool, specifies the maximum number of bytes of a transmitted packet a driver can push directly to the underlying device - Add basic LED support for switch/phy - Add NAPI documentation, stop relaying on external links - Convert dsa_master_ioctl() to netdev notifier. This is a preparatory work to make the hardware timestamping layer selectable by user space - Add transceiver support and improve the error messages for CAN-FD controllers New hardware / drivers: - Ethernet: - AMD/Pensando core device support - MediaTek MT7981 SoC - MediaTek MT7988 SoC - Broadcom BCM53134 embedded switch - Texas Instruments CPSW9G ethernet switch - Qualcomm EMAC3 DWMAC ethernet - StarFive JH7110 SoC - NXP CBTX ethernet PHY - WiFi: - Apple M1 Pro/Max devices - RealTek rtl8710bu/rtl8188gu - RealTek rtl8822bs, rtl8822cs and rtl8821cs SDIO chipset - Bluetooth: - Realtek RTL8821CS, RTL8851B, RTL8852BS - Mediatek MT7663, MT7922 - NXP w8997 - Actions Semi ATS2851 - QTI WCN6855 - Marvell 88W8997 - Can: - STMicroelectronics bxcan stm32f429 Drivers: - Ethernet NICs: - Intel (1G, icg): - add tracking and reporting of QBV config errors - add support for configuring max SDU for each Tx queue - Intel (100G, ice): - refactor mailbox overflow detection to support Scalable IOV - GNSS interface optimization - Intel (i40e): - support XDP multi-buffer - nVidia/Mellanox: - add the support for linux bridge multicast offload - enable TC offload for egress and engress MACVLAN over bond - add support for VxLAN GBP encap/decap flows offload - extend packet offload to fully support libreswan - support tunnel mode in mlx5 IPsec packet offload - extend XDP multi-buffer support - support MACsec VLAN offload - add support for dynamic msix vectors allocation - drop RX page_cache and fully use page_pool - implement thermal zone to report NIC temperature - Netronome/Corigine: - add support for multi-zone conntrack offload - Solarflare/Xilinx: - support offloading TC VLAN push/pop actions to the MAE - support TC decap rules - support unicast PTP - Other NICs: - Broadcom (bnxt): enforce software based freq adjustments only on shared PHC NIC - RealTek (r8169): refactor to addess ASPM issues during NAPI poll - Micrel (lan8841): add support for PTP_PF_PEROUT - Cadence (macb): enable PTP unicast - Engleder (tsnep): add XDP socket zero-copy support - virtio-net: implement exact header length guest feature - veth: add page_pool support for page recycling - vxlan: add MDB data path support - gve: add XDP support for GQI-QPL format - geneve: accept every ethertype - macvlan: allow some packets to bypass broadcast queue - mana: add support for jumbo frame - Ethernet high-speed switches: - Microchip (sparx5): Add support for TC flower templates - Ethernet embedded switches: - Broadcom (b54): - configure 6318 and 63268 RGMII ports - Marvell (mv88e6xxx): - faster C45 bus scan - Microchip: - lan966x: - add support for IS1 VCAP - better TX/RX from/to CPU performances - ksz9477: add ETS Qdisc support - ksz8: enhance static MAC table operations and error handling - sama7g5: add PTP capability - NXP (ocelot): - add support for external ports - add support for preemptible traffic classes - Texas Instruments: - add CPSWxG SGMII support for J7200 and J721E - Intel WiFi (iwlwifi): - preparation for Wi-Fi 7 EHT and multi-link support - EHT (Wi-Fi 7) sniffer support - hardware timestamping support for some devices/firwmares - TX beacon protection on newer hardware - Qualcomm 802.11ax WiFi (ath11k): - MU-MIMO parameters support - ack signal support for management packets - RealTek WiFi (rtw88): - SDIO bus support - better support for some SDIO devices (e.g. MAC address from efuse) - RealTek WiFi (rtw89): - HW scan support for 8852b - better support for 6 GHz scanning - support for various newer firmware APIs - framework firmware backwards compatibility - MediaTek WiFi (mt76): - P2P support - mesh A-MSDU support - EHT (Wi-Fi 7) support - coredump support" * tag 'net-next-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2078 commits) net: phy: hide the PHYLIB_LEDS knob net: phy: marvell-88x2222: remove unnecessary (void*) conversions tcp/udp: Fix memleaks of sk and zerocopy skbs with TX timestamp. net: amd: Fix link leak when verifying config failed net: phy: marvell: Fix inconsistent indenting in led_blink_set lan966x: Don't use xdp_frame when action is XDP_TX tsnep: Add XDP socket zero-copy TX support tsnep: Add XDP socket zero-copy RX support tsnep: Move skb receive action to separate function tsnep: Add functions for queue enable/disable tsnep: Rework TX/RX queue initialization tsnep: Replace modulo operation with mask net: phy: dp83867: Add led_brightness_set support net: phy: Fix reading LED reg property drivers: nfc: nfcsim: remove return value check of `dev_dir` net: phy: dp83867: Remove unnecessary (void*) conversions net: ethtool: coalesce: try to make user settings stick twice net: mana: Check if netdev/napi_alloc_frag returns single page net: mana: Rename mana_refill_rxoob and remove some empty lines net: veth: add page_pool stats ...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlxsw')
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_thermal.c165
1 files changed, 36 insertions, 129 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 66dd42a8e72f..70d7fff24fa2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -19,6 +19,9 @@
#define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */
#define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */
#define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */
+#define MLXSW_THERMAL_MODULE_TEMP_NORM 55000 /* 55C */
+#define MLXSW_THERMAL_MODULE_TEMP_HIGH 65000 /* 65C */
+#define MLXSW_THERMAL_MODULE_TEMP_HOT 80000 /* 80C */
#define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */
#define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
#define MLXSW_THERMAL_MAX_STATE 10
@@ -30,12 +33,6 @@ static char * const mlxsw_thermal_external_allowed_cdev[] = {
"mlxreg_fan",
};
-enum mlxsw_thermal_trips {
- MLXSW_THERMAL_TEMP_TRIP_NORM,
- MLXSW_THERMAL_TEMP_TRIP_HIGH,
- MLXSW_THERMAL_TEMP_TRIP_HOT,
-};
-
struct mlxsw_cooling_states {
int min_state;
int max_state;
@@ -59,6 +56,24 @@ static const struct thermal_trip default_thermal_trips[] = {
},
};
+static const struct thermal_trip default_thermal_module_trips[] = {
+ { /* In range - 0-40% PWM */
+ .type = THERMAL_TRIP_ACTIVE,
+ .temperature = MLXSW_THERMAL_MODULE_TEMP_NORM,
+ .hysteresis = MLXSW_THERMAL_HYSTERESIS_TEMP,
+ },
+ {
+ /* In range - 40-100% PWM */
+ .type = THERMAL_TRIP_ACTIVE,
+ .temperature = MLXSW_THERMAL_MODULE_TEMP_HIGH,
+ .hysteresis = MLXSW_THERMAL_HYSTERESIS_TEMP,
+ },
+ { /* Warning */
+ .type = THERMAL_TRIP_HOT,
+ .temperature = MLXSW_THERMAL_MODULE_TEMP_HOT,
+ },
+};
+
static const struct mlxsw_cooling_states default_cooling_states[] = {
{
.min_state = 0,
@@ -140,63 +155,6 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
return -ENODEV;
}
-static void
-mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
-{
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = 0;
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temperature = 0;
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temperature = 0;
-}
-
-static int
-mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
- struct mlxsw_thermal_module *tz,
- int crit_temp, int emerg_temp)
-{
- int err;
-
- /* Do not try to query temperature thresholds directly from the module's
- * EEPROM if we got valid thresholds from MTMP.
- */
- if (!emerg_temp || !crit_temp) {
- err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index,
- tz->module,
- SFP_TEMP_HIGH_WARN,
- &crit_temp);
- if (err)
- return err;
-
- err = mlxsw_env_module_temp_thresholds_get(core, tz->slot_index,
- tz->module,
- SFP_TEMP_HIGH_ALARM,
- &emerg_temp);
- if (err)
- return err;
- }
-
- if (crit_temp > emerg_temp) {
- dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
- thermal_zone_device_type(tz->tzdev), crit_temp, emerg_temp);
- return 0;
- }
-
- /* According to the system thermal requirements, the thermal zones are
- * defined with three trip points. The critical and emergency
- * temperature thresholds, provided by QSFP module are set as "active"
- * and "hot" trip points, "normal" trip point is derived from "active"
- * by subtracting double hysteresis value.
- */
- if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = crit_temp -
- MLXSW_THERMAL_MODULE_TEMP_SHIFT;
- else
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temperature = crit_temp;
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temperature = crit_temp;
- tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temperature = emerg_temp;
-
- return 0;
-}
-
static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
struct thermal_cooling_device *cdev)
{
@@ -325,59 +283,22 @@ static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
return err;
}
-static void
-mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core,
- u8 slot_index, u16 sensor_index,
- int *p_temp, int *p_crit_temp,
- int *p_emerg_temp)
-{
- char mtmp_pl[MLXSW_REG_MTMP_LEN];
- int err;
-
- /* Read module temperature and thresholds. */
- mlxsw_reg_mtmp_pack(mtmp_pl, slot_index, sensor_index,
- false, false);
- err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
- if (err) {
- /* Set temperature and thresholds to zero to avoid passing
- * uninitialized data back to the caller.
- */
- *p_temp = 0;
- *p_crit_temp = 0;
- *p_emerg_temp = 0;
-
- return;
- }
- mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp,
- NULL);
-}
-
static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
int *p_temp)
{
struct mlxsw_thermal_module *tz = thermal_zone_device_priv(tzdev);
struct mlxsw_thermal *thermal = tz->parent;
- int temp, crit_temp, emerg_temp;
- struct device *dev;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
u16 sensor_index;
+ int err;
- dev = thermal->bus_info->dev;
sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module;
-
- /* Read module temperature and thresholds. */
- mlxsw_thermal_module_temp_and_thresholds_get(thermal->core,
- tz->slot_index,
- sensor_index, &temp,
- &crit_temp, &emerg_temp);
- *p_temp = temp;
-
- if (!temp)
- return 0;
-
- /* Update trip points. */
- mlxsw_thermal_module_trips_update(dev, thermal->core, tz,
- crit_temp, emerg_temp);
-
+ mlxsw_reg_mtmp_pack(mtmp_pl, tz->slot_index, sensor_index,
+ false, false);
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, NULL, NULL, NULL);
return 0;
}
@@ -521,36 +442,26 @@ static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
thermal_zone_device_unregister(tzdev);
}
-static int
+static void
mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
struct mlxsw_thermal *thermal,
struct mlxsw_thermal_area *area, u8 module)
{
struct mlxsw_thermal_module *module_tz;
- int dummy_temp, crit_temp, emerg_temp;
- u16 sensor_index;
- sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module;
module_tz = &area->tz_module_arr[module];
/* Skip if parent is already set (case of port split). */
if (module_tz->parent)
- return 0;
+ return;
module_tz->module = module;
module_tz->slot_index = area->slot_index;
module_tz->parent = thermal;
- memcpy(module_tz->trips, default_thermal_trips,
+ BUILD_BUG_ON(ARRAY_SIZE(default_thermal_module_trips) !=
+ MLXSW_THERMAL_NUM_TRIPS);
+ memcpy(module_tz->trips, default_thermal_module_trips,
sizeof(thermal->trips));
memcpy(module_tz->cooling_states, default_cooling_states,
sizeof(thermal->cooling_states));
- /* Initialize all trip point. */
- mlxsw_thermal_module_trips_reset(module_tz);
- /* Read module temperature and thresholds. */
- mlxsw_thermal_module_temp_and_thresholds_get(core, area->slot_index,
- sensor_index, &dummy_temp,
- &crit_temp, &emerg_temp);
- /* Update trip point according to the module data. */
- return mlxsw_thermal_module_trips_update(dev, core, module_tz,
- crit_temp, emerg_temp);
}
static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
@@ -589,11 +500,8 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
if (!area->tz_module_arr)
return -ENOMEM;
- for (i = 0; i < area->tz_module_num; i++) {
- err = mlxsw_thermal_module_init(dev, core, thermal, area, i);
- if (err)
- goto err_thermal_module_init;
- }
+ for (i = 0; i < area->tz_module_num; i++)
+ mlxsw_thermal_module_init(dev, core, thermal, area, i);
for (i = 0; i < area->tz_module_num; i++) {
module_tz = &area->tz_module_arr[i];
@@ -607,7 +515,6 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
return 0;
err_thermal_module_tz_init:
-err_thermal_module_init:
for (i = area->tz_module_num - 1; i >= 0; i--)
mlxsw_thermal_module_fini(&area->tz_module_arr[i]);
kfree(area->tz_module_arr);