summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-class-thermal2
-rw-r--r--Documentation/devicetree/bindings/thermal/exynos-thermal.txt106
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-lmh.yaml1
-rw-r--r--Documentation/devicetree/bindings/thermal/qcom-tsens.yaml1
-rw-r--r--Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml184
-rw-r--r--Documentation/driver-api/thermal/index.rst1
-rw-r--r--Documentation/driver-api/thermal/intel_dptf.rst272
-rw-r--r--Documentation/x86/index.rst1
-rw-r--r--Documentation/x86/intel-hfi.rst72
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/msr-index.h6
-rw-r--r--drivers/thermal/broadcom/brcmstb_thermal.c2
-rw-r--r--drivers/thermal/intel/Kconfig14
-rw-r--r--drivers/thermal/intel/Makefile1
-rw-r--r--drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c23
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3400_thermal.c153
-rw-r--r--drivers/thermal/intel/intel_hfi.c569
-rw-r--r--drivers/thermal/intel/intel_hfi.h17
-rw-r--r--drivers/thermal/intel/intel_powerclamp.c9
-rw-r--r--drivers/thermal/intel/therm_throt.c22
-rw-r--r--drivers/thermal/qcom/lmh.c62
-rw-r--r--drivers/thermal/qcom/tsens.c5
-rw-r--r--drivers/thermal/tegra/tegra-bpmp-thermal.c13
-rw-r--r--drivers/thermal/thermal_netlink.c53
-rw-r--r--drivers/thermal/thermal_netlink.h14
-rw-r--r--drivers/thermal/ti-soc-thermal/ti-thermal-common.c12
-rw-r--r--include/uapi/linux/thermal.h6
-rw-r--r--tools/power/x86/intel-speed-select/Build2
-rw-r--r--tools/power/x86/intel-speed-select/Makefile10
-rw-r--r--tools/power/x86/intel-speed-select/hfi-events.c309
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c54
-rw-r--r--tools/power/x86/intel-speed-select/isst-daemon.c244
-rw-r--r--tools/power/x86/intel-speed-select/isst.h13
34 files changed, 2026 insertions, 235 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-thermal b/Documentation/ABI/testing/sysfs-class-thermal
index 2c52bb1f864c..8eee37982b2a 100644
--- a/Documentation/ABI/testing/sysfs-class-thermal
+++ b/Documentation/ABI/testing/sysfs-class-thermal
@@ -203,7 +203,7 @@ Description:
- for generic ACPI: should be "Fan", "Processor" or "LCD"
- for memory controller device on intel_menlow platform:
- should be "Memory controller".
+ should be "Memory controller".
RO, Required
diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
deleted file mode 100644
index 33004ce7e5df..000000000000
--- a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
+++ /dev/null
@@ -1,106 +0,0 @@
-* Exynos Thermal Management Unit (TMU)
-
-** Required properties:
-
-- compatible : One of the following:
- "samsung,exynos3250-tmu"
- "samsung,exynos4412-tmu"
- "samsung,exynos4210-tmu"
- "samsung,exynos5250-tmu"
- "samsung,exynos5260-tmu"
- "samsung,exynos5420-tmu" for TMU channel 0, 1 on Exynos5420
- "samsung,exynos5420-tmu-ext-triminfo" for TMU channels 2, 3 and 4
- Exynos5420 (Must pass triminfo base and triminfo clock)
- "samsung,exynos5433-tmu"
- "samsung,exynos7-tmu"
-- reg : Address range of the thermal registers. For soc's which has multiple
- instances of TMU and some registers are shared across all TMU's like
- interrupt related then 2 set of register has to supplied. First set
- belongs to register set of TMU instance and second set belongs to
- registers shared with the TMU instance.
-
- NOTE: On Exynos5420, the TRIMINFO register is misplaced for TMU
- channels 2, 3 and 4
- Use "samsung,exynos5420-tmu-ext-triminfo" in cases, there is a misplaced
- register, also provide clock to access that base.
-
- TRIMINFO at 0x1006c000 contains data for TMU channel 3
- TRIMINFO at 0x100a0000 contains data for TMU channel 4
- TRIMINFO at 0x10068000 contains data for TMU channel 2
-
-- interrupts : Should contain interrupt for thermal system
-- clocks : The main clocks for TMU device
- -- 1. operational clock for TMU channel
- -- 2. optional clock to access the shared registers of TMU channel
- -- 3. optional special clock for functional operation
-- clock-names : Thermal system clock name
- -- "tmu_apbif" operational clock for current TMU channel
- -- "tmu_triminfo_apbif" clock to access the shared triminfo register
- for current TMU channel
- -- "tmu_sclk" clock for functional operation of the current TMU
- channel
-
-The Exynos TMU supports generating interrupts when reaching given
-temperature thresholds. Number of supported thermal trip points depends
-on the SoC (only first trip points defined in DT will be configured):
- - most of SoC: 4
- - samsung,exynos5433-tmu: 8
- - samsung,exynos7-tmu: 8
-
-** Optional properties:
-
-- vtmu-supply: This entry is optional and provides the regulator node supplying
- voltage to TMU. If needed this entry can be placed inside
- board/platform specific dts file.
-
-Example 1):
-
- tmu@100c0000 {
- compatible = "samsung,exynos4412-tmu";
- interrupt-parent = <&combiner>;
- reg = <0x100C0000 0x100>;
- interrupts = <2 4>;
- clocks = <&clock 383>;
- clock-names = "tmu_apbif";
- vtmu-supply = <&tmu_regulator_node>;
- #thermal-sensor-cells = <0>;
- };
-
-Example 2): (In case of Exynos5420 "with misplaced TRIMINFO register")
- tmu_cpu2: tmu@10068000 {
- compatible = "samsung,exynos5420-tmu-ext-triminfo";
- reg = <0x10068000 0x100>, <0x1006c000 0x4>;
- interrupts = <0 184 0>;
- clocks = <&clock 318>, <&clock 318>;
- clock-names = "tmu_apbif", "tmu_triminfo_apbif";
- #thermal-sensor-cells = <0>;
- };
-
- tmu_cpu3: tmu@1006c000 {
- compatible = "samsung,exynos5420-tmu-ext-triminfo";
- reg = <0x1006c000 0x100>, <0x100a0000 0x4>;
- interrupts = <0 185 0>;
- clocks = <&clock 318>, <&clock 319>;
- clock-names = "tmu_apbif", "tmu_triminfo_apbif";
- #thermal-sensor-cells = <0>;
- };
-
- tmu_gpu: tmu@100a0000 {
- compatible = "samsung,exynos5420-tmu-ext-triminfo";
- reg = <0x100a0000 0x100>, <0x10068000 0x4>;
- interrupts = <0 215 0>;
- clocks = <&clock 319>, <&clock 318>;
- clock-names = "tmu_apbif", "tmu_triminfo_apbif";
- #thermal-sensor-cells = <0>;
- };
-
-Note: For multi-instance tmu each instance should have an alias correctly
-numbered in "aliases" node.
-
-Example:
-
-aliases {
- tmuctrl0 = &tmuctrl_0;
- tmuctrl1 = &tmuctrl_1;
- tmuctrl2 = &tmuctrl_2;
-};
diff --git a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
index 289e9a845600..a9b7388ca9ac 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
@@ -19,6 +19,7 @@ properties:
compatible:
enum:
- qcom,sdm845-lmh
+ - qcom,sm8150-lmh
reg:
items:
diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index d3b9e9b600a2..b6406bcc683f 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -43,6 +43,7 @@ properties:
- description: v2 of TSENS
items:
- enum:
+ - qcom,msm8953-tsens
- qcom,msm8996-tsens
- qcom,msm8998-tsens
- qcom,sc7180-tsens
diff --git a/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
new file mode 100644
index 000000000000..17129f75d962
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/samsung,exynos-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung Exynos SoC Thermal Management Unit (TMU)
+
+maintainers:
+ - Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+
+description: |
+ For multi-instance tmu each instance should have an alias correctly numbered
+ in "aliases" node.
+
+properties:
+ compatible:
+ enum:
+ - samsung,exynos3250-tmu
+ - samsung,exynos4412-tmu
+ - samsung,exynos4210-tmu
+ - samsung,exynos5250-tmu
+ - samsung,exynos5260-tmu
+ # For TMU channel 0, 1 on Exynos5420:
+ - samsung,exynos5420-tmu
+ # For TMU channels 2, 3 and 4 of Exynos5420:
+ - samsung,exynos5420-tmu-ext-triminfo
+ - samsung,exynos5433-tmu
+ - samsung,exynos7-tmu
+
+ clocks:
+ minItems: 1
+ maxItems: 3
+
+ clock-names:
+ minItems: 1
+ maxItems: 3
+
+ interrupts:
+ description: |
+ The Exynos TMU supports generating interrupts when reaching given
+ temperature thresholds. Number of supported thermal trip points depends
+ on the SoC (only first trip points defined in DT will be configured)::
+ - most of SoC: 4
+ - samsung,exynos5433-tmu: 8
+ - samsung,exynos7-tmu: 8
+ maxItems: 1
+
+ reg:
+ items:
+ - description: TMU instance registers.
+ - description: |
+ Shared TMU registers.
+
+ Note:: On Exynos5420, the TRIMINFO register is misplaced for TMU
+ channels 2, 3 and 4 Use "samsung,exynos5420-tmu-ext-triminfo" in
+ cases, there is a misplaced register, also provide clock to access
+ that base.
+ TRIMINFO at 0x1006c000 contains data for TMU channel 3
+ TRIMINFO at 0x100a0000 contains data for TMU channel 4
+ TRIMINFO at 0x10068000 contains data for TMU channel 2
+ minItems: 1
+
+ '#thermal-sensor-cells': true
+
+ vtmu-supply:
+ description: The regulator node supplying voltage to TMU.
+
+required:
+ - compatible
+ - clocks
+ - clock-names
+ - interrupts
+ - reg
+
+allOf:
+ - $ref: /schemas/thermal/thermal-sensor.yaml
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: samsung,exynos5420-tmu-ext-triminfo
+ then:
+ properties:
+ clocks:
+ items:
+ - description:
+ Operational clock for TMU channel.
+ - description:
+ Optional clock to access the shared registers (e.g. TRIMINFO) of TMU
+ channel.
+ clock-names:
+ items:
+ - const: tmu_apbif
+ - const: tmu_triminfo_apbif
+ reg:
+ minItems: 2
+ maxItems: 2
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynos5433-tmu
+ - samsung,exynos7-tmu
+ then:
+ properties:
+ clocks:
+ items:
+ - description:
+ Operational clock for TMU channel.
+ - description:
+ Optional special clock for functional operation of TMU channel.
+ clock-names:
+ items:
+ - const: tmu_apbif
+ - const: tmu_sclk
+ reg:
+ minItems: 1
+ maxItems: 1
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - samsung,exynos3250-tmu
+ - samsung,exynos4412-tmu
+ - samsung,exynos4210-tmu
+ - samsung,exynos5250-tmu
+ - samsung,exynos5260-tmu
+ - samsung,exynos5420-tmu
+ then:
+ properties:
+ clocks:
+ minItems: 1
+ maxItems: 1
+ reg:
+ minItems: 1
+ maxItems: 1
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/exynos4.h>
+
+ tmu@100c0000 {
+ compatible = "samsung,exynos4412-tmu";
+ reg = <0x100C0000 0x100>;
+ interrupt-parent = <&combiner>;
+ interrupts = <2 4>;
+ #thermal-sensor-cells = <0>;
+ clocks = <&clock CLK_TMU_APBIF>;
+ clock-names = "tmu_apbif";
+ vtmu-supply = <&ldo10_reg>;
+ };
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ tmu@10068000 {
+ compatible = "samsung,exynos5420-tmu-ext-triminfo";
+ reg = <0x10068000 0x100>, <0x1006c000 0x4>;
+ interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+ #thermal-sensor-cells = <0>;
+ clocks = <&clock 318>, <&clock 318>; /* CLK_TMU */
+ clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+ vtmu-supply = <&ldo7_reg>;
+ };
+
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ tmu@10060000 {
+ compatible = "samsung,exynos5433-tmu";
+ reg = <0x10060000 0x200>;
+ interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
+ #thermal-sensor-cells = <0>;
+ clocks = <&cmu_peris 3>, /* CLK_PCLK_TMU0_APBIF */
+ <&cmu_peris 35>; /* CLK_SCLK_TMU0 */
+ clock-names = "tmu_apbif", "tmu_sclk";
+ vtmu-supply = <&ldo3_reg>;
+ };
diff --git a/Documentation/driver-api/thermal/index.rst b/Documentation/driver-api/thermal/index.rst
index 4cb0b9b6bfb8..030306ffa408 100644
--- a/Documentation/driver-api/thermal/index.rst
+++ b/Documentation/driver-api/thermal/index.rst
@@ -17,3 +17,4 @@ Thermal
intel_powerclamp
nouveau_thermal
x86_pkg_temperature_thermal
+ intel_dptf
diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
new file mode 100644
index 000000000000..96668dca753a
--- /dev/null
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -0,0 +1,272 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================================
+Intel(R) Dynamic Platform and Thermal Framework Sysfs Interface
+===============================================================
+
+:Copyright: |copy| 2022 Intel Corporation
+
+:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Introduction
+------------
+
+Intel(R) Dynamic Platform and Thermal Framework (DPTF) is a platform
+level hardware/software solution for power and thermal management.
+
+As a container for multiple power/thermal technologies, DPTF provides
+a coordinated approach for different policies to effect the hardware
+state of a system.
+
+Since it is a platform level framework, this has several components.
+Some parts of the technology is implemented in the firmware and uses
+ACPI and PCI devices to expose various features for monitoring and
+control. Linux has a set of kernel drivers exposing hardware interface
+to user space. This allows user space thermal solutions like
+"Linux Thermal Daemon" to read platform specific thermal and power
+tables to deliver adequate performance while keeping the system under
+thermal limits.
+
+DPTF ACPI Drivers interface
+----------------------------
+
+:file:`/sys/bus/platform/devices/<N>/uuids`, where <N>
+=INT3400|INTC1040|INTC1041|INTC10A0
+
+``available_uuids`` (RO)
+ A set of UUIDs strings presenting available policies
+ which should be notified to the firmware when the
+ user space can support those policies.
+
+ UUID strings:
+
+ "42A441D6-AE6A-462b-A84B-4A8CE79027D3" : Passive 1
+
+ "3A95C389-E4B8-4629-A526-C52C88626BAE" : Active
+
+ "97C68AE7-15FA-499c-B8C9-5DA81D606E0A" : Critical
+
+ "63BE270F-1C11-48FD-A6F7-3AF253FF3E2D" : Adaptive performance
+
+ "5349962F-71E6-431D-9AE8-0A635B710AEE" : Emergency call
+
+ "9E04115A-AE87-4D1C-9500-0F3E340BFE75" : Passive 2
+
+ "F5A35014-C209-46A4-993A-EB56DE7530A1" : Power Boss
+
+ "6ED722A7-9240-48A5-B479-31EEF723D7CF" : Virtual Sensor
+
+ "16CAF1B7-DD38-40ED-B1C1-1B8A1913D531" : Cooling mode
+
+ "BE84BABF-C4D4-403D-B495-3128FD44dAC1" : HDC
+
+``current_uuid`` (RW)
+ User space can write strings from available UUIDs, one at a
+ time.
+
+:file:`/sys/bus/platform/devices/<N>/`, where <N>
+=INT3400|INTC1040|INTC1041|INTC10A0
+
+``imok`` (WO)
+ User space daemon write 1 to respond to firmware event
+ for sending keep alive notification. User space receives
+ THERMAL_EVENT_KEEP_ALIVE kobject uevent notification when
+ firmware calls for user space to respond with imok ACPI
+ method.
+
+``odvp*`` (RO)
+ Firmware thermal status variable values. Thermal tables
+ calls for different processing based on these variable
+ values.
+
+``data_vault`` (RO)
+ Binary thermal table. Refer to
+ https:/github.com/intel/thermal_daemon for decoding
+ thermal table.
+
+
+ACPI Thermal Relationship table interface
+------------------------------------------
+
+:file:`/dev/acpi_thermal_rel`
+
+ This device provides IOCTL interface to read standard ACPI
+ thermal relationship tables via ACPI methods _TRT and _ART.
+ These IOCTLs are defined in
+ drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h
+
+ IOCTLs:
+
+ ACPI_THERMAL_GET_TRT_LEN: Get length of TRT table
+
+ ACPI_THERMAL_GET_ART_LEN: Get length of ART table
+
+ ACPI_THERMAL_GET_TRT_COUNT: Number of records in TRT table
+
+ ACPI_THERMAL_GET_ART_COUNT: Number of records in ART table
+
+ ACPI_THERMAL_GET_TRT: Read binary TRT table, length to read is
+ provided via argument to ioctl().
+
+ ACPI_THERMAL_GET_ART: Read binary ART table, length to read is
+ provided via argument to ioctl().
+
+DPTF ACPI Sensor drivers
+-------------------------
+
+DPTF Sensor drivers are presented as standard thermal sysfs thermal_zone.
+
+
+DPTF ACPI Cooling drivers
+--------------------------
+
+DPTF cooling drivers are presented as standard thermal sysfs cooling_device.
+
+
+DPTF Processor thermal PCI Driver interface
+--------------------------------------------
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/power_limits/`
+
+Refer to Documentation/power/powercap/powercap.rst for powercap
+ABI.
+
+``power_limit_0_max_uw`` (RO)
+ Maximum powercap sysfs constraint_0_power_limit_uw for Intel RAPL
+
+``power_limit_0_step_uw`` (RO)
+ Power limit increment/decrements for Intel RAPL constraint 0 power limit
+
+``power_limit_0_min_uw`` (RO)
+ Minimum powercap sysfs constraint_0_power_limit_uw for Intel RAPL
+
+``power_limit_0_tmin_us`` (RO)
+ Minimum powercap sysfs constraint_0_time_window_us for Intel RAPL
+
+``power_limit_0_tmax_us`` (RO)
+ Maximum powercap sysfs constraint_0_time_window_us for Intel RAPL
+
+``power_limit_1_max_uw`` (RO)
+ Maximum powercap sysfs constraint_1_power_limit_uw for Intel RAPL
+
+``power_limit_1_step_uw`` (RO)
+ Power limit increment/decrements for Intel RAPL constraint 1 power limit
+
+``power_limit_1_min_uw`` (RO)
+ Minimum powercap sysfs constraint_1_power_limit_uw for Intel RAPL
+
+``power_limit_1_tmin_us`` (RO)
+ Minimum powercap sysfs constraint_1_time_window_us for Intel RAPL
+
+``power_limit_1_tmax_us`` (RO)
+ Maximum powercap sysfs constraint_1_time_window_us for Intel RAPL
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/`
+
+``tcc_offset_degree_celsius`` (RW)
+ TCC offset from the critical temperature where hardware will throttle
+ CPU.
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/workload_request`
+
+``workload_available_types`` (RO)
+ Available workload types. User space can specify one of the workload type
+ it is currently executing via workload_type. For example: idle, bursty,
+ sustained etc.
+
+``workload_type`` (RW)
+ User space can specify any one of the available workload type using
+ this interface.
+
+DPTF Processor thermal RFIM interface
+--------------------------------------------
+
+RFIM interface allows adjustment of FIVR (Fully Integrated Voltage Regulator)
+and DDR (Double Data Rate)frequencies to avoid RF interference with WiFi and 5G.
+
+Switching voltage regulators (VR) generate radiated EMI or RFI at the
+fundamental frequency and its harmonics. Some harmonics may interfere
+with very sensitive wireless receivers such as Wi-Fi and cellular that
+are integrated into host systems like notebook PCs. One of mitigation
+methods is requesting SOC integrated VR (IVR) switching frequency to a
+small % and shift away the switching noise harmonic interference from
+radio channels. OEM or ODMs can use the driver to control SOC IVR
+operation within the range where it does not impact IVR performance.
+
+DRAM devices of DDR IO interface and their power plane can generate EMI
+at the data rates. Similar to IVR control mechanism, Intel offers a
+mechanism by which DDR data rates can be changed if several conditions
+are met: there is strong RFI interference because of DDR; CPU power
+management has no other restriction in changing DDR data rates;
+PC ODMs enable this feature (real time DDR RFI Mitigation referred to as
+DDR-RFIM) for Wi-Fi from BIOS.
+
+
+FIVR attributes
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/fivr/`
+
+``vco_ref_code_lo`` (RW)
+ The VCO reference code is an 11-bit field and controls the FIVR
+ switching frequency. This is the 3-bit LSB field.
+
+``vco_ref_code_hi`` (RW)
+ The VCO reference code is an 11-bit field and controls the FIVR
+ switching frequency. This is the 8-bit MSB field.
+
+``spread_spectrum_pct`` (RW)
+ Set the FIVR spread spectrum clocking percentage
+
+``spread_spectrum_clk_enable`` (RW)
+ Enable/disable of the FIVR spread spectrum clocking feature
+
+``rfi_vco_ref_code`` (RW)
+ This field is a read only status register which reflects the
+ current FIVR switching frequency
+
+``fivr_fffc_rev`` (RW)
+ This field indicated the revision of the FIVR HW.
+
+
+DVFS attributes
+
+:file:`/sys/bus/pci/devices/0000\:00\:04.0/dvfs/`
+
+``rfi_restriction_run_busy`` (RW)
+ Request the restriction of specific DDR data rate and set this
+ value 1. Self reset to 0 after operation.
+
+``rfi_restriction_err_code`` (RW)
+ 0 :Request is accepted, 1:Feature disabled,
+ 2: the request restricts more points than it is allowed
+
+``rfi_restriction_data_rate_Delta`` (RW)
+ Restricted DDR data rate for RFI protection: Lower Limit
+
+``rfi_restriction_data_rate_Base`` (RW)
+ Restricted DDR data rate for RFI protection: Upper Limit
+
+``ddr_data_rate_point_0`` (RO)
+ DDR data rate selection 1st point
+
+``ddr_data_rate_point_1`` (RO)
+ DDR data rate selection 2nd point
+
+``ddr_data_rate_point_2`` (RO)
+ DDR data rate selection 3rd point
+
+``ddr_data_rate_point_3`` (RO)
+ DDR data rate selection 4th point
+
+``rfi_disable (RW)``
+ Disable DDR rate change feature
+
+DPTF Power supply and Battery Interface
+----------------------------------------
+
+Refer to Documentation/ABI/testing/sysfs-platform-dptf
+
+DPTF Fan Control
+----------------------------------------
+
+Refer to Documentation/admin-guide/acpi/fan_performance_states.rst
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index f498f1d36cd3..982c8af853b9 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -21,6 +21,7 @@ x86-specific Documentation
tlb
mtrr
pat
+ intel-hfi
intel-iommu
intel_txt
amd-memory-encryption
diff --git a/Documentation/x86/intel-hfi.rst b/Documentation/x86/intel-hfi.rst
new file mode 100644
index 000000000000..49dea58ea4fb
--- /dev/null
+++ b/Documentation/x86/intel-hfi.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Hardware-Feedback Interface for scheduling on Intel Hardware
+============================================================
+
+Overview
+--------
+
+Intel has described the Hardware Feedback Interface (HFI) in the Intel 64 and
+IA-32 Architectures Software Developer's Manual (Intel SDM) Volume 3 Section
+14.6 [1]_.
+
+The HFI gives the operating system a performance and energy efficiency
+capability data for each CPU in the system. Linux can use the information from
+the HFI to influence task placement decisions.
+
+The Hardware Feedback Interface
+-------------------------------
+
+The Hardware Feedback Interface provides to the operating system information
+about the performance and energy efficiency of each CPU in the system. Each
+capability is given as a unit-less quantity in the range [0-255]. Higher values
+indicate higher capability. Energy efficiency and performance are reported in
+separate capabilities. Even though on some systems these two metrics may be
+related, they are specified as independent capabilities in the Intel SDM.
+
+These capabilities may change at runtime as a result of changes in the
+operating conditions of the system or the action of external factors. The rate
+at which these capabilities are updated is specific to each processor model. On
+some models, capabilities are set at boot time and never change. On others,
+capabilities may change every tens of milliseconds. For instance, a remote
+mechanism may be used to lower Thermal Design Power. Such change can be
+reflected in the HFI. Likewise, if the system needs to be throttled due to
+excessive heat, the HFI may reflect reduced performance on specific CPUs.
+
+The kernel or a userspace policy daemon can use these capabilities to modify
+task placement decisions. For instance, if either the performance or energy
+capabilities of a given logical processor becomes zero, it is an indication that
+the hardware recommends to the operating system to not schedule any tasks on
+that processor for performance or energy efficiency reasons, respectively.
+
+Implementation details for Linux
+--------------------------------
+
+The infrastructure to handle thermal event interrupts has two parts. In the
+Local Vector Table of a CPU's local APIC, there exists a register for the
+Thermal Monitor Register. This register controls how interrupts are delivered
+to a CPU when the thermal monitor generates and interrupt. Further details
+can be found in the Intel SDM Vol. 3 Section 10.5 [1]_.
+
+The thermal monitor may generate interrupts per CPU or per package. The HFI
+generates package-level interrupts. This monitor is configured and initialized
+via a set of machine-specific registers. Specifically, the HFI interrupt and
+status are controlled via designated bits in the IA32_PACKAGE_THERM_INTERRUPT
+and IA32_PACKAGE_THERM_STATUS registers, respectively. There exists one HFI
+table per package. Further details can be found in the Intel SDM Vol. 3
+Section 14.9 [1]_.
+
+The hardware issues an HFI interrupt after updating the HFI table and is ready
+for the operating system to consume it. CPUs receive such interrupt via the
+thermal entry in the Local APIC's Local Vector Table.
+
+When servicing such interrupt, the HFI driver parses the updated table and
+relays the update to userspace using the thermal notification framework. Given
+that there may be many HFI updates every second, the updates relayed to
+userspace are throttled at a rate of CONFIG_HZ jiffies.
+
+References
+----------
+
+.. [1] https://www.intel.com/sdm
diff --git a/MAINTAINERS b/MAINTAINERS
index 4e8d9b1d6138..ac4508914b3a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17143,11 +17143,12 @@ S: Supported
F: drivers/net/ethernet/samsung/sxgbe/
SAMSUNG THERMAL DRIVER
-M: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
+M: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
+M: Krzysztof Kozlowski <krzk@kernel.org>
L: linux-pm@vger.kernel.org
L: linux-samsung-soc@vger.kernel.org
-S: Supported
-T: git https://github.com/lmajewski/linux-samsung-thermal.git
+S: Maintained
+F: Documentation/devicetree/bindings/thermal/samsung,exynos-thermal.yaml
F: drivers/thermal/samsung/
SAMSUNG USB2 PHY DRIVER
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f7436fccc076..3edf05e98e58 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -327,6 +327,7 @@
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a4a39c3e0f19..0e7f303542bf 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -705,12 +705,14 @@
#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
+#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26)
#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25)
/* Thermal Thresholds Support */
#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
@@ -959,4 +961,8 @@
#define MSR_VM_IGNNE 0xc0010115
#define MSR_VM_HSAVE_PA 0xc0010117
+/* Hardware Feedback Interface */
+#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1
+
#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/drivers/thermal/broadcom/brcmstb_thermal.c b/drivers/thermal/broadcom/brcmstb_thermal.c
index 8df5edef1ded..0cedb8b4f00a 100644
--- a/drivers/thermal/broadcom/brcmstb_thermal.c
+++ b/drivers/thermal/broadcom/brcmstb_thermal.c
@@ -351,7 +351,7 @@ static int brcmstb_thermal_probe(struct platform_device *pdev)
priv->thermal = thermal;
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq >= 0) {
ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
brcmstb_tmon_irq_thread,
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
index c83ea5d04a1d..f0c845679250 100644
--- a/drivers/thermal/intel/Kconfig
+++ b/drivers/thermal/intel/Kconfig
@@ -99,3 +99,17 @@ config INTEL_MENLOW
Intel Menlow platform.
If unsure, say N.
+
+config INTEL_HFI_THERMAL
+ bool "Intel Hardware Feedback Interface"
+ depends on NET
+ depends on CPU_SUP_INTEL
+ depends on X86_THERMAL_VECTOR
+ select THERMAL_NETLINK
+ help
+ Select this option to enable the Hardware Feedback Interface. If
+ selected, hardware provides guidance to the operating system on
+ the performance and energy efficiency capabilities of each CPU.
+ These capabilities may change as a result of changes in the operating
+ conditions of the system such power and thermal limits. If selected,
+ the kernel relays updates in CPUs' capabilities to userspace.
diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile
index 960b56268b4a..9a8d8054f316 100644
--- a/drivers/thermal/intel/Makefile
+++ b/drivers/thermal/intel/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o
obj-$(CONFIG_INTEL_TCC_COOLING) += intel_tcc_cooling.o
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
obj-$(CONFIG_INTEL_MENLOW) += intel_menlow.o
+obj-$(CONFIG_INTEL_HFI_THERMAL) += intel_hfi.o
diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
index e90690a234c4..01b80331eab6 100644
--- a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
@@ -72,7 +72,6 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
int i;
int nr_bad_entries = 0;
struct trt *trts;
- struct acpi_device *adev;
union acpi_object *p;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_buffer element = { 0, NULL };
@@ -112,12 +111,10 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
if (!create_dev)
continue;
- result = acpi_bus_get_device(trt->source, &adev);
- if (result)
+ if (!acpi_fetch_acpi_dev(trt->source))
pr_warn("Failed to get source ACPI device\n");
- result = acpi_bus_get_device(trt->target, &adev);
- if (result)
+ if (!acpi_fetch_acpi_dev(trt->target))
pr_warn("Failed to get target ACPI device\n");
}
@@ -149,7 +146,6 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
int i;
int nr_bad_entries = 0;
struct art *arts;
- struct acpi_device *adev;
union acpi_object *p;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_buffer element = { 0, NULL };
@@ -191,16 +187,11 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
if (!create_dev)
continue;
- if (art->source) {
- result = acpi_bus_get_device(art->source, &adev);
- if (result)
- pr_warn("Failed to get source ACPI device\n");
- }
- if (art->target) {
- result = acpi_bus_get_device(art->target, &adev);
- if (result)
- pr_warn("Failed to get target ACPI device\n");
- }
+ if (!acpi_fetch_acpi_dev(art->source))
+ pr_warn("Failed to get source ACPI device\n");
+
+ if (!acpi_fetch_acpi_dev(art->target))
+ pr_warn("Failed to get target ACPI device\n");
}
*artp = arts;
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
index 4f478812cb51..4954800b9850 100644
--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -17,8 +17,8 @@
#define INT3400_KEEP_ALIVE 0xA0
enum int3400_thermal_uuid {
+ INT3400_THERMAL_ACTIVE = 0,
INT3400_THERMAL_PASSIVE_1,
- INT3400_THERMAL_ACTIVE,
INT3400_THERMAL_CRITICAL,
INT3400_THERMAL_ADAPTIVE_PERFORMANCE,
INT3400_THERMAL_EMERGENCY_CALL_MODE,
@@ -31,8 +31,8 @@ enum int3400_thermal_uuid {
};
static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
- "42A441D6-AE6A-462b-A84B-4A8CE79027D3",
"3A95C389-E4B8-4629-A526-C52C88626BAE",
+ "42A441D6-AE6A-462b-A84B-4A8CE79027D3",
"97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
"63BE270F-1C11-48FD-A6F7-3AF253FF3E2D",
"5349962F-71E6-431D-9AE8-0A635B710AEE",
@@ -53,12 +53,13 @@ struct int3400_thermal_priv {
struct art *arts;
int trt_count;
struct trt *trts;
- u8 uuid_bitmap;
+ u32 uuid_bitmap;
int rel_misc_dev_res;
int current_uuid_index;
char *data_vault;
int odvp_count;
int *odvp;
+ u32 os_uuid_mask;
struct odvp_attr *odvp_attrs;
};
@@ -142,12 +143,55 @@ static ssize_t current_uuid_show(struct device *dev,
struct device_attribute *devattr, char *buf)
{
struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+ int i, length = 0;
- if (priv->current_uuid_index == -1)
- return sprintf(buf, "INVALID\n");
+ if (priv->current_uuid_index > 0)
+ return sprintf(buf, "%s\n",
+ int3400_thermal_uuids[priv->current_uuid_index]);
- return sprintf(buf, "%s\n",
- int3400_thermal_uuids[priv->current_uuid_index]);
+ for (i = 0; i <= INT3400_THERMAL_CRITICAL; i++) {
+ if (priv->os_uuid_mask & BIT(i))
+ length += scnprintf(&buf[length],
+ PAGE_SIZE - length,
+ "%s\n",
+ int3400_thermal_uuids[i]);
+ }
+
+ if (length)
+ return length;
+
+ return sprintf(buf, "INVALID\n");
+}
+
+static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enable)
+{
+ u32 ret, buf[2];
+ acpi_status status;
+ int result = 0;
+ struct acpi_osc_context context = {
+ .uuid_str = NULL,
+ .rev = 1,
+ .cap.length = 8,
+ };
+
+ context.uuid_str = uuid_str;
+
+ buf[OSC_QUERY_DWORD] = 0;
+ buf[OSC_SUPPORT_DWORD] = *enable;
+
+ context.cap.pointer = buf;
+
+ status = acpi_run_osc(handle, &context);
+ if (ACPI_SUCCESS(status)) {
+ ret = *((u32 *)(context.ret.pointer + 4));
+ if (ret != *enable)
+ result = -EPERM;
+ } else
+ result = -EPERM;
+
+ kfree(context.ret.pointer);
+
+ return result;
}
static ssize_t current_uuid_store(struct device *dev,
@@ -164,16 +208,47 @@ static ssize_t current_uuid_store(struct device *dev,
* If we have a list of supported UUIDs, make sure
* this one is supported.
*/
- if (priv->uuid_bitmap &&
- !(priv->uuid_bitmap & (1 << i)))
+ if (priv->uuid_bitmap & BIT(i)) {
+ priv->current_uuid_index = i;
+ return count;
+ }
+
+ /*
+ * There is support of only 3 policies via the new
+ * _OSC to inform OS capability:
+ * INT3400_THERMAL_ACTIVE
+ * INT3400_THERMAL_PASSIVE_1
+ * INT3400_THERMAL_CRITICAL
+ */
+
+ if (i > INT3400_THERMAL_CRITICAL)
return -EINVAL;
- priv->current_uuid_index = i;
- return count;
+ priv->os_uuid_mask |= BIT(i);
+
+ break;
}
}
- return -EINVAL;
+ if (priv->os_uuid_mask) {
+ int cap, ret;
+
+ /*
+ * Capability bits:
+ * Bit 0: set to 1 to indicate DPTF is active
+ * Bi1 1: set to 1 to active cooling is supported by user space daemon
+ * Bit 2: set to 1 to passive cooling is supported by user space daemon
+ * Bit 3: set to 1 to critical trip is handled by user space daemon
+ */
+ cap = ((priv->os_uuid_mask << 1) | 0x01);
+ ret = int3400_thermal_run_osc(priv->adev->handle,
+ "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
+ &cap);
+ if (ret)
+ return ret;
+ }
+
+ return count;
}
static DEVICE_ATTR_RW(current_uuid);
@@ -236,41 +311,6 @@ end:
return result;
}
-static int int3400_thermal_run_osc(acpi_handle handle,
- enum int3400_thermal_uuid uuid, bool enable)
-{
- u32 ret, buf[2];
- acpi_status status;
- int result = 0;
- struct acpi_osc_context context = {
- .uuid_str = NULL,
- .rev = 1,
- .cap.length = 8,
- };
-
- if (uuid < 0 || uuid >= INT3400_THERMAL_MAXIMUM_UUID)
- return -EINVAL;
-
- context.uuid_str = int3400_thermal_uuids[uuid];
-
- buf[OSC_QUERY_DWORD] = 0;
- buf[OSC_SUPPORT_DWORD] = enable;
-
- context.cap.pointer = buf;
-
- status = acpi_run_osc(handle, &context);
- if (ACPI_SUCCESS(status)) {
- ret = *((u32 *)(context.ret.pointer + 4));
- if (ret != enable)
- result = -EPERM;
- } else
- result = -EPERM;
-
- kfree(context.ret.pointer);
-
- return result;
-}
-
static ssize_t odvp_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
@@ -426,10 +466,18 @@ static int int3400_thermal_change_mode(struct thermal_zone_device *thermal,
if (!priv)
return -EINVAL;
- if (mode != thermal->mode)
+ if (mode != thermal->mode) {
+ int enabled;
+
+ if (priv->current_uuid_index < 0 ||
+ priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
+ return -EINVAL;
+
+ enabled = (mode == THERMAL_DEVICE_ENABLED);
result = int3400_thermal_run_osc(priv->adev->handle,
- priv->current_uuid_index,
- mode == THERMAL_DEVICE_ENABLED);
+ int3400_thermal_uuids[priv->current_uuid_index],
+ &enabled);
+ }
evaluate_odvp(priv);
@@ -468,6 +516,11 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv)
priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer,
obj->package.elements[0].buffer.length,
GFP_KERNEL);
+ if (!priv->data_vault) {
+ kfree(buffer.pointer);
+ return;
+ }
+
bin_attr_data_vault.private = priv->data_vault;
bin_attr_data_vault.size = obj->package.elements[0].buffer.length;
kfree(buffer.pointer);
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
new file mode 100644
index 000000000000..730fd121df6e
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hardware Feedback Interface Driver
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ *
+ * Authors: Aubrey Li <aubrey.li@linux.intel.com>
+ * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+ *
+ *
+ * The Hardware Feedback Interface provides a performance and energy efficiency
+ * capability information for each CPU in the system. Depending on the processor
+ * model, hardware may periodically update these capabilities as a result of
+ * changes in the operating conditions (e.g., power limits or thermal
+ * constraints). On other processor models, there is a single HFI update
+ * at boot.
+ *
+ * This file provides functionality to process HFI updates and relay these
+ * updates to userspace.
+ */
+
+#define pr_fmt(fmt) "intel-hfi: " fmt
+
+#include <linux/bitops.h>
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/mutex.h>
+#include <linux/percpu-defs.h>
+#include <linux/printk.h>
+#include <linux/processor.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/topology.h>
+#include <linux/workqueue.h>
+
+#include <asm/msr.h>
+
+#include "../thermal_core.h"
+#include "intel_hfi.h"
+
+#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \
+ BIT(9) | BIT(11) | BIT(26))
+
+/* Hardware Feedback Interface MSR configuration bits */
+#define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
+#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
+
+/* CPUID detection and enumeration definitions for HFI */
+
+#define CPUID_HFI_LEAF 6
+
+union hfi_capabilities {
+ struct {
+ u8 performance:1;
+ u8 energy_efficiency:1;
+ u8 __reserved:6;
+ } split;
+ u8 bits;
+};
+
+union cpuid6_edx {
+ struct {
+ union hfi_capabilities capabilities;
+ u32 table_pages:4;
+ u32 __reserved:4;
+ s32 index:16;
+ } split;
+ u32 full;
+};
+
+/**
+ * struct hfi_cpu_data - HFI capabilities per CPU
+ * @perf_cap: Performance capability
+ * @ee_cap: Energy efficiency capability
+ *
+ * Capabilities of a logical processor in the HFI table. These capabilities are
+ * unitless.
+ */
+struct hfi_cpu_data {
+ u8 perf_cap;
+ u8 ee_cap;
+} __packed;
+
+/**
+ * struct hfi_hdr - Header of the HFI table
+ * @perf_updated: Hardware updated performance capabilities
+ * @ee_updated: Hardware updated energy efficiency capabilities
+ *
+ * Properties of the data in an HFI table.
+ */
+struct hfi_hdr {
+ u8 perf_updated;
+ u8 ee_updated;
+} __packed;
+
+/**
+ * struct hfi_instance - Representation of an HFI instance (i.e., a table)
+ * @local_table: Base of the local copy of the HFI table
+ * @timestamp: Timestamp of the last update of the local table.
+ * Located at the base of the local table.
+ * @hdr: Base address of the header of the local table
+ * @data: Base address of the data of the local table
+ * @cpus: CPUs represented in this HFI table instance
+ * @hw_table: Pointer to the HFI table of this instance
+ * @update_work: Delayed work to process HFI updates
+ * @table_lock: Lock to protect acceses to the table of this instance
+ * @event_lock: Lock to process HFI interrupts
+ *
+ * A set of parameters to parse and navigate a specific HFI table.
+ */
+struct hfi_instance {
+ union {
+ void *local_table;
+ u64 *timestamp;
+ };
+ void *hdr;
+ void *data;
+ cpumask_var_t cpus;
+ void *hw_table;
+ struct delayed_work update_work;
+ raw_spinlock_t table_lock;
+ raw_spinlock_t event_lock;
+};
+
+/**
+ * struct hfi_features - Supported HFI features
+ * @nr_table_pages: Size of the HFI table in 4KB pages
+ * @cpu_stride: Stride size to locate the capability data of a logical
+ * processor within the table (i.e., row stride)
+ * @hdr_size: Size of the table header
+ *
+ * Parameters and supported features that are common to all HFI instances
+ */
+struct hfi_features {
+ unsigned int nr_table_pages;
+ unsigned int cpu_stride;
+ unsigned int hdr_size;
+};
+
+/**
+ * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
+ * @index: Row of this CPU in its HFI table
+ * @hfi_instance: Attributes of the HFI table to which this CPU belongs
+ *
+ * Parameters to link a logical processor to an HFI table and a row within it.
+ */
+struct hfi_cpu_info {
+ s16 index;
+ struct hfi_instance *hfi_instance;
+};
+
+static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
+
+static int max_hfi_instances;
+static struct hfi_instance *hfi_instances;
+
+static struct hfi_features hfi_features;
+static DEFINE_MUTEX(hfi_instance_lock);
+
+static struct workqueue_struct *hfi_updates_wq;
+#define HFI_UPDATE_INTERVAL HZ
+#define HFI_MAX_THERM_NOTIFY_COUNT 16
+
+static void get_hfi_caps(struct hfi_instance *hfi_instance,
+ struct thermal_genl_cpu_caps *cpu_caps)
+{
+ int cpu, i = 0;
+
+ raw_spin_lock_irq(&hfi_instance->table_lock);
+ for_each_cpu(cpu, hfi_instance->cpus) {
+ struct hfi_cpu_data *caps;
+ s16 index;
+
+ index = per_cpu(hfi_cpu_info, cpu).index;
+ caps = hfi_instance->data + index * hfi_features.cpu_stride;
+ cpu_caps[i].cpu = cpu;
+
+ /*
+ * Scale performance and energy efficiency to
+ * the [0, 1023] interval that thermal netlink uses.
+ */
+ cpu_caps[i].performance = caps->perf_cap << 2;
+ cpu_caps[i].efficiency = caps->ee_cap << 2;
+
+ ++i;
+ }
+ raw_spin_unlock_irq(&hfi_instance->table_lock);
+}
+
+/*
+ * Call update_capabilities() when there are changes in the HFI table.
+ */
+static void update_capabilities(struct hfi_instance *hfi_instance)
+{
+ struct thermal_genl_cpu_caps *cpu_caps;
+ int i = 0, cpu_count;
+
+ /* CPUs may come online/offline while processing an HFI update. */
+ mutex_lock(&hfi_instance_lock);
+
+ cpu_count = cpumask_weight(hfi_instance->cpus);
+
+ /* No CPUs to report in this hfi_instance. */
+ if (!cpu_count)
+ goto out;
+
+ cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
+ if (!cpu_caps)
+ goto out;
+
+ get_hfi_caps(hfi_instance, cpu_caps);
+
+ if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
+ goto last_cmd;
+
+ /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
+ for (i = 0;
+ (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
+ i += HFI_MAX_THERM_NOTIFY_COUNT)
+ thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
+ &cpu_caps[i]);
+
+ cpu_count = cpu_count - i;
+
+last_cmd:
+ /* Process the remaining capabilities if any. */
+ if (cpu_count)
+ thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
+
+ kfree(cpu_caps);
+out:
+ mutex_unlock(&hfi_instance_lock);
+}
+
+static void hfi_update_work_fn(struct work_struct *work)
+{
+ struct hfi_instance *hfi_instance;
+
+ hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
+ update_work);
+ if (!hfi_instance)
+ return;
+
+ update_capabilities(hfi_instance);
+}
+
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
+{
+ struct hfi_instance *hfi_instance;
+ int cpu = smp_processor_id();
+ struct hfi_cpu_info *info;
+ u64 new_timestamp;
+
+ if (!pkg_therm_status_msr_val)
+ return;
+
+ info = &per_cpu(hfi_cpu_info, cpu);
+ if (!info)
+ return;
+
+ /*
+ * A CPU is linked to its HFI instance before the thermal vector in the
+ * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
+ * when receiving an HFI event.
+ */
+ hfi_instance = info->hfi_instance;
+ if (unlikely(!hfi_instance)) {
+ pr_debug("Received event on CPU %d but instance was null", cpu);
+ return;
+ }
+
+ /*
+ * On most systems, all CPUs in the package receive a package-level
+ * thermal interrupt when there is an HFI update. It is sufficient to
+ * let a single CPU to acknowledge the update and queue work to
+ * process it. The remaining CPUs can resume their work.
+ */
+ if (!raw_spin_trylock(&hfi_instance->event_lock))
+ return;
+
+ /* Skip duplicated updates. */
+ new_timestamp = *(u64 *)hfi_instance->hw_table;
+ if (*hfi_instance->timestamp == new_timestamp) {
+ raw_spin_unlock(&hfi_instance->event_lock);
+ return;
+ }
+
+ raw_spin_lock(&hfi_instance->table_lock);
+
+ /*
+ * Copy the updated table into our local copy. This includes the new
+ * timestamp.
+ */
+ memcpy(hfi_instance->local_table, hfi_instance->hw_table,
+ hfi_features.nr_table_pages << PAGE_SHIFT);
+
+ raw_spin_unlock(&hfi_instance->table_lock);
+ raw_spin_unlock(&hfi_instance->event_lock);
+
+ /*
+ * Let hardware know that we are done reading the HFI table and it is
+ * free to update it again.
+ */
+ pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK &
+ ~PACKAGE_THERM_STATUS_HFI_UPDATED;
+ wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val);
+
+ queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
+ HFI_UPDATE_INTERVAL);
+}
+
+static void init_hfi_cpu_index(struct hfi_cpu_info *info)
+{
+ union cpuid6_edx edx;
+
+ /* Do not re-read @cpu's index if it has already been initialized. */
+ if (info->index > -1)
+ return;
+
+ edx.full = cpuid_edx(CPUID_HFI_LEAF);
+ info->index = edx.split.index;
+}
+
+/*
+ * The format of the HFI table depends on the number of capabilities that the
+ * hardware supports. Keep a data structure to navigate the table.
+ */
+static void init_hfi_instance(struct hfi_instance *hfi_instance)
+{
+ /* The HFI header is below the time-stamp. */
+ hfi_instance->hdr = hfi_instance->local_table +
+ sizeof(*hfi_instance->timestamp);
+
+ /* The HFI data starts below the header. */
+ hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
+}
+
+/**
+ * intel_hfi_online() - Enable HFI on @cpu
+ * @cpu: CPU in which the HFI will be enabled
+ *
+ * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
+ * level. The first CPU in the die/package to come online does the full HFI
+ * initialization. Subsequent CPUs will just link themselves to the HFI
+ * instance of their die/package.
+ *
+ * This function is called before enabling the thermal vector in the local APIC
+ * in order to ensure that @cpu has an associated HFI instance when it receives
+ * an HFI event.
+ */
+void intel_hfi_online(unsigned int cpu)
+{
+ struct hfi_instance *hfi_instance;
+ struct hfi_cpu_info *info;
+ phys_addr_t hw_table_pa;
+ u64 msr_val;
+ u16 die_id;
+
+ /* Nothing to do if hfi_instances are missing. */
+ if (!hfi_instances)
+ return;
+
+ /*
+ * Link @cpu to the HFI instance of its package/die. It does not
+ * matter whether the instance has been initialized.
+ */
+ info = &per_cpu(hfi_cpu_info, cpu);
+ die_id = topology_logical_die_id(cpu);
+ hfi_instance = info->hfi_instance;
+ if (!hfi_instance) {
+ if (die_id < 0 || die_id >= max_hfi_instances)
+ return;
+
+ hfi_instance = &hfi_instances[die_id];
+ info->hfi_instance = hfi_instance;
+ }
+
+ init_hfi_cpu_index(info);
+
+ /*
+ * Now check if the HFI instance of the package/die of @cpu has been
+ * initialized (by checking its header). In such case, all we have to
+ * do is to add @cpu to this instance's cpumask.
+ */
+ mutex_lock(&hfi_instance_lock);
+ if (hfi_instance->hdr) {
+ cpumask_set_cpu(cpu, hfi_instance->cpus);
+ goto unlock;
+ }
+
+ /*
+ * Hardware is programmed with the physical address of the first page
+ * frame of the table. Hence, the allocated memory must be page-aligned.
+ */
+ hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!hfi_instance->hw_table)
+ goto unlock;
+
+ hw_table_pa = virt_to_phys(hfi_instance->hw_table);
+
+ /*
+ * Allocate memory to keep a local copy of the table that
+ * hardware generates.
+ */
+ hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
+ GFP_KERNEL);
+ if (!hfi_instance->local_table)
+ goto free_hw_table;
+
+ /*
+ * Program the address of the feedback table of this die/package. On
+ * some processors, hardware remembers the old address of the HFI table
+ * even after having been reprogrammed and re-enabled. Thus, do not free
+ * the pages allocated for the table or reprogram the hardware with a
+ * new base address. Namely, program the hardware only once.
+ */
+ msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+
+ init_hfi_instance(hfi_instance);
+
+ INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
+ raw_spin_lock_init(&hfi_instance->table_lock);
+ raw_spin_lock_init(&hfi_instance->event_lock);
+
+ cpumask_set_cpu(cpu, hfi_instance->cpus);
+
+ /*
+ * Enable the hardware feedback interface and never disable it. See
+ * comment on programming the address of the table.
+ */
+ rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+
+unlock:
+ mutex_unlock(&hfi_instance_lock);
+ return;
+
+free_hw_table:
+ free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
+ goto unlock;
+}
+
+/**
+ * intel_hfi_offline() - Disable HFI on @cpu
+ * @cpu: CPU in which the HFI will be disabled
+ *
+ * Remove @cpu from those covered by its HFI instance.
+ *
+ * On some processors, hardware remembers previous programming settings even
+ * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
+ * die/package of @cpu are offline. See note in intel_hfi_online().
+ */
+void intel_hfi_offline(unsigned int cpu)
+{
+ struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
+ struct hfi_instance *hfi_instance;
+
+ /*
+ * Check if @cpu as an associated, initialized (i.e., with a non-NULL
+ * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
+ * is present.
+ */
+ hfi_instance = info->hfi_instance;
+ if (!hfi_instance)
+ return;
+
+ if (!hfi_instance->hdr)
+ return;
+
+ mutex_lock(&hfi_instance_lock);
+ cpumask_clear_cpu(cpu, hfi_instance->cpus);
+ mutex_unlock(&hfi_instance_lock);
+}
+
+static __init int hfi_parse_features(void)
+{
+ unsigned int nr_capabilities;
+ union cpuid6_edx edx;
+
+ if (!boot_cpu_has(X86_FEATURE_HFI))
+ return -ENODEV;
+
+ /*
+ * If we are here we know that CPUID_HFI_LEAF exists. Parse the
+ * supported capabilities and the size of the HFI table.
+ */
+ edx.full = cpuid_edx(CPUID_HFI_LEAF);
+
+ if (!edx.split.capabilities.split.performance) {
+ pr_debug("Performance reporting not supported! Not using HFI\n");
+ return -ENODEV;
+ }
+
+ /*
+ * The number of supported capabilities determines the number of
+ * columns in the HFI table. Exclude the reserved bits.
+ */
+ edx.split.capabilities.split.__reserved = 0;
+ nr_capabilities = hweight8(edx.split.capabilities.bits);
+
+ /* The number of 4KB pages required by the table */
+ hfi_features.nr_table_pages = edx.split.table_pages + 1;
+
+ /*
+ * The header contains change indications for each supported feature.
+ * The size of the table header is rounded up to be a multiple of 8
+ * bytes.
+ */
+ hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+ /*
+ * Data of each logical processor is also rounded up to be a multiple
+ * of 8 bytes.
+ */
+ hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+ return 0;
+}
+
+void __init intel_hfi_init(void)
+{
+ struct hfi_instance *hfi_instance;
+ int i, j;
+
+ if (hfi_parse_features())
+ return;
+
+ /* There is one HFI instance per die/package. */
+ max_hfi_instances = topology_max_packages() *
+ topology_max_die_per_package();
+
+ /*
+ * This allocation may fail. CPU hotplug callbacks must check
+ * for a null pointer.
+ */
+ hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
+ GFP_KERNEL);
+ if (!hfi_instances)
+ return;
+
+ for (i = 0; i < max_hfi_instances; i++) {
+ hfi_instance = &hfi_instances[i];
+ if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
+ goto err_nomem;
+ }
+
+ hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
+ if (!hfi_updates_wq)
+ goto err_nomem;
+
+ return;
+
+err_nomem:
+ for (j = 0; j < i; ++j) {
+ hfi_instance = &hfi_instances[j];
+ free_cpumask_var(hfi_instance->cpus);
+ }
+
+ kfree(hfi_instances);
+ hfi_instances = NULL;
+}
diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
new file mode 100644
index 000000000000..325aa78b745c
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_HFI_H
+#define _INTEL_HFI_H
+
+#if defined(CONFIG_INTEL_HFI_THERMAL)
+void __init intel_hfi_init(void);
+void intel_hfi_online(unsigned int cpu);
+void intel_hfi_offline(unsigned int cpu);
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val);
+#else
+static inline void intel_hfi_init(void) { }
+static inline void intel_hfi_online(unsigned int cpu) { }
+static inline void intel_hfi_offline(unsigned int cpu) { }
+static inline void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) { }
+#endif /* CONFIG_INTEL_HFI_THERMAL */
+
+#endif /* _INTEL_HFI_H */
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 14256421d98c..c841ab37e7c6 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -556,12 +556,9 @@ static void end_power_clamp(void)
* stop faster.
*/
clamping = false;
- if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
- for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
- pr_debug("clamping worker for cpu %d alive, destroy\n",
- i);
- stop_power_clamp_worker(i);
- }
+ for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
+ pr_debug("clamping worker for cpu %d alive, destroy\n", i);
+ stop_power_clamp_worker(i);
}
}
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index dab7e8fb1059..8352083b87c7 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -32,6 +32,7 @@
#include <asm/irq.h>
#include <asm/msr.h>
+#include "intel_hfi.h"
#include "thermal_interrupt.h"
/* How long to wait between reporting thermal events */
@@ -475,6 +476,13 @@ static int thermal_throttle_online(unsigned int cpu)
INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
+ /*
+ * The first CPU coming online will enable the HFI. Usually this causes
+ * hardware to issue an HFI thermal interrupt. Such interrupt will reach
+ * the CPU once we enable the thermal vector in the local APIC.
+ */
+ intel_hfi_online(cpu);
+
/* Unmask the thermal vector after the above workqueues are initialized. */
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
@@ -492,6 +500,8 @@ static int thermal_throttle_offline(unsigned int cpu)
l = apic_read(APIC_LVTTHMR);
apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
+ intel_hfi_offline(cpu);
+
cancel_delayed_work_sync(&state->package_throttle.therm_work);
cancel_delayed_work_sync(&state->core_throttle.therm_work);
@@ -509,6 +519,8 @@ static __init int thermal_throttle_init_device(void)
if (!atomic_read(&therm_throt_en))
return 0;
+ intel_hfi_init();
+
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
thermal_throttle_online,
thermal_throttle_offline);
@@ -608,6 +620,10 @@ void intel_thermal_interrupt(void)
PACKAGE_THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,
PACKAGE_LEVEL);
+
+ if (this_cpu_has(X86_FEATURE_HFI))
+ intel_hfi_process_event(msr_val &
+ PACKAGE_THERM_STATUS_HFI_UPDATED);
}
}
@@ -717,6 +733,12 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
l | (PACKAGE_THERM_INT_LOW_ENABLE
| PACKAGE_THERM_INT_HIGH_ENABLE), h);
+
+ if (cpu_has(c, X86_FEATURE_HFI)) {
+ rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ l | PACKAGE_THERM_INT_HFI_ENABLE, h);
+ }
}
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
index eafa7526eb8b..c7f91cbdccc7 100644
--- a/drivers/thermal/qcom/lmh.c
+++ b/drivers/thermal/qcom/lmh.c
@@ -28,6 +28,8 @@
#define LMH_REG_DCVS_INTR_CLR 0x8
+#define LMH_ENABLE_ALGOS 1
+
struct lmh_hw_data {
void __iomem *base;
struct irq_domain *domain;
@@ -90,6 +92,7 @@ static int lmh_probe(struct platform_device *pdev)
struct device_node *cpu_node;
struct lmh_hw_data *lmh_data;
int temp_low, temp_high, temp_arm, cpu_id, ret;
+ unsigned int enable_alg;
u32 node_id;
lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
@@ -141,32 +144,36 @@ static int lmh_probe(struct platform_device *pdev)
if (!qcom_scm_lmh_dcvsh_available())
return -EINVAL;
- ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
- LMH_NODE_DCVS, node_id, 0);
- if (ret)
- dev_err(dev, "Error %d enabling current subfunction\n", ret);
-
- ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
- LMH_NODE_DCVS, node_id, 0);
- if (ret)
- dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
-
- ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
- LMH_NODE_DCVS, node_id, 0);
- if (ret)
- dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
-
- ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
- LMH_NODE_DCVS, node_id, 0);
- if (ret) {
- dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
- return ret;
- }
-
- ret = qcom_scm_lmh_profile_change(0x1);
- if (ret) {
- dev_err(dev, "Error %d changing profile\n", ret);
- return ret;
+ enable_alg = (uintptr_t)of_device_get_match_data(dev);
+
+ if (enable_alg) {
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret)
+ dev_err(dev, "Error %d enabling current subfunction\n", ret);
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret)
+ dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret)
+ dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
+ return ret;
+ }
+
+ ret = qcom_scm_lmh_profile_change(0x1);
+ if (ret) {
+ dev_err(dev, "Error %d changing profile\n", ret);
+ return ret;
+ }
}
/* Set default thermal trips */
@@ -213,7 +220,8 @@ static int lmh_probe(struct platform_device *pdev)
}
static const struct of_device_id lmh_table[] = {
- { .compatible = "qcom,sdm845-lmh", },
+ { .compatible = "qcom,sdm845-lmh", .data = (void *)LMH_ENABLE_ALGOS},
+ { .compatible = "qcom,sm8150-lmh", },
{}
};
MODULE_DEVICE_TABLE(of, lmh_table);
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index 99a8d9f3e03c..154d3cb19c88 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -18,6 +18,7 @@
#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/thermal.h>
+#include "../thermal_hwmon.h"
#include "tsens.h"
/**
@@ -1060,6 +1061,10 @@ static int tsens_register(struct tsens_priv *priv)
priv->sensor[i].tzd = tzd;
if (priv->ops->enable)
priv->ops->enable(priv, i);
+
+ if (devm_thermal_add_hwmon_sysfs(tzd))
+ dev_warn(priv->dev,
+ "Failed to add hwmon sysfs attributes\n");
}
/* VER_0 require to set MIN and MAX THRESH
diff --git a/drivers/thermal/tegra/tegra-bpmp-thermal.c b/drivers/thermal/tegra/tegra-bpmp-thermal.c
index 94f1da1dcd69..5affc3d196be 100644
--- a/drivers/thermal/tegra/tegra-bpmp-thermal.c
+++ b/drivers/thermal/tegra/tegra-bpmp-thermal.c
@@ -52,6 +52,8 @@ static int tegra_bpmp_thermal_get_temp(void *data, int *out_temp)
err = tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
if (err)
return err;
+ if (msg.rx.ret)
+ return -EINVAL;
*out_temp = reply.get_temp.temp;
@@ -63,6 +65,7 @@ static int tegra_bpmp_thermal_set_trips(void *data, int low, int high)
struct tegra_bpmp_thermal_zone *zone = data;
struct mrq_thermal_host_to_bpmp_request req;
struct tegra_bpmp_message msg;
+ int err;
memset(&req, 0, sizeof(req));
req.type = CMD_THERMAL_SET_TRIP;
@@ -76,7 +79,13 @@ static int tegra_bpmp_thermal_set_trips(void *data, int low, int high)
msg.tx.data = &req;
msg.tx.size = sizeof(req);
- return tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
+ err = tegra_bpmp_transfer(zone->tegra->bpmp, &msg);
+ if (err)
+ return err;
+ if (msg.rx.ret)
+ return -EINVAL;
+
+ return 0;
}
static void tz_device_update_work_fn(struct work_struct *work)
@@ -140,6 +149,8 @@ static int tegra_bpmp_thermal_get_num_zones(struct tegra_bpmp *bpmp,
err = tegra_bpmp_transfer(bpmp, &msg);
if (err)
return err;
+ if (msg.rx.ret)
+ return -EINVAL;
*num_zones = reply.get_num_zones.num;
diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c
index 73e68cce292e..32fea5174cc0 100644
--- a/drivers/thermal/thermal_netlink.c
+++ b/drivers/thermal/thermal_netlink.c
@@ -43,6 +43,11 @@ static const struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] =
[THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 },
[THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING,
.len = THERMAL_NAME_LENGTH },
+ /* CPU capabilities */
+ [THERMAL_GENL_ATTR_CPU_CAPABILITY] = { .type = NLA_NESTED },
+ [THERMAL_GENL_ATTR_CPU_CAPABILITY_ID] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE] = { .type = NLA_U32 },
+ [THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY] = { .type = NLA_U32 },
};
struct param {
@@ -58,6 +63,8 @@ struct param {
int temp;
int cdev_state;
int cdev_max_state;
+ struct thermal_genl_cpu_caps *cpu_capabilities;
+ int cpu_capabilities_count;
};
typedef int (*cb_t)(struct param *);
@@ -190,6 +197,42 @@ static int thermal_genl_event_gov_change(struct param *p)
return 0;
}
+static int thermal_genl_event_cpu_capability_change(struct param *p)
+{
+ struct thermal_genl_cpu_caps *cpu_cap = p->cpu_capabilities;
+ struct sk_buff *msg = p->msg;
+ struct nlattr *start_cap;
+ int i;
+
+ start_cap = nla_nest_start(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY);
+ if (!start_cap)
+ return -EMSGSIZE;
+
+ for (i = 0; i < p->cpu_capabilities_count; ++i) {
+ if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+ cpu_cap->cpu))
+ goto out_cancel_nest;
+
+ if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+ cpu_cap->performance))
+ goto out_cancel_nest;
+
+ if (nla_put_u32(msg, THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
+ cpu_cap->efficiency))
+ goto out_cancel_nest;
+
+ ++cpu_cap;
+ }
+
+ nla_nest_end(msg, start_cap);
+
+ return 0;
+out_cancel_nest:
+ nla_nest_cancel(msg, start_cap);
+
+ return -EMSGSIZE;
+}
+
int thermal_genl_event_tz_delete(struct param *p)
__attribute__((alias("thermal_genl_event_tz")));
@@ -219,6 +262,7 @@ static cb_t event_cb[] = {
[THERMAL_GENL_EVENT_CDEV_DELETE] = thermal_genl_event_cdev_delete,
[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = thermal_genl_event_cdev_state_update,
[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = thermal_genl_event_gov_change,
+ [THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE] = thermal_genl_event_cpu_capability_change,
};
/*
@@ -356,6 +400,15 @@ int thermal_notify_tz_gov_change(int tz_id, const char *name)
return thermal_genl_send_event(THERMAL_GENL_EVENT_TZ_GOV_CHANGE, &p);
}
+int thermal_genl_cpu_capability_event(int count,
+ struct thermal_genl_cpu_caps *caps)
+{
+ struct param p = { .cpu_capabilities_count = count, .cpu_capabilities = caps };
+
+ return thermal_genl_send_event(THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, &p);
+}
+EXPORT_SYMBOL_GPL(thermal_genl_cpu_capability_event);
+
/*************************** Command encoding ********************************/
static int __thermal_genl_cmd_tz_get_id(struct thermal_zone_device *tz,
diff --git a/drivers/thermal/thermal_netlink.h b/drivers/thermal/thermal_netlink.h
index e554f76291f4..1052f523188d 100644
--- a/drivers/thermal/thermal_netlink.h
+++ b/drivers/thermal/thermal_netlink.h
@@ -4,6 +4,12 @@
* Author: Daniel Lezcano <daniel.lezcano@linaro.org>
*/
+struct thermal_genl_cpu_caps {
+ int cpu;
+ int performance;
+ int efficiency;
+};
+
/* Netlink notification function */
#ifdef CONFIG_THERMAL_NETLINK
int __init thermal_netlink_init(void);
@@ -23,6 +29,8 @@ int thermal_notify_cdev_add(int cdev_id, const char *name, int max_state);
int thermal_notify_cdev_delete(int cdev_id);
int thermal_notify_tz_gov_change(int tz_id, const char *name);
int thermal_genl_sampling_temp(int id, int temp);
+int thermal_genl_cpu_capability_event(int count,
+ struct thermal_genl_cpu_caps *caps);
#else
static inline int thermal_netlink_init(void)
{
@@ -101,4 +109,10 @@ static inline int thermal_genl_sampling_temp(int id, int temp)
{
return 0;
}
+
+static inline int thermal_genl_cpu_capability_event(int count, struct thermal_genl_cpu_caps *caps)
+{
+ return 0;
+}
+
#endif /* CONFIG_THERMAL_NETLINK */
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index f84375865c97..703039d8b937 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -21,6 +21,7 @@
#include "ti-thermal.h"
#include "ti-bandgap.h"
+#include "../thermal_hwmon.h"
/* common data structures */
struct ti_thermal_data {
@@ -106,14 +107,6 @@ static inline int __ti_thermal_get_temp(void *devdata, int *temp)
return ret;
}
-static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal,
- int *temp)
-{
- struct ti_thermal_data *data = thermal->devdata;
-
- return __ti_thermal_get_temp(data, temp);
-}
-
static int __ti_thermal_get_trend(void *p, int trip, enum thermal_trend *trend)
{
struct ti_thermal_data *data = p;
@@ -189,6 +182,9 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
ti_bandgap_set_sensor_data(bgp, id, data);
ti_bandgap_write_update_interval(bgp, data->sensor_id, interval);
+ if (devm_thermal_add_hwmon_sysfs(data->ti_thermal))
+ dev_warn(bgp->dev, "failed to add hwmon sysfs attributes\n");
+
return 0;
}
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
index 9aa2fedfa309..fc78bf3aead7 100644
--- a/include/uapi/linux/thermal.h
+++ b/include/uapi/linux/thermal.h
@@ -44,7 +44,10 @@ enum thermal_genl_attr {
THERMAL_GENL_ATTR_CDEV_MAX_STATE,
THERMAL_GENL_ATTR_CDEV_NAME,
THERMAL_GENL_ATTR_GOV_NAME,
-
+ THERMAL_GENL_ATTR_CPU_CAPABILITY,
+ THERMAL_GENL_ATTR_CPU_CAPABILITY_ID,
+ THERMAL_GENL_ATTR_CPU_CAPABILITY_PERFORMANCE,
+ THERMAL_GENL_ATTR_CPU_CAPABILITY_EFFICIENCY,
__THERMAL_GENL_ATTR_MAX,
};
#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
@@ -71,6 +74,7 @@ enum thermal_genl_event {
THERMAL_GENL_EVENT_CDEV_DELETE, /* Cdev unbound */
THERMAL_GENL_EVENT_CDEV_STATE_UPDATE, /* Cdev state updated */
THERMAL_GENL_EVENT_TZ_GOV_CHANGE, /* Governor policy changed */
+ THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE, /* CPU capability changed */
__THERMAL_GENL_EVENT_MAX,
};
#define THERMAL_GENL_EVENT_MAX (__THERMAL_GENL_EVENT_MAX - 1)
diff --git a/tools/power/x86/intel-speed-select/Build b/tools/power/x86/intel-speed-select/Build
index b61456d75190..81e36bd578b1 100644
--- a/tools/power/x86/intel-speed-select/Build
+++ b/tools/power/x86/intel-speed-select/Build
@@ -1 +1 @@
-intel-speed-select-y += isst-config.o isst-core.o isst-display.o
+intel-speed-select-y += isst-config.o isst-core.o isst-display.o isst-daemon.o hfi-events.o
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 12c6939dca2a..d2fba1297d96 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -13,8 +13,8 @@ endif
# Do not use make's built-in rules
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
-
-override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3
+override LDFLAGS += -lnl-genl-3 -lnl-3
ALL_TARGETS := intel-speed-select
ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
@@ -31,7 +31,11 @@ $(OUTPUT)include/linux/isst_if.h: ../../../../include/uapi/linux/isst_if.h
mkdir -p $(OUTPUT)include/linux 2>&1 || true
ln -sf $(CURDIR)/../../../../include/uapi/linux/isst_if.h $@
-prepare: $(OUTPUT)include/linux/isst_if.h
+$(OUTPUT)include/linux/thermal.h: ../../../../include/uapi/linux/thermal.h
+ mkdir -p $(OUTPUT)include/linux 2>&1 || true
+ ln -sf $(CURDIR)/../../../../include/uapi/linux/thermal.h $@
+
+prepare: $(OUTPUT)include/linux/isst_if.h $(OUTPUT)include/linux/thermal.h
ISST_IN := $(OUTPUT)intel-speed-select-in.o
diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c
new file mode 100644
index 000000000000..e85676711372
--- /dev/null
+++ b/tools/power/x86/intel-speed-select/hfi-events.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Speed Select -- Read HFI events for OOB
+ * Copyright (c) 2022 Intel Corporation.
+ */
+
+/*
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+
+ * WPA Supplicant - driver interaction with Linux nl80211/cfg80211
+ * Copyright (c) 2003-2008, Jouni Malinen <j@w1.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Alternatively, this software may be distributed under the terms of
+ * BSD license.
+ *
+ * Requires
+ * libnl-genl-3-dev
+ *
+ * For Fedora/CenOS
+ * dnf install libnl3-devel
+ * For Ubuntu
+ * apt install libnl-3-dev libnl-genl-3-dev
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <linux/thermal.h>
+#include "isst.h"
+
+struct hfi_event_data {
+ struct nl_sock *nl_handle;
+ struct nl_cb *nl_cb;
+};
+
+struct hfi_event_data drv;
+
+static int ack_handler(struct nl_msg *msg, void *arg)
+{
+ int *err = arg;
+ *err = 0;
+ return NL_STOP;
+}
+
+static int finish_handler(struct nl_msg *msg, void *arg)
+{
+ int *ret = arg;
+ *ret = 0;
+ return NL_SKIP;
+}
+
+static int error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err,
+ void *arg)
+{
+ int *ret = arg;
+ *ret = err->error;
+ return NL_SKIP;
+}
+
+static int seq_check_handler(struct nl_msg *msg, void *arg)
+{
+ return NL_OK;
+}
+
+static int send_and_recv_msgs(struct hfi_event_data *drv,
+ struct nl_msg *msg,
+ int (*valid_handler)(struct nl_msg *, void *),
+ void *valid_data)
+{
+ struct nl_cb *cb;
+ int err = -ENOMEM;
+
+ cb = nl_cb_clone(drv->nl_cb);
+ if (!cb)
+ goto out;
+
+ err = nl_send_auto_complete(drv->nl_handle, msg);
+ if (err < 0)
+ goto out;
+
+ err = 1;
+
+ nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &err);
+ nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, finish_handler, &err);
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err);
+
+ if (valid_handler)
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM,
+ valid_handler, valid_data);
+
+ while (err > 0)
+ nl_recvmsgs(drv->nl_handle, cb);
+ out:
+ nl_cb_put(cb);
+ nlmsg_free(msg);
+ return err;
+}
+
+struct family_data {
+ const char *group;
+ int id;
+};
+
+static int family_handler(struct nl_msg *msg, void *arg)
+{
+ struct family_data *res = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int i;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return NL_SKIP;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], i) {
+ struct nlattr *tb2[CTRL_ATTR_MCAST_GRP_MAX + 1];
+ nla_parse(tb2, CTRL_ATTR_MCAST_GRP_MAX, nla_data(mcgrp),
+ nla_len(mcgrp), NULL);
+ if (!tb2[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb2[CTRL_ATTR_MCAST_GRP_ID] ||
+ strncmp(nla_data(tb2[CTRL_ATTR_MCAST_GRP_NAME]),
+ res->group,
+ nla_len(tb2[CTRL_ATTR_MCAST_GRP_NAME])) != 0)
+ continue;
+ res->id = nla_get_u32(tb2[CTRL_ATTR_MCAST_GRP_ID]);
+ break;
+ };
+
+ return 0;
+}
+
+static int nl_get_multicast_id(struct hfi_event_data *drv,
+ const char *family, const char *group)
+{
+ struct nl_msg *msg;
+ int ret = -1;
+ struct family_data res = { group, -ENOENT };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return -ENOMEM;
+ genlmsg_put(msg, 0, 0, genl_ctrl_resolve(drv->nl_handle, "nlctrl"),
+ 0, 0, CTRL_CMD_GETFAMILY, 0);
+ NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = send_and_recv_msgs(drv, msg, family_handler, &res);
+ msg = NULL;
+ if (ret == 0)
+ ret = res.id;
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return ret;
+}
+
+struct perf_cap {
+ int cpu;
+ int perf;
+ int eff;
+};
+
+static void process_hfi_event(struct perf_cap *perf_cap)
+{
+ process_level_change(perf_cap->cpu);
+}
+
+static int handle_event(struct nl_msg *n, void *arg)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(n);
+ struct genlmsghdr *genlhdr = genlmsg_hdr(nlh);
+ struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1];
+ int ret;
+ struct perf_cap perf_cap;
+
+ ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL);
+
+ debug_printf("Received event %d parse_rer:%d\n", genlhdr->cmd, ret);
+ if (genlhdr->cmd == THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE) {
+ struct nlattr *cap;
+ int j, index = 0;
+
+ debug_printf("THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE\n");
+ nla_for_each_nested(cap, attrs[THERMAL_GENL_ATTR_CPU_CAPABILITY], j) {
+ switch (index) {
+ case 0:
+ perf_cap.cpu = nla_get_u32(cap);
+ break;
+ case 1:
+ perf_cap.perf = nla_get_u32(cap);
+ break;
+ case 2:
+ perf_cap.eff = nla_get_u32(cap);
+ break;
+ default:
+ break;
+ }
+ ++index;
+ if (index == 3) {
+ index = 0;
+ process_hfi_event(&perf_cap);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int _hfi_exit;
+
+static int check_hf_suport(void)
+{
+ unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+ __cpuid(6, eax, ebx, ecx, edx);
+ if (eax & BIT(19))
+ return 1;
+
+ return 0;
+}
+
+int hfi_main(void)
+{
+ struct nl_sock *sock;
+ struct nl_cb *cb;
+ int err = 0;
+ int mcast_id;
+ int no_block = 0;
+
+ if (!check_hf_suport()) {
+ fprintf(stderr, "CPU Doesn't support HFI\n");
+ return -1;
+ }
+
+ sock = nl_socket_alloc();
+ if (!sock) {
+ fprintf(stderr, "nl_socket_alloc failed\n");
+ return -1;
+ }
+
+ if (genl_connect(sock)) {
+ fprintf(stderr, "genl_connect(sk_event) failed\n");
+ goto free_sock;
+ }
+
+ drv.nl_handle = sock;
+ drv.nl_cb = cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (drv.nl_cb == NULL) {
+ printf("Failed to allocate netlink callbacks");
+ goto free_sock;
+ }
+
+ mcast_id = nl_get_multicast_id(&drv, THERMAL_GENL_FAMILY_NAME,
+ THERMAL_GENL_EVENT_GROUP_NAME);
+ if (mcast_id < 0) {
+ fprintf(stderr, "nl_get_multicast_id failed\n");
+ goto free_sock;
+ }
+
+ if (nl_socket_add_membership(sock, mcast_id)) {
+ fprintf(stderr, "nl_socket_add_membership failed");
+ goto free_sock;
+ }
+
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, seq_check_handler, 0);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, handle_event, NULL);
+
+ if (no_block)
+ nl_socket_set_nonblocking(sock);
+
+ debug_printf("hfi is initialized\n");
+
+ while (!_hfi_exit && !err) {
+ err = nl_recvmsgs(sock, cb);
+ debug_printf("nl_recv_message err:%d\n", err);
+ }
+
+ return 0;
+
+ /* Netlink library doesn't have calls to dealloc cb or disconnect */
+free_sock:
+ nl_socket_free(sock);
+
+ return -1;
+}
+
+void hfi_exit(void)
+{
+ _hfi_exit = 1;
+}
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index efe72fa48224..060390e88e37 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,8 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.11";
+static const char *version_str = "v1.12";
+
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -368,7 +369,7 @@ int get_topo_max_cpus(void)
return topo_max_cpus;
}
-static void set_cpu_online_offline(int cpu, int state)
+void set_cpu_online_offline(int cpu, int state)
{
char buffer[128];
int fd, ret;
@@ -409,12 +410,10 @@ static void force_all_cpus_online(void)
unlink("/var/run/isst_cpu_topology.dat");
}
-#define MAX_PACKAGE_COUNT 8
-#define MAX_DIE_PER_PACKAGE 2
-static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
- void *, void *),
- void *arg1, void *arg2, void *arg3,
- void *arg4)
+void for_each_online_package_in_set(void (*callback)(int, void *, void *,
+ void *, void *),
+ void *arg1, void *arg2, void *arg3,
+ void *arg4)
{
int max_packages[MAX_PACKAGE_COUNT * MAX_PACKAGE_COUNT];
int pkg_index = 0, i;
@@ -2803,7 +2802,9 @@ static void usage(void)
printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n");
printf("\t[-r|--retry] : Retry count for mail box commands on failure, default 3\n");
printf("\t[-v|--version] : Print version\n");
-
+ printf("\t[-b|--oob : Start a daemon to process HFI events for perf profile change from Out of Band agent.\n");
+ printf("\t[-n|--no-daemon : Don't run as daemon. By default --oob will turn on daemon mode\n");
+ printf("\t[-w|--delay : Delay for reading config level state change in OOB poll mode.\n");
printf("\nResult format\n");
printf("\tResult display uses a common format for each command:\n");
printf("\tResults are formatted in text/JSON with\n");
@@ -2837,6 +2838,9 @@ static void cmdline(int argc, char **argv)
int opt, force_cpus_online = 0;
int option_index = 0;
int ret;
+ int oob_mode = 0;
+ int poll_interval = -1;
+ int no_daemon = 0;
static struct option long_options[] = {
{ "all-cpus-online", no_argument, 0, 'a' },
@@ -2849,6 +2853,9 @@ static void cmdline(int argc, char **argv)
{ "out", required_argument, 0, 'o' },
{ "retry", required_argument, 0, 'r' },
{ "version", no_argument, 0, 'v' },
+ { "oob", no_argument, 0, 'b' },
+ { "no-daemon", no_argument, 0, 'n' },
+ { "poll-interval", required_argument, 0, 'w' },
{ 0, 0, 0, 0 }
};
@@ -2875,7 +2882,7 @@ static void cmdline(int argc, char **argv)
}
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+c:df:hio:va", long_options,
+ while ((opt = getopt_long_only(argc, argv, "+c:df:hio:vabw:n", long_options,
&option_index)) != -1) {
switch (opt) {
case 'a':
@@ -2920,12 +2927,26 @@ static void cmdline(int argc, char **argv)
case 'v':
print_version();
break;
+ case 'b':
+ oob_mode = 1;
+ break;
+ case 'n':
+ no_daemon = 1;
+ break;
+ case 'w':
+ ret = strtol(optarg, &ptr, 10);
+ if (!ret) {
+ fprintf(stderr, "Invalid poll interval count\n");
+ exit(0);
+ }
+ poll_interval = ret;
+ break;
default:
usage();
}
}
- if (optind > (argc - 2)) {
+ if (optind > (argc - 2) && !oob_mode) {
usage();
exit(0);
}
@@ -2936,6 +2957,17 @@ static void cmdline(int argc, char **argv)
set_cpu_present_cpu_mask();
set_cpu_target_cpu_mask();
+ if (oob_mode) {
+ create_cpu_map();
+ if (debug_flag)
+ fprintf(stderr, "OOB mode is enabled in debug mode\n");
+
+ ret = isst_daemon(debug_flag, poll_interval, no_daemon);
+ if (ret)
+ fprintf(stderr, "OOB mode enable failed\n");
+ goto out;
+ }
+
if (!is_clx_n_platform()) {
ret = isst_fill_platform_info();
if (ret)
diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c
new file mode 100644
index 000000000000..dd372924bc82
--- /dev/null
+++ b/tools/power/x86/intel-speed-select/isst-daemon.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Speed Select -- Allow speed select to daemonize
+ * Copyright (c) 2022 Intel Corporation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <time.h>
+
+#include "isst.h"
+
+static int per_package_levels_info[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE];
+static time_t per_package_levels_tm[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE];
+
+static void init_levels(void)
+{
+ int i, j;
+
+ for (i = 0; i < MAX_PACKAGE_COUNT; ++i)
+ for (j = 0; j < MAX_DIE_PER_PACKAGE; ++j)
+ per_package_levels_info[i][j] = -1;
+}
+
+void process_level_change(int cpu)
+{
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ int pkg_id = get_physical_package_id(cpu);
+ int die_id = get_physical_die_id(cpu);
+ struct isst_pkg_ctdp pkg_dev;
+ time_t tm;
+ int ret;
+
+ if (pkg_id >= MAX_PACKAGE_COUNT || die_id > MAX_DIE_PER_PACKAGE) {
+ debug_printf("Invalid package/die info for cpu:%d\n", cpu);
+ return;
+ }
+
+ tm = time(NULL);
+ if (tm - per_package_levels_tm[pkg_id][die_id] < 2 )
+ return;
+
+ per_package_levels_tm[pkg_id][die_id] = tm;
+
+ ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+ if (ret) {
+ debug_printf("Can't get tdp levels for cpu:%d\n", cpu);
+ return;
+ }
+
+ debug_printf("Get Config level %d pkg:%d die:%d current_level:%d \n", cpu,
+ pkg_id, die_id, pkg_dev.current_level);
+
+ if (pkg_dev.locked) {
+ debug_printf("config TDP s locked \n");
+ return;
+ }
+
+ if (per_package_levels_info[pkg_id][die_id] == pkg_dev.current_level)
+ return;
+
+ debug_printf("**Config level change for cpu:%d pkg:%d die:%d from %d to %d\n",
+ cpu, pkg_id, die_id, per_package_levels_info[pkg_id][die_id],
+ pkg_dev.current_level);
+
+ per_package_levels_info[pkg_id][die_id] = pkg_dev.current_level;
+
+ ctdp_level.core_cpumask_size =
+ alloc_cpu_set(&ctdp_level.core_cpumask);
+ ret = isst_get_coremask_info(cpu, pkg_dev.current_level, &ctdp_level);
+ if (ret) {
+ free_cpu_set(ctdp_level.core_cpumask);
+ debug_printf("Can't get core_mask:%d\n", cpu);
+ return;
+ }
+
+ if (ctdp_level.cpu_count) {
+ int i, max_cpus = get_topo_max_cpus();
+ for (i = 0; i < max_cpus; ++i) {
+ if (pkg_id != get_physical_package_id(i) || die_id != get_physical_die_id(i))
+ continue;
+ if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) {
+ fprintf(stderr, "online cpu %d\n", i);
+ set_cpu_online_offline(i, 1);
+ } else {
+ fprintf(stderr, "offline cpu %d\n", i);
+ set_cpu_online_offline(i, 0);
+ }
+ }
+ }
+
+ free_cpu_set(ctdp_level.core_cpumask);
+}
+
+static void _poll_for_config_change(int cpu, void *arg1, void *arg2,
+ void *arg3, void *arg4)
+{
+ process_level_change(cpu);
+}
+
+static void poll_for_config_change(void)
+{
+ for_each_online_package_in_set(_poll_for_config_change, NULL, NULL,
+ NULL, NULL);
+}
+
+static int done = 0;
+static int pid_file_handle;
+
+static void signal_handler(int sig)
+{
+ switch (sig) {
+ case SIGINT:
+ case SIGTERM:
+ done = 1;
+ hfi_exit();
+ exit(0);
+ break;
+ default:
+ break;
+ }
+}
+
+static void daemonize(char *rundir, char *pidfile)
+{
+ int pid, sid, i;
+ char str[10];
+ struct sigaction sig_actions;
+ sigset_t sig_set;
+ int ret;
+
+ if (getppid() == 1)
+ return;
+
+ sigemptyset(&sig_set);
+ sigaddset(&sig_set, SIGCHLD);
+ sigaddset(&sig_set, SIGTSTP);
+ sigaddset(&sig_set, SIGTTOU);
+ sigaddset(&sig_set, SIGTTIN);
+ sigprocmask(SIG_BLOCK, &sig_set, NULL);
+
+ sig_actions.sa_handler = signal_handler;
+ sigemptyset(&sig_actions.sa_mask);
+ sig_actions.sa_flags = 0;
+
+ sigaction(SIGHUP, &sig_actions, NULL);
+ sigaction(SIGTERM, &sig_actions, NULL);
+ sigaction(SIGINT, &sig_actions, NULL);
+
+ pid = fork();
+ if (pid < 0) {
+ /* Could not fork */
+ exit(EXIT_FAILURE);
+ }
+ if (pid > 0)
+ exit(EXIT_SUCCESS);
+
+ umask(027);
+
+ sid = setsid();
+ if (sid < 0)
+ exit(EXIT_FAILURE);
+
+ /* close all descriptors */
+ for (i = getdtablesize(); i >= 0; --i)
+ close(i);
+
+ i = open("/dev/null", O_RDWR);
+ ret = dup(i);
+ if (ret == -1)
+ exit(EXIT_FAILURE);
+
+ ret = dup(i);
+ if (ret == -1)
+ exit(EXIT_FAILURE);
+
+ ret = chdir(rundir);
+ if (ret == -1)
+ exit(EXIT_FAILURE);
+
+ pid_file_handle = open(pidfile, O_RDWR | O_CREAT, 0600);
+ if (pid_file_handle == -1) {
+ /* Couldn't open lock file */
+ exit(1);
+ }
+ /* Try to lock file */
+#ifdef LOCKF_SUPPORT
+ if (lockf(pid_file_handle, F_TLOCK, 0) == -1) {
+#else
+ if (flock(pid_file_handle, LOCK_EX|LOCK_NB) < 0) {
+#endif
+ /* Couldn't get lock on lock file */
+ fprintf(stderr, "Couldn't get lock file %d\n", getpid());
+ exit(1);
+ }
+ snprintf(str, sizeof(str), "%d\n", getpid());
+ ret = write(pid_file_handle, str, strlen(str));
+ if (ret == -1)
+ exit(EXIT_FAILURE);
+
+ close(i);
+}
+
+int isst_daemon(int debug_mode, int poll_interval, int no_daemon)
+{
+ int ret;
+
+ if (!no_daemon && poll_interval < 0 && !debug_mode) {
+ fprintf(stderr, "OOB mode is enabled and will run as daemon\n");
+ daemonize((char *) "/tmp/",
+ (char *)"/tmp/hfi-events.pid");
+ } else {
+ signal(SIGINT, signal_handler);
+ }
+
+ init_levels();
+
+ if (poll_interval < 0) {
+ ret = hfi_main();
+ if (ret) {
+ fprintf(stderr, "HFI initialization failed\n");
+ }
+ fprintf(stderr, "Must specify poll-interval\n");
+ return ret;
+ }
+
+ debug_printf("Starting loop\n");
+ while (!done) {
+ sleep(poll_interval);
+ poll_for_config_change();
+ }
+
+ return 0;
+}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 1aa15d5ea57c..0796d8c6a882 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -76,6 +76,9 @@
#define DISP_FREQ_MULTIPLIER 100
+#define MAX_PACKAGE_COUNT 8
+#define MAX_DIE_PER_PACKAGE 2
+
struct isst_clos_config {
int pkg_id;
int die_id;
@@ -260,4 +263,14 @@ extern int is_skx_based_platform(void);
extern int is_spr_platform(void);
extern int is_icx_platform(void);
extern void isst_trl_display_information(int cpu, FILE *outf, unsigned long long trl);
+
+extern void set_cpu_online_offline(int cpu, int state);
+extern void for_each_online_package_in_set(void (*callback)(int, void *, void *,
+ void *, void *),
+ void *arg1, void *arg2, void *arg3,
+ void *arg4);
+extern int isst_daemon(int debug_mode, int poll_interval, int no_daemon);
+extern void process_level_change(int cpu);
+extern int hfi_main(void);
+extern void hfi_exit(void);
#endif