summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 13:12:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-14 13:12:24 -0700
commitb018fc9800557bd14a40d69501e19c340eb2c521 (patch)
tree541109645e83725699d2b091a1c6c4816fdc6649
parentc07b3682cd12a017f976ec63bbd4758dc4c5100e (diff)
parent7425ecd5e3e8c9d84f399a102282a23a90a19278 (diff)
downloadlinux-b018fc9800557bd14a40d69501e19c340eb2c521.tar.gz
linux-b018fc9800557bd14a40d69501e19c340eb2c521.tar.bz2
linux-b018fc9800557bd14a40d69501e19c340eb2c521.zip
Merge tag 'pm-4.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "These add a new framework for CPU idle time injection, to be used by all of the idle injection code in the kernel in the future, fix some issues and add a number of relatively small extensions in multiple places. Specifics: - Add a new framework for CPU idle time injection (Daniel Lezcano). - Add AVS support to the armada-37xx cpufreq driver (Gregory CLEMENT). - Add support for current CPU frequency reporting to the ACPI CPPC cpufreq driver (George Cherian). - Rework the cooling device registration in the imx6q/thermal driver (Bastian Stender). - Make the pcc-cpufreq driver refuse to work with dynamic scaling governors on systems with many CPUs to avoid scalability issues with it (Rafael Wysocki). - Fix the intel_pstate driver to report different maximum CPU frequencies on systems where they really are different and to ignore the turbo active ratio if hardware-managend P-states (HWP) are in use; make it use the match_string() helper (Xie Yisheng, Srinivas Pandruvada). - Fix a minor deferred probe issue in the qcom-kryo cpufreq driver (Niklas Cassel). - Add a tracepoint for the tracking of frequency limits changes (from Andriod) to the cpufreq core (Ruchi Kandoi). - Fix a circular lock dependency between CPU hotplug and sysfs locking in the cpufreq core reported by lockdep (Waiman Long). - Avoid excessive error reports on driver registration failures in the ARM cpuidle driver (Sudeep Holla). - Add a new device links flag to the driver core to make links go away automatically on supplier driver removal (Vivek Gautam). - Eliminate potential race condition between system-wide power management transitions and system shutdown (Pingfan Liu). - Add a quirk to save NVS memory on system suspend for the ASUS 1025C laptop (Willy Tarreau). - Make more systems use suspend-to-idle (instead of ACPI S3) by default (Tristian Celestin). - Get rid of stack VLA usage in the low-level hibernation code on 64-bit x86 (Kees Cook). - Fix error handling in the hibernation core and mark an expected fall-through switch in it (Chengguang Xu, Gustavo Silva). - Extend the generic power domains (genpd) framework to support attaching a device to a power domain by name (Ulf Hansson). - Fix device reference counting and user limits initialization in the devfreq core (Arvind Yadav, Matthias Kaehlcke). - Fix a few issues in the rk3399_dmc devfreq driver and improve its documentation (Enric Balletbo i Serra, Lin Huang, Nick Milner). - Drop a redundant error message from the exynos-ppmu devfreq driver (Markus Elfring)" * tag 'pm-4.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (35 commits) PM / reboot: Eliminate race between reboot and suspend PM / hibernate: Mark expected switch fall-through cpufreq: intel_pstate: Ignore turbo active ratio in HWP cpufreq: Fix a circular lock dependency problem cpu/hotplug: Add a cpus_read_trylock() function x86/power/hibernate_64: Remove VLA usage cpufreq: trace frequency limits change cpufreq: intel_pstate: Show different max frequency with turbo 3 and HWP cpufreq: pcc-cpufreq: Disable dynamic scaling on many-CPU systems cpufreq: qcom-kryo: Silently error out on EPROBE_DEFER cpufreq / CPPC: Add cpuinfo_cur_freq support for CPPC cpufreq: armada-37xx: Add AVS support dt-bindings: marvell: Add documentation for the Armada 3700 AVS binding PM / devfreq: rk3399_dmc: Fix duplicated opp table on reload. PM / devfreq: Init user limits from OPP limits, not viceversa PM / devfreq: rk3399_dmc: fix spelling mistakes. PM / devfreq: rk3399_dmc: do not print error when get supply and clk defer. dt-bindings: devfreq: rk3399_dmc: move interrupts to be optional. PM / devfreq: rk3399_dmc: remove wait for dcf irq event. dt-bindings: clock: add rk3399 DDR3 standard speed bins. ...
-rw-r--r--Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt15
-rw-r--r--Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt211
-rw-r--r--Documentation/devicetree/bindings/power/power_domain.txt8
-rw-r--r--Documentation/driver-api/device_link.rst12
-rw-r--r--Documentation/power/freezing-of-tasks.txt12
-rw-r--r--Documentation/power/suspend-and-cpuhotplug.txt6
-rw-r--r--Documentation/trace/events-power.rst1
-rw-r--r--arch/x86/power/hibernate_64.c36
-rw-r--r--drivers/acpi/sleep.c30
-rw-r--r--drivers/base/core.c25
-rw-r--r--drivers/base/power/common.c17
-rw-r--r--drivers/base/power/domain.c24
-rw-r--r--drivers/cpufreq/armada-37xx-cpufreq.c163
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c52
-rw-r--r--drivers/cpufreq/cpufreq.c8
-rw-r--r--drivers/cpufreq/imx6q-cpufreq.c21
-rw-r--r--drivers/cpufreq/intel_pstate.c27
-rw-r--r--drivers/cpufreq/pcc-cpufreq.c9
-rw-r--r--drivers/cpufreq/qcom-cpufreq-kryo.c5
-rw-r--r--drivers/cpuidle/cpuidle-arm.c3
-rw-r--r--drivers/devfreq/devfreq.c16
-rw-r--r--drivers/devfreq/event/exynos-ppmu.c6
-rw-r--r--drivers/devfreq/rk3399_dmc.c102
-rw-r--r--drivers/gpu/drm/tegra/dc.c2
-rw-r--r--drivers/gpu/ipu-v3/ipu-pre.c3
-rw-r--r--drivers/gpu/ipu-v3/ipu-prg.c3
-rw-r--r--drivers/powercap/Kconfig10
-rw-r--r--drivers/powercap/Makefile1
-rw-r--r--drivers/powercap/idle_inject.c356
-rw-r--r--drivers/soc/imx/gpc.c2
-rw-r--r--drivers/thermal/imx_thermal.c28
-rw-r--r--include/dt-bindings/clock/rk3399-ddr.h56
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/device.h14
-rw-r--r--include/linux/idle_inject.h29
-rw-r--r--include/linux/pm_domain.h15
-rw-r--r--include/linux/suspend.h2
-rw-r--r--include/trace/events/power.h25
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/freezer.c4
-rw-r--r--kernel/power/hibernate.c16
-rw-r--r--kernel/power/main.c12
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/power/swap.c4
-rw-r--r--kernel/power/user.c4
-rw-r--r--kernel/reboot.c6
-rw-r--r--mm/page_alloc.c11
47 files changed, 1139 insertions, 285 deletions
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt b/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt
index 35c3c3460d17..eddde4faef01 100644
--- a/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt
+++ b/Documentation/devicetree/bindings/arm/marvell/armada-37xx.txt
@@ -33,3 +33,18 @@ nb_pm: syscon@14000 {
compatible = "marvell,armada-3700-nb-pm", "syscon";
reg = <0x14000 0x60>;
}
+
+AVS
+---
+
+For AVS an other component is needed:
+
+Required properties:
+- compatible : should contain "marvell,armada-3700-avs", "syscon";
+- reg : the register start and length for the AVS
+
+Example:
+avs: avs@11500 {
+ compatible = "marvell,armada-3700-avs", "syscon";
+ reg = <0x11500 0x40>;
+}
diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
index fc2bcbe26b1e..0ec68141f85a 100644
--- a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
+++ b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
@@ -1,14 +1,10 @@
-* Rockchip rk3399 DMC(Dynamic Memory Controller) device
+* Rockchip rk3399 DMC (Dynamic Memory Controller) device
Required properties:
- compatible: Must be "rockchip,rk3399-dmc".
- devfreq-events: Node to get DDR loading, Refer to
- Documentation/devicetree/bindings/devfreq/
+ Documentation/devicetree/bindings/devfreq/event/
rockchip-dfi.txt
-- interrupts: The interrupt number to the CPU. The interrupt
- specifier format depends on the interrupt controller.
- It should be DCF interrupts, when DDR dvfs finish,
- it will happen.
- clocks: Phandles for clock specified in "clock-names" property
- clock-names : The name of clock used by the DFI, must be
"pclk_ddr_mon";
@@ -17,139 +13,148 @@ Required properties:
- center-supply: DMC supply node.
- status: Marks the node enabled/disabled.
-Following properties are ddr timing:
-
-- rockchip,dram_speed_bin : Value reference include/dt-bindings/clock/ddr.h,
- it select ddr3 cl-trp-trcd type, default value
- "DDR3_DEFAULT".it must selected according to
- "Speed Bin" in ddr3 datasheet, DO NOT use
- smaller "Speed Bin" than ddr3 exactly is.
-
-- rockchip,pd_idle : Config the PD_IDLE value, defined the power-down
- idle period, memories are places into power-down
- mode if bus is idle for PD_IDLE DFI clocks.
-
-- rockchip,sr_idle : Configure the SR_IDLE value, defined the
- selfrefresh idle period, memories are places
- into self-refresh mode if bus is idle for
- SR_IDLE*1024 DFI clocks (DFI clocks freq is
- half of dram's clocks), defaule value is "0".
-
-- rockchip,sr_mc_gate_idle : Defined the self-refresh with memory and
- controller clock gating idle period, memories
- are places into self-refresh mode and memory
- controller clock arg gating if bus is idle for
- sr_mc_gate_idle*1024 DFI clocks.
-
-- rockchip,srpd_lite_idle : Defined the self-refresh power down idle
- period, memories are places into self-refresh
- power down mode if bus is idle for
- srpd_lite_idle*1024 DFI clocks. This parameter
- is for LPDDR4 only.
-
-- rockchip,standby_idle : Defined the standby idle period, memories are
- places into self-refresh than controller, pi,
- phy and dram clock will gating if bus is idle
- for standby_idle * DFI clocks.
-
-- rockchip,dram_dll_disb_freq : It's defined the DDR3 dll bypass frequency in
- MHz, when ddr freq less than DRAM_DLL_DISB_FREQ,
- ddr3 dll will bypssed note: if dll was bypassed,
- the odt also stop working.
-
-- rockchip,phy_dll_disb_freq : Defined the PHY dll bypass frequency in
- MHz (Mega Hz), when ddr freq less than
- DRAM_DLL_DISB_FREQ, phy dll will bypssed.
- note: phy dll and phy odt are independent.
-
-- rockchip,ddr3_odt_disb_freq : When dram type is DDR3, this parameter defined
- the odt disable frequency in MHz (Mega Hz),
- when ddr frequency less then ddr3_odt_disb_freq,
- the odt on dram side and controller side are
+Optional properties:
+- interrupts: The CPU interrupt number. The interrupt specifier
+ format depends on the interrupt controller.
+ It should be a DCF interrupt. When DDR DVFS finishes
+ a DCF interrupt is triggered.
+
+Following properties relate to DDR timing:
+
+- rockchip,dram_speed_bin : Value reference include/dt-bindings/clock/rk3399-ddr.h,
+ it selects the DDR3 cl-trp-trcd type. It must be
+ set according to "Speed Bin" in DDR3 datasheet,
+ DO NOT use a smaller "Speed Bin" than specified
+ for the DDR3 being used.
+
+- rockchip,pd_idle : Configure the PD_IDLE value. Defines the
+ power-down idle period in which memories are
+ placed into power-down mode if bus is idle
+ for PD_IDLE DFI clock cycles.
+
+- rockchip,sr_idle : Configure the SR_IDLE value. Defines the
+ self-refresh idle period in which memories are
+ placed into self-refresh mode if bus is idle
+ for SR_IDLE * 1024 DFI clock cycles (DFI
+ clocks freq is half of DRAM clock), default
+ value is "0".
+
+- rockchip,sr_mc_gate_idle : Defines the memory self-refresh and controller
+ clock gating idle period. Memories are placed
+ into self-refresh mode and memory controller
+ clock arg gating started if bus is idle for
+ sr_mc_gate_idle*1024 DFI clock cycles.
+
+- rockchip,srpd_lite_idle : Defines the self-refresh power down idle
+ period in which memories are placed into
+ self-refresh power down mode if bus is idle
+ for srpd_lite_idle * 1024 DFI clock cycles.
+ This parameter is for LPDDR4 only.
+
+- rockchip,standby_idle : Defines the standby idle period in which
+ memories are placed into self-refresh mode.
+ The controller, pi, PHY and DRAM clock will
+ be gated if bus is idle for standby_idle * DFI
+ clock cycles.
+
+- rockchip,dram_dll_dis_freq : Defines the DDR3 DLL bypass frequency in MHz.
+ When DDR frequency is less than DRAM_DLL_DISB_FREQ,
+ DDR3 DLL will be bypassed. Note: if DLL was bypassed,
+ the odt will also stop working.
+
+- rockchip,phy_dll_dis_freq : Defines the PHY dll bypass frequency in
+ MHz (Mega Hz). When DDR frequency is less than
+ DRAM_DLL_DISB_FREQ, PHY DLL will be bypassed.
+ Note: PHY DLL and PHY ODT are independent.
+
+- rockchip,ddr3_odt_dis_freq : When the DRAM type is DDR3, this parameter defines
+ the ODT disable frequency in MHz (Mega Hz).
+ when the DDR frequency is less then ddr3_odt_dis_freq,
+ the ODT on the DRAM side and controller side are
both disabled.
-- rockchip,ddr3_drv : When dram type is DDR3, this parameter define
- the dram side driver stength in ohm, default
+- rockchip,ddr3_drv : When the DRAM type is DDR3, this parameter defines
+ the DRAM side driver strength in ohms. Default
value is DDR3_DS_40ohm.
-- rockchip,ddr3_odt : When dram type is DDR3, this parameter define
- the dram side ODT stength in ohm, default value
+- rockchip,ddr3_odt : When the DRAM type is DDR3, this parameter defines
+ the DRAM side ODT strength in ohms. Default value
is DDR3_ODT_120ohm.
-- rockchip,phy_ddr3_ca_drv : When dram type is DDR3, this parameter define
- the phy side CA line(incluing command line,
+- rockchip,phy_ddr3_ca_drv : When the DRAM type is DDR3, this parameter defines
+ the phy side CA line (incluing command line,
address line and clock line) driver strength.
Default value is PHY_DRV_ODT_40.
-- rockchip,phy_ddr3_dq_drv : When dram type is DDR3, this parameter define
- the phy side DQ line(incluing DQS/DQ/DM line)
- driver strength. default value is PHY_DRV_ODT_40.
+- rockchip,phy_ddr3_dq_drv : When the DRAM type is DDR3, this parameter defines
+ the PHY side DQ line (including DQS/DQ/DM line)
+ driver strength. Default value is PHY_DRV_ODT_40.
-- rockchip,phy_ddr3_odt : When dram type is DDR3, this parameter define the
- phy side odt strength, default value is
+- rockchip,phy_ddr3_odt : When the DRAM type is DDR3, this parameter defines
+ the PHY side ODT strength. Default value is
PHY_DRV_ODT_240.
-- rockchip,lpddr3_odt_disb_freq : When dram type is LPDDR3, this parameter defined
- then odt disable frequency in MHz (Mega Hz),
- when ddr frequency less then ddr3_odt_disb_freq,
- the odt on dram side and controller side are
+- rockchip,lpddr3_odt_dis_freq : When the DRAM type is LPDDR3, this parameter defines
+ then ODT disable frequency in MHz (Mega Hz).
+ When DDR frequency is less then ddr3_odt_dis_freq,
+ the ODT on the DRAM side and controller side are
both disabled.
-- rockchip,lpddr3_drv : When dram type is LPDDR3, this parameter define
- the dram side driver stength in ohm, default
+- rockchip,lpddr3_drv : When the DRAM type is LPDDR3, this parameter defines
+ the DRAM side driver strength in ohms. Default
value is LP3_DS_34ohm.
-- rockchip,lpddr3_odt : When dram type is LPDDR3, this parameter define
- the dram side ODT stength in ohm, default value
+- rockchip,lpddr3_odt : When the DRAM type is LPDDR3, this parameter defines
+ the DRAM side ODT strength in ohms. Default value
is LP3_ODT_240ohm.
-- rockchip,phy_lpddr3_ca_drv : When dram type is LPDDR3, this parameter define
- the phy side CA line(incluing command line,
+- rockchip,phy_lpddr3_ca_drv : When the DRAM type is LPDDR3, this parameter defines
+ the PHY side CA line (including command line,
address line and clock line) driver strength.
- default value is PHY_DRV_ODT_40.
+ Default value is PHY_DRV_ODT_40.
-- rockchip,phy_lpddr3_dq_drv : When dram type is LPDDR3, this parameter define
- the phy side DQ line(incluing DQS/DQ/DM line)
- driver strength. default value is
+- rockchip,phy_lpddr3_dq_drv : When the DRAM type is LPDDR3, this parameter defines
+ the PHY side DQ line (including DQS/DQ/DM line)
+ driver strength. Default value is
PHY_DRV_ODT_40.
- rockchip,phy_lpddr3_odt : When dram type is LPDDR3, this parameter define
the phy side odt strength, default value is
PHY_DRV_ODT_240.
-- rockchip,lpddr4_odt_disb_freq : When dram type is LPDDR4, this parameter
- defined the odt disable frequency in
- MHz (Mega Hz), when ddr frequency less then
- ddr3_odt_disb_freq, the odt on dram side and
+- rockchip,lpddr4_odt_dis_freq : When the DRAM type is LPDDR4, this parameter
+ defines the ODT disable frequency in
+ MHz (Mega Hz). When the DDR frequency is less then
+ ddr3_odt_dis_freq, the ODT on the DRAM side and
controller side are both disabled.
-- rockchip,lpddr4_drv : When dram type is LPDDR4, this parameter define
- the dram side driver stength in ohm, default
+- rockchip,lpddr4_drv : When the DRAM type is LPDDR4, this parameter defines
+ the DRAM side driver strength in ohms. Default
value is LP4_PDDS_60ohm.
-- rockchip,lpddr4_dq_odt : When dram type is LPDDR4, this parameter define
- the dram side ODT on dqs/dq line stength in ohm,
- default value is LP4_DQ_ODT_40ohm.
+- rockchip,lpddr4_dq_odt : When the DRAM type is LPDDR4, this parameter defines
+ the DRAM side ODT on DQS/DQ line strength in ohms.
+ Default value is LP4_DQ_ODT_40ohm.
-- rockchip,lpddr4_ca_odt : When dram type is LPDDR4, this parameter define
- the dram side ODT on ca line stength in ohm,
- default value is LP4_CA_ODT_40ohm.
+- rockchip,lpddr4_ca_odt : When the DRAM type is LPDDR4, this parameter defines
+ the DRAM side ODT on CA line strength in ohms.
+ Default value is LP4_CA_ODT_40ohm.
-- rockchip,phy_lpddr4_ca_drv : When dram type is LPDDR4, this parameter define
- the phy side CA line(incluing command address
- line) driver strength. default value is
+- rockchip,phy_lpddr4_ca_drv : When the DRAM type is LPDDR4, this parameter defines
+ the PHY side CA line (including command address
+ line) driver strength. Default value is
PHY_DRV_ODT_40.
-- rockchip,phy_lpddr4_ck_cs_drv : When dram type is LPDDR4, this parameter define
- the phy side clock line and cs line driver
- strength. default value is PHY_DRV_ODT_80.
+- rockchip,phy_lpddr4_ck_cs_drv : When the DRAM type is LPDDR4, this parameter defines
+ the PHY side clock line and CS line driver
+ strength. Default value is PHY_DRV_ODT_80.
-- rockchip,phy_lpddr4_dq_drv : When dram type is LPDDR4, this parameter define
- the phy side DQ line(incluing DQS/DQ/DM line)
- driver strength. default value is PHY_DRV_ODT_80.
+- rockchip,phy_lpddr4_dq_drv : When the DRAM type is LPDDR4, this parameter defines
+ the PHY side DQ line (including DQS/DQ/DM line)
+ driver strength. Default value is PHY_DRV_ODT_80.
-- rockchip,phy_lpddr4_odt : When dram type is LPDDR4, this parameter define
- the phy side odt strength, default value is
+- rockchip,phy_lpddr4_odt : When the DRAM type is LPDDR4, this parameter defines
+ the PHY side ODT strength. Default value is
PHY_DRV_ODT_60.
Example:
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index 7dec508987c7..8f8b25a24b8f 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -114,18 +114,26 @@ Required properties:
- power-domains : A list of PM domain specifiers, as defined by bindings of
the power controller that is the PM domain provider.
+Optional properties:
+ - power-domain-names : A list of power domain name strings sorted in the same
+ order as the power-domains property. Consumers drivers will use
+ power-domain-names to match power domains with power-domains
+ specifiers.
+
Example:
leaky-device@12350000 {
compatible = "foo,i-leak-current";
reg = <0x12350000 0x1000>;
power-domains = <&power 0>;
+ power-domain-names = "io";
};
leaky-device@12351000 {
compatible = "foo,i-leak-current";
reg = <0x12351000 0x1000>;
power-domains = <&power 0>, <&power 1> ;
+ power-domain-names = "io", "clk";
};
The first example above defines a typical PM domain consumer device, which is
diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index 70e328e16aad..d6763272e747 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -81,10 +81,14 @@ integration is desired.
Two other flags are specifically targeted at use cases where the device
link is added from the consumer's ``->probe`` callback: ``DL_FLAG_RPM_ACTIVE``
can be specified to runtime resume the supplier upon addition of the
-device link. ``DL_FLAG_AUTOREMOVE`` causes the device link to be automatically
-purged when the consumer fails to probe or later unbinds. This obviates
-the need to explicitly delete the link in the ``->remove`` callback or in
-the error path of the ``->probe`` callback.
+device link. ``DL_FLAG_AUTOREMOVE_CONSUMER`` causes the device link to be
+automatically purged when the consumer fails to probe or later unbinds.
+This obviates the need to explicitly delete the link in the ``->remove``
+callback or in the error path of the ``->probe`` callback.
+
+Similarly, when the device link is added from supplier's ``->probe`` callback,
+``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically
+purged when the supplier fails to probe or later unbinds.
Limitations
===========
diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index af005770e767..cd283190855a 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -204,26 +204,26 @@ VI. Are there any precautions to be taken to prevent freezing failures?
Yes, there are.
-First of all, grabbing the 'pm_mutex' lock to mutually exclude a piece of code
+First of all, grabbing the 'system_transition_mutex' lock to mutually exclude a piece of code
from system-wide sleep such as suspend/hibernation is not encouraged.
If possible, that piece of code must instead hook onto the suspend/hibernation
notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
(kernel/cpu.c) for an example.
-However, if that is not feasible, and grabbing 'pm_mutex' is deemed necessary,
-it is strongly discouraged to directly call mutex_[un]lock(&pm_mutex) since
+However, if that is not feasible, and grabbing 'system_transition_mutex' is deemed necessary,
+it is strongly discouraged to directly call mutex_[un]lock(&system_transition_mutex) since
that could lead to freezing failures, because if the suspend/hibernate code
-successfully acquired the 'pm_mutex' lock, and hence that other entity failed
+successfully acquired the 'system_transition_mutex' lock, and hence that other entity failed
to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
state. As a consequence, the freezer would not be able to freeze that task,
leading to freezing failure.
However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
since they ask the freezer to skip freezing this task, since it is anyway
-"frozen enough" as it is blocked on 'pm_mutex', which will be released
+"frozen enough" as it is blocked on 'system_transition_mutex', which will be released
only after the entire suspend/hibernation sequence is complete.
So, to summarize, use [un]lock_system_sleep() instead of directly using
-mutex_[un]lock(&pm_mutex). That would prevent freezing failures.
+mutex_[un]lock(&system_transition_mutex). That would prevent freezing failures.
V. Miscellaneous
/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt
index 6f55eb960a6d..a8751b8df10e 100644
--- a/Documentation/power/suspend-and-cpuhotplug.txt
+++ b/Documentation/power/suspend-and-cpuhotplug.txt
@@ -32,7 +32,7 @@ More details follow:
sysfs file
|
v
- Acquire pm_mutex lock
+ Acquire system_transition_mutex lock
|
v
Send PM_SUSPEND_PREPARE
@@ -96,10 +96,10 @@ execution during resume):
* thaw tasks
* send PM_POST_SUSPEND notifications
-* Release pm_mutex lock.
+* Release system_transition_mutex lock.
-It is to be noted here that the pm_mutex lock is acquired at the very
+It is to be noted here that the system_transition_mutex lock is acquired at the very
beginning, when we are just starting out to suspend, and then released only
after the entire cycle is complete (i.e., suspend + resume).
diff --git a/Documentation/trace/events-power.rst b/Documentation/trace/events-power.rst
index a77daca75e30..2ef318962e29 100644
--- a/Documentation/trace/events-power.rst
+++ b/Documentation/trace/events-power.rst
@@ -27,6 +27,7 @@ cpufreq.
cpu_idle "state=%lu cpu_id=%lu"
cpu_frequency "state=%lu cpu_id=%lu"
+ cpu_frequency_limits "min=%lu max=%lu cpu_id=%lu"
A suspend event is used to indicate the system going in and out of the
suspend mode:
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 67ccf64c8bd8..f8e3b668d20b 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -233,29 +233,35 @@ struct restore_data_record {
*/
static int get_e820_md5(struct e820_table *table, void *buf)
{
- struct scatterlist sg;
- struct crypto_ahash *tfm;
+ struct crypto_shash *tfm;
+ struct shash_desc *desc;
int size;
int ret = 0;
- tfm = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
+ tfm = crypto_alloc_shash("md5", 0, 0);
if (IS_ERR(tfm))
return -ENOMEM;
- {
- AHASH_REQUEST_ON_STACK(req, tfm);
- size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry) * table->nr_entries;
- ahash_request_set_tfm(req, tfm);
- sg_init_one(&sg, (u8 *)table, size);
- ahash_request_set_callback(req, 0, NULL, NULL);
- ahash_request_set_crypt(req, &sg, buf, size);
-
- if (crypto_ahash_digest(req))
- ret = -EINVAL;
- ahash_request_zero(req);
+ desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm),
+ GFP_KERNEL);
+ if (!desc) {
+ ret = -ENOMEM;
+ goto free_tfm;
}
- crypto_free_ahash(tfm);
+ desc->tfm = tfm;
+ desc->flags = 0;
+
+ size = offsetof(struct e820_table, entries) +
+ sizeof(struct e820_entry) * table->nr_entries;
+
+ if (crypto_shash_digest(desc, (u8 *)table, size, buf))
+ ret = -EINVAL;
+
+ kzfree(desc);
+
+free_tfm:
+ crypto_free_shash(tfm);
return ret;
}
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 5d0486f1cfcd..754d59f95500 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -338,6 +338,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"),
},
},
+ {
+ .callback = init_nvs_save_s3,
+ .ident = "Asus 1025C",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "1025C"),
+ },
+ },
/*
* https://bugzilla.kernel.org/show_bug.cgi?id=189431
* Lenovo G50-45 is a platform later than 2012, but needs nvs memory
@@ -718,9 +726,6 @@ static const struct acpi_device_id lps0_device_ids[] = {
#define ACPI_LPS0_ENTRY 5
#define ACPI_LPS0_EXIT 6
-#define ACPI_LPS0_SCREEN_MASK ((1 << ACPI_LPS0_SCREEN_OFF) | (1 << ACPI_LPS0_SCREEN_ON))
-#define ACPI_LPS0_PLATFORM_MASK ((1 << ACPI_LPS0_ENTRY) | (1 << ACPI_LPS0_EXIT))
-
static acpi_handle lps0_device_handle;
static guid_t lps0_dsm_guid;
static char lps0_dsm_func_mask;
@@ -924,17 +929,14 @@ static int lps0_device_attach(struct acpi_device *adev,
if (out_obj && out_obj->type == ACPI_TYPE_BUFFER) {
char bitmask = *(char *)out_obj->buffer.pointer;
- if ((bitmask & ACPI_LPS0_PLATFORM_MASK) == ACPI_LPS0_PLATFORM_MASK ||
- (bitmask & ACPI_LPS0_SCREEN_MASK) == ACPI_LPS0_SCREEN_MASK) {
- lps0_dsm_func_mask = bitmask;
- lps0_device_handle = adev->handle;
- /*
- * Use suspend-to-idle by default if the default
- * suspend mode was not set from the command line.
- */
- if (mem_sleep_default > PM_SUSPEND_MEM)
- mem_sleep_current = PM_SUSPEND_TO_IDLE;
- }
+ lps0_dsm_func_mask = bitmask;
+ lps0_device_handle = adev->handle;
+ /*
+ * Use suspend-to-idle by default if the default
+ * suspend mode was not set from the command line.
+ */
+ if (mem_sleep_default > PM_SUSPEND_MEM)
+ mem_sleep_current = PM_SUSPEND_TO_IDLE;
acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n",
bitmask);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 2ab316d85651..9d3c15d4dde8 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -178,10 +178,10 @@ void device_pm_move_to_tail(struct device *dev)
* of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be
* ignored.
*
- * If the DL_FLAG_AUTOREMOVE is set, the link will be removed automatically
- * when the consumer device driver unbinds from it. The combination of both
- * DL_FLAG_AUTOREMOVE and DL_FLAG_STATELESS set is invalid and will cause NULL
- * to be returned.
+ * If the DL_FLAG_AUTOREMOVE_CONSUMER is set, the link will be removed
+ * automatically when the consumer device driver unbinds from it.
+ * The combination of both DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_STATELESS
+ * set is invalid and will cause NULL to be returned.
*
* A side effect of the link creation is re-ordering of dpm_list and the
* devices_kset list by moving the consumer device and all devices depending
@@ -198,7 +198,8 @@ struct device_link *device_link_add(struct device *consumer,
struct device_link *link;
if (!consumer || !supplier ||
- ((flags & DL_FLAG_STATELESS) && (flags & DL_FLAG_AUTOREMOVE)))
+ ((flags & DL_FLAG_STATELESS) &&
+ (flags & DL_FLAG_AUTOREMOVE_CONSUMER)))
return NULL;
device_links_write_lock();
@@ -509,7 +510,7 @@ static void __device_links_no_driver(struct device *dev)
if (link->flags & DL_FLAG_STATELESS)
continue;
- if (link->flags & DL_FLAG_AUTOREMOVE)
+ if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
kref_put(&link->kref, __device_link_del);
else if (link->status != DL_STATE_SUPPLIER_UNBIND)
WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
@@ -545,8 +546,18 @@ void device_links_driver_cleanup(struct device *dev)
if (link->flags & DL_FLAG_STATELESS)
continue;
- WARN_ON(link->flags & DL_FLAG_AUTOREMOVE);
+ WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND);
+
+ /*
+ * autoremove the links between this @dev and its consumer
+ * devices that are not active, i.e. where the link state
+ * has moved to DL_STATE_SUPPLIER_UNBIND.
+ */
+ if (link->status == DL_STATE_SUPPLIER_UNBIND &&
+ link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
+ kref_put(&link->kref, __device_link_del);
+
WRITE_ONCE(link->status, DL_STATE_DORMANT);
}
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index df41b4780b3b..b413951c6abc 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -153,6 +153,23 @@ struct device *dev_pm_domain_attach_by_id(struct device *dev,
EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id);
/**
+ * dev_pm_domain_attach_by_name - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @name: The name of the PM domain.
+ *
+ * For a detailed function description, see dev_pm_domain_attach_by_id().
+ */
+struct device *dev_pm_domain_attach_by_name(struct device *dev,
+ char *name)
+{
+ if (dev->pm_domain)
+ return ERR_PTR(-EEXIST);
+
+ return genpd_dev_pm_attach_by_name(dev, name);
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_name);
+
+/**
* dev_pm_domain_detach - Detach a device from its PM domain.
* @dev: Device to detach.
* @power_off: Used to indicate whether we should power off the device.
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 9e8484189034..79bdca70a81a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2374,6 +2374,30 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
}
EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id);
+/**
+ * genpd_dev_pm_attach_by_name - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @name: The name of the PM domain.
+ *
+ * Parse device's OF node to find a PM domain specifier using the
+ * power-domain-names DT property. For further description see
+ * genpd_dev_pm_attach_by_id().
+ */
+struct device *genpd_dev_pm_attach_by_name(struct device *dev, char *name)
+{
+ int index;
+
+ if (!dev->of_node)
+ return NULL;
+
+ index = of_property_match_string(dev->of_node, "power-domain-names",
+ name);
+ if (index < 0)
+ return NULL;
+
+ return genpd_dev_pm_attach_by_id(dev, index);
+}
+
static const struct of_device_id idle_state_match[] = {
{ .compatible = "domain-idle-state", },
{ }
diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index 739da90ff3f6..75491fc841a6 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -51,6 +51,16 @@
#define ARMADA_37XX_DVFS_LOAD_2 2
#define ARMADA_37XX_DVFS_LOAD_3 3
+/* AVS register set */
+#define ARMADA_37XX_AVS_CTL0 0x0
+#define ARMADA_37XX_AVS_ENABLE BIT(30)
+#define ARMADA_37XX_AVS_HIGH_VDD_LIMIT 16
+#define ARMADA_37XX_AVS_LOW_VDD_LIMIT 22
+#define ARMADA_37XX_AVS_VDD_MASK 0x3F
+#define ARMADA_37XX_AVS_CTL2 0x8
+#define ARMADA_37XX_AVS_LOW_VDD_EN BIT(6)
+#define ARMADA_37XX_AVS_VSET(x) (0x1C + 4 * (x))
+
/*
* On Armada 37xx the Power management manages 4 level of CPU load,
* each level can be associated with a CPU clock source, a CPU
@@ -58,6 +68,17 @@
*/
#define LOAD_LEVEL_NR 4
+#define MIN_VOLT_MV 1000
+
+/* AVS value for the corresponding voltage (in mV) */
+static int avs_map[] = {
+ 747, 758, 770, 782, 793, 805, 817, 828, 840, 852, 863, 875, 887, 898,
+ 910, 922, 933, 945, 957, 968, 980, 992, 1003, 1015, 1027, 1038, 1050,
+ 1062, 1073, 1085, 1097, 1108, 1120, 1132, 1143, 1155, 1167, 1178, 1190,
+ 1202, 1213, 1225, 1237, 1248, 1260, 1272, 1283, 1295, 1307, 1318, 1330,
+ 1342
+};
+
struct armada37xx_cpufreq_state {
struct regmap *regmap;
u32 nb_l0l1;
@@ -71,6 +92,7 @@ static struct armada37xx_cpufreq_state *armada37xx_cpufreq_state;
struct armada_37xx_dvfs {
u32 cpu_freq_max;
u8 divider[LOAD_LEVEL_NR];
+ u32 avs[LOAD_LEVEL_NR];
};
static struct armada_37xx_dvfs armada_37xx_dvfs[] = {
@@ -148,6 +170,128 @@ static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base,
clk_set_parent(clk, parent);
}
+/*
+ * Find out the armada 37x supported AVS value whose voltage value is
+ * the round-up closest to the target voltage value.
+ */
+static u32 armada_37xx_avs_val_match(int target_vm)
+{
+ u32 avs;
+
+ /* Find out the round-up closest supported voltage value */
+ for (avs = 0; avs < ARRAY_SIZE(avs_map); avs++)
+ if (avs_map[avs] >= target_vm)
+ break;
+
+ /*
+ * If all supported voltages are smaller than target one,
+ * choose the largest supported voltage
+ */
+ if (avs == ARRAY_SIZE(avs_map))
+ avs = ARRAY_SIZE(avs_map) - 1;
+
+ return avs;
+}
+
+/*
+ * For Armada 37xx soc, L0(VSET0) VDD AVS value is set to SVC revision
+ * value or a default value when SVC is not supported.
+ * - L0 can be read out from the register of AVS_CTRL_0 and L0 voltage
+ * can be got from the mapping table of avs_map.
+ * - L1 voltage should be about 100mv smaller than L0 voltage
+ * - L2 & L3 voltage should be about 150mv smaller than L0 voltage.
+ * This function calculates L1 & L2 & L3 AVS values dynamically based
+ * on L0 voltage and fill all AVS values to the AVS value table.
+ */
+static void __init armada37xx_cpufreq_avs_configure(struct regmap *base,
+ struct armada_37xx_dvfs *dvfs)
+{
+ unsigned int target_vm;
+ int load_level = 0;
+ u32 l0_vdd_min;
+
+ if (base == NULL)
+ return;
+
+ /* Get L0 VDD min value */
+ regmap_read(base, ARMADA_37XX_AVS_CTL0, &l0_vdd_min);
+ l0_vdd_min = (l0_vdd_min >> ARMADA_37XX_AVS_LOW_VDD_LIMIT) &
+ ARMADA_37XX_AVS_VDD_MASK;
+ if (l0_vdd_min >= ARRAY_SIZE(avs_map)) {
+ pr_err("L0 VDD MIN %d is not correct.\n", l0_vdd_min);
+ return;
+ }
+ dvfs->avs[0] = l0_vdd_min;
+
+ if (avs_map[l0_vdd_min] <= MIN_VOLT_MV) {
+ /*
+ * If L0 voltage is smaller than 1000mv, then all VDD sets
+ * use L0 voltage;
+ */
+ u32 avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV);
+
+ for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++)
+ dvfs->avs[load_level] = avs_min;
+
+ return;
+ }
+
+ /*
+ * L1 voltage is equal to L0 voltage - 100mv and it must be
+ * larger than 1000mv
+ */
+
+ target_vm = avs_map[l0_vdd_min] - 100;
+ target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV;
+ dvfs->avs[1] = armada_37xx_avs_val_match(target_vm);
+
+ /*
+ * L2 & L3 voltage is equal to L0 voltage - 150mv and it must
+ * be larger than 1000mv
+ */
+ target_vm = avs_map[l0_vdd_min] - 150;
+ target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV;
+ dvfs->avs[2] = dvfs->avs[3] = armada_37xx_avs_val_match(target_vm);
+}
+
+static void __init armada37xx_cpufreq_avs_setup(struct regmap *base,
+ struct armada_37xx_dvfs *dvfs)
+{
+ unsigned int avs_val = 0, freq;
+ int load_level = 0;
+
+ if (base == NULL)
+ return;
+
+ /* Disable AVS before the configuration */
+ regmap_update_bits(base, ARMADA_37XX_AVS_CTL0,
+ ARMADA_37XX_AVS_ENABLE, 0);
+
+
+ /* Enable low voltage mode */
+ regmap_update_bits(base, ARMADA_37XX_AVS_CTL2,
+ ARMADA_37XX_AVS_LOW_VDD_EN,
+ ARMADA_37XX_AVS_LOW_VDD_EN);
+
+
+ for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++) {
+ freq = dvfs->cpu_freq_max / dvfs->divider[load_level];
+
+ avs_val = dvfs->avs[load_level];
+ regmap_update_bits(base, ARMADA_37XX_AVS_VSET(load_level-1),
+ ARMADA_37XX_AVS_VDD_MASK << ARMADA_37XX_AVS_HIGH_VDD_LIMIT |
+ ARMADA_37XX_AVS_VDD_MASK << ARMADA_37XX_AVS_LOW_VDD_LIMIT,
+ avs_val << ARMADA_37XX_AVS_HIGH_VDD_LIMIT |
+ avs_val << ARMADA_37XX_AVS_LOW_VDD_LIMIT);
+ }
+
+ /* Enable AVS after the configuration */
+ regmap_update_bits(base, ARMADA_37XX_AVS_CTL0,
+ ARMADA_37XX_AVS_ENABLE,
+ ARMADA_37XX_AVS_ENABLE);
+
+}
+
static void armada37xx_cpufreq_disable_dvfs(struct regmap *base)
{
unsigned int reg = ARMADA_37XX_NB_DYN_MOD,
@@ -216,7 +360,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
struct platform_device *pdev;
unsigned long freq;
unsigned int cur_frequency;
- struct regmap *nb_pm_base;
+ struct regmap *nb_pm_base, *avs_base;
struct device *cpu_dev;
int load_lvl, ret;
struct clk *clk;
@@ -227,6 +371,14 @@ static int __init armada37xx_cpufreq_driver_init(void)
if (IS_ERR(nb_pm_base))
return -ENODEV;
+ avs_base =
+ syscon_regmap_lookup_by_compatible("marvell,armada-3700-avs");
+
+ /* if AVS is not present don't use it but still try to setup dvfs */
+ if (IS_ERR(avs_base)) {
+ pr_info("Syscon failed for Adapting Voltage Scaling: skip it\n");
+ avs_base = NULL;
+ }
/* Before doing any configuration on the DVFS first, disable it */
armada37xx_cpufreq_disable_dvfs(nb_pm_base);
@@ -270,16 +422,21 @@ static int __init armada37xx_cpufreq_driver_init(void)
armada37xx_cpufreq_state->regmap = nb_pm_base;
+ armada37xx_cpufreq_avs_configure(avs_base, dvfs);
+ armada37xx_cpufreq_avs_setup(avs_base, dvfs);
+
armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider);
clk_put(clk);
for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
load_lvl++) {
+ unsigned long u_volt = avs_map[dvfs->avs[load_lvl]] * 1000;
freq = cur_frequency / dvfs->divider[load_lvl];
-
- ret = dev_pm_opp_add(cpu_dev, freq, 0);
+ ret = dev_pm_opp_add(cpu_dev, freq, u_volt);
if (ret)
goto remove_opp;
+
+
}
/* Now that everything is setup, enable the DVFS at hardware level */
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index a9d3eec32795..30f302149730 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -296,10 +296,62 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
return ret;
}
+static inline u64 get_delta(u64 t1, u64 t0)
+{
+ if (t1 > t0 || t0 > ~(u32)0)
+ return t1 - t0;
+
+ return (u32)t1 - (u32)t0;
+}
+
+static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu,
+ struct cppc_perf_fb_ctrs fb_ctrs_t0,
+ struct cppc_perf_fb_ctrs fb_ctrs_t1)
+{
+ u64 delta_reference, delta_delivered;
+ u64 reference_perf, delivered_perf;
+
+ reference_perf = fb_ctrs_t0.reference_perf;
+
+ delta_reference = get_delta(fb_ctrs_t1.reference,
+ fb_ctrs_t0.reference);
+ delta_delivered = get_delta(fb_ctrs_t1.delivered,
+ fb_ctrs_t0.delivered);
+
+ /* Check to avoid divide-by zero */
+ if (delta_reference || delta_delivered)
+ delivered_perf = (reference_perf * delta_delivered) /
+ delta_reference;
+ else
+ delivered_perf = cpu->perf_ctrls.desired_perf;
+
+ return cppc_cpufreq_perf_to_khz(cpu, delivered_perf);
+}
+
+static unsigned int cppc_cpufreq_get_rate(unsigned int cpunum)
+{
+ struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
+ struct cppc_cpudata *cpu = all_cpu_data[cpunum];
+ int ret;
+
+ ret = cppc_get_perf_ctrs(cpunum, &fb_ctrs_t0);
+ if (ret)
+ return ret;
+
+ udelay(2); /* 2usec delay between sampling */
+
+ ret = cppc_get_perf_ctrs(cpunum, &fb_ctrs_t1);
+ if (ret)
+ return ret;
+
+ return cppc_get_rate_from_fbctrs(cpu, fb_ctrs_t0, fb_ctrs_t1);
+}
+
static struct cpufreq_driver cppc_cpufreq_driver = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = cppc_verify_policy,
.target = cppc_cpufreq_set_target,
+ .get = cppc_cpufreq_get_rate,
.init = cppc_cpufreq_cpu_init,
.stop_cpu = cppc_cpufreq_stop_cpu,
.name = "cppc_cpufreq",
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b0dfd3222013..f53fb41efb7b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -923,7 +923,12 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
struct freq_attr *fattr = to_attr(attr);
ssize_t ret = -EINVAL;
- cpus_read_lock();
+ /*
+ * cpus_read_trylock() is used here to work around a circular lock
+ * dependency problem with respect to the cpufreq_register_driver().
+ */
+ if (!cpus_read_trylock())
+ return -EBUSY;
if (cpu_online(policy->cpu)) {
down_write(&policy->rwsem);
@@ -2236,6 +2241,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
policy->min = new_policy->min;
policy->max = new_policy->max;
+ trace_cpu_frequency_limits(policy);
policy->cached_target_freq = UINT_MAX;
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 8b3c2a79ad6c..b2ff423ad7f8 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -9,6 +9,7 @@
#include <linux/clk.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
+#include <linux/cpu_cooling.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/of.h>
@@ -50,6 +51,7 @@ static struct clk_bulk_data clks[] = {
};
static struct device *cpu_dev;
+static struct thermal_cooling_device *cdev;
static bool free_opp;
static struct cpufreq_frequency_table *freq_table;
static unsigned int max_freq;
@@ -191,6 +193,16 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
return 0;
}
+static void imx6q_cpufreq_ready(struct cpufreq_policy *policy)
+{
+ cdev = of_cpufreq_cooling_register(policy);
+
+ if (!cdev)
+ dev_err(cpu_dev,
+ "running cpufreq without cooling device: %ld\n",
+ PTR_ERR(cdev));
+}
+
static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
{
int ret;
@@ -202,13 +214,22 @@ static int imx6q_cpufreq_init(struct cpufreq_policy *policy)
return ret;
}
+static int imx6q_cpufreq_exit(struct cpufreq_policy *policy)
+{
+ cpufreq_cooling_unregister(cdev);
+
+ return 0;
+}
+
static struct cpufreq_driver imx6q_cpufreq_driver = {
.flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = imx6q_set_target,
.get = cpufreq_generic_get,
.init = imx6q_cpufreq_init,
+ .exit = imx6q_cpufreq_exit,
.name = "imx6q-cpufreq",
+ .ready = imx6q_cpufreq_ready,
.attr = cpufreq_generic_attr,
.suspend = cpufreq_generic_suspend,
};
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index d4ed0022b0dd..b6a1aadaff9f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -670,21 +670,18 @@ static ssize_t store_energy_performance_preference(
{
struct cpudata *cpu_data = all_cpu_data[policy->cpu];
char str_preference[21];
- int ret, i = 0;
+ int ret;
ret = sscanf(buf, "%20s", str_preference);
if (ret != 1)
return -EINVAL;
- while (energy_perf_strings[i] != NULL) {
- if (!strcmp(str_preference, energy_perf_strings[i])) {
- intel_pstate_set_energy_pref_index(cpu_data, i);
- return count;
- }
- ++i;
- }
+ ret = match_string(energy_perf_strings, -1, str_preference);
+ if (ret < 0)
+ return ret;
- return -EINVAL;
+ intel_pstate_set_energy_pref_index(cpu_data, ret);
+ return count;
}
static ssize_t show_energy_performance_preference(
@@ -2011,7 +2008,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy,
struct cpudata *cpu)
{
- if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
+ if (!hwp_active &&
+ cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
policy->max < policy->cpuinfo.max_freq &&
policy->max > cpu->pstate.max_freq) {
pr_debug("policy->max > max non turbo frequency\n");
@@ -2085,6 +2083,15 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
policy->cpuinfo.max_freq *= cpu->pstate.scaling;
+ if (hwp_active) {
+ unsigned int max_freq;
+
+ max_freq = global.turbo_disabled ?
+ cpu->pstate.max_freq : cpu->pstate.turbo_freq;
+ if (max_freq < policy->cpuinfo.max_freq)
+ policy->cpuinfo.max_freq = max_freq;
+ }
+
intel_pstate_init_acpi_perf_limits(policy);
policy->fast_switch_possible = true;
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 0c56c9759672..099a849396f6 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -593,6 +593,15 @@ static int __init pcc_cpufreq_init(void)
return ret;
}
+ if (num_present_cpus() > 4) {
+ pcc_cpufreq_driver.flags |= CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING;
+ pr_err("%s: Too many CPUs, dynamic performance scaling disabled\n",
+ __func__);
+ pr_err("%s: Try to enable another scaling driver through BIOS settings\n",
+ __func__);
+ pr_err("%s: and complain to the system vendor\n", __func__);
+ }
+
ret = cpufreq_register_driver(&pcc_cpufreq_driver);
return ret;
diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c
index efc9a7ae4857..a1830fa25fc5 100644
--- a/drivers/cpufreq/qcom-cpufreq-kryo.c
+++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
@@ -109,8 +109,9 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
speedbin_nvmem = of_nvmem_cell_get(np, NULL);
of_node_put(np);
if (IS_ERR(speedbin_nvmem)) {
- dev_err(cpu_dev, "Could not get nvmem cell: %ld\n",
- PTR_ERR(speedbin_nvmem));
+ if (PTR_ERR(speedbin_nvmem) != -EPROBE_DEFER)
+ dev_err(cpu_dev, "Could not get nvmem cell: %ld\n",
+ PTR_ERR(speedbin_nvmem));
return PTR_ERR(speedbin_nvmem);
}
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index e07bc7ace774..073557f433eb 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -105,7 +105,8 @@ static int __init arm_idle_init_cpu(int cpu)
ret = cpuidle_register_driver(drv);
if (ret) {
- pr_err("Failed to register cpuidle driver\n");
+ if (ret != -EBUSY)
+ pr_err("Failed to register cpuidle driver\n");
goto out_kfree_drv;
}
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 0b5b3abe054e..4c49bb1330b5 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -604,28 +604,29 @@ struct devfreq *devfreq_add_device(struct device *dev,
mutex_lock(&devfreq->lock);
}
- devfreq->min_freq = find_available_min_freq(devfreq);
- if (!devfreq->min_freq) {
+ devfreq->scaling_min_freq = find_available_min_freq(devfreq);
+ if (!devfreq->scaling_min_freq) {
mutex_unlock(&devfreq->lock);
err = -EINVAL;
goto err_dev;
}
- devfreq->scaling_min_freq = devfreq->min_freq;
+ devfreq->min_freq = devfreq->scaling_min_freq;
- devfreq->max_freq = find_available_max_freq(devfreq);
- if (!devfreq->max_freq) {
+ devfreq->scaling_max_freq = find_available_max_freq(devfreq);
+ if (!devfreq->scaling_max_freq) {
mutex_unlock(&devfreq->lock);
err = -EINVAL;
goto err_dev;
}
- devfreq->scaling_max_freq = devfreq->max_freq;
+ devfreq->max_freq = devfreq->scaling_max_freq;
dev_set_name(&devfreq->dev, "devfreq%d",
atomic_inc_return(&devfreq_no));
err = device_register(&devfreq->dev);
if (err) {
mutex_unlock(&devfreq->lock);
- goto err_dev;
+ put_device(&devfreq->dev);
+ goto err_out;
}
devfreq->trans_table =
@@ -672,6 +673,7 @@ err_init:
mutex_unlock(&devfreq_list_lock);
device_unregister(&devfreq->dev);
+ devfreq = NULL;
err_dev:
if (devfreq)
kfree(devfreq);
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index 3cd6a184fe7c..a9c64f0d3284 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -627,11 +627,9 @@ static int exynos_ppmu_probe(struct platform_device *pdev)
size = sizeof(struct devfreq_event_dev *) * info->num_events;
info->edev = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
- if (!info->edev) {
- dev_err(&pdev->dev,
- "failed to allocate memory devfreq-event devices\n");
+ if (!info->edev)
return -ENOMEM;
- }
+
edev = info->edev;
platform_set_drvdata(pdev, info);
diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
index 5dfbfa3cc878..e795ad2b3f6b 100644
--- a/drivers/devfreq/rk3399_dmc.c
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -68,15 +68,6 @@ struct rk3399_dmcfreq {
struct devfreq_event_dev *edev;
struct mutex lock;
struct dram_timing timing;
-
- /*
- * DDR Converser of Frequency (DCF) is used to implement DDR frequency
- * conversion without the participation of CPU, we will implement and
- * control it in arm trust firmware.
- */
- wait_queue_head_t wait_dcf_queue;
- int irq;
- int wait_dcf_flag;
struct regulator *vdd_center;
unsigned long rate, target_rate;
unsigned long volt, target_volt;
@@ -112,31 +103,22 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
err = regulator_set_voltage(dmcfreq->vdd_center, target_volt,
target_volt);
if (err) {
- dev_err(dev, "Cannot to set voltage %lu uV\n",
+ dev_err(dev, "Cannot set voltage %lu uV\n",
target_volt);
goto out;
}
}
- dmcfreq->wait_dcf_flag = 1;
err = clk_set_rate(dmcfreq->dmc_clk, target_rate);
if (err) {
- dev_err(dev, "Cannot to set frequency %lu (%d)\n",
- target_rate, err);
+ dev_err(dev, "Cannot set frequency %lu (%d)\n", target_rate,
+ err);
regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt,
dmcfreq->volt);
goto out;
}
/*
- * Wait until bcf irq happen, it means freq scaling finish in
- * arm trust firmware, use 100ms as timeout time.
- */
- if (!wait_event_timeout(dmcfreq->wait_dcf_queue,
- !dmcfreq->wait_dcf_flag, HZ / 10))
- dev_warn(dev, "Timeout waiting for dcf interrupt\n");
-
- /*
* Check the dpll rate,
* There only two result we will get,
* 1. Ddr frequency scaling fail, we still get the old rate.
@@ -146,8 +128,8 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
/* If get the incorrect rate, set voltage to old value. */
if (dmcfreq->rate != target_rate) {
- dev_err(dev, "Get wrong ddr frequency, Request frequency %lu,\
- Current frequency %lu\n", target_rate, dmcfreq->rate);
+ dev_err(dev, "Got wrong frequency, Request %lu, Current %lu\n",
+ target_rate, dmcfreq->rate);
regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt,
dmcfreq->volt);
goto out;
@@ -155,7 +137,7 @@ static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
err = regulator_set_voltage(dmcfreq->vdd_center, target_volt,
target_volt);
if (err)
- dev_err(dev, "Cannot to set vol %lu uV\n", target_volt);
+ dev_err(dev, "Cannot set voltage %lu uV\n", target_volt);
dmcfreq->rate = target_rate;
dmcfreq->volt = target_volt;
@@ -241,22 +223,6 @@ static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev)
static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend,
rk3399_dmcfreq_resume);
-static irqreturn_t rk3399_dmc_irq(int irq, void *dev_id)
-{
- struct rk3399_dmcfreq *dmcfreq = dev_id;
- struct arm_smccc_res res;
-
- dmcfreq->wait_dcf_flag = 0;
- wake_up(&dmcfreq->wait_dcf_queue);
-
- /* Clear the DCF interrupt */
- arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
- ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ,
- 0, 0, 0, 0, &res);
-
- return IRQ_HANDLED;
-}
-
static int of_get_ddr_timings(struct dram_timing *timing,
struct device_node *np)
{
@@ -330,16 +296,10 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct device_node *np = pdev->dev.of_node;
struct rk3399_dmcfreq *data;
- int ret, irq, index, size;
+ int ret, index, size;
uint32_t *timing;
struct dev_pm_opp *opp;
- irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(&pdev->dev,
- "Cannot get the dmc interrupt resource: %d\n", irq);
- return irq;
- }
data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -348,27 +308,22 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
data->vdd_center = devm_regulator_get(dev, "center");
if (IS_ERR(data->vdd_center)) {
+ if (PTR_ERR(data->vdd_center) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
dev_err(dev, "Cannot get the regulator \"center\"\n");
return PTR_ERR(data->vdd_center);
}
data->dmc_clk = devm_clk_get(dev, "dmc_clk");
if (IS_ERR(data->dmc_clk)) {
+ if (PTR_ERR(data->dmc_clk) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
dev_err(dev, "Cannot get the clk dmc_clk\n");
return PTR_ERR(data->dmc_clk);
};
- data->irq = irq;
- ret = devm_request_irq(dev, irq, rk3399_dmc_irq, 0,
- dev_name(dev), data);
- if (ret) {
- dev_err(dev, "Failed to request dmc irq: %d\n", ret);
- return ret;
- }
-
- init_waitqueue_head(&data->wait_dcf_queue);
- data->wait_dcf_flag = 0;
-
data->edev = devfreq_event_get_edev_by_phandle(dev, 0);
if (IS_ERR(data->edev))
return -EPROBE_DEFER;
@@ -420,8 +375,10 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
data->rate = clk_get_rate(data->dmc_clk);
opp = devfreq_recommended_opp(dev, &data->rate, 0);
- if (IS_ERR(opp))
- return PTR_ERR(opp);
+ if (IS_ERR(opp)) {
+ ret = PTR_ERR(opp);
+ goto err_free_opp;
+ }
data->rate = dev_pm_opp_get_freq(opp);
data->volt = dev_pm_opp_get_voltage(opp);
@@ -433,14 +390,34 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev)
&rk3399_devfreq_dmc_profile,
DEVFREQ_GOV_SIMPLE_ONDEMAND,
&data->ondemand_data);
- if (IS_ERR(data->devfreq))
- return PTR_ERR(data->devfreq);
+ if (IS_ERR(data->devfreq)) {
+ ret = PTR_ERR(data->devfreq);
+ goto err_free_opp;
+ }
+
devm_devfreq_register_opp_notifier(dev, data->devfreq);
data->dev = dev;
platform_set_drvdata(pdev, data);
return 0;
+
+err_free_opp:
+ dev_pm_opp_of_remove_table(&pdev->dev);
+ return ret;
+}
+
+static int rk3399_dmcfreq_remove(struct platform_device *pdev)
+{
+ struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev);
+
+ /*
+ * Before remove the opp table we need to unregister the opp notifier.
+ */
+ devm_devfreq_unregister_opp_notifier(dmcfreq->dev, dmcfreq->devfreq);
+ dev_pm_opp_of_remove_table(dmcfreq->dev);
+
+ return 0;
}
static const struct of_device_id rk3399dmc_devfreq_of_match[] = {
@@ -451,6 +428,7 @@ MODULE_DEVICE_TABLE(of, rk3399dmc_devfreq_of_match);
static struct platform_driver rk3399_dmcfreq_driver = {
.probe = rk3399_dmcfreq_probe,
+ .remove = rk3399_dmcfreq_remove,
.driver = {
.name = "rk3399-dmc-freq",
.pm = &rk3399_dmcfreq_pm,
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index c3afe7b2237e..965088afcfad 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -2312,7 +2312,7 @@ static int tegra_dc_couple(struct tegra_dc *dc)
* POWER_CONTROL registers during CRTC enabling.
*/
if (dc->soc->coupled_pm && dc->pipe == 1) {
- u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE;
+ u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER;
struct device_link *link;
struct device *partner;
diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c
index 0f70e8847540..2f8db9d62551 100644
--- a/drivers/gpu/ipu-v3/ipu-pre.c
+++ b/drivers/gpu/ipu-v3/ipu-pre.c
@@ -128,7 +128,8 @@ ipu_pre_lookup_by_phandle(struct device *dev, const char *name, int index)
list_for_each_entry(pre, &ipu_pre_list, list) {
if (pre_node == pre->dev->of_node) {
mutex_unlock(&ipu_pre_list_mutex);
- device_link_add(dev, pre->dev, DL_FLAG_AUTOREMOVE);
+ device_link_add(dev, pre->dev,
+ DL_FLAG_AUTOREMOVE_CONSUMER);
of_node_put(pre_node);
return pre;
}
diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c
index 83f9dd934a5d..38a3a9764e49 100644
--- a/drivers/gpu/ipu-v3/ipu-prg.c
+++ b/drivers/gpu/ipu-v3/ipu-prg.c
@@ -100,7 +100,8 @@ ipu_prg_lookup_by_phandle(struct device *dev, const char *name, int ipu_id)
list_for_each_entry(prg, &ipu_prg_list, list) {
if (prg_node == prg->dev->of_node) {
mutex_unlock(&ipu_prg_list_mutex);
- device_link_add(dev, prg->dev, DL_FLAG_AUTOREMOVE);
+ device_link_add(dev, prg->dev,
+ DL_FLAG_AUTOREMOVE_CONSUMER);
prg->id = ipu_id;
of_node_put(prg_node);
return prg;
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 85727ef6ce8e..6ac27e5908f5 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -29,4 +29,14 @@ config INTEL_RAPL
controller, CPU core (Power Plance 0), graphics uncore (Power Plane
1), etc.
+config IDLE_INJECT
+ bool "Idle injection framework"
+ depends on CPU_IDLE
+ default n
+ help
+ This enables support for the idle injection framework. It
+ provides a way to force idle periods on a set of specified
+ CPUs for power capping. Idle period can be injected
+ synchronously on a set of specified CPUs or alternatively
+ on a per CPU basis.
endif
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index 0a21ef31372b..1b328854b36e 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -1,2 +1,3 @@
obj-$(CONFIG_POWERCAP) += powercap_sys.o
obj-$(CONFIG_INTEL_RAPL) += intel_rapl.o
+obj-$(CONFIG_IDLE_INJECT) += idle_inject.o
diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
new file mode 100644
index 000000000000..24ff2a068978
--- /dev/null
+++ b/drivers/powercap/idle_inject.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The idle injection framework provides a way to force CPUs to enter idle
+ * states for a specified fraction of time over a specified period.
+ *
+ * It relies on the smpboot kthreads feature providing common code for CPU
+ * hotplug and thread [un]parking.
+ *
+ * All of the kthreads used for idle injection are created at init time.
+ *
+ * Next, the users of the the idle injection framework provide a cpumask via
+ * its register function. The kthreads will be synchronized with respect to
+ * this cpumask.
+ *
+ * The idle + run duration is specified via separate helpers and that allows
+ * idle injection to be started.
+ *
+ * The idle injection kthreads will call play_idle() with the idle duration
+ * specified as per the above.
+ *
+ * After all of them have been woken up, a timer is set to start the next idle
+ * injection cycle.
+ *
+ * The timer interrupt handler will wake up the idle injection kthreads for
+ * all of the CPUs in the cpumask provided by the user.
+ *
+ * Idle injection is stopped synchronously and no leftover idle injection
+ * kthread activity after its completion is guaranteed.
+ *
+ * It is up to the user of this framework to provide a lock for higher-level
+ * synchronization to prevent race conditions like starting idle injection
+ * while unregistering from the framework.
+ */
+#define pr_fmt(fmt) "ii_dev: " fmt
+
+#include <linux/cpu.h>
+#include <linux/hrtimer.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smpboot.h>
+
+#include <uapi/linux/sched/types.h>
+
+/**
+ * struct idle_inject_thread - task on/off switch structure
+ * @tsk: task injecting the idle cycles
+ * @should_run: whether or not to run the task (for the smpboot kthread API)
+ */
+struct idle_inject_thread {
+ struct task_struct *tsk;
+ int should_run;
+};
+
+/**
+ * struct idle_inject_device - idle injection data
+ * @timer: idle injection period timer
+ * @idle_duration_ms: duration of CPU idle time to inject
+ * @run_duration_ms: duration of CPU run time to allow
+ * @cpumask: mask of CPUs affected by idle injection
+ */
+struct idle_inject_device {
+ struct hrtimer timer;
+ unsigned int idle_duration_ms;
+ unsigned int run_duration_ms;
+ unsigned long int cpumask[0];
+};
+
+static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread);
+static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device);
+
+/**
+ * idle_inject_wakeup - Wake up idle injection threads
+ * @ii_dev: target idle injection device
+ *
+ * Every idle injection task associated with the given idle injection device
+ * and running on an online CPU will be woken up.
+ */
+static void idle_inject_wakeup(struct idle_inject_device *ii_dev)
+{
+ struct idle_inject_thread *iit;
+ unsigned int cpu;
+
+ for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) {
+ iit = per_cpu_ptr(&idle_inject_thread, cpu);
+ iit->should_run = 1;
+ wake_up_process(iit->tsk);
+ }
+}
+
+/**
+ * idle_inject_timer_fn - idle injection timer function
+ * @timer: idle injection hrtimer
+ *
+ * This function is called when the idle injection timer expires. It wakes up
+ * idle injection tasks associated with the timer and they, in turn, invoke
+ * play_idle() to inject a specified amount of CPU idle time.
+ *
+ * Return: HRTIMER_RESTART.
+ */
+static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
+{
+ unsigned int duration_ms;
+ struct idle_inject_device *ii_dev =
+ container_of(timer, struct idle_inject_device, timer);
+
+ duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+ duration_ms += READ_ONCE(ii_dev->idle_duration_ms);
+
+ idle_inject_wakeup(ii_dev);
+
+ hrtimer_forward_now(timer, ms_to_ktime(duration_ms));
+
+ return HRTIMER_RESTART;
+}
+
+/**
+ * idle_inject_fn - idle injection work function
+ * @cpu: the CPU owning the task
+ *
+ * This function calls play_idle() to inject a specified amount of CPU idle
+ * time.
+ */
+static void idle_inject_fn(unsigned int cpu)
+{
+ struct idle_inject_device *ii_dev;
+ struct idle_inject_thread *iit;
+
+ ii_dev = per_cpu(idle_inject_device, cpu);
+ iit = per_cpu_ptr(&idle_inject_thread, cpu);
+
+ /*
+ * Let the smpboot main loop know that the task should not run again.
+ */
+ iit->should_run = 0;
+
+ play_idle(READ_ONCE(ii_dev->idle_duration_ms));
+}
+
+/**
+ * idle_inject_set_duration - idle and run duration update helper
+ * @run_duration_ms: CPU run time to allow in milliseconds
+ * @idle_duration_ms: CPU idle time to inject in milliseconds
+ */
+void idle_inject_set_duration(struct idle_inject_device *ii_dev,
+ unsigned int run_duration_ms,
+ unsigned int idle_duration_ms)
+{
+ if (run_duration_ms && idle_duration_ms) {
+ WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms);
+ WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms);
+ }
+}
+
+/**
+ * idle_inject_get_duration - idle and run duration retrieval helper
+ * @run_duration_ms: memory location to store the current CPU run time
+ * @idle_duration_ms: memory location to store the current CPU idle time
+ */
+void idle_inject_get_duration(struct idle_inject_device *ii_dev,
+ unsigned int *run_duration_ms,
+ unsigned int *idle_duration_ms)
+{
+ *run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+ *idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+}
+
+/**
+ * idle_inject_start - start idle injections
+ * @ii_dev: idle injection control device structure
+ *
+ * The function starts idle injection by first waking up all of the idle
+ * injection kthreads associated with @ii_dev to let them inject CPU idle time
+ * sets up a timer to start the next idle injection period.
+ *
+ * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success.
+ */
+int idle_inject_start(struct idle_inject_device *ii_dev)
+{
+ unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+ unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+
+ if (!idle_duration_ms || !run_duration_ms)
+ return -EINVAL;
+
+ pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
+ cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
+
+ idle_inject_wakeup(ii_dev);
+
+ hrtimer_start(&ii_dev->timer,
+ ms_to_ktime(idle_duration_ms + run_duration_ms),
+ HRTIMER_MODE_REL);
+
+ return 0;
+}
+
+/**
+ * idle_inject_stop - stops idle injections
+ * @ii_dev: idle injection control device structure
+ *
+ * The function stops idle injection and waits for the threads to finish work.
+ * If CPU idle time is being injected when this function runs, then it will
+ * wait until the end of the cycle.
+ *
+ * When it returns, there is no more idle injection kthread activity. The
+ * kthreads are scheduled out and the periodic timer is off.
+ */
+void idle_inject_stop(struct idle_inject_device *ii_dev)
+{
+ struct idle_inject_thread *iit;
+ unsigned int cpu;
+
+ pr_debug("Stopping idle injection on CPUs '%*pbl'\n",
+ cpumask_pr_args(to_cpumask(ii_dev->cpumask)));
+
+ hrtimer_cancel(&ii_dev->timer);
+
+ /*
+ * Stopping idle injection requires all of the idle injection kthreads
+ * associated with the given cpumask to be parked and stay that way, so
+ * prevent CPUs from going online at this point. Any CPUs going online
+ * after the loop below will be covered by clearing the should_run flag
+ * that will cause the smpboot main loop to schedule them out.
+ */
+ cpu_hotplug_disable();
+
+ /*
+ * Iterate over all (online + offline) CPUs here in case one of them
+ * goes offline with the should_run flag set so as to prevent its idle
+ * injection kthread from running when the CPU goes online again after
+ * the ii_dev has been freed.
+ */
+ for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
+ iit = per_cpu_ptr(&idle_inject_thread, cpu);
+ iit->should_run = 0;
+
+ wait_task_inactive(iit->tsk, 0);
+ }
+
+ cpu_hotplug_enable();
+}
+
+/**
+ * idle_inject_setup - prepare the current task for idle injection
+ * @cpu: not used
+ *
+ * Called once, this function is in charge of setting the current task's
+ * scheduler parameters to make it an RT task.
+ */
+static void idle_inject_setup(unsigned int cpu)
+{
+ struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 };
+
+ sched_setscheduler(current, SCHED_FIFO, &param);
+}
+
+/**
+ * idle_inject_should_run - function helper for the smpboot API
+ * @cpu: CPU the kthread is running on
+ *
+ * Return: whether or not the thread can run.
+ */
+static int idle_inject_should_run(unsigned int cpu)
+{
+ struct idle_inject_thread *iit =
+ per_cpu_ptr(&idle_inject_thread, cpu);
+
+ return iit->should_run;
+}
+
+/**
+ * idle_inject_register - initialize idle injection on a set of CPUs
+ * @cpumask: CPUs to be affected by idle injection
+ *
+ * This function creates an idle injection control device structure for the
+ * given set of CPUs and initializes the timer associated with it. It does not
+ * start any injection cycles.
+ *
+ * Return: NULL if memory allocation fails, idle injection control device
+ * pointer on success.
+ */
+struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)
+{
+ struct idle_inject_device *ii_dev;
+ int cpu, cpu_rb;
+
+ ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL);
+ if (!ii_dev)
+ return NULL;
+
+ cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask);
+ hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ ii_dev->timer.function = idle_inject_timer_fn;
+
+ for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {
+
+ if (per_cpu(idle_inject_device, cpu)) {
+ pr_err("cpu%d is already registered\n", cpu);
+ goto out_rollback;
+ }
+
+ per_cpu(idle_inject_device, cpu) = ii_dev;
+ }
+
+ return ii_dev;
+
+out_rollback:
+ for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) {
+ if (cpu == cpu_rb)
+ break;
+ per_cpu(idle_inject_device, cpu_rb) = NULL;
+ }
+
+ kfree(ii_dev);
+
+ return NULL;
+}
+
+/**
+ * idle_inject_unregister - unregister idle injection control device
+ * @ii_dev: idle injection control device to unregister
+ *
+ * The function stops idle injection for the given control device,
+ * unregisters its kthreads and frees memory allocated when that device was
+ * created.
+ */
+void idle_inject_unregister(struct idle_inject_device *ii_dev)
+{
+ unsigned int cpu;
+
+ idle_inject_stop(ii_dev);
+
+ for_each_cpu(cpu, to_cpumask(ii_dev->cpumask))
+ per_cpu(idle_inject_device, cpu) = NULL;
+
+ kfree(ii_dev);
+}
+
+static struct smp_hotplug_thread idle_inject_threads = {
+ .store = &idle_inject_thread.tsk,
+ .setup = idle_inject_setup,
+ .thread_fn = idle_inject_fn,
+ .thread_comm = "idle_inject/%u",
+ .thread_should_run = idle_inject_should_run,
+};
+
+static int __init idle_inject_init(void)
+{
+ return smpboot_register_percpu_thread(&idle_inject_threads);
+}
+early_initcall(idle_inject_init);
diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c
index 0097a939487f..546960a18d60 100644
--- a/drivers/soc/imx/gpc.c
+++ b/drivers/soc/imx/gpc.c
@@ -209,7 +209,7 @@ static int imx_pgc_power_domain_probe(struct platform_device *pdev)
goto genpd_err;
}
- device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE);
+ device_link_add(dev, dev->parent, DL_FLAG_AUTOREMOVE_CONSUMER);
return 0;
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 334d98be03b9..cbfcca828cd7 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -3,6 +3,7 @@
// Copyright 2013 Freescale Semiconductor, Inc.
#include <linux/clk.h>
+#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/cpu_cooling.h>
#include <linux/delay.h>
@@ -644,6 +645,27 @@ static const struct of_device_id of_imx_thermal_match[] = {
};
MODULE_DEVICE_TABLE(of, of_imx_thermal_match);
+/*
+ * Create cooling device in case no #cooling-cells property is available in
+ * CPU node
+ */
+static int imx_thermal_register_legacy_cooling(struct imx_thermal_data *data)
+{
+ struct device_node *np = of_get_cpu_node(data->policy->cpu, NULL);
+ int ret;
+
+ if (!np || !of_find_property(np, "#cooling-cells", NULL)) {
+ data->cdev = cpufreq_cooling_register(data->policy);
+ if (IS_ERR(data->cdev)) {
+ ret = PTR_ERR(data->cdev);
+ cpufreq_cpu_put(data->policy);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
static int imx_thermal_probe(struct platform_device *pdev)
{
struct imx_thermal_data *data;
@@ -724,12 +746,10 @@ static int imx_thermal_probe(struct platform_device *pdev)
return -EPROBE_DEFER;
}
- data->cdev = cpufreq_cooling_register(data->policy);
- if (IS_ERR(data->cdev)) {
- ret = PTR_ERR(data->cdev);
+ ret = imx_thermal_register_legacy_cooling(data);
+ if (ret) {
dev_err(&pdev->dev,
"failed to register cpufreq cooling device: %d\n", ret);
- cpufreq_cpu_put(data->policy);
return ret;
}
diff --git a/include/dt-bindings/clock/rk3399-ddr.h b/include/dt-bindings/clock/rk3399-ddr.h
new file mode 100644
index 000000000000..ed2280844963
--- /dev/null
+++ b/include/dt-bindings/clock/rk3399-ddr.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
+
+#ifndef DT_BINDINGS_DDR_H
+#define DT_BINDINGS_DDR_H
+
+/*
+ * DDR3 SDRAM Standard Speed Bins include tCK, tRCD, tRP, tRAS and tRC for
+ * each corresponding bin.
+ */
+
+/* DDR3-800 (5-5-5) */
+#define DDR3_800D 0
+/* DDR3-800 (6-6-6) */
+#define DDR3_800E 1
+/* DDR3-1066 (6-6-6) */
+#define DDR3_1066E 2
+/* DDR3-1066 (7-7-7) */
+#define DDR3_1066F 3
+/* DDR3-1066 (8-8-8) */
+#define DDR3_1066G 4
+/* DDR3-1333 (7-7-7) */
+#define DDR3_1333F 5
+/* DDR3-1333 (8-8-8) */
+#define DDR3_1333G 6
+/* DDR3-1333 (9-9-9) */
+#define DDR3_1333H 7
+/* DDR3-1333 (10-10-10) */
+#define DDR3_1333J 8
+/* DDR3-1600 (8-8-8) */
+#define DDR3_1600G 9
+/* DDR3-1600 (9-9-9) */
+#define DDR3_1600H 10
+/* DDR3-1600 (10-10-10) */
+#define DDR3_1600J 11
+/* DDR3-1600 (11-11-11) */
+#define DDR3_1600K 12
+/* DDR3-1600 (10-10-10) */
+#define DDR3_1866J 13
+/* DDR3-1866 (11-11-11) */
+#define DDR3_1866K 14
+/* DDR3-1866 (12-12-12) */
+#define DDR3_1866L 15
+/* DDR3-1866 (13-13-13) */
+#define DDR3_1866M 16
+/* DDR3-2133 (11-11-11) */
+#define DDR3_2133K 17
+/* DDR3-2133 (12-12-12) */
+#define DDR3_2133L 18
+/* DDR3-2133 (13-13-13) */
+#define DDR3_2133M 19
+/* DDR3-2133 (14-14-14) */
+#define DDR3_2133N 20
+/* DDR3 ATF default */
+#define DDR3_DEFAULT 21
+
+#endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 45789a892c41..218df7f4d3e1 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -105,6 +105,7 @@ extern void cpus_write_lock(void);
extern void cpus_write_unlock(void);
extern void cpus_read_lock(void);
extern void cpus_read_unlock(void);
+extern int cpus_read_trylock(void);
extern void lockdep_assert_cpus_held(void);
extern void cpu_hotplug_disable(void);
extern void cpu_hotplug_enable(void);
@@ -117,6 +118,7 @@ static inline void cpus_write_lock(void) { }
static inline void cpus_write_unlock(void) { }
static inline void cpus_read_lock(void) { }
static inline void cpus_read_unlock(void) { }
+static inline int cpus_read_trylock(void) { return true; }
static inline void lockdep_assert_cpus_held(void) { }
static inline void cpu_hotplug_disable(void) { }
static inline void cpu_hotplug_enable(void) { }
diff --git a/include/linux/device.h b/include/linux/device.h
index b42c301c0f90..eb91127cc791 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -90,7 +90,7 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
* @num_vf: Called to find out how many virtual functions a device on this
* bus supports.
* @dma_configure: Called to setup DMA configuration on a device on
- this bus.
+ * this bus.
* @pm: Power management operations of this bus, callback the specific
* device driver's pm-ops.
* @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU
@@ -784,14 +784,16 @@ enum device_link_state {
* Device link flags.
*
* STATELESS: The core won't track the presence of supplier/consumer drivers.
- * AUTOREMOVE: Remove this link automatically on consumer driver unbind.
+ * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind.
* PM_RUNTIME: If set, the runtime PM framework will use this link.
* RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation.
+ * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind.
*/
-#define DL_FLAG_STATELESS BIT(0)
-#define DL_FLAG_AUTOREMOVE BIT(1)
-#define DL_FLAG_PM_RUNTIME BIT(2)
-#define DL_FLAG_RPM_ACTIVE BIT(3)
+#define DL_FLAG_STATELESS BIT(0)
+#define DL_FLAG_AUTOREMOVE_CONSUMER BIT(1)
+#define DL_FLAG_PM_RUNTIME BIT(2)
+#define DL_FLAG_RPM_ACTIVE BIT(3)
+#define DL_FLAG_AUTOREMOVE_SUPPLIER BIT(4)
/**
* struct device_link - Device link representation.
diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h
new file mode 100644
index 000000000000..bdc0293fb6cb
--- /dev/null
+++ b/include/linux/idle_inject.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Linaro Ltd
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ */
+#ifndef __IDLE_INJECT_H__
+#define __IDLE_INJECT_H__
+
+/* private idle injection device structure */
+struct idle_inject_device;
+
+struct idle_inject_device *idle_inject_register(struct cpumask *cpumask);
+
+void idle_inject_unregister(struct idle_inject_device *ii_dev);
+
+int idle_inject_start(struct idle_inject_device *ii_dev);
+
+void idle_inject_stop(struct idle_inject_device *ii_dev);
+
+void idle_inject_set_duration(struct idle_inject_device *ii_dev,
+ unsigned int run_duration_ms,
+ unsigned int idle_duration_ms);
+
+void idle_inject_get_duration(struct idle_inject_device *ii_dev,
+ unsigned int *run_duration_ms,
+ unsigned int *idle_duration_ms);
+#endif /* __IDLE_INJECT_H__ */
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index cb8d84090cfb..776c546d581a 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -239,6 +239,8 @@ unsigned int of_genpd_opp_to_performance_state(struct device *dev,
int genpd_dev_pm_attach(struct device *dev);
struct device *genpd_dev_pm_attach_by_id(struct device *dev,
unsigned int index);
+struct device *genpd_dev_pm_attach_by_name(struct device *dev,
+ char *name);
#else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
static inline int of_genpd_add_provider_simple(struct device_node *np,
struct generic_pm_domain *genpd)
@@ -290,6 +292,12 @@ static inline struct device *genpd_dev_pm_attach_by_id(struct device *dev,
return NULL;
}
+static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev,
+ char *name)
+{
+ return NULL;
+}
+
static inline
struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
{
@@ -301,6 +309,8 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
int dev_pm_domain_attach(struct device *dev, bool power_on);
struct device *dev_pm_domain_attach_by_id(struct device *dev,
unsigned int index);
+struct device *dev_pm_domain_attach_by_name(struct device *dev,
+ char *name);
void dev_pm_domain_detach(struct device *dev, bool power_off);
void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd);
#else
@@ -313,6 +323,11 @@ static inline struct device *dev_pm_domain_attach_by_id(struct device *dev,
{
return NULL;
}
+static inline struct device *dev_pm_domain_attach_by_name(struct device *dev,
+ char *name)
+{
+ return NULL;
+}
static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
static inline void dev_pm_domain_set(struct device *dev,
struct dev_pm_domain *pd) {}
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 440b62f7502e..5a28ac9284f0 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -414,7 +414,7 @@ static inline bool hibernation_available(void) { return false; }
#define PM_RESTORE_PREPARE 0x0005 /* Going to restore a saved image */
#define PM_POST_RESTORE 0x0006 /* Restore failed */
-extern struct mutex pm_mutex;
+extern struct mutex system_transition_mutex;
#ifdef CONFIG_PM_SLEEP
void save_processor_state(void);
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 908977d69783..f7aece721aed 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -5,6 +5,7 @@
#if !defined(_TRACE_POWER_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_POWER_H
+#include <linux/cpufreq.h>
#include <linux/ktime.h>
#include <linux/pm_qos.h>
#include <linux/tracepoint.h>
@@ -148,6 +149,30 @@ DEFINE_EVENT(cpu, cpu_frequency,
TP_ARGS(frequency, cpu_id)
);
+TRACE_EVENT(cpu_frequency_limits,
+
+ TP_PROTO(struct cpufreq_policy *policy),
+
+ TP_ARGS(policy),
+
+ TP_STRUCT__entry(
+ __field(u32, min_freq)
+ __field(u32, max_freq)
+ __field(u32, cpu_id)
+ ),
+
+ TP_fast_assign(
+ __entry->min_freq = policy->min;
+ __entry->max_freq = policy->max;
+ __entry->cpu_id = policy->cpu;
+ ),
+
+ TP_printk("min=%lu max=%lu cpu_id=%lu",
+ (unsigned long)__entry->min_freq,
+ (unsigned long)__entry->max_freq,
+ (unsigned long)__entry->cpu_id)
+);
+
TRACE_EVENT(device_pm_callback_start,
TP_PROTO(struct device *dev, const char *pm_ops, int event),
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 099fb20cd7be..90ec528a6e32 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -291,6 +291,12 @@ void cpus_read_lock(void)
}
EXPORT_SYMBOL_GPL(cpus_read_lock);
+int cpus_read_trylock(void)
+{
+ return percpu_down_read_trylock(&cpu_hotplug_lock);
+}
+EXPORT_SYMBOL_GPL(cpus_read_trylock);
+
void cpus_read_unlock(void)
{
percpu_up_read(&cpu_hotplug_lock);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 6f56a9e219fa..b162b74611e4 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -15,7 +15,9 @@
atomic_t system_freezing_cnt = ATOMIC_INIT(0);
EXPORT_SYMBOL(system_freezing_cnt);
-/* indicate whether PM freezing is in effect, protected by pm_mutex */
+/* indicate whether PM freezing is in effect, protected by
+ * system_transition_mutex
+ */
bool pm_freezing;
bool pm_nosig_freezing;
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 9c85c7822383..abef759de7c8 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -338,7 +338,7 @@ static int create_image(int platform_mode)
* hibernation_snapshot - Quiesce devices and create a hibernation image.
* @platform_mode: If set, use platform driver to prepare for the transition.
*
- * This routine must be called with pm_mutex held.
+ * This routine must be called with system_transition_mutex held.
*/
int hibernation_snapshot(int platform_mode)
{
@@ -500,8 +500,9 @@ static int resume_target_kernel(bool platform_mode)
* hibernation_restore - Quiesce devices and restore from a hibernation image.
* @platform_mode: If set, use platform driver to prepare for the transition.
*
- * This routine must be called with pm_mutex held. If it is successful, control
- * reappears in the restored target kernel in hibernation_snapshot().
+ * This routine must be called with system_transition_mutex held. If it is
+ * successful, control reappears in the restored target kernel in
+ * hibernation_snapshot().
*/
int hibernation_restore(int platform_mode)
{
@@ -638,6 +639,7 @@ static void power_down(void)
break;
case HIBERNATION_PLATFORM:
hibernation_platform_enter();
+ /* Fall through */
case HIBERNATION_SHUTDOWN:
if (pm_power_off)
kernel_power_off();
@@ -805,13 +807,13 @@ static int software_resume(void)
* name_to_dev_t() below takes a sysfs buffer mutex when sysfs
* is configured into the kernel. Since the regular hibernate
* trigger path is via sysfs which takes a buffer mutex before
- * calling hibernate functions (which take pm_mutex) this can
- * cause lockdep to complain about a possible ABBA deadlock
+ * calling hibernate functions (which take system_transition_mutex)
+ * this can cause lockdep to complain about a possible ABBA deadlock
* which cannot happen since we're in the boot code here and
* sysfs can't be invoked yet. Therefore, we use a subclass
* here to avoid lockdep complaining.
*/
- mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock_nested(&system_transition_mutex, SINGLE_DEPTH_NESTING);
if (swsusp_resume_device)
goto Check_image;
@@ -899,7 +901,7 @@ static int software_resume(void)
atomic_inc(&snapshot_device_available);
/* For success case, the suspend path will release the lock */
Unlock:
- mutex_unlock(&pm_mutex);
+ mutex_unlock(&system_transition_mutex);
pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
return error;
Close_Finish:
diff --git a/kernel/power/main.c b/kernel/power/main.c
index d9706da10930..35b50823d83b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -15,17 +15,16 @@
#include <linux/workqueue.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+#include <linux/suspend.h>
#include "power.h"
-DEFINE_MUTEX(pm_mutex);
-
#ifdef CONFIG_PM_SLEEP
void lock_system_sleep(void)
{
current->flags |= PF_FREEZER_SKIP;
- mutex_lock(&pm_mutex);
+ mutex_lock(&system_transition_mutex);
}
EXPORT_SYMBOL_GPL(lock_system_sleep);
@@ -37,8 +36,9 @@ void unlock_system_sleep(void)
*
* Reason:
* Fundamentally, we just don't need it, because freezing condition
- * doesn't come into effect until we release the pm_mutex lock,
- * since the freezer always works with pm_mutex held.
+ * doesn't come into effect until we release the
+ * system_transition_mutex lock, since the freezer always works with
+ * system_transition_mutex held.
*
* More importantly, in the case of hibernation,
* unlock_system_sleep() gets called in snapshot_read() and
@@ -47,7 +47,7 @@ void unlock_system_sleep(void)
* enter the refrigerator, thus causing hibernation to lockup.
*/
current->flags &= ~PF_FREEZER_SKIP;
- mutex_unlock(&pm_mutex);
+ mutex_unlock(&system_transition_mutex);
}
EXPORT_SYMBOL_GPL(unlock_system_sleep);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 70178f6ffdc4..5342f6fc022e 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -556,7 +556,7 @@ static int enter_state(suspend_state_t state)
} else if (!valid_state(state)) {
return -EINVAL;
}
- if (!mutex_trylock(&pm_mutex))
+ if (!mutex_trylock(&system_transition_mutex))
return -EBUSY;
if (state == PM_SUSPEND_TO_IDLE)
@@ -590,7 +590,7 @@ static int enter_state(suspend_state_t state)
pm_pr_dbg("Finishing wakeup.\n");
suspend_finish();
Unlock:
- mutex_unlock(&pm_mutex);
+ mutex_unlock(&system_transition_mutex);
return error;
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index c2bcf97d24c8..d7f6c1a288d3 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -923,7 +923,7 @@ int swsusp_write(unsigned int flags)
}
memset(&snapshot, 0, sizeof(struct snapshot_handle));
error = snapshot_read_next(&snapshot);
- if (error < PAGE_SIZE) {
+ if (error < (int)PAGE_SIZE) {
if (error >= 0)
error = -EFAULT;
@@ -1483,7 +1483,7 @@ int swsusp_read(unsigned int *flags_p)
memset(&snapshot, 0, sizeof(struct snapshot_handle));
error = snapshot_write_next(&snapshot);
- if (error < PAGE_SIZE)
+ if (error < (int)PAGE_SIZE)
return error < 0 ? error : -EFAULT;
header = (struct swsusp_info *)data_of(snapshot);
error = get_swap_reader(&handle, flags_p);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index abd225550271..2d8b60a3c86b 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -216,7 +216,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!mutex_trylock(&pm_mutex))
+ if (!mutex_trylock(&system_transition_mutex))
return -EBUSY;
lock_device_hotplug();
@@ -394,7 +394,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
}
unlock_device_hotplug();
- mutex_unlock(&pm_mutex);
+ mutex_unlock(&system_transition_mutex);
return error;
}
diff --git a/kernel/reboot.c b/kernel/reboot.c
index e4ced883d8de..8fb44dec9ad7 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -294,7 +294,7 @@ void kernel_power_off(void)
}
EXPORT_SYMBOL_GPL(kernel_power_off);
-static DEFINE_MUTEX(reboot_mutex);
+DEFINE_MUTEX(system_transition_mutex);
/*
* Reboot system call: for obvious reasons only root may call it,
@@ -338,7 +338,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
cmd = LINUX_REBOOT_CMD_HALT;
- mutex_lock(&reboot_mutex);
+ mutex_lock(&system_transition_mutex);
switch (cmd) {
case LINUX_REBOOT_CMD_RESTART:
kernel_restart(NULL);
@@ -389,7 +389,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
ret = -EINVAL;
break;
}
- mutex_unlock(&reboot_mutex);
+ mutex_unlock(&system_transition_mutex);
return ret;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3222193c46c6..0922ef5d2e46 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -155,16 +155,17 @@ static inline void set_pcppage_migratetype(struct page *page, int migratetype)
* The following functions are used by the suspend/hibernate code to temporarily
* change gfp_allowed_mask in order to avoid using I/O during memory allocations
* while devices are suspended. To avoid races with the suspend/hibernate code,
- * they should always be called with pm_mutex held (gfp_allowed_mask also should
- * only be modified with pm_mutex held, unless the suspend/hibernate code is
- * guaranteed not to run in parallel with that modification).
+ * they should always be called with system_transition_mutex held
+ * (gfp_allowed_mask also should only be modified with system_transition_mutex
+ * held, unless the suspend/hibernate code is guaranteed not to run in parallel
+ * with that modification).
*/
static gfp_t saved_gfp_mask;
void pm_restore_gfp_mask(void)
{
- WARN_ON(!mutex_is_locked(&pm_mutex));
+ WARN_ON(!mutex_is_locked(&system_transition_mutex));
if (saved_gfp_mask) {
gfp_allowed_mask = saved_gfp_mask;
saved_gfp_mask = 0;
@@ -173,7 +174,7 @@ void pm_restore_gfp_mask(void)
void pm_restrict_gfp_mask(void)
{
- WARN_ON(!mutex_is_locked(&pm_mutex));
+ WARN_ON(!mutex_is_locked(&system_transition_mutex));
WARN_ON(saved_gfp_mask);
saved_gfp_mask = gfp_allowed_mask;
gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);