From 8b7912f4cb6c29a1223ca7f2472bf12c44cc285e Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Thu, 30 Sep 2021 15:39:08 +0530 Subject: opp: Fix required-opps phandle array count check The 'required-opps' property is optional. So of_count_phandle_with_args() can return -ENOENT when queried for required-opps. Handle this case. Signed-off-by: Pavankumar Kondeti Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 2a97c6535c4c..5437085fb380 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -170,7 +170,7 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, } count = of_count_phandle_with_args(np, "required-opps", NULL); - if (!count) + if (count <= 0) goto put_np; required_opp_tables = kcalloc(count, sizeof(*required_opp_tables), -- cgit v1.2.3 From 3734b9f2cee01d9dde2fbbda742ba6dd6ba10a29 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 27 Sep 2021 01:40:24 +0300 Subject: opp: Change type of dev_pm_opp_attach_genpd(names) argument Elements of the 'names' array are not changed by the code, constify them for consistency. Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 6 +++--- include/linux/pm_opp.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 04b4691a8aac..3057beabd370 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2348,12 +2348,12 @@ static void _opp_detach_genpd(struct opp_table *opp_table) * "required-opps" are added in DT. */ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, - const char **names, struct device ***virt_devs) + const char * const *names, struct device ***virt_devs) { struct opp_table *opp_table; struct device *virt_dev; int index = 0, ret = -EINVAL; - const char **name = names; + const char * const *name = names; opp_table = _add_opp_table(dev, false); if (IS_ERR(opp_table)) @@ -2457,7 +2457,7 @@ static void devm_pm_opp_detach_genpd(void *data) * * Return: 0 on success and errorno otherwise. */ -int devm_pm_opp_attach_genpd(struct device *dev, const char **names, +int devm_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs) { struct opp_table *opp_table; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 84150a22fd7c..7f1f066fc377 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -156,9 +156,9 @@ int devm_pm_opp_set_clkname(struct device *dev, const char *name); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); int devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); -struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); +struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs); void dev_pm_opp_detach_genpd(struct opp_table *opp_table); -int devm_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs); +int devm_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs); struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp); int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); @@ -376,7 +376,7 @@ static inline int devm_pm_opp_set_clkname(struct device *dev, const char *name) return -EOPNOTSUPP; } -static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs) +static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs) { return ERR_PTR(-EOPNOTSUPP); } @@ -384,7 +384,7 @@ static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, cons static inline void dev_pm_opp_detach_genpd(struct opp_table *opp_table) {} static inline int devm_pm_opp_attach_genpd(struct device *dev, - const char **names, + const char * const *names, struct device ***virt_devs) { return -EOPNOTSUPP; -- cgit v1.2.3 From e69709f6861aaba80b4eaab6825e8c522a2afb5c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 20 Sep 2021 20:22:46 +0300 Subject: opp: Add more resource-managed variants of dev_pm_opp_of_add_table() Add resource-managed variants of dev_pm_opp_of_add_table_indexed() and dev_pm_opp_of_add_table_noclk(), allowing drivers to remove boilerplate code. Signed-off-by: Dmitry Osipenko [ Viresh: Added underscore to devm_of_add_table_indexed() ] Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 46 +++++++++++++++++++++++++++++++++++++++------- include/linux/pm_opp.h | 12 ++++++++++++ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 5437085fb380..fe218af735b0 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1081,6 +1081,17 @@ static void devm_pm_opp_of_table_release(void *data) dev_pm_opp_of_remove_table(data); } +static int _devm_of_add_table_indexed(struct device *dev, int index, bool getclk) +{ + int ret; + + ret = _of_add_table_indexed(dev, index, getclk); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, devm_pm_opp_of_table_release, dev); +} + /** * devm_pm_opp_of_add_table() - Initialize opp table from device tree * @dev: device pointer used to lookup OPP table. @@ -1102,13 +1113,7 @@ static void devm_pm_opp_of_table_release(void *data) */ int devm_pm_opp_of_add_table(struct device *dev) { - int ret; - - ret = dev_pm_opp_of_add_table(dev); - if (ret) - return ret; - - return devm_add_action_or_reset(dev, devm_pm_opp_of_table_release, dev); + return _devm_of_add_table_indexed(dev, 0, true); } EXPORT_SYMBOL_GPL(devm_pm_opp_of_add_table); @@ -1151,6 +1156,19 @@ int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) } EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_indexed); +/** + * devm_pm_opp_of_add_table_indexed() - Initialize indexed opp table from device tree + * @dev: device pointer used to lookup OPP table. + * @index: Index number. + * + * This is a resource-managed variant of dev_pm_opp_of_add_table_indexed(). + */ +int devm_pm_opp_of_add_table_indexed(struct device *dev, int index) +{ + return _devm_of_add_table_indexed(dev, index, true); +} +EXPORT_SYMBOL_GPL(devm_pm_opp_of_add_table_indexed); + /** * dev_pm_opp_of_add_table_noclk() - Initialize indexed opp table from device * tree without getting clk for device. @@ -1169,6 +1187,20 @@ int dev_pm_opp_of_add_table_noclk(struct device *dev, int index) } EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_noclk); +/** + * devm_pm_opp_of_add_table_noclk() - Initialize indexed opp table from device + * tree without getting clk for device. + * @dev: device pointer used to lookup OPP table. + * @index: Index number. + * + * This is a resource-managed variant of dev_pm_opp_of_add_table_noclk(). + */ +int devm_pm_opp_of_add_table_noclk(struct device *dev, int index) +{ + return _devm_of_add_table_indexed(dev, index, false); +} +EXPORT_SYMBOL_GPL(devm_pm_opp_of_add_table_noclk); + /* CPU device specific helpers */ /** diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 7f1f066fc377..879c138c7b8e 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -439,7 +439,9 @@ static inline int dev_pm_opp_sync_regulators(struct device *dev) #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) int dev_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_add_table_indexed(struct device *dev, int index); +int devm_pm_opp_of_add_table_indexed(struct device *dev, int index); int dev_pm_opp_of_add_table_noclk(struct device *dev, int index); +int devm_pm_opp_of_add_table_noclk(struct device *dev, int index); void dev_pm_opp_of_remove_table(struct device *dev); int devm_pm_opp_of_add_table(struct device *dev); int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask); @@ -465,11 +467,21 @@ static inline int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) return -EOPNOTSUPP; } +static inline int devm_pm_opp_of_add_table_indexed(struct device *dev, int index) +{ + return -EOPNOTSUPP; +} + static inline int dev_pm_opp_of_add_table_noclk(struct device *dev, int index) { return -EOPNOTSUPP; } +static inline int devm_pm_opp_of_add_table_noclk(struct device *dev, int index) +{ + return -EOPNOTSUPP; +} + static inline void dev_pm_opp_of_remove_table(struct device *dev) { } -- cgit v1.2.3 From 1cc55204b0dbba0ca09cc14624cbbeeb2d35742f Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 20 Sep 2021 20:22:47 +0300 Subject: PM / devfreq: Add devm_devfreq_add_governor() Add resource-managed variant of devfreq_add_governor(). Signed-off-by: Dmitry Osipenko Acked-by: Chanwoo Choi Signed-off-by: Viresh Kumar --- drivers/devfreq/devfreq.c | 26 ++++++++++++++++++++++++++ drivers/devfreq/governor.h | 3 +++ 2 files changed, 29 insertions(+) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 85faa7a5c7d1..4579eefb8fe7 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1301,6 +1301,32 @@ err_out: } EXPORT_SYMBOL(devfreq_add_governor); +static void devm_devfreq_remove_governor(void *governor) +{ + WARN_ON(devfreq_remove_governor(governor)); +} + +/** + * devm_devfreq_add_governor() - Add devfreq governor + * @dev: device which adds devfreq governor + * @governor: the devfreq governor to be added + * + * This is a resource-managed variant of devfreq_add_governor(). + */ +int devm_devfreq_add_governor(struct device *dev, + struct devfreq_governor *governor) +{ + int err; + + err = devfreq_add_governor(governor); + if (err) + return err; + + return devm_add_action_or_reset(dev, devm_devfreq_remove_governor, + governor); +} +EXPORT_SYMBOL(devm_devfreq_add_governor); + /** * devfreq_remove_governor() - Remove devfreq feature from a device. * @governor: the devfreq governor to be removed diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index 2d69a0ce6291..002a7d67e39d 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -84,6 +84,9 @@ void devfreq_update_interval(struct devfreq *devfreq, unsigned int *delay); int devfreq_add_governor(struct devfreq_governor *governor); int devfreq_remove_governor(struct devfreq_governor *governor); +int devm_devfreq_add_governor(struct device *dev, + struct devfreq_governor *governor); + int devfreq_update_status(struct devfreq *devfreq, unsigned long freq); int devfreq_update_target(struct devfreq *devfreq, unsigned long freq); -- cgit v1.2.3 From 68b79f285540842225bda8e9ded4a9b78664ed1a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 20 Sep 2021 20:22:48 +0300 Subject: PM / devfreq: tegra30: Use resource-managed helpers Use resource-managed API helpers to simplify driver's probe() function, making code cleaner. Signed-off-by: Dmitry Osipenko Acked-by: Chanwoo Choi Signed-off-by: Viresh Kumar --- drivers/devfreq/tegra30-devfreq.c | 107 ++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 61 deletions(-) diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index 10661eb2aed8..d83fdc2713ed 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -178,7 +178,6 @@ struct tegra_devfreq_soc_data { struct tegra_devfreq { struct devfreq *devfreq; - struct opp_table *opp_table; struct reset_control *reset; struct clk *clock; @@ -789,6 +788,39 @@ static struct devfreq_governor tegra_devfreq_governor = { .event_handler = tegra_governor_event_handler, }; +static void devm_tegra_devfreq_deinit_hw(void *data) +{ + struct tegra_devfreq *tegra = data; + + reset_control_reset(tegra->reset); + clk_disable_unprepare(tegra->clock); +} + +static int devm_tegra_devfreq_init_hw(struct device *dev, + struct tegra_devfreq *tegra) +{ + int err; + + err = clk_prepare_enable(tegra->clock); + if (err) { + dev_err(dev, "Failed to prepare and enable ACTMON clock\n"); + return err; + } + + err = devm_add_action_or_reset(dev, devm_tegra_devfreq_deinit_hw, + tegra); + if (err) + return err; + + err = reset_control_reset(tegra->reset); + if (err) { + dev_err(dev, "Failed to reset hardware: %d\n", err); + return err; + } + + return err; +} + static int tegra_devfreq_probe(struct platform_device *pdev) { u32 hw_version = BIT(tegra_sku_info.soc_speedo_id); @@ -842,38 +874,26 @@ static int tegra_devfreq_probe(struct platform_device *pdev) return err; } - tegra->opp_table = dev_pm_opp_set_supported_hw(&pdev->dev, - &hw_version, 1); - err = PTR_ERR_OR_ZERO(tegra->opp_table); + err = devm_pm_opp_set_supported_hw(&pdev->dev, &hw_version, 1); if (err) { dev_err(&pdev->dev, "Failed to set supported HW: %d\n", err); return err; } - err = dev_pm_opp_of_add_table_noclk(&pdev->dev, 0); + err = devm_pm_opp_of_add_table_noclk(&pdev->dev, 0); if (err) { dev_err(&pdev->dev, "Failed to add OPP table: %d\n", err); - goto put_hw; - } - - err = clk_prepare_enable(tegra->clock); - if (err) { - dev_err(&pdev->dev, - "Failed to prepare and enable ACTMON clock\n"); - goto remove_table; + return err; } - err = reset_control_reset(tegra->reset); - if (err) { - dev_err(&pdev->dev, "Failed to reset hardware: %d\n", err); - goto disable_clk; - } + err = devm_tegra_devfreq_init_hw(&pdev->dev, tegra); + if (err) + return err; rate = clk_round_rate(tegra->emc_clock, ULONG_MAX); if (rate < 0) { dev_err(&pdev->dev, "Failed to round clock rate: %ld\n", rate); - err = rate; - goto disable_clk; + return rate; } tegra->max_freq = rate / KHZ; @@ -892,52 +912,18 @@ static int tegra_devfreq_probe(struct platform_device *pdev) INIT_DELAYED_WORK(&tegra->cpufreq_update_work, tegra_actmon_delayed_update); - err = devfreq_add_governor(&tegra_devfreq_governor); + err = devm_devfreq_add_governor(&pdev->dev, &tegra_devfreq_governor); if (err) { dev_err(&pdev->dev, "Failed to add governor: %d\n", err); - goto remove_opps; + return err; } tegra_devfreq_profile.initial_freq = clk_get_rate(tegra->emc_clock); - devfreq = devfreq_add_device(&pdev->dev, &tegra_devfreq_profile, - "tegra_actmon", NULL); - if (IS_ERR(devfreq)) { - err = PTR_ERR(devfreq); - goto remove_governor; - } - - return 0; - -remove_governor: - devfreq_remove_governor(&tegra_devfreq_governor); - -remove_opps: - dev_pm_opp_remove_all_dynamic(&pdev->dev); - - reset_control_reset(tegra->reset); -disable_clk: - clk_disable_unprepare(tegra->clock); -remove_table: - dev_pm_opp_of_remove_table(&pdev->dev); -put_hw: - dev_pm_opp_put_supported_hw(tegra->opp_table); - - return err; -} - -static int tegra_devfreq_remove(struct platform_device *pdev) -{ - struct tegra_devfreq *tegra = platform_get_drvdata(pdev); - - devfreq_remove_device(tegra->devfreq); - devfreq_remove_governor(&tegra_devfreq_governor); - - reset_control_reset(tegra->reset); - clk_disable_unprepare(tegra->clock); - - dev_pm_opp_of_remove_table(&pdev->dev); - dev_pm_opp_put_supported_hw(tegra->opp_table); + devfreq = devm_devfreq_add_device(&pdev->dev, &tegra_devfreq_profile, + "tegra_actmon", NULL); + if (IS_ERR(devfreq)) + return PTR_ERR(devfreq); return 0; } @@ -967,7 +953,6 @@ MODULE_DEVICE_TABLE(of, tegra_devfreq_of_match); static struct platform_driver tegra_devfreq_driver = { .probe = tegra_devfreq_probe, - .remove = tegra_devfreq_remove, .driver = { .name = "tegra-devfreq", .of_match_table = tegra_devfreq_of_match, -- cgit v1.2.3 From 4844bdbe9166bc3d194b1d20d13dc1f01d3c1bb7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 20 Sep 2021 20:22:49 +0300 Subject: PM / devfreq: tegra30: Check whether clk_round_rate() returns zero rate EMC clock is always-on and can't be zero. Check whether clk_round_rate() returns zero rate and error out if it does. It can return zero if clock tree isn't initialized properly. Acked-by: Chanwoo Choi Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- drivers/devfreq/tegra30-devfreq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index d83fdc2713ed..65ecf17a36f4 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -891,9 +891,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev) return err; rate = clk_round_rate(tegra->emc_clock, ULONG_MAX); - if (rate < 0) { + if (rate <= 0) { dev_err(&pdev->dev, "Failed to round clock rate: %ld\n", rate); - return rate; + return rate ?: -EINVAL; } tegra->max_freq = rate / KHZ; -- cgit v1.2.3 From 27ff8187f13ecfec8a26fb1928e906f46f326cc5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 8 Oct 2021 15:46:52 +0800 Subject: opp: Fix return in _opp_add_static_v2() Fix sparse warning: drivers/opp/of.c:924 _opp_add_static_v2() warn: passing zero to 'ERR_PTR' For duplicate OPPs 'ret' be set to zero. Fixes: deac8703da5f ("PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is added") Signed-off-by: YueHaibing Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index fe218af735b0..2f40afa4e65c 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -921,7 +921,7 @@ free_required_opps: free_opp: _opp_free(new_opp); - return ERR_PTR(ret); + return ret ? ERR_PTR(ret) : NULL; } /* Initializes OPP tables based on new bindings */ -- cgit v1.2.3 From 7ca81b690e598fdf16a6c738a466247ef9be7ac7 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 29 Oct 2021 01:15:47 +0300 Subject: dt-bindings: opp: Allow multi-worded OPP entry name Not all OPP entries fit into a single word. In particular NVIDIA Tegra OPP tables use multi-word names. Allow OPP entry to have multi-worded name separated by hyphen. This silences DT checker warnings about wrong naming scheme. Reviewed-by: David Heidelberg Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp-v2-base.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml index ae3ae4d39843..15a76bcd6d42 100644 --- a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml +++ b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml @@ -33,7 +33,7 @@ properties: type: boolean patternProperties: - '^opp-?[0-9]+$': + '^opp(-?[0-9]+)*$': type: object description: One or more OPP nodes describing voltage-current-frequency combinations. -- cgit v1.2.3 From a2bd7be12b9edab5736a9a95bceccfbdf7520fdd Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 29 Oct 2021 14:38:55 +0200 Subject: PM: sleep: Fix runtime PM based cpuidle support In the cpuidle-psci case, runtime PM in combination with the generic PM domain (genpd), may be used when entering/exiting a shared idlestate. More precisely, genpd relies on runtime PM to be enabled for the attached device (in this case it belongs to a CPU), to properly manage the reference counting of its PM domain. This works fine most of the time, but during system suspend in dpm_suspend_late(), the PM core disables runtime PM for all devices. Beyond this point, calls to pm_runtime_get_sync() to runtime resume a device may fail and therefore it could also mess up the reference counting in genpd. To fix this problem, let's call wake_up_all_idle_cpus() in dpm_suspend_late(), prior to disabling runtime PM. In this way a device that belongs to a CPU, becomes runtime resumed through cpuidle-psci and stays like that, because the runtime PM usage count has been bumped in device_prepare(). Diagnosed-by: Maulik Shah Suggested-by: Rafael J. Wysocki Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index ac4dde8fdb8b..2fb08d4f1aca 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1463,6 +1463,7 @@ int dpm_suspend_late(pm_message_t state) int error = 0; trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true); + wake_up_all_idle_cpus(); mutex_lock(&dpm_list_mtx); pm_transition = state; async_error = 0; -- cgit v1.2.3 From dbea75fe18f60e364de6d994fc938a24ba249d81 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 3 Nov 2021 19:43:47 +0100 Subject: cpufreq: intel_pstate: Clear HWP desired on suspend/shutdown and offline Commit a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback") caused intel_pstate to use nonzero HWP desired values in certain usage scenarios, but it did not prevent them from being leaked into the confugirations in which HWP desired is expected to be 0. The failing scenarios are switching the driver from the passive mode to the active mode and starting a new kernel via kexec() while intel_pstate is running in the passive mode. To address this issue, ensure that HWP desired will be cleared on offline and suspend/shutdown. Fixes: a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback") Reported-by: Julia Lawall Tested-by: Julia Lawall Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 349ddbaef796..8896a44ec7b0 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1006,9 +1006,16 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) */ value &= ~GENMASK_ULL(31, 24); value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached); - WRITE_ONCE(cpu->hwp_req_cached, value); } + /* + * Clear the desired perf field in the cached HWP request value to + * prevent nonzero desired values from being leaked into the active + * mode. + */ + value &= ~HWP_DESIRED_PERF(~0L); + WRITE_ONCE(cpu->hwp_req_cached, value); + value &= ~GENMASK_ULL(31, 0); min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached)); @@ -3003,6 +3010,27 @@ static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy) return intel_pstate_cpu_exit(policy); } +static int intel_cpufreq_suspend(struct cpufreq_policy *policy) +{ + intel_pstate_suspend(policy); + + if (hwp_active) { + struct cpudata *cpu = all_cpu_data[policy->cpu]; + u64 value = READ_ONCE(cpu->hwp_req_cached); + + /* + * Clear the desired perf field in MSR_HWP_REQUEST in case + * intel_cpufreq_adjust_perf() is in use and the last value + * written by it may not be suitable. + */ + value &= ~HWP_DESIRED_PERF(~0L); + wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); + WRITE_ONCE(cpu->hwp_req_cached, value); + } + + return 0; +} + static struct cpufreq_driver intel_cpufreq = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_cpufreq_verify_policy, @@ -3012,7 +3040,7 @@ static struct cpufreq_driver intel_cpufreq = { .exit = intel_cpufreq_cpu_exit, .offline = intel_cpufreq_cpu_offline, .online = intel_pstate_cpu_online, - .suspend = intel_pstate_suspend, + .suspend = intel_cpufreq_suspend, .resume = intel_pstate_resume, .update_limits = intel_pstate_update_limits, .name = "intel_cpufreq", -- cgit v1.2.3 From 5521055670a53bd495676d1163b40ecb0a37af9c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 3 Nov 2021 22:19:25 -0700 Subject: cpufreq: intel_pstate: Fix unchecked MSR 0x773 access It is possible that on some platforms HWP interrupts are disabled. In that case accessing MSR 0x773 will result in warning. So check X86_FEATURE_HWP_NOTIFY feature to access MSR 0x773. The other places in code where this MSR is accessed, already checks this feature except during disable path called during cpufreq offline and suspend callbacks. Fixes: 57577c996d73 ("cpufreq: intel_pstate: Process HWP Guaranteed change notification") Reported-by: Steven Rostedt Signed-off-by: Srinivas Pandruvada Tested-by: Steven Rostedt (VMware) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8896a44ec7b0..2bb847650b9d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1627,6 +1627,9 @@ static void intel_pstate_disable_hwp_interrupt(struct cpudata *cpudata) { unsigned long flags; + if (!boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) + return; + /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); -- cgit v1.2.3 From 074d0cdfbb2f5985c5748fe80f6f8a2a7db8b63f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 4 Nov 2021 03:22:30 -0700 Subject: cpufreq: intel_pstate: Clear HWP Status during HWP Interrupt enable It is possible that some performance excursions happened before OS boot or enable HWP interrupts. So clear MSR_HWP_STATUS bits when we enable HWP interrupt. In this way a next excursion will results in a HWP interrupt. The status bits of MSR_HWP_STATUS must be cleared (0) by software so that a new status condition change will cause the hardware to set the bit again and issue the notification. Fixes: 57577c996d73 ("cpufreq: intel_pstate: Process HWP Guaranteed change notification") Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 2bb847650b9d..815df3daae9d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1652,6 +1652,7 @@ static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata) /* wrmsrl_on_cpu has to be outside spinlock as this can result in IPC */ wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01); + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_STATUS, 0); } } -- cgit v1.2.3 From 2aa36604e8243698ff22bd5fef0dd0c6bb07ba92 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Nov 2021 18:26:26 +0100 Subject: PM: sleep: Avoid calling put_device() under dpm_list_mtx It is generally unsafe to call put_device() with dpm_list_mtx held, because the given device's release routine may carry out an action depending on that lock which then may deadlock, so modify the system-wide suspend and resume of devices to always drop dpm_list_mtx before calling put_device() (and adjust white space somewhat while at it). For instance, this prevents the following splat from showing up in the kernel log after a system resume in certain configurations: [ 3290.969514] ====================================================== [ 3290.969517] WARNING: possible circular locking dependency detected [ 3290.969519] 5.15.0+ #2420 Tainted: G S [ 3290.969523] ------------------------------------------------------ [ 3290.969525] systemd-sleep/4553 is trying to acquire lock: [ 3290.969529] ffff888117ab1138 ((wq_completion)hci0#2){+.+.}-{0:0}, at: flush_workqueue+0x87/0x4a0 [ 3290.969554] but task is already holding lock: [ 3290.969556] ffffffff8280fca8 (dpm_list_mtx){+.+.}-{3:3}, at: dpm_resume+0x12e/0x3e0 [ 3290.969571] which lock already depends on the new lock. [ 3290.969573] the existing dependency chain (in reverse order) is: [ 3290.969575] -> #3 (dpm_list_mtx){+.+.}-{3:3}: [ 3290.969583] __mutex_lock+0x9d/0xa30 [ 3290.969591] device_pm_add+0x2e/0xe0 [ 3290.969597] device_add+0x4d5/0x8f0 [ 3290.969605] hci_conn_add_sysfs+0x43/0xb0 [bluetooth] [ 3290.969689] hci_conn_complete_evt.isra.71+0x124/0x750 [bluetooth] [ 3290.969747] hci_event_packet+0xd6c/0x28a0 [bluetooth] [ 3290.969798] hci_rx_work+0x213/0x640 [bluetooth] [ 3290.969842] process_one_work+0x2aa/0x650 [ 3290.969851] worker_thread+0x39/0x400 [ 3290.969859] kthread+0x142/0x170 [ 3290.969865] ret_from_fork+0x22/0x30 [ 3290.969872] -> #2 (&hdev->lock){+.+.}-{3:3}: [ 3290.969881] __mutex_lock+0x9d/0xa30 [ 3290.969887] hci_event_packet+0xba/0x28a0 [bluetooth] [ 3290.969935] hci_rx_work+0x213/0x640 [bluetooth] [ 3290.969978] process_one_work+0x2aa/0x650 [ 3290.969985] worker_thread+0x39/0x400 [ 3290.969993] kthread+0x142/0x170 [ 3290.969999] ret_from_fork+0x22/0x30 [ 3290.970004] -> #1 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}: [ 3290.970013] process_one_work+0x27d/0x650 [ 3290.970020] worker_thread+0x39/0x400 [ 3290.970028] kthread+0x142/0x170 [ 3290.970033] ret_from_fork+0x22/0x30 [ 3290.970038] -> #0 ((wq_completion)hci0#2){+.+.}-{0:0}: [ 3290.970047] __lock_acquire+0x15cb/0x1b50 [ 3290.970054] lock_acquire+0x26c/0x300 [ 3290.970059] flush_workqueue+0xae/0x4a0 [ 3290.970066] drain_workqueue+0xa1/0x130 [ 3290.970073] destroy_workqueue+0x34/0x1f0 [ 3290.970081] hci_release_dev+0x49/0x180 [bluetooth] [ 3290.970130] bt_host_release+0x1d/0x30 [bluetooth] [ 3290.970195] device_release+0x33/0x90 [ 3290.970201] kobject_release+0x63/0x160 [ 3290.970211] dpm_resume+0x164/0x3e0 [ 3290.970215] dpm_resume_end+0xd/0x20 [ 3290.970220] suspend_devices_and_enter+0x1a4/0xba0 [ 3290.970229] pm_suspend+0x26b/0x310 [ 3290.970236] state_store+0x42/0x90 [ 3290.970243] kernfs_fop_write_iter+0x135/0x1b0 [ 3290.970251] new_sync_write+0x125/0x1c0 [ 3290.970257] vfs_write+0x360/0x3c0 [ 3290.970263] ksys_write+0xa7/0xe0 [ 3290.970269] do_syscall_64+0x3a/0x80 [ 3290.970276] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3290.970284] other info that might help us debug this: [ 3290.970285] Chain exists of: (wq_completion)hci0#2 --> &hdev->lock --> dpm_list_mtx [ 3290.970297] Possible unsafe locking scenario: [ 3290.970299] CPU0 CPU1 [ 3290.970300] ---- ---- [ 3290.970302] lock(dpm_list_mtx); [ 3290.970306] lock(&hdev->lock); [ 3290.970310] lock(dpm_list_mtx); [ 3290.970314] lock((wq_completion)hci0#2); [ 3290.970319] *** DEADLOCK *** [ 3290.970321] 7 locks held by systemd-sleep/4553: [ 3290.970325] #0: ffff888103bcd448 (sb_writers#4){.+.+}-{0:0}, at: ksys_write+0xa7/0xe0 [ 3290.970341] #1: ffff888115a14488 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x103/0x1b0 [ 3290.970355] #2: ffff888100f719e0 (kn->active#233){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x10c/0x1b0 [ 3290.970369] #3: ffffffff82661048 (autosleep_lock){+.+.}-{3:3}, at: state_store+0x12/0x90 [ 3290.970384] #4: ffffffff82658ac8 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend+0x9f/0x310 [ 3290.970399] #5: ffffffff827f2a48 (acpi_scan_lock){+.+.}-{3:3}, at: acpi_suspend_begin+0x4c/0x80 [ 3290.970416] #6: ffffffff8280fca8 (dpm_list_mtx){+.+.}-{3:3}, at: dpm_resume+0x12e/0x3e0 [ 3290.970428] stack backtrace: [ 3290.970431] CPU: 3 PID: 4553 Comm: systemd-sleep Tainted: G S 5.15.0+ #2420 [ 3290.970438] Hardware name: Dell Inc. XPS 13 9380/0RYJWW, BIOS 1.5.0 06/03/2019 [ 3290.970441] Call Trace: [ 3290.970446] dump_stack_lvl+0x44/0x57 [ 3290.970454] check_noncircular+0x105/0x120 [ 3290.970468] ? __lock_acquire+0x15cb/0x1b50 [ 3290.970474] __lock_acquire+0x15cb/0x1b50 [ 3290.970487] lock_acquire+0x26c/0x300 [ 3290.970493] ? flush_workqueue+0x87/0x4a0 [ 3290.970503] ? __raw_spin_lock_init+0x3b/0x60 [ 3290.970510] ? lockdep_init_map_type+0x58/0x240 [ 3290.970519] flush_workqueue+0xae/0x4a0 [ 3290.970526] ? flush_workqueue+0x87/0x4a0 [ 3290.970544] ? drain_workqueue+0xa1/0x130 [ 3290.970552] drain_workqueue+0xa1/0x130 [ 3290.970561] destroy_workqueue+0x34/0x1f0 [ 3290.970572] hci_release_dev+0x49/0x180 [bluetooth] [ 3290.970624] bt_host_release+0x1d/0x30 [bluetooth] [ 3290.970687] device_release+0x33/0x90 [ 3290.970695] kobject_release+0x63/0x160 [ 3290.970705] dpm_resume+0x164/0x3e0 [ 3290.970710] ? dpm_resume_early+0x251/0x3b0 [ 3290.970718] dpm_resume_end+0xd/0x20 [ 3290.970723] suspend_devices_and_enter+0x1a4/0xba0 [ 3290.970737] pm_suspend+0x26b/0x310 [ 3290.970746] state_store+0x42/0x90 [ 3290.970755] kernfs_fop_write_iter+0x135/0x1b0 [ 3290.970764] new_sync_write+0x125/0x1c0 [ 3290.970777] vfs_write+0x360/0x3c0 [ 3290.970785] ksys_write+0xa7/0xe0 [ 3290.970794] do_syscall_64+0x3a/0x80 [ 3290.970803] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3290.970811] RIP: 0033:0x7f41b1328164 [ 3290.970819] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 80 00 00 00 00 8b 05 4a d2 2c 00 48 63 ff 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 55 53 48 89 d5 48 89 f3 48 83 [ 3290.970824] RSP: 002b:00007ffe6ae21b28 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 3290.970831] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f41b1328164 [ 3290.970836] RDX: 0000000000000004 RSI: 000055965e651070 RDI: 0000000000000004 [ 3290.970839] RBP: 000055965e651070 R08: 000055965e64f390 R09: 00007f41b1e3d1c0 [ 3290.970843] R10: 000000000000000a R11: 0000000000000246 R12: 0000000000000004 [ 3290.970846] R13: 0000000000000001 R14: 000055965e64f2b0 R15: 0000000000000004 Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/base/power/main.c | 84 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 27 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 2fb08d4f1aca..f4d0c555de29 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -710,6 +710,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) dev = to_device(dpm_noirq_list.next); get_device(dev); list_move_tail(&dev->power.entry, &dpm_late_early_list); + mutex_unlock(&dpm_list_mtx); if (!is_async(dev)) { @@ -724,8 +725,9 @@ static void dpm_noirq_resume_devices(pm_message_t state) } } - mutex_lock(&dpm_list_mtx); put_device(dev); + + mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); @@ -849,6 +851,7 @@ void dpm_resume_early(pm_message_t state) dev = to_device(dpm_late_early_list.next); get_device(dev); list_move_tail(&dev->power.entry, &dpm_suspended_list); + mutex_unlock(&dpm_list_mtx); if (!is_async(dev)) { @@ -862,8 +865,10 @@ void dpm_resume_early(pm_message_t state) pm_dev_err(dev, state, " early", error); } } - mutex_lock(&dpm_list_mtx); + put_device(dev); + + mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); @@ -1026,7 +1031,12 @@ void dpm_resume(pm_message_t state) } if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); + + mutex_unlock(&dpm_list_mtx); + put_device(dev); + + mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); @@ -1104,14 +1114,16 @@ void dpm_complete(pm_message_t state) get_device(dev); dev->power.is_prepared = false; list_move(&dev->power.entry, &list); + mutex_unlock(&dpm_list_mtx); trace_device_pm_callback_start(dev, "", state.event); device_complete(dev, state); trace_device_pm_callback_end(dev, 0); - mutex_lock(&dpm_list_mtx); put_device(dev); + + mutex_lock(&dpm_list_mtx); } list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); @@ -1296,17 +1308,21 @@ static int dpm_noirq_suspend_devices(pm_message_t state) error = device_suspend_noirq(dev); mutex_lock(&dpm_list_mtx); + if (error) { pm_dev_err(dev, state, " noirq", error); dpm_save_failed_dev(dev_name(dev)); - put_device(dev); - break; - } - if (!list_empty(&dev->power.entry)) + } else if (!list_empty(&dev->power.entry)) { list_move(&dev->power.entry, &dpm_noirq_list); + } + + mutex_unlock(&dpm_list_mtx); + put_device(dev); - if (async_error) + mutex_lock(&dpm_list_mtx); + + if (error || async_error) break; } mutex_unlock(&dpm_list_mtx); @@ -1472,23 +1488,28 @@ int dpm_suspend_late(pm_message_t state) struct device *dev = to_device(dpm_suspended_list.prev); get_device(dev); + mutex_unlock(&dpm_list_mtx); error = device_suspend_late(dev); mutex_lock(&dpm_list_mtx); + if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_late_early_list); if (error) { pm_dev_err(dev, state, " late", error); dpm_save_failed_dev(dev_name(dev)); - put_device(dev); - break; } + + mutex_unlock(&dpm_list_mtx); + put_device(dev); - if (async_error) + mutex_lock(&dpm_list_mtx); + + if (error || async_error) break; } mutex_unlock(&dpm_list_mtx); @@ -1748,21 +1769,27 @@ int dpm_suspend(pm_message_t state) struct device *dev = to_device(dpm_prepared_list.prev); get_device(dev); + mutex_unlock(&dpm_list_mtx); error = device_suspend(dev); mutex_lock(&dpm_list_mtx); + if (error) { pm_dev_err(dev, state, "", error); dpm_save_failed_dev(dev_name(dev)); - put_device(dev); - break; - } - if (!list_empty(&dev->power.entry)) + } else if (!list_empty(&dev->power.entry)) { list_move(&dev->power.entry, &dpm_suspended_list); + } + + mutex_unlock(&dpm_list_mtx); + put_device(dev); - if (async_error) + + mutex_lock(&dpm_list_mtx); + + if (error || async_error) break; } mutex_unlock(&dpm_list_mtx); @@ -1879,6 +1906,7 @@ int dpm_prepare(pm_message_t state) struct device *dev = to_device(dpm_list.next); get_device(dev); + mutex_unlock(&dpm_list_mtx); trace_device_pm_callback_start(dev, "", state.event); @@ -1886,21 +1914,23 @@ int dpm_prepare(pm_message_t state) trace_device_pm_callback_end(dev, error); mutex_lock(&dpm_list_mtx); - if (error) { - if (error == -EAGAIN) { - put_device(dev); - error = 0; - continue; - } + + if (!error) { + dev->power.is_prepared = true; + if (!list_empty(&dev->power.entry)) + list_move_tail(&dev->power.entry, &dpm_prepared_list); + } else if (error == -EAGAIN) { + error = 0; + } else { dev_info(dev, "not prepared for power transition: code %d\n", error); - put_device(dev); - break; } - dev->power.is_prepared = true; - if (!list_empty(&dev->power.entry)) - list_move_tail(&dev->power.entry, &dpm_prepared_list); + + mutex_unlock(&dpm_list_mtx); + put_device(dev); + + mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); trace_suspend_resume(TPS("dpm_prepare"), state.event, false); -- cgit v1.2.3