From 023d3779145ec6b7a0f38f19672a347b92feb74e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jan 2011 11:06:39 +0100 Subject: PM / Wakeup: Combine atomic counters to avoid reordering issues The memory barrier in wakeup_source_deactivate() is supposed to prevent the callers of pm_wakeup_pending() and pm_get_wakeup_count() from seeing the new value of events_in_progress (0, in particular) and the old value of event_count at the same time. However, if wakeup_source_deactivate() is executed by CPU0 and, for instance, pm_wakeup_pending() is executed by CPU1, where both processors can reorder operations, the memory barrier in wakeup_source_deactivate() doesn't affect CPU1 which can reorder reads. In that case CPU1 may very well decide to fetch event_count before it's modified and events_in_progress after it's been updated, so pm_wakeup_pending() may fail to detect a wakeup event. This issue can be addressed by using a single atomic variable to store both events_in_progress and event_count, so that they can be updated together in a single atomic operation. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 61 +++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 8ec406d8f548..e5e73b5efc80 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -24,12 +24,26 @@ */ bool events_check_enabled; -/* The counter of registered wakeup events. */ -static atomic_t event_count = ATOMIC_INIT(0); -/* A preserved old value of event_count. */ +/* + * Combined counters of registered wakeup events and wakeup events in progress. + * They need to be modified together atomically, so it's better to use one + * atomic variable to hold them both. + */ +static atomic_t combined_event_count = ATOMIC_INIT(0); + +#define IN_PROGRESS_BITS (sizeof(int) * 4) +#define MAX_IN_PROGRESS ((1 << IN_PROGRESS_BITS) - 1) + +static void split_counters(unsigned int *cnt, unsigned int *inpr) +{ + unsigned int comb = atomic_read(&combined_event_count); + + *cnt = (comb >> IN_PROGRESS_BITS); + *inpr = comb & MAX_IN_PROGRESS; +} + +/* A preserved old value of the events counter. */ static unsigned int saved_count; -/* The counter of wakeup events being processed. */ -static atomic_t events_in_progress = ATOMIC_INIT(0); static DEFINE_SPINLOCK(events_lock); @@ -307,7 +321,8 @@ static void wakeup_source_activate(struct wakeup_source *ws) ws->timer_expires = jiffies; ws->last_time = ktime_get(); - atomic_inc(&events_in_progress); + /* Increment the counter of events in progress. */ + atomic_inc(&combined_event_count); } /** @@ -394,14 +409,10 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) del_timer(&ws->timer); /* - * event_count has to be incremented before events_in_progress is - * modified, so that the callers of pm_check_wakeup_events() and - * pm_save_wakeup_count() don't see the old value of event_count and - * events_in_progress equal to zero at the same time. + * Increment the counter of registered wakeup events and decrement the + * couter of wakeup events in progress simultaneously. */ - atomic_inc(&event_count); - smp_mb__before_atomic_dec(); - atomic_dec(&events_in_progress); + atomic_add(MAX_IN_PROGRESS, &combined_event_count); } /** @@ -556,8 +567,10 @@ bool pm_wakeup_pending(void) spin_lock_irqsave(&events_lock, flags); if (events_check_enabled) { - ret = ((unsigned int)atomic_read(&event_count) != saved_count) - || atomic_read(&events_in_progress); + unsigned int cnt, inpr; + + split_counters(&cnt, &inpr); + ret = (cnt != saved_count || inpr > 0); events_check_enabled = !ret; } spin_unlock_irqrestore(&events_lock, flags); @@ -579,19 +592,22 @@ bool pm_wakeup_pending(void) */ bool pm_get_wakeup_count(unsigned int *count) { - bool ret; + unsigned int cnt, inpr; if (capable(CAP_SYS_ADMIN)) events_check_enabled = false; - while (atomic_read(&events_in_progress) && !signal_pending(current)) { + for (;;) { + split_counters(&cnt, &inpr); + if (inpr == 0 || signal_pending(current)) + break; pm_wakeup_update_hit_counts(); schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT)); } - ret = !atomic_read(&events_in_progress); - *count = atomic_read(&event_count); - return ret; + split_counters(&cnt, &inpr); + *count = cnt; + return !inpr; } /** @@ -605,11 +621,12 @@ bool pm_get_wakeup_count(unsigned int *count) */ bool pm_save_wakeup_count(unsigned int count) { + unsigned int cnt, inpr; bool ret = false; spin_lock_irq(&events_lock); - if (count == (unsigned int)atomic_read(&event_count) - && !atomic_read(&events_in_progress)) { + split_counters(&cnt, &inpr); + if (cnt == count && inpr == 0) { saved_count = count; events_check_enabled = true; ret = true; -- cgit v1.2.3 From 378eef99ad45700aabfba2bd962516e5608b259a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jan 2011 11:06:50 +0100 Subject: PM / Wakeup: Make pm_save_wakeup_count() work as documented According to Documentation/ABI/testing/sysfs-power, the /sys/power/wakeup_count interface should only make the kernel react to wakeup events during suspend if the last write to it has been successful. However, if /sys/power/wakeup_count is written to two times in a row, where the first write is successful and the second is not, the kernel will still react to wakeup events during suspend due to a bug in pm_save_wakeup_count(). Fix the bug by making pm_save_wakeup_count() clear events_check_enabled unconditionally before checking if there are any new wakeup events registered since the previous read from /sys/power/wakeup_count. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index e5e73b5efc80..07e08c3aece4 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -616,25 +616,25 @@ bool pm_get_wakeup_count(unsigned int *count) * * If @count is equal to the current number of registered wakeup events and the * current number of wakeup events being processed is zero, store @count as the - * old number of registered wakeup events to be used by pm_check_wakeup_events() - * and return true. Otherwise return false. + * old number of registered wakeup events for pm_check_wakeup_events(), enable + * wakeup events detection and return 'true'. Otherwise disable wakeup events + * detection and return 'false'. */ bool pm_save_wakeup_count(unsigned int count) { unsigned int cnt, inpr; - bool ret = false; + events_check_enabled = false; spin_lock_irq(&events_lock); split_counters(&cnt, &inpr); if (cnt == count && inpr == 0) { saved_count = count; events_check_enabled = true; - ret = true; } spin_unlock_irq(&events_lock); - if (!ret) + if (!events_check_enabled) pm_wakeup_update_hit_counts(); - return ret; + return events_check_enabled; } static struct dentry *wakeup_sources_stats_dentry; -- cgit v1.2.3 From 790c7885a4b2105e41f7a80b035fdb78e406154f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 31 Jan 2011 11:07:01 +0100 Subject: PM / Wakeup: Don't update events_check_enabled in pm_get_wakeup_count() Since pm_save_wakeup_count() has just been changed to clear events_check_enabled unconditionally before checking if there are any new wakeup events registered since the last read from /sys/power/wakeup_count, the detection of wakeup events during suspend may be disabled, after it's been enabled, by writing a "wrong" value back to /sys/power/wakeup_count. For this reason, it is not necessary to update events_check_enabled in pm_get_wakeup_count() any more. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 07e08c3aece4..82836383997f 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -586,17 +586,14 @@ bool pm_wakeup_pending(void) * Store the number of registered wakeup events at the address in @count. Block * if the current number of wakeup events being processed is nonzero. * - * Return false if the wait for the number of wakeup events being processed to + * Return 'false' if the wait for the number of wakeup events being processed to * drop down to zero has been interrupted by a signal (and the current number - * of wakeup events being processed is still nonzero). Otherwise return true. + * of wakeup events being processed is still nonzero). Otherwise return 'true'. */ bool pm_get_wakeup_count(unsigned int *count) { unsigned int cnt, inpr; - if (capable(CAP_SYS_ADMIN)) - events_check_enabled = false; - for (;;) { split_counters(&cnt, &inpr); if (inpr == 0 || signal_pending(current)) -- cgit v1.2.3 From 0295a34d61f14522fddb26856191520d2e1d7e77 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Mon, 31 Jan 2011 11:07:14 +0100 Subject: PM: Use appropriate printk() priority level in trace.c printk()s without a priority level default to KERN_WARNING. To reduce noise at KERN_WARNING, this patch sets the priority level appriopriately for unleveled printks()s. This should be useful to folks that look at dmesg warnings closely. Changed these messages to pr_info(). Signed-off-by: Mandeep Singh Baines Signed-off-by: Rafael J. Wysocki --- drivers/base/power/trace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c index 9f4258df4cfd..c80e138b62fe 100644 --- a/drivers/base/power/trace.c +++ b/drivers/base/power/trace.c @@ -112,7 +112,7 @@ static unsigned int read_magic_time(void) unsigned int val; get_rtc_time(&time); - printk("Time: %2d:%02d:%02d Date: %02d/%02d/%02d\n", + pr_info("Time: %2d:%02d:%02d Date: %02d/%02d/%02d\n", time.tm_hour, time.tm_min, time.tm_sec, time.tm_mon + 1, time.tm_mday, time.tm_year % 100); val = time.tm_year; /* 100 years */ @@ -179,7 +179,7 @@ static int show_file_hash(unsigned int value) unsigned int hash = hash_string(lineno, file, FILEHASH); if (hash != value) continue; - printk(" hash matches %s:%u\n", file, lineno); + pr_info(" hash matches %s:%u\n", file, lineno); match++; } return match; @@ -255,7 +255,7 @@ static int late_resume_init(void) val = val / FILEHASH; dev = val /* % DEVHASH */; - printk(" Magic number: %d:%d:%d\n", user, file, dev); + pr_info(" Magic number: %d:%d:%d\n", user, file, dev); show_file_hash(file); show_dev_hash(dev); return 0; -- cgit v1.2.3 From 4681b17154b3fd81f898802262985f662344e6ed Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 8 Feb 2011 23:25:48 +0100 Subject: USB / Hub: Do not call device_set_wakeup_capable() under spinlock A subsequent patch will modify device_set_wakeup_capable() in such a way that it will call functions which may sleep and therefore it shouldn't be called under spinlocks. In preparation to that, modify usb_set_device_state() to avoid calling device_set_wakeup_capable() under device_state_lock. Tested-by: Minchan Kim Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 0f299b7aad60..19d3435e6140 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1465,6 +1465,7 @@ void usb_set_device_state(struct usb_device *udev, enum usb_device_state new_state) { unsigned long flags; + int wakeup = -1; spin_lock_irqsave(&device_state_lock, flags); if (udev->state == USB_STATE_NOTATTACHED) @@ -1479,11 +1480,10 @@ void usb_set_device_state(struct usb_device *udev, || new_state == USB_STATE_SUSPENDED) ; /* No change to wakeup settings */ else if (new_state == USB_STATE_CONFIGURED) - device_set_wakeup_capable(&udev->dev, - (udev->actconfig->desc.bmAttributes - & USB_CONFIG_ATT_WAKEUP)); + wakeup = udev->actconfig->desc.bmAttributes + & USB_CONFIG_ATT_WAKEUP; else - device_set_wakeup_capable(&udev->dev, 0); + wakeup = 0; } if (udev->state == USB_STATE_SUSPENDED && new_state != USB_STATE_SUSPENDED) @@ -1495,6 +1495,8 @@ void usb_set_device_state(struct usb_device *udev, } else recursively_mark_NOTATTACHED(udev); spin_unlock_irqrestore(&device_state_lock, flags); + if (wakeup >= 0) + device_set_wakeup_capable(&udev->dev, wakeup); } EXPORT_SYMBOL_GPL(usb_set_device_state); -- cgit v1.2.3 From cb8f51bdadb7969139c2e39c2defd4cde98c1ea8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 8 Feb 2011 23:26:02 +0100 Subject: PM: Do not create wakeup sysfs files for devices that cannot wake up Currently, wakeup sysfs attributes are created for all devices, regardless of whether or not they are wakeup-capable. This is excessive and complicates wakeup device identification from user space (i.e. to identify wakeup-capable devices user space has to read /sys/devices/.../power/wakeup for all devices and see if they are not empty). Fix this issue by avoiding to create wakeup sysfs files for devices that cannot wake up the system from sleep states (i.e. whose power.can_wakeup flags are unset during registration) and modify device_set_wakeup_capable() so that it adds (or removes) the relevant sysfs attributes if a device's wakeup capability status is changed. Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-devices-power | 20 +++---- Documentation/power/devices.txt | 20 +++---- drivers/base/power/power.h | 21 ++++---- drivers/base/power/sysfs.c | 78 +++++++++++++++++---------- drivers/base/power/wakeup.c | 29 ++++++++++ include/linux/pm_runtime.h | 6 +++ include/linux/pm_wakeup.h | 8 +-- 7 files changed, 117 insertions(+), 65 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 7628cd1bc36a..8ffbc25376a0 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -29,9 +29,8 @@ Description: "disabled" to it. For the devices that are not capable of generating system wakeup - events this file contains "\n". In that cases the user space - cannot modify the contents of this file and the device cannot be - enabled to wake up the system. + events this file is not present. In that case the device cannot + be enabled to wake up the system from sleep states. What: /sys/devices/.../power/control Date: January 2009 @@ -85,7 +84,7 @@ Description: The /sys/devices/.../wakeup_count attribute contains the number of signaled wakeup events associated with the device. This attribute is read-only. If the device is not enabled to wake up - the system from sleep states, this attribute is empty. + the system from sleep states, this attribute is not present. What: /sys/devices/.../power/wakeup_active_count Date: September 2010 @@ -95,7 +94,7 @@ Description: number of times the processing of wakeup events associated with the device was completed (at the kernel level). This attribute is read-only. If the device is not enabled to wake up the - system from sleep states, this attribute is empty. + system from sleep states, this attribute is not present. What: /sys/devices/.../power/wakeup_hit_count Date: September 2010 @@ -105,7 +104,8 @@ Description: number of times the processing of a wakeup event associated with the device might prevent the system from entering a sleep state. This attribute is read-only. If the device is not enabled to - wake up the system from sleep states, this attribute is empty. + wake up the system from sleep states, this attribute is not + present. What: /sys/devices/.../power/wakeup_active Date: September 2010 @@ -115,7 +115,7 @@ Description: or 0, depending on whether or not a wakeup event associated with the device is being processed (1). This attribute is read-only. If the device is not enabled to wake up the system from sleep - states, this attribute is empty. + states, this attribute is not present. What: /sys/devices/.../power/wakeup_total_time_ms Date: September 2010 @@ -125,7 +125,7 @@ Description: the total time of processing wakeup events associated with the device, in milliseconds. This attribute is read-only. If the device is not enabled to wake up the system from sleep states, - this attribute is empty. + this attribute is not present. What: /sys/devices/.../power/wakeup_max_time_ms Date: September 2010 @@ -135,7 +135,7 @@ Description: the maximum time of processing a single wakeup event associated with the device, in milliseconds. This attribute is read-only. If the device is not enabled to wake up the system from sleep - states, this attribute is empty. + states, this attribute is not present. What: /sys/devices/.../power/wakeup_last_time_ms Date: September 2010 @@ -146,7 +146,7 @@ Description: signaling the last wakeup event associated with the device, in milliseconds. This attribute is read-only. If the device is not enabled to wake up the system from sleep states, this - attribute is empty. + attribute is not present. What: /sys/devices/.../power/autosuspend_delay_ms Date: September 2010 diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 57080cd74575..dd9b49251db3 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -159,18 +159,18 @@ matter, and the kernel is responsible for keeping track of it. By contrast, whether or not a wakeup-capable device should issue wakeup events is a policy decision, and it is managed by user space through a sysfs attribute: the power/wakeup file. User space can write the strings "enabled" or "disabled" to -set or clear the should_wakeup flag, respectively. Reads from the file will -return the corresponding string if can_wakeup is true, but if can_wakeup is -false then reads will return an empty string, to indicate that the device -doesn't support wakeup events. (But even though the file appears empty, writes -will still affect the should_wakeup flag.) +set or clear the "should_wakeup" flag, respectively. This file is only present +for wakeup-capable devices (i.e. devices whose "can_wakeup" flags are set) +and is created (or removed) by device_set_wakeup_capable(). Reads from the +file will return the corresponding string. The device_may_wakeup() routine returns true only if both flags are set. -Drivers should check this routine when putting devices in a low-power state -during a system sleep transition, to see whether or not to enable the devices' -wakeup mechanisms. However for runtime power management, wakeup events should -be enabled whenever the device and driver both support them, regardless of the -should_wakeup flag. +This information is used by subsystems, like the PCI bus type code, to see +whether or not to enable the devices' wakeup mechanisms. If device wakeup +mechanisms are enabled or disabled directly by drivers, they also should use +device_may_wakeup() to decide what to do during a system sleep transition. +However for runtime power management, wakeup events should be enabled whenever +the device and driver both support them, regardless of the should_wakeup flag. /sys/devices/.../power/control files diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 698dde742587..f2a25f18fde7 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -58,19 +58,18 @@ static inline void device_pm_move_last(struct device *dev) {} * sysfs.c */ -extern int dpm_sysfs_add(struct device *); -extern void dpm_sysfs_remove(struct device *); -extern void rpm_sysfs_remove(struct device *); +extern int dpm_sysfs_add(struct device *dev); +extern void dpm_sysfs_remove(struct device *dev); +extern void rpm_sysfs_remove(struct device *dev); +extern int wakeup_sysfs_add(struct device *dev); +extern void wakeup_sysfs_remove(struct device *dev); #else /* CONFIG_PM */ -static inline int dpm_sysfs_add(struct device *dev) -{ - return 0; -} - -static inline void dpm_sysfs_remove(struct device *dev) -{ -} +static inline int dpm_sysfs_add(struct device *dev) { return 0; } +static inline void dpm_sysfs_remove(struct device *dev) {} +static inline void rpm_sysfs_remove(struct device *dev) {} +static inline int wakeup_sysfs_add(struct device *dev) { return 0; } +static inline void wakeup_sysfs_remove(struct device *dev) {} #endif diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 0b1e46bf3e56..fff49bee781d 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -431,26 +431,18 @@ static ssize_t async_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(async, 0644, async_show, async_store); #endif /* CONFIG_PM_ADVANCED_DEBUG */ -static struct attribute * power_attrs[] = { - &dev_attr_wakeup.attr, -#ifdef CONFIG_PM_SLEEP - &dev_attr_wakeup_count.attr, - &dev_attr_wakeup_active_count.attr, - &dev_attr_wakeup_hit_count.attr, - &dev_attr_wakeup_active.attr, - &dev_attr_wakeup_total_time_ms.attr, - &dev_attr_wakeup_max_time_ms.attr, - &dev_attr_wakeup_last_time_ms.attr, -#endif +static struct attribute *power_attrs[] = { #ifdef CONFIG_PM_ADVANCED_DEBUG +#ifdef CONFIG_PM_SLEEP &dev_attr_async.attr, +#endif #ifdef CONFIG_PM_RUNTIME &dev_attr_runtime_status.attr, &dev_attr_runtime_usage.attr, &dev_attr_runtime_active_kids.attr, &dev_attr_runtime_enabled.attr, #endif -#endif +#endif /* CONFIG_PM_ADVANCED_DEBUG */ NULL, }; static struct attribute_group pm_attr_group = { @@ -458,9 +450,26 @@ static struct attribute_group pm_attr_group = { .attrs = power_attrs, }; -#ifdef CONFIG_PM_RUNTIME +static struct attribute *wakeup_attrs[] = { +#ifdef CONFIG_PM_SLEEP + &dev_attr_wakeup.attr, + &dev_attr_wakeup_count.attr, + &dev_attr_wakeup_active_count.attr, + &dev_attr_wakeup_hit_count.attr, + &dev_attr_wakeup_active.attr, + &dev_attr_wakeup_total_time_ms.attr, + &dev_attr_wakeup_max_time_ms.attr, + &dev_attr_wakeup_last_time_ms.attr, +#endif + NULL, +}; +static struct attribute_group pm_wakeup_attr_group = { + .name = power_group_name, + .attrs = wakeup_attrs, +}; static struct attribute *runtime_attrs[] = { +#ifdef CONFIG_PM_RUNTIME #ifndef CONFIG_PM_ADVANCED_DEBUG &dev_attr_runtime_status.attr, #endif @@ -468,6 +477,7 @@ static struct attribute *runtime_attrs[] = { &dev_attr_runtime_suspended_time.attr, &dev_attr_runtime_active_time.attr, &dev_attr_autosuspend_delay_ms.attr, +#endif /* CONFIG_PM_RUNTIME */ NULL, }; static struct attribute_group pm_runtime_attr_group = { @@ -480,35 +490,49 @@ int dpm_sysfs_add(struct device *dev) int rc; rc = sysfs_create_group(&dev->kobj, &pm_attr_group); - if (rc == 0 && !dev->power.no_callbacks) { + if (rc) + return rc; + + if (pm_runtime_callbacks_present(dev)) { rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group); if (rc) - sysfs_remove_group(&dev->kobj, &pm_attr_group); + goto err_out; + } + + if (device_can_wakeup(dev)) { + rc = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); + if (rc) { + if (pm_runtime_callbacks_present(dev)) + sysfs_unmerge_group(&dev->kobj, + &pm_runtime_attr_group); + goto err_out; + } } + return 0; + + err_out: + sysfs_remove_group(&dev->kobj, &pm_attr_group); return rc; } -void rpm_sysfs_remove(struct device *dev) +int wakeup_sysfs_add(struct device *dev) { - sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); + return sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); } -void dpm_sysfs_remove(struct device *dev) +void wakeup_sysfs_remove(struct device *dev) { - rpm_sysfs_remove(dev); - sysfs_remove_group(&dev->kobj, &pm_attr_group); + sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); } -#else /* CONFIG_PM_RUNTIME */ - -int dpm_sysfs_add(struct device * dev) +void rpm_sysfs_remove(struct device *dev) { - return sysfs_create_group(&dev->kobj, &pm_attr_group); + sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); } -void dpm_sysfs_remove(struct device * dev) +void dpm_sysfs_remove(struct device *dev) { + rpm_sysfs_remove(dev); + sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); sysfs_remove_group(&dev->kobj, &pm_attr_group); } - -#endif diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 82836383997f..4573c83df6dd 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -241,6 +241,35 @@ int device_wakeup_disable(struct device *dev) } EXPORT_SYMBOL_GPL(device_wakeup_disable); +/** + * device_set_wakeup_capable - Set/reset device wakeup capability flag. + * @dev: Device to handle. + * @capable: Whether or not @dev is capable of waking up the system from sleep. + * + * If @capable is set, set the @dev's power.can_wakeup flag and add its + * wakeup-related attributes to sysfs. Otherwise, unset the @dev's + * power.can_wakeup flag and remove its wakeup-related attributes from sysfs. + * + * This function may sleep and it can't be called from any context where + * sleeping is not allowed. + */ +void device_set_wakeup_capable(struct device *dev, bool capable) +{ + if (!!dev->power.can_wakeup == !!capable) + return; + + if (device_is_registered(dev)) { + if (capable) { + if (wakeup_sysfs_add(dev)) + return; + } else { + wakeup_sysfs_remove(dev); + } + } + dev->power.can_wakeup = capable; +} +EXPORT_SYMBOL_GPL(device_set_wakeup_capable); + /** * device_init_wakeup - Device wakeup initialization. * @dev: Device to handle. diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d34f067e2a7f..8de9aa6e7def 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -87,6 +87,11 @@ static inline bool pm_runtime_enabled(struct device *dev) return !dev->power.disable_depth; } +static inline bool pm_runtime_callbacks_present(struct device *dev) +{ + return !dev->power.no_callbacks; +} + static inline void pm_runtime_mark_last_busy(struct device *dev) { ACCESS_ONCE(dev->power.last_busy) = jiffies; @@ -133,6 +138,7 @@ static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } static inline void pm_runtime_no_callbacks(struct device *dev) {} static inline void pm_runtime_irq_safe(struct device *dev) {} +static inline bool pm_runtime_callbacks_present(struct device *dev) { return false; } static inline void pm_runtime_mark_last_busy(struct device *dev) {} static inline void __pm_runtime_use_autosuspend(struct device *dev, bool use) {} diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 03a67db03d01..a32da962d693 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -62,18 +62,11 @@ struct wakeup_source { * Changes to device_may_wakeup take effect on the next pm state change. */ -static inline void device_set_wakeup_capable(struct device *dev, bool capable) -{ - dev->power.can_wakeup = capable; -} - static inline bool device_can_wakeup(struct device *dev) { return dev->power.can_wakeup; } - - static inline bool device_may_wakeup(struct device *dev) { return dev->power.can_wakeup && !!dev->power.wakeup; @@ -88,6 +81,7 @@ extern struct wakeup_source *wakeup_source_register(const char *name); extern void wakeup_source_unregister(struct wakeup_source *ws); extern int device_wakeup_enable(struct device *dev); extern int device_wakeup_disable(struct device *dev); +extern void device_set_wakeup_capable(struct device *dev, bool capable); extern int device_init_wakeup(struct device *dev, bool val); extern int device_set_wakeup_enable(struct device *dev, bool enable); extern void __pm_stay_awake(struct wakeup_source *ws); -- cgit v1.2.3 From cd51e61cf4e8b220da37dc35e9c2dc2dc258b4de Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 11 Feb 2011 00:04:52 +0100 Subject: PM / ACPI: Remove references to pm_flags from bus.c If direct references to pm_flags are removed from drivers/acpi/bus.c, CONFIG_ACPI will not need to depend on CONFIG_PM any more. Make that happen. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- drivers/acpi/Kconfig | 1 - drivers/acpi/bus.c | 7 ++++--- include/linux/suspend.h | 6 ++++++ kernel/power/main.c | 12 +++++++++++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 2aa042a5da6d..3a17ca5fff6f 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -7,7 +7,6 @@ menuconfig ACPI depends on !IA64_HP_SIM depends on IA64 || X86 depends on PCI - depends on PM select PNP default y help diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 7ced61f39492..973b0709972c 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "internal.h" @@ -1025,13 +1026,13 @@ static int __init acpi_init(void) if (!result) { pci_mmcfg_late_init(); - if (!(pm_flags & PM_APM)) - pm_flags |= PM_ACPI; - else { + if (pm_apm_enabled()) { printk(KERN_INFO PREFIX "APM is already active, exiting\n"); disable_acpi(); result = -ENODEV; + } else { + pm_set_acpi_flag(); } } else disable_acpi(); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5a89e3612875..5e364db8a56a 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -272,6 +272,9 @@ extern int unregister_pm_notifier(struct notifier_block *nb); register_pm_notifier(&fn##_nb); \ } +extern bool pm_apm_enabled(void); +extern void pm_set_acpi_flag(void); + /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; @@ -292,6 +295,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) #define pm_notifier(fn, pri) do { (void)(fn); } while (0) +static inline bool pm_apm_enabled(void) { return false; } +static inline void pm_set_acpi_flag(void) {} + static inline bool pm_wakeup_pending(void) { return false; } #endif /* !CONFIG_PM_SLEEP */ diff --git a/kernel/power/main.c b/kernel/power/main.c index 701853042c28..b5405af48ddb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -17,10 +17,20 @@ DEFINE_MUTEX(pm_mutex); +#ifdef CONFIG_PM_SLEEP + unsigned int pm_flags; EXPORT_SYMBOL(pm_flags); -#ifdef CONFIG_PM_SLEEP +bool pm_apm_enabled(void) +{ + return !!(pm_flags & PM_APM); +} + +void pm_set_acpi_flag(void) +{ + pm_flags |= PM_ACPI; +} /* Routines for PM-transition notifications */ -- cgit v1.2.3 From 1eb208aea3179dd2fc0cdeea45ef869d75b4fe70 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 11 Feb 2011 00:06:30 +0100 Subject: PM: Make CONFIG_PM depend on (CONFIG_PM_SLEEP || CONFIG_PM_RUNTIME) From the users' point of view CONFIG_PM is really only used for making it possible to set CONFIG_SUSPEND, CONFIG_HIBERNATION, CONFIG_PM_RUNTIME and (surprisingly enough) CONFIG_XEN_SAVE_RESTORE (CONFIG_PM_OPP also depends on CONFIG_PM, but quite artificially). However, both CONFIG_SUSPEND and CONFIG_HIBERNATION require platform support (independent of CONFIG_PM) and it is not quite obvious that CONFIG_PM has to be set for CONFIG_XEN_SAVE_RESTORE to be available. Thus, from the users' point of view, it would be more logical to automatically select CONFIG_PM if any of the above options depending on it are set. Make CONFIG_PM depend on (CONFIG_PM_SLEEP || CONFIG_PM_RUNTIME), which will cause it to be selected when any of CONFIG_SUSPEND, CONFIG_HIBERNATION, CONFIG_PM_RUNTIME, CONFIG_XEN_SAVE_RESTORE is set and will clarify its meaning. Signed-off-by: Rafael J. Wysocki --- arch/x86/xen/Kconfig | 2 +- kernel/power/Kconfig | 29 ++++++----------------------- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 5b54892e4bc3..2c876ed723d0 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -38,7 +38,7 @@ config XEN_MAX_DOMAIN_MEMORY config XEN_SAVE_RESTORE bool - depends on XEN && PM + depends on XEN default y config XEN_DEBUG_FS diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 265729966ece..d739cf612e50 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -1,23 +1,7 @@ config PM - bool "Power Management support" - depends on !IA64_HP_SIM - ---help--- - "Power Management" means that parts of your computer are shut - off or put into a power conserving "sleep" mode if they are not - being used. There are two competing standards for doing this: APM - and ACPI. If you want to use either one, say Y here and then also - to the requisite support below. - - Power Management is most important for battery powered laptop - computers; if you have a laptop, check out the Linux Laptop home - page on the WWW at or - Tuxmobil - Linux on Mobile Computers at - and the Battery Powered Linux mini-HOWTO, available from - . - - Note that, even if you say N here, Linux on the x86 architecture - will issue the hlt instruction if nothing is to be done, thereby - sending the processor to sleep and saving power. + bool + depends on PM_SLEEP || PM_RUNTIME + default y config PM_DEBUG bool "Power Management Debug Support" @@ -102,7 +86,7 @@ config PM_SLEEP_ADVANCED_DEBUG config SUSPEND bool "Suspend to RAM and standby" - depends on PM && ARCH_SUSPEND_POSSIBLE + depends on ARCH_SUSPEND_POSSIBLE default y ---help--- Allow the system to enter sleep states in which main memory is @@ -133,7 +117,7 @@ config SUSPEND_FREEZER config HIBERNATION bool "Hibernation (aka 'suspend to disk')" - depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE + depends on SWAP && ARCH_HIBERNATION_POSSIBLE select LZO_COMPRESS select LZO_DECOMPRESS ---help--- @@ -224,7 +208,7 @@ config APM_EMULATION config PM_RUNTIME bool "Run-time PM core functionality" - depends on PM + depends on !IA64_HP_SIM ---help--- Enable functionality allowing I/O devices to be put into energy-saving (low power) states at run time (or autosuspended) after a specified @@ -246,7 +230,6 @@ config ARCH_HAS_OPP config PM_OPP bool "Operating Performance Point (OPP) Layer library" - depends on PM depends on ARCH_HAS_OPP ---help--- SOCs have a standard set of tuples consisting of frequency and -- cgit v1.2.3 From 196ec243224bb38fc5c41d9fa4050f70708b7fb4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 11 Feb 2011 00:06:42 +0100 Subject: PM: Reorder power management Kconfig options Reorder configuration options in kernel/power/Kconfig so that the options depended on are at the top of the list. This patch doesn't introduce any functional changes. Signed-off-by: Rafael J. Wysocki --- kernel/power/Kconfig | 222 +++++++++++++++++++++++++-------------------------- 1 file changed, 111 insertions(+), 111 deletions(-) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index d739cf612e50..1e4d9238c5da 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -1,89 +1,3 @@ -config PM - bool - depends on PM_SLEEP || PM_RUNTIME - default y - -config PM_DEBUG - bool "Power Management Debug Support" - depends on PM - ---help--- - This option enables various debugging support in the Power Management - code. This is helpful when debugging and reporting PM bugs, like - suspend support. - -config PM_ADVANCED_DEBUG - bool "Extra PM attributes in sysfs for low-level debugging/testing" - depends on PM_DEBUG - default n - ---help--- - Add extra sysfs attributes allowing one to access some Power Management - fields of device objects from user space. If you are not a kernel - developer interested in debugging/testing Power Management, say "no". - -config PM_VERBOSE - bool "Verbose Power Management debugging" - depends on PM_DEBUG - default n - ---help--- - This option enables verbose messages from the Power Management code. - -config CAN_PM_TRACE - def_bool y - depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL - -config PM_TRACE - bool - help - This enables code to save the last PM event point across - reboot. The architecture needs to support this, x86 for - example does by saving things in the RTC, see below. - - The architecture specific code must provide the extern - functions from as well as the - header with a TRACE_RESUME() macro. - - The way the information is presented is architecture- - dependent, x86 will print the information during a - late_initcall. - -config PM_TRACE_RTC - bool "Suspend/resume event tracing" - depends on CAN_PM_TRACE - depends on X86 - select PM_TRACE - default n - ---help--- - This enables some cheesy code to save the last PM event point in the - RTC across reboots, so that you can debug a machine that just hangs - during suspend (or more commonly, during resume). - - To use this debugging feature you should attempt to suspend the - machine, reboot it and then run - - dmesg -s 1000000 | grep 'hash matches' - - CAUTION: this option will cause your machine's real-time clock to be - set to an invalid time after a resume. - -config PM_SLEEP_SMP - bool - depends on SMP - depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE - depends on PM_SLEEP - select HOTPLUG - select HOTPLUG_CPU - default y - -config PM_SLEEP - bool - depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE - default y - -config PM_SLEEP_ADVANCED_DEBUG - bool - depends on PM_ADVANCED_DEBUG - default n - config SUSPEND bool "Suspend to RAM and standby" depends on ARCH_SUSPEND_POSSIBLE @@ -93,17 +7,6 @@ config SUSPEND powered and thus its contents are preserved, such as the suspend-to-RAM state (e.g. the ACPI S3 state). -config PM_TEST_SUSPEND - bool "Test suspend/resume and wakealarm during bootup" - depends on SUSPEND && PM_DEBUG && RTC_CLASS=y - ---help--- - This option will let you suspend your machine during bootup, and - make it wake up a few seconds later using an RTC wakeup alarm. - Enable this with a kernel parameter like "test_suspend=mem". - - You probably want to have your system's RTC driver statically - linked, ensuring that it's available when this test runs. - config SUSPEND_FREEZER bool "Enable freezer for suspend to RAM/standby" \ if ARCH_WANTS_FREEZER_CONTROL || BROKEN @@ -180,6 +83,117 @@ config PM_STD_PARTITION suspended image to. It will simply pick the first available swap device. +config PM_SLEEP + bool + depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE + default y + +config PM_SLEEP_SMP + bool + depends on SMP + depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE + depends on PM_SLEEP + select HOTPLUG + select HOTPLUG_CPU + default y + +config PM_RUNTIME + bool "Run-time PM core functionality" + depends on !IA64_HP_SIM + ---help--- + Enable functionality allowing I/O devices to be put into energy-saving + (low power) states at run time (or autosuspended) after a specified + period of inactivity and woken up in response to a hardware-generated + wake-up event or a driver's request. + + Hardware support is generally required for this functionality to work + and the bus type drivers of the buses the devices are on are + responsible for the actual handling of the autosuspend requests and + wake-up events. + +config PM + bool + depends on PM_SLEEP || PM_RUNTIME + default y + +config PM_DEBUG + bool "Power Management Debug Support" + depends on PM + ---help--- + This option enables various debugging support in the Power Management + code. This is helpful when debugging and reporting PM bugs, like + suspend support. + +config PM_VERBOSE + bool "Verbose Power Management debugging" + depends on PM_DEBUG + default n + ---help--- + This option enables verbose messages from the Power Management code. + +config PM_ADVANCED_DEBUG + bool "Extra PM attributes in sysfs for low-level debugging/testing" + depends on PM_DEBUG + default n + ---help--- + Add extra sysfs attributes allowing one to access some Power Management + fields of device objects from user space. If you are not a kernel + developer interested in debugging/testing Power Management, say "no". + +config PM_SLEEP_ADVANCED_DEBUG + bool + depends on PM_ADVANCED_DEBUG + default n + +config PM_TEST_SUSPEND + bool "Test suspend/resume and wakealarm during bootup" + depends on SUSPEND && PM_DEBUG && RTC_CLASS=y + ---help--- + This option will let you suspend your machine during bootup, and + make it wake up a few seconds later using an RTC wakeup alarm. + Enable this with a kernel parameter like "test_suspend=mem". + + You probably want to have your system's RTC driver statically + linked, ensuring that it's available when this test runs. + +config CAN_PM_TRACE + def_bool y + depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL + +config PM_TRACE + bool + help + This enables code to save the last PM event point across + reboot. The architecture needs to support this, x86 for + example does by saving things in the RTC, see below. + + The architecture specific code must provide the extern + functions from as well as the + header with a TRACE_RESUME() macro. + + The way the information is presented is architecture- + dependent, x86 will print the information during a + late_initcall. + +config PM_TRACE_RTC + bool "Suspend/resume event tracing" + depends on CAN_PM_TRACE + depends on X86 + select PM_TRACE + default n + ---help--- + This enables some cheesy code to save the last PM event point in the + RTC across reboots, so that you can debug a machine that just hangs + during suspend (or more commonly, during resume). + + To use this debugging feature you should attempt to suspend the + machine, reboot it and then run + + dmesg -s 1000000 | grep 'hash matches' + + CAUTION: this option will cause your machine's real-time clock to be + set to an invalid time after a resume. + config APM_EMULATION tristate "Advanced Power Management Emulation" depends on PM && SYS_SUPPORTS_APM_EMULATION @@ -206,20 +220,6 @@ config APM_EMULATION anything, try disabling/enabling this option (or disabling/enabling APM in your BIOS). -config PM_RUNTIME - bool "Run-time PM core functionality" - depends on !IA64_HP_SIM - ---help--- - Enable functionality allowing I/O devices to be put into energy-saving - (low power) states at run time (or autosuspended) after a specified - period of inactivity and woken up in response to a hardware-generated - wake-up event or a driver's request. - - Hardware support is generally required for this functionality to work - and the bus type drivers of the buses the devices are on are - responsible for the actual handling of the autosuspend requests and - wake-up events. - config PM_OPS bool depends on PM_SLEEP || PM_RUNTIME -- cgit v1.2.3 From aa33860158114d0df3c7997bc1dd41c0168e1c2a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 11 Feb 2011 00:06:54 +0100 Subject: PM: Remove CONFIG_PM_OPS After redefining CONFIG_PM to depend on (CONFIG_PM_SLEEP || CONFIG_PM_RUNTIME) the CONFIG_PM_OPS option is redundant and can be replaced with CONFIG_PM. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sleep.c | 4 ++-- drivers/base/power/Makefile | 3 +-- drivers/net/e1000e/netdev.c | 8 ++++---- drivers/net/pch_gbe/pch_gbe_main.c | 2 +- drivers/pci/pci-driver.c | 4 ++-- drivers/scsi/Makefile | 2 +- drivers/scsi/scsi_priv.h | 2 +- drivers/scsi/scsi_sysfs.c | 2 +- drivers/usb/core/hcd-pci.c | 4 ++-- include/acpi/acpi_bus.h | 2 +- include/linux/pm.h | 2 +- kernel/power/Kconfig | 5 ----- 12 files changed, 17 insertions(+), 23 deletions(-) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index d6a8cd14de2e..8ea092fad3f6 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -585,7 +585,7 @@ int acpi_suspend(u32 acpi_state) return -EINVAL; } -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM /** * acpi_pm_device_sleep_state - return preferred power state of ACPI device * in the system sleep state given by %acpi_target_sleep_state @@ -671,7 +671,7 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p) *d_min_p = d_min; return d_max; } -#endif /* CONFIG_PM_OPS */ +#endif /* CONFIG_PM */ #ifdef CONFIG_PM_SLEEP /** diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index abe46edfe5b4..118c1b92a511 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,7 +1,6 @@ -obj-$(CONFIG_PM) += sysfs.o +obj-$(CONFIG_PM) += sysfs.o generic_ops.o obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o obj-$(CONFIG_PM_RUNTIME) += runtime.o -obj-$(CONFIG_PM_OPS) += generic_ops.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o obj-$(CONFIG_PM_OPP) += opp.o diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 2e5022849f18..6d513a383340 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -5338,7 +5338,7 @@ void e1000e_disable_aspm(struct pci_dev *pdev, u16 state) __e1000e_disable_aspm(pdev, state); } -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM static bool e1000e_pm_ready(struct e1000_adapter *adapter) { return !!adapter->tx_ring->buffer_info; @@ -5489,7 +5489,7 @@ static int e1000_runtime_resume(struct device *dev) return __e1000_resume(pdev); } #endif /* CONFIG_PM_RUNTIME */ -#endif /* CONFIG_PM_OPS */ +#endif /* CONFIG_PM */ static void e1000_shutdown(struct pci_dev *pdev) { @@ -6196,7 +6196,7 @@ static DEFINE_PCI_DEVICE_TABLE(e1000_pci_tbl) = { }; MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM static const struct dev_pm_ops e1000_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(e1000_suspend, e1000_resume) SET_RUNTIME_PM_OPS(e1000_runtime_suspend, @@ -6210,7 +6210,7 @@ static struct pci_driver e1000_driver = { .id_table = e1000_pci_tbl, .probe = e1000_probe, .remove = __devexit_p(e1000_remove), -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM .driver.pm = &e1000_pm_ops, #endif .shutdown = e1000_shutdown, diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c index b99e90aca37d..8c66e22c3a0a 100644 --- a/drivers/net/pch_gbe/pch_gbe_main.c +++ b/drivers/net/pch_gbe/pch_gbe_main.c @@ -2446,7 +2446,7 @@ static struct pci_driver pch_gbe_pcidev = { .id_table = pch_gbe_pcidev_id, .probe = pch_gbe_probe, .remove = pch_gbe_remove, -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM .driver.pm = &pch_gbe_pm_ops, #endif .shutdown = pch_gbe_shutdown, diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 88246dd46452..d86ea8b01137 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -431,7 +431,7 @@ static void pci_device_shutdown(struct device *dev) pci_msix_shutdown(pci_dev); } -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM /* Auxiliary functions used for system resume and run-time resume. */ @@ -1059,7 +1059,7 @@ static int pci_pm_runtime_idle(struct device *dev) #endif /* !CONFIG_PM_RUNTIME */ -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM const struct dev_pm_ops pci_dev_pm_ops = { .prepare = pci_pm_prepare, diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 2e9a87e8e7d8..ef6de669424b 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -165,7 +165,7 @@ scsi_mod-$(CONFIG_SCSI_NETLINK) += scsi_netlink.o scsi_mod-$(CONFIG_SYSCTL) += scsi_sysctl.o scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o scsi_mod-y += scsi_trace.o -scsi_mod-$(CONFIG_PM_OPS) += scsi_pm.o +scsi_mod-$(CONFIG_PM) += scsi_pm.o scsi_tgt-y += scsi_tgt_lib.o scsi_tgt_if.o diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index b4056d14f812..342ee1a9c41d 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -146,7 +146,7 @@ static inline void scsi_netlink_exit(void) {} #endif /* scsi_pm.c */ -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM extern const struct dev_pm_ops scsi_bus_pm_ops; #endif #ifdef CONFIG_PM_RUNTIME diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 490ce213204e..e44ff64233fd 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -383,7 +383,7 @@ struct bus_type scsi_bus_type = { .name = "scsi", .match = scsi_bus_match, .uevent = scsi_bus_uevent, -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM .pm = &scsi_bus_pm_ops, #endif }; diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index f71e8e307e0f..64a035ba2eab 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -335,7 +335,7 @@ void usb_hcd_pci_shutdown(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(usb_hcd_pci_shutdown); -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM #ifdef CONFIG_PPC_PMAC static void powermac_set_asic(struct pci_dev *pci_dev, int enable) @@ -580,4 +580,4 @@ const struct dev_pm_ops usb_hcd_pci_pm_ops = { }; EXPORT_SYMBOL_GPL(usb_hcd_pci_pm_ops); -#endif /* CONFIG_PM_OPS */ +#endif /* CONFIG_PM */ diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 78ca429929f7..ff103ba96b78 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -381,7 +381,7 @@ struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle); int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); int acpi_disable_wakeup_device_power(struct acpi_device *dev); -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM int acpi_pm_device_sleep_state(struct device *, int *); #else static inline int acpi_pm_device_sleep_state(struct device *d, int *p) diff --git a/include/linux/pm.h b/include/linux/pm.h index 21415cc91cbb..9279175a4557 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -267,7 +267,7 @@ const struct dev_pm_ops name = { \ * callbacks provided by device drivers supporting both the system sleep PM and * runtime PM, make the pm member point to generic_subsys_pm_ops. */ -#ifdef CONFIG_PM_OPS +#ifdef CONFIG_PM extern struct dev_pm_ops generic_subsys_pm_ops; #define GENERIC_SUBSYS_PM_OPS (&generic_subsys_pm_ops) #else diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 1e4d9238c5da..0710beead05b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -220,11 +220,6 @@ config APM_EMULATION anything, try disabling/enabling this option (or disabling/enabling APM in your BIOS). -config PM_OPS - bool - depends on PM_SLEEP || PM_RUNTIME - default y - config ARCH_HAS_OPP bool -- cgit v1.2.3 From 88a6f33e4d7143f94f8e787fa4065ac873507984 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 11 Feb 2011 20:31:11 +0100 Subject: PM: Clean up PM_TRACE dependencies and drop unnecessary Kconfig option CONFIG_PM_SLEEP_ADVANCED_DEBUG is not used any more, so drop it and CONFIG_CAN_PM_TRACE need not depend on EXPERIMENTAL, so remove that dependency. Signed-off-by: Rafael J. Wysocki --- kernel/power/Kconfig | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 0710beead05b..298bed04555e 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -140,11 +140,6 @@ config PM_ADVANCED_DEBUG fields of device objects from user space. If you are not a kernel developer interested in debugging/testing Power Management, say "no". -config PM_SLEEP_ADVANCED_DEBUG - bool - depends on PM_ADVANCED_DEBUG - default n - config PM_TEST_SUSPEND bool "Test suspend/resume and wakealarm during bootup" depends on SUSPEND && PM_DEBUG && RTC_CLASS=y @@ -158,7 +153,7 @@ config PM_TEST_SUSPEND config CAN_PM_TRACE def_bool y - depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL + depends on PM_DEBUG && PM_SLEEP config PM_TRACE bool -- cgit v1.2.3 From e8665002477f0278f84f898145b1f141ba26ee26 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 12 Feb 2011 01:42:41 +0100 Subject: PM: Allow pm_runtime_suspend() to succeed during system suspend The dpm_prepare() function increments the runtime PM reference counters of all devices to prevent pm_runtime_suspend() from executing subsystem-level callbacks. However, this was supposed to guard against a specific race condition that cannot happen, because the power management workqueue is freezable, so pm_runtime_suspend() can only be called synchronously during system suspend and we can rely on subsystems and device drivers to avoid doing that unnecessarily. Make dpm_prepare() drop the runtime PM reference to each device after making sure that runtime resume is not pending for it. Signed-off-by: Rafael J. Wysocki Acked-by: Kevin Hilman --- drivers/base/power/main.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 83404973f97a..f7a755923751 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -669,7 +669,6 @@ static void dpm_complete(pm_message_t state) mutex_unlock(&dpm_list_mtx); device_complete(dev, state); - pm_runtime_put_sync(dev); mutex_lock(&dpm_list_mtx); put_device(dev); @@ -1005,12 +1004,9 @@ static int dpm_prepare(pm_message_t state) if (pm_runtime_barrier(dev) && device_may_wakeup(dev)) pm_wakeup_event(dev, 0); - if (pm_wakeup_pending()) { - pm_runtime_put_sync(dev); - error = -EBUSY; - } else { - error = device_prepare(dev, state); - } + pm_runtime_put_sync(dev); + error = pm_wakeup_pending() ? + -EBUSY : device_prepare(dev, state); mutex_lock(&dpm_list_mtx); if (error) { -- cgit v1.2.3 From 6831c6edc7b272a08dd2a6c71bb183a48fe98ae6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Feb 2011 21:22:24 +0100 Subject: PM: Drop pm_flags that is not necessary The variable pm_flags is used to prevent APM from being enabled along with ACPI, which would lead to problems. However, acpi_init() is always called before apm_init() and after acpi_init() has returned, it is known whether or not ACPI will be used. Namely, if acpi_disabled is not set after acpi_init() has returned, this means that ACPI is enabled. Thus, it is sufficient to check acpi_disabled in apm_init() to prevent APM from being enabled in parallel with ACPI. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- arch/x86/kernel/apm_32.c | 5 ++--- drivers/acpi/bus.c | 22 +++++----------------- include/linux/pm.h | 9 --------- include/linux/suspend.h | 6 ------ kernel/power/main.c | 13 ------------- 5 files changed, 7 insertions(+), 48 deletions(-) diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 0e4f24c2a746..15f47f741983 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -227,6 +227,7 @@ #include #include #include +#include #include #include @@ -2331,12 +2332,11 @@ static int __init apm_init(void) apm_info.disabled = 1; return -ENODEV; } - if (pm_flags & PM_ACPI) { + if (!acpi_disabled) { printk(KERN_NOTICE "apm: overridden by ACPI.\n"); apm_info.disabled = 1; return -ENODEV; } - pm_flags |= PM_APM; /* * Set up the long jump entry point to the APM BIOS, which is called @@ -2428,7 +2428,6 @@ static void __exit apm_exit(void) kthread_stop(kapmd_task); kapmd_task = NULL; } - pm_flags &= ~PM_APM; } module_init(apm_init); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 973b0709972c..9749980ca6ca 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1007,8 +1007,7 @@ struct kobject *acpi_kobj; static int __init acpi_init(void) { - int result = 0; - + int result; if (acpi_disabled) { printk(KERN_INFO PREFIX "Interpreter disabled.\n"); @@ -1023,29 +1022,18 @@ static int __init acpi_init(void) init_acpi_device_notify(); result = acpi_bus_init(); - - if (!result) { - pci_mmcfg_late_init(); - if (pm_apm_enabled()) { - printk(KERN_INFO PREFIX - "APM is already active, exiting\n"); - disable_acpi(); - result = -ENODEV; - } else { - pm_set_acpi_flag(); - } - } else + if (result) { disable_acpi(); - - if (acpi_disabled) return result; + } + pci_mmcfg_late_init(); acpi_scan_init(); acpi_ec_init(); acpi_debugfs_init(); acpi_sleep_proc_init(); acpi_wakeup_device_init(); - return result; + return 0; } subsys_initcall(acpi_init); diff --git a/include/linux/pm.h b/include/linux/pm.h index 9279175a4557..1f79c98f1e56 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -565,15 +565,6 @@ enum dpm_order { DPM_ORDER_DEV_LAST, }; -/* - * Global Power Management flags - * Used to keep APM and ACPI from both being active - */ -extern unsigned int pm_flags; - -#define PM_APM 1 -#define PM_ACPI 2 - extern int pm_generic_suspend(struct device *dev); extern int pm_generic_resume(struct device *dev); extern int pm_generic_freeze(struct device *dev); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5e364db8a56a..5a89e3612875 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -272,9 +272,6 @@ extern int unregister_pm_notifier(struct notifier_block *nb); register_pm_notifier(&fn##_nb); \ } -extern bool pm_apm_enabled(void); -extern void pm_set_acpi_flag(void); - /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; @@ -295,9 +292,6 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) #define pm_notifier(fn, pri) do { (void)(fn); } while (0) -static inline bool pm_apm_enabled(void) { return false; } -static inline void pm_set_acpi_flag(void) {} - static inline bool pm_wakeup_pending(void) { return false; } #endif /* !CONFIG_PM_SLEEP */ diff --git a/kernel/power/main.c b/kernel/power/main.c index b5405af48ddb..8eaba5f27b10 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -19,19 +19,6 @@ DEFINE_MUTEX(pm_mutex); #ifdef CONFIG_PM_SLEEP -unsigned int pm_flags; -EXPORT_SYMBOL(pm_flags); - -bool pm_apm_enabled(void) -{ - return !!(pm_flags & PM_APM); -} - -void pm_set_acpi_flag(void) -{ - pm_flags |= PM_ACPI; -} - /* Routines for PM-transition notifications */ static BLOCKING_NOTIFIER_HEAD(pm_chain_head); -- cgit v1.2.3 From 7538e3db6e015e890825fbd9f8659952896ddd5b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 16 Feb 2011 21:53:17 +0100 Subject: PM: Add support for device power domains The platform bus type is often used to handle Systems-on-a-Chip (SoC) where all devices are represented by objects of type struct platform_device. In those cases the same "platform" device driver may be used with multiple different system configurations, but the actions needed to put the devices it handles into a low-power state and back into the full-power state may depend on the design of the given SoC. The driver, however, cannot possibly include all the information necessary for the power management of its device on all the systems it is used with. Moreover, the device hierarchy in its current form also is not suitable for representing this kind of information. The patch below attempts to address this problem by introducing objects of type struct dev_power_domain that can be used for representing power domains within a SoC. Every struct dev_power_domain object provides a sets of device power management callbacks that can be used to perform what's needed for device power management in addition to the operations carried out by the device's driver and subsystem. Namely, if a struct dev_power_domain object is pointed to by the pwr_domain field in a struct device, the callbacks provided by its ops member will be executed in addition to the corresponding callbacks provided by the device's subsystem and driver during all power transitions. Signed-off-by: Rafael J. Wysocki Tested-and-acked-by: Kevin Hilman --- Documentation/power/devices.txt | 45 ++++++++++++++++++++++++++++++++++++++++- drivers/base/power/main.c | 37 +++++++++++++++++++++++++++++++++ drivers/base/power/runtime.c | 19 +++++++++++++++-- include/linux/device.h | 1 + include/linux/pm.h | 8 ++++++++ 5 files changed, 107 insertions(+), 3 deletions(-) diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index dd9b49251db3..df1a5cb10c42 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -1,6 +1,6 @@ Device Power Management -Copyright (c) 2010 Rafael J. Wysocki , Novell Inc. +Copyright (c) 2010-2011 Rafael J. Wysocki , Novell Inc. Copyright (c) 2010 Alan Stern @@ -507,6 +507,49 @@ routines. Nevertheless, different callback pointers are used in case there is a situation where it actually matters. +Device Power Domains +-------------------- +Sometimes devices share reference clocks or other power resources. In those +cases it generally is not possible to put devices into low-power states +individually. Instead, a set of devices sharing a power resource can be put +into a low-power state together at the same time by turning off the shared +power resource. Of course, they also need to be put into the full-power state +together, by turning the shared power resource on. A set of devices with this +property is often referred to as a power domain. + +Support for power domains is provided through the pwr_domain field of struct +device. This field is a pointer to an object of type struct dev_power_domain, +defined in include/linux/pm.h, providing a set of power management callbacks +analogous to the subsystem-level and device driver callbacks that are executed +for the given device during all power transitions, in addition to the respective +subsystem-level callbacks. Specifically, the power domain "suspend" callbacks +(i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are +executed after the analogous subsystem-level callbacks, while the power domain +"resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore, +etc.) are executed before the analogous subsystem-level callbacks. Error codes +returned by the "suspend" and "resume" power domain callbacks are ignored. + +Power domain ->runtime_idle() callback is executed before the subsystem-level +->runtime_idle() callback and the result returned by it is not ignored. Namely, +if it returns error code, the subsystem-level ->runtime_idle() callback will not +be called and the helper function rpm_idle() executing it will return error +code. This mechanism is intended to help platforms where saving device state +is a time consuming operation and should only be carried out if all devices +in the power domain are idle, before turning off the shared power resource(s). +Namely, the power domain ->runtime_idle() callback may return error code until +the pm_runtime_idle() helper (or its asychronous version) has been called for +all devices in the power domain (it is recommended that the returned error code +be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle() +callback from being run prematurely. + +The support for device power domains is only relevant to platforms needing to +use the same subsystem-level (e.g. platform bus type) and device driver power +management callbacks in many different power domain configurations and wanting +to avoid incorporating the support for power domains into the subsystem-level +callbacks. The other platforms need not implement it or take it into account +in any way. + + System Devices -------------- System devices (sysdevs) follow a slightly different API, which can be found in diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index f7a755923751..05b989139b54 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -423,6 +423,11 @@ static int device_resume_noirq(struct device *dev, pm_message_t state) TRACE_DEVICE(dev); TRACE_RESUME(0); + if (dev->pwr_domain) { + pm_dev_dbg(dev, state, "EARLY power domain "); + pm_noirq_op(dev, &dev->pwr_domain->ops, state); + } + if (dev->bus && dev->bus->pm) { pm_dev_dbg(dev, state, "EARLY "); error = pm_noirq_op(dev, dev->bus->pm, state); @@ -518,6 +523,11 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) dev->power.in_suspend = false; + if (dev->pwr_domain) { + pm_dev_dbg(dev, state, "power domain "); + pm_op(dev, &dev->pwr_domain->ops, state); + } + if (dev->bus) { if (dev->bus->pm) { pm_dev_dbg(dev, state, ""); @@ -629,6 +639,11 @@ static void device_complete(struct device *dev, pm_message_t state) { device_lock(dev); + if (dev->pwr_domain && dev->pwr_domain->ops.complete) { + pm_dev_dbg(dev, state, "completing power domain "); + dev->pwr_domain->ops.complete(dev); + } + if (dev->class && dev->class->pm && dev->class->pm->complete) { pm_dev_dbg(dev, state, "completing class "); dev->class->pm->complete(dev); @@ -745,6 +760,13 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state) if (dev->bus && dev->bus->pm) { pm_dev_dbg(dev, state, "LATE "); error = pm_noirq_op(dev, dev->bus->pm, state); + if (error) + goto End; + } + + if (dev->pwr_domain) { + pm_dev_dbg(dev, state, "LATE power domain "); + pm_noirq_op(dev, &dev->pwr_domain->ops, state); } End: @@ -864,6 +886,13 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_dev_dbg(dev, state, "legacy "); error = legacy_suspend(dev, state, dev->bus->suspend); } + if (error) + goto End; + } + + if (dev->pwr_domain) { + pm_dev_dbg(dev, state, "power domain "); + pm_op(dev, &dev->pwr_domain->ops, state); } End: @@ -976,7 +1005,15 @@ static int device_prepare(struct device *dev, pm_message_t state) pm_dev_dbg(dev, state, "preparing class "); error = dev->class->pm->prepare(dev); suspend_report_result(dev->class->pm->prepare, error); + if (error) + goto End; + } + + if (dev->pwr_domain && dev->pwr_domain->ops.prepare) { + pm_dev_dbg(dev, state, "preparing power domain "); + dev->pwr_domain->ops.prepare(dev); } + End: device_unlock(dev); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 42615b419dfb..25edc9a3a489 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -168,6 +168,7 @@ static int rpm_check_suspend_allowed(struct device *dev) static int rpm_idle(struct device *dev, int rpmflags) { int (*callback)(struct device *); + int (*domain_callback)(struct device *); int retval; retval = rpm_check_suspend_allowed(dev); @@ -222,10 +223,19 @@ static int rpm_idle(struct device *dev, int rpmflags) else callback = NULL; - if (callback) { + if (dev->pwr_domain) + domain_callback = dev->pwr_domain->ops.runtime_idle; + else + domain_callback = NULL; + + if (callback || domain_callback) { spin_unlock_irq(&dev->power.lock); - callback(dev); + if (domain_callback) + retval = domain_callback(dev); + + if (!retval && callback) + callback(dev); spin_lock_irq(&dev->power.lock); } @@ -390,6 +400,8 @@ static int rpm_suspend(struct device *dev, int rpmflags) else pm_runtime_cancel_pending(dev); } else { + if (dev->pwr_domain) + rpm_callback(dev->pwr_domain->ops.runtime_suspend, dev); no_callback: __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_deactivate_timer(dev); @@ -569,6 +581,9 @@ static int rpm_resume(struct device *dev, int rpmflags) __update_runtime_status(dev, RPM_RESUMING); + if (dev->pwr_domain) + rpm_callback(dev->pwr_domain->ops.runtime_resume, dev); + if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) callback = dev->bus->pm->runtime_resume; else if (dev->type && dev->type->pm && dev->type->pm->runtime_resume) diff --git a/include/linux/device.h b/include/linux/device.h index 1bf5cf0b4513..22e9a8a7e1bc 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -422,6 +422,7 @@ struct device { void *platform_data; /* Platform specific data, device core doesn't touch it */ struct dev_pm_info power; + struct dev_power_domain *pwr_domain; #ifdef CONFIG_NUMA int numa_node; /* NUMA node this device is close to */ diff --git a/include/linux/pm.h b/include/linux/pm.h index 1f79c98f1e56..6618216bb973 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -465,6 +465,14 @@ struct dev_pm_info { extern void update_pm_runtime_accounting(struct device *dev); +/* + * Power domains provide callbacks that are executed during system suspend, + * hibernation, system resume and during runtime PM transitions along with + * subsystem-level and driver-level callbacks. + */ +struct dev_power_domain { + struct dev_pm_ops ops; +}; /* * The PM_EVENT_ messages are also used by drivers implementing the legacy -- cgit v1.2.3 From cf4fb80ca3d591cae366ae8364e3c3f7a68bd249 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 18 Feb 2011 01:05:36 +0100 Subject: PM: Simplify kernel/power/Kconfig 'n' defaults are pretty pointless and actually bogus when used with prompt-less config options. The "bool"/"default y" pair with no prompt can be expressed more compactly using def_bool. [rjw: Rebased on top of earlier patches modifying this file.] Signed-off-by: Jan Beulich Signed-off-by: Rafael J. Wysocki --- kernel/power/Kconfig | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 298bed04555e..4603f08dc47b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -84,18 +84,16 @@ config PM_STD_PARTITION device. config PM_SLEEP - bool + def_bool y depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE - default y config PM_SLEEP_SMP - bool + def_bool y depends on SMP depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE depends on PM_SLEEP select HOTPLUG select HOTPLUG_CPU - default y config PM_RUNTIME bool "Run-time PM core functionality" @@ -112,9 +110,8 @@ config PM_RUNTIME wake-up events. config PM - bool + def_bool y depends on PM_SLEEP || PM_RUNTIME - default y config PM_DEBUG bool "Power Management Debug Support" @@ -127,14 +124,12 @@ config PM_DEBUG config PM_VERBOSE bool "Verbose Power Management debugging" depends on PM_DEBUG - default n ---help--- This option enables verbose messages from the Power Management code. config PM_ADVANCED_DEBUG bool "Extra PM attributes in sysfs for low-level debugging/testing" depends on PM_DEBUG - default n ---help--- Add extra sysfs attributes allowing one to access some Power Management fields of device objects from user space. If you are not a kernel @@ -175,7 +170,6 @@ config PM_TRACE_RTC depends on CAN_PM_TRACE depends on X86 select PM_TRACE - default n ---help--- This enables some cheesy code to save the last PM event point in the RTC across reboots, so that you can debug a machine that just hangs -- cgit v1.2.3 From 9659cc0678b954f187290c6e8b247a673c5d37e1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 18 Feb 2011 23:20:21 +0100 Subject: PM: Make system-wide PM and runtime PM treat subsystems consistently The code handling system-wide power transitions (eg. suspend-to-RAM) can in theory execute callbacks provided by the device's bus type, device type and class in each phase of the power transition. In turn, the runtime PM core code only calls one of those callbacks at a time, preferring bus type callbacks to device type or class callbacks and device type callbacks to class callbacks. It seems reasonable to make them both behave in the same way in that respect. Moreover, even though a device may belong to two subsystems (eg. bus type and device class) simultaneously, in practice power management callbacks for system-wide power transitions are always provided by only one of them (ie. if the bus type callbacks are defined, the device class ones are not and vice versa). Thus it is possible to modify the code handling system-wide power transitions so that it follows the core runtime PM code (ie. treats the subsystem callbacks as mutually exclusive). On the other hand, the core runtime PM code will choose to execute, for example, a runtime suspend callback provided by the device type even if the bus type's struct dev_pm_ops object exists, but the runtime_suspend pointer in it happens to be NULL. This is confusing, because it may lead to the execution of callbacks from different subsystems during different operations (eg. the bus type suspend callback may be executed during runtime suspend of the device, while the device type callback will be executed during system suspend). Make all of the power management code treat subsystem callbacks in a consistent way, such that: (1) If the device's type is defined (eg. dev->type is not NULL) and its pm pointer is not NULL, the callbacks from dev->type->pm will be used. (2) If dev->type is NULL or dev->type->pm is NULL, but the device's class is defined (eg. dev->class is not NULL) and its pm pointer is not NULL, the callbacks from dev->class->pm will be used. (3) If dev->type is NULL or dev->type->pm is NULL and dev->class is NULL or dev->class->pm is NULL, the callbacks from dev->bus->pm will be used provided that both dev->bus and dev->bus->pm are not NULL. Signed-off-by: Rafael J. Wysocki Acked-by: Kevin Hilman Reasoning-sounds-sane-to: Grant Likely Acked-by: Greg Kroah-Hartman --- Documentation/power/devices.txt | 29 +++---- Documentation/power/runtime_pm.txt | 13 ++-- drivers/base/power/main.c | 150 ++++++++++++++++--------------------- drivers/base/power/runtime.c | 18 ++--- 4 files changed, 92 insertions(+), 118 deletions(-) diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index df1a5cb10c42..f023ba6bba62 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -249,23 +249,18 @@ various phases always run after tasks have been frozen and before they are unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have been disabled (except for those marked with the IRQ_WAKEUP flag). -Most phases use bus, type, and class callbacks (that is, methods defined in -dev->bus->pm, dev->type->pm, and dev->class->pm). The prepare and complete -phases are exceptions; they use only bus callbacks. When multiple callbacks -are used in a phase, they are invoked in the order: during -power-down transitions and in the opposite order during power-up transitions. -For example, during the suspend phase the PM core invokes - - dev->class->pm.suspend(dev); - dev->type->pm.suspend(dev); - dev->bus->pm.suspend(dev); - -before moving on to the next device, whereas during the resume phase the core -invokes - - dev->bus->pm.resume(dev); - dev->type->pm.resume(dev); - dev->class->pm.resume(dev); +All phases use bus, type, or class callbacks (that is, methods defined in +dev->bus->pm, dev->type->pm, or dev->class->pm). These callbacks are mutually +exclusive, so if the device type provides a struct dev_pm_ops object pointed to +by its pm field (i.e. both dev->type and dev->type->pm are defined), the +callbacks included in that object (i.e. dev->type->pm) will be used. Otherwise, +if the class provides a struct dev_pm_ops object pointed to by its pm field +(i.e. both dev->class and dev->class->pm are defined), the PM core will use the +callbacks from that object (i.e. dev->class->pm). Finally, if the pm fields of +both the device type and class objects are NULL (or those objects do not exist), +the callbacks provided by the bus (that is, the callbacks from dev->bus->pm) +will be used (this allows device types to override callbacks provided by bus +types or classes if necessary). These callbacks may in turn invoke device- or driver-specific methods stored in dev->driver->pm, but they don't have to. diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index ffe55ffa540a..654097b130b4 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -1,6 +1,6 @@ Run-time Power Management Framework for I/O Devices -(C) 2009 Rafael J. Wysocki , Novell Inc. +(C) 2009-2011 Rafael J. Wysocki , Novell Inc. (C) 2010 Alan Stern 1. Introduction @@ -44,11 +44,12 @@ struct dev_pm_ops { }; The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks are -executed by the PM core for either the bus type, or device type (if the bus -type's callback is not defined), or device class (if the bus type's and device -type's callbacks are not defined) of given device. The bus type, device type -and device class callbacks are referred to as subsystem-level callbacks in what -follows. +executed by the PM core for either the device type, or the class (if the device +type's struct dev_pm_ops object does not exist), or the bus type (if the +device type's and class' struct dev_pm_ops objects do not exist) of the given +device (this allows device types to override callbacks provided by bus types or +classes if necessary). The bus type, device type and class callbacks are +referred to as subsystem-level callbacks in what follows. By default, the callbacks are always invoked in process context with interrupts enabled. However, subsystems can use the pm_runtime_irq_safe() helper function diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 05b989139b54..052dc53eef38 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -428,26 +428,17 @@ static int device_resume_noirq(struct device *dev, pm_message_t state) pm_noirq_op(dev, &dev->pwr_domain->ops, state); } - if (dev->bus && dev->bus->pm) { - pm_dev_dbg(dev, state, "EARLY "); - error = pm_noirq_op(dev, dev->bus->pm, state); - if (error) - goto End; - } - if (dev->type && dev->type->pm) { pm_dev_dbg(dev, state, "EARLY type "); error = pm_noirq_op(dev, dev->type->pm, state); - if (error) - goto End; - } - - if (dev->class && dev->class->pm) { + } else if (dev->class && dev->class->pm) { pm_dev_dbg(dev, state, "EARLY class "); error = pm_noirq_op(dev, dev->class->pm, state); + } else if (dev->bus && dev->bus->pm) { + pm_dev_dbg(dev, state, "EARLY "); + error = pm_noirq_op(dev, dev->bus->pm, state); } -End: TRACE_RESUME(error); return error; } @@ -528,36 +519,34 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) pm_op(dev, &dev->pwr_domain->ops, state); } - if (dev->bus) { - if (dev->bus->pm) { - pm_dev_dbg(dev, state, ""); - error = pm_op(dev, dev->bus->pm, state); - } else if (dev->bus->resume) { - pm_dev_dbg(dev, state, "legacy "); - error = legacy_resume(dev, dev->bus->resume); - } - if (error) - goto End; - } - - if (dev->type) { - if (dev->type->pm) { - pm_dev_dbg(dev, state, "type "); - error = pm_op(dev, dev->type->pm, state); - } - if (error) - goto End; + if (dev->type && dev->type->pm) { + pm_dev_dbg(dev, state, "type "); + error = pm_op(dev, dev->type->pm, state); + goto End; } if (dev->class) { if (dev->class->pm) { pm_dev_dbg(dev, state, "class "); error = pm_op(dev, dev->class->pm, state); + goto End; } else if (dev->class->resume) { pm_dev_dbg(dev, state, "legacy class "); error = legacy_resume(dev, dev->class->resume); + goto End; } } + + if (dev->bus) { + if (dev->bus->pm) { + pm_dev_dbg(dev, state, ""); + error = pm_op(dev, dev->bus->pm, state); + } else if (dev->bus->resume) { + pm_dev_dbg(dev, state, "legacy "); + error = legacy_resume(dev, dev->bus->resume); + } + } + End: device_unlock(dev); complete_all(&dev->power.completion); @@ -644,19 +633,18 @@ static void device_complete(struct device *dev, pm_message_t state) dev->pwr_domain->ops.complete(dev); } - if (dev->class && dev->class->pm && dev->class->pm->complete) { - pm_dev_dbg(dev, state, "completing class "); - dev->class->pm->complete(dev); - } - - if (dev->type && dev->type->pm && dev->type->pm->complete) { + if (dev->type && dev->type->pm) { pm_dev_dbg(dev, state, "completing type "); - dev->type->pm->complete(dev); - } - - if (dev->bus && dev->bus->pm && dev->bus->pm->complete) { + if (dev->type->pm->complete) + dev->type->pm->complete(dev); + } else if (dev->class && dev->class->pm) { + pm_dev_dbg(dev, state, "completing class "); + if (dev->class->pm->complete) + dev->class->pm->complete(dev); + } else if (dev->bus && dev->bus->pm) { pm_dev_dbg(dev, state, "completing "); - dev->bus->pm->complete(dev); + if (dev->bus->pm->complete) + dev->bus->pm->complete(dev); } device_unlock(dev); @@ -741,27 +729,23 @@ static pm_message_t resume_event(pm_message_t sleep_state) */ static int device_suspend_noirq(struct device *dev, pm_message_t state) { - int error = 0; - - if (dev->class && dev->class->pm) { - pm_dev_dbg(dev, state, "LATE class "); - error = pm_noirq_op(dev, dev->class->pm, state); - if (error) - goto End; - } + int error; if (dev->type && dev->type->pm) { pm_dev_dbg(dev, state, "LATE type "); error = pm_noirq_op(dev, dev->type->pm, state); if (error) - goto End; - } - - if (dev->bus && dev->bus->pm) { + return error; + } else if (dev->class && dev->class->pm) { + pm_dev_dbg(dev, state, "LATE class "); + error = pm_noirq_op(dev, dev->class->pm, state); + if (error) + return error; + } else if (dev->bus && dev->bus->pm) { pm_dev_dbg(dev, state, "LATE "); error = pm_noirq_op(dev, dev->bus->pm, state); if (error) - goto End; + return error; } if (dev->pwr_domain) { @@ -769,8 +753,7 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state) pm_noirq_op(dev, &dev->pwr_domain->ops, state); } -End: - return error; + return 0; } /** @@ -857,25 +840,22 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) goto End; } + if (dev->type && dev->type->pm) { + pm_dev_dbg(dev, state, "type "); + error = pm_op(dev, dev->type->pm, state); + goto Domain; + } + if (dev->class) { if (dev->class->pm) { pm_dev_dbg(dev, state, "class "); error = pm_op(dev, dev->class->pm, state); + goto Domain; } else if (dev->class->suspend) { pm_dev_dbg(dev, state, "legacy class "); error = legacy_suspend(dev, state, dev->class->suspend); + goto Domain; } - if (error) - goto End; - } - - if (dev->type) { - if (dev->type->pm) { - pm_dev_dbg(dev, state, "type "); - error = pm_op(dev, dev->type->pm, state); - } - if (error) - goto End; } if (dev->bus) { @@ -886,11 +866,10 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) pm_dev_dbg(dev, state, "legacy "); error = legacy_suspend(dev, state, dev->bus->suspend); } - if (error) - goto End; } - if (dev->pwr_domain) { + Domain: + if (!error && dev->pwr_domain) { pm_dev_dbg(dev, state, "power domain "); pm_op(dev, &dev->pwr_domain->ops, state); } @@ -985,28 +964,27 @@ static int device_prepare(struct device *dev, pm_message_t state) device_lock(dev); - if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) { - pm_dev_dbg(dev, state, "preparing "); - error = dev->bus->pm->prepare(dev); - suspend_report_result(dev->bus->pm->prepare, error); - if (error) - goto End; - } - - if (dev->type && dev->type->pm && dev->type->pm->prepare) { + if (dev->type && dev->type->pm) { pm_dev_dbg(dev, state, "preparing type "); - error = dev->type->pm->prepare(dev); + if (dev->type->pm->prepare) + error = dev->type->pm->prepare(dev); suspend_report_result(dev->type->pm->prepare, error); if (error) goto End; - } - - if (dev->class && dev->class->pm && dev->class->pm->prepare) { + } else if (dev->class && dev->class->pm) { pm_dev_dbg(dev, state, "preparing class "); - error = dev->class->pm->prepare(dev); + if (dev->class->pm->prepare) + error = dev->class->pm->prepare(dev); suspend_report_result(dev->class->pm->prepare, error); if (error) goto End; + } else if (dev->bus && dev->bus->pm) { + pm_dev_dbg(dev, state, "preparing "); + if (dev->bus->pm->prepare) + error = dev->bus->pm->prepare(dev); + suspend_report_result(dev->bus->pm->prepare, error); + if (error) + goto End; } if (dev->pwr_domain && dev->pwr_domain->ops.prepare) { diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 25edc9a3a489..54597c859ecb 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -214,12 +214,12 @@ static int rpm_idle(struct device *dev, int rpmflags) dev->power.idle_notification = true; - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) - callback = dev->bus->pm->runtime_idle; - else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle) + if (dev->type && dev->type->pm) callback = dev->type->pm->runtime_idle; else if (dev->class && dev->class->pm) callback = dev->class->pm->runtime_idle; + else if (dev->bus && dev->bus->pm) + callback = dev->bus->pm->runtime_idle; else callback = NULL; @@ -382,12 +382,12 @@ static int rpm_suspend(struct device *dev, int rpmflags) __update_runtime_status(dev, RPM_SUSPENDING); - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) - callback = dev->bus->pm->runtime_suspend; - else if (dev->type && dev->type->pm && dev->type->pm->runtime_suspend) + if (dev->type && dev->type->pm) callback = dev->type->pm->runtime_suspend; else if (dev->class && dev->class->pm) callback = dev->class->pm->runtime_suspend; + else if (dev->bus && dev->bus->pm) + callback = dev->bus->pm->runtime_suspend; else callback = NULL; @@ -584,12 +584,12 @@ static int rpm_resume(struct device *dev, int rpmflags) if (dev->pwr_domain) rpm_callback(dev->pwr_domain->ops.runtime_resume, dev); - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) - callback = dev->bus->pm->runtime_resume; - else if (dev->type && dev->type->pm && dev->type->pm->runtime_resume) + if (dev->type && dev->type->pm) callback = dev->type->pm->runtime_resume; else if (dev->class && dev->class->pm) callback = dev->class->pm->runtime_resume; + else if (dev->bus && dev->bus->pm) + callback = dev->bus->pm->runtime_resume; else callback = NULL; -- cgit v1.2.3 From a8b7228cdce9937ebc302a28db8599035e7b3c86 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 24 Feb 2011 20:48:56 +0100 Subject: PM: Documentation/power/states.txt: fix repetition Remove repetition of "called swsusp". Signed-off-by: Alexandre Courbot Signed-off-by: Rafael J. Wysocki --- Documentation/power/states.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt index 34800cc521bf..4416b28630df 100644 --- a/Documentation/power/states.txt +++ b/Documentation/power/states.txt @@ -62,12 +62,12 @@ setup via another operating system for it to use. Despite the inconvenience, this method requires minimal work by the kernel, since the firmware will also handle restoring memory contents on resume. -For suspend-to-disk, a mechanism called swsusp called 'swsusp' (Swap -Suspend) is used to write memory contents to free swap space. -swsusp has some restrictive requirements, but should work in most -cases. Some, albeit outdated, documentation can be found in -Documentation/power/swsusp.txt. Alternatively, userspace can do most -of the actual suspend to disk work, see userland-swsusp.txt. +For suspend-to-disk, a mechanism called 'swsusp' (Swap Suspend) is used +to write memory contents to free swap space. swsusp has some restrictive +requirements, but should work in most cases. Some, albeit outdated, +documentation can be found in Documentation/power/swsusp.txt. +Alternatively, userspace can do most of the actual suspend to disk work, +see userland-swsusp.txt. Once memory state is written to disk, the system may either enter a low-power state (like ACPI S4), or it may simply power down. Powering -- cgit v1.2.3 From 7ae496187876d264c712d7c102c45edb8eb41363 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 25 Feb 2011 23:46:18 +0100 Subject: PM / OPP: opp_find_freq_exact() documentation fix opp_find_freq_exact() documentation has is_available instead of available. This also fixes warning with the kernel-doc: scripts/kernel-doc drivers/base/power/opp.c >/dev/null Warning(drivers/base/power/opp.c:246): No description found for parameter 'available' Warning(drivers/base/power/opp.c:246): Excess function parameter 'is_available' description in 'opp_find_freq_exact' Signed-off-by: Nishanth Menon Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 2bb9b4cf59d7..56a6899f5e9e 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -222,7 +222,7 @@ int opp_get_opp_count(struct device *dev) * opp_find_freq_exact() - search for an exact frequency * @dev: device for which we do this operation * @freq: frequency to search for - * @is_available: true/false - match for available opp + * @available: true/false - match for available opp * * Searches for exact match in the opp list and returns pointer to the matching * opp if found, else returns ERR_PTR in case of error and should be handled -- cgit v1.2.3 From f9b9e806ae0ede772cbb9916d9ac7354a123d044 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 28 Feb 2011 22:06:34 +0100 Subject: PM QoS: Make pm_qos settings readable I have a machine where entering deep C-states broke. pm_qos was a hot candidate, but I couldn't find any way to double check without the need of recompiling. While in this case it was a driver bug (ath9k): https://bugzilla.kernel.org/show_bug.cgi?id=27532 powertop or others may want to read out cpu_dma_latency restrictions which could be the cause of preventing a machine entering deeper C-states. Output with this patch: # default value of 2000 * USEC_PER_SEC (0x77359400) cat /dev/network_latency |hexdump 0000000 9400 7735 0000004 # value of 55 us which is the reason for not entering C2 cat /dev/cpu_dma_latency |hexdump 0000000 0037 0000 0000004 There is no reason to hide this info -> make pm_qos files readable. Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- kernel/pm_qos_params.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index aeaa7f846821..0da058bff8eb 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -103,11 +103,14 @@ static struct pm_qos_object *pm_qos_array[] = { static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos); +static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos); static int pm_qos_power_open(struct inode *inode, struct file *filp); static int pm_qos_power_release(struct inode *inode, struct file *filp); static const struct file_operations pm_qos_power_fops = { .write = pm_qos_power_write, + .read = pm_qos_power_read, .open = pm_qos_power_open, .release = pm_qos_power_release, .llseek = noop_llseek, @@ -376,6 +379,27 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp) } +static ssize_t pm_qos_power_read(struct file *filp, char __user *buf, + size_t count, loff_t *f_pos) +{ + s32 value; + unsigned long flags; + struct pm_qos_object *o; + struct pm_qos_request_list *pm_qos_req = filp->private_data;; + + if (!pm_qos_req) + return -EINVAL; + if (!pm_qos_request_active(pm_qos_req)) + return -EINVAL; + + o = pm_qos_array[pm_qos_req->pm_qos_class]; + spin_lock_irqsave(&pm_qos_lock, flags); + value = pm_qos_get_value(o); + spin_unlock_irqrestore(&pm_qos_lock, flags); + + return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); +} + static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { -- cgit v1.2.3 From 40dc166cb5dddbd36aa4ad11c03915ea538f5a61 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Mar 2011 00:43:46 +0100 Subject: PM / Core: Introduce struct syscore_ops for core subsystems PM Some subsystems need to carry out suspend/resume and shutdown operations with one CPU on-line and interrupts disabled. The only way to register such operations is to define a sysdev class and a sysdev specifically for this purpose which is cumbersome and inefficient. Moreover, the arguments taken by sysdev suspend, resume and shutdown callbacks are practically never necessary. For this reason, introduce a simpler interface allowing subsystems to register operations to be executed very late during system suspend and shutdown and very early during resume in the form of strcut syscore_ops objects. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- drivers/base/Makefile | 2 +- drivers/base/syscore.c | 117 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/syscore_ops.h | 29 +++++++++++ kernel/power/hibernate.c | 9 ++++ kernel/power/suspend.c | 4 ++ kernel/sys.c | 4 ++ 6 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 drivers/base/syscore.c create mode 100644 include/linux/syscore_ops.h diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 5f51c3b4451e..4c5701c15f53 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -1,6 +1,6 @@ # Makefile for the Linux device tree -obj-y := core.o sys.o bus.o dd.o \ +obj-y := core.o sys.o bus.o dd.o syscore.o \ driver.o class.o platform.o \ cpu.o firmware.o init.o map.o devres.o \ attribute_container.o transport_class.o diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c new file mode 100644 index 000000000000..90af2943f9e4 --- /dev/null +++ b/drivers/base/syscore.c @@ -0,0 +1,117 @@ +/* + * syscore.c - Execution of system core operations. + * + * Copyright (C) 2011 Rafael J. Wysocki , Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include + +static LIST_HEAD(syscore_ops_list); +static DEFINE_MUTEX(syscore_ops_lock); + +/** + * register_syscore_ops - Register a set of system core operations. + * @ops: System core operations to register. + */ +void register_syscore_ops(struct syscore_ops *ops) +{ + mutex_lock(&syscore_ops_lock); + list_add_tail(&ops->node, &syscore_ops_list); + mutex_unlock(&syscore_ops_lock); +} +EXPORT_SYMBOL_GPL(register_syscore_ops); + +/** + * unregister_syscore_ops - Unregister a set of system core operations. + * @ops: System core operations to unregister. + */ +void unregister_syscore_ops(struct syscore_ops *ops) +{ + mutex_lock(&syscore_ops_lock); + list_del(&ops->node); + mutex_unlock(&syscore_ops_lock); +} +EXPORT_SYMBOL_GPL(unregister_syscore_ops); + +#ifdef CONFIG_PM_SLEEP +/** + * syscore_suspend - Execute all the registered system core suspend callbacks. + * + * This function is executed with one CPU on-line and disabled interrupts. + */ +int syscore_suspend(void) +{ + struct syscore_ops *ops; + int ret = 0; + + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled before system core suspend.\n"); + + list_for_each_entry_reverse(ops, &syscore_ops_list, node) + if (ops->suspend) { + if (initcall_debug) + pr_info("PM: Calling %pF\n", ops->suspend); + ret = ops->suspend(); + if (ret) + goto err_out; + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled after %pF\n", ops->suspend); + } + + return 0; + + err_out: + pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend); + + list_for_each_entry_continue(ops, &syscore_ops_list, node) + if (ops->resume) + ops->resume(); + + return ret; +} + +/** + * syscore_resume - Execute all the registered system core resume callbacks. + * + * This function is executed with one CPU on-line and disabled interrupts. + */ +void syscore_resume(void) +{ + struct syscore_ops *ops; + + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled before system core resume.\n"); + + list_for_each_entry(ops, &syscore_ops_list, node) + if (ops->resume) { + if (initcall_debug) + pr_info("PM: Calling %pF\n", ops->resume); + ops->resume(); + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled after %pF\n", ops->resume); + } +} +#endif /* CONFIG_PM_SLEEP */ + +/** + * syscore_shutdown - Execute all the registered system core shutdown callbacks. + */ +void syscore_shutdown(void) +{ + struct syscore_ops *ops; + + mutex_lock(&syscore_ops_lock); + + list_for_each_entry_reverse(ops, &syscore_ops_list, node) + if (ops->shutdown) { + if (initcall_debug) + pr_info("PM: Calling %pF\n", ops->shutdown); + ops->shutdown(); + } + + mutex_unlock(&syscore_ops_lock); +} diff --git a/include/linux/syscore_ops.h b/include/linux/syscore_ops.h new file mode 100644 index 000000000000..27b3b0bc41a9 --- /dev/null +++ b/include/linux/syscore_ops.h @@ -0,0 +1,29 @@ +/* + * syscore_ops.h - System core operations. + * + * Copyright (C) 2011 Rafael J. Wysocki , Novell Inc. + * + * This file is released under the GPLv2. + */ + +#ifndef _LINUX_SYSCORE_OPS_H +#define _LINUX_SYSCORE_OPS_H + +#include + +struct syscore_ops { + struct list_head node; + int (*suspend)(void); + void (*resume)(void); + void (*shutdown)(void); +}; + +extern void register_syscore_ops(struct syscore_ops *ops); +extern void unregister_syscore_ops(struct syscore_ops *ops); +#ifdef CONFIG_PM_SLEEP +extern int syscore_suspend(void); +extern void syscore_resume(void); +#endif +extern void syscore_shutdown(void); + +#endif diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1832bd264219..aeabd26e3342 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -272,6 +273,8 @@ static int create_image(int platform_mode) local_irq_disable(); error = sysdev_suspend(PMSG_FREEZE); + if (!error) + error = syscore_suspend(); if (error) { printk(KERN_ERR "PM: Some system devices failed to power down, " "aborting hibernation\n"); @@ -295,6 +298,7 @@ static int create_image(int platform_mode) } Power_up: + syscore_resume(); sysdev_resume(); /* NOTE: dpm_resume_noirq() is just a resume() for devices * that suspended with irqs off ... no overall powerup. @@ -403,6 +407,8 @@ static int resume_target_kernel(bool platform_mode) local_irq_disable(); error = sysdev_suspend(PMSG_QUIESCE); + if (!error) + error = syscore_suspend(); if (error) goto Enable_irqs; @@ -429,6 +435,7 @@ static int resume_target_kernel(bool platform_mode) restore_processor_state(); touch_softlockup_watchdog(); + syscore_resume(); sysdev_resume(); Enable_irqs: @@ -516,6 +523,7 @@ int hibernation_platform_enter(void) local_irq_disable(); sysdev_suspend(PMSG_HIBERNATE); + syscore_suspend(); if (pm_wakeup_pending()) { error = -EAGAIN; goto Power_up; @@ -526,6 +534,7 @@ int hibernation_platform_enter(void) while (1); Power_up: + syscore_resume(); sysdev_resume(); local_irq_enable(); enable_nonboot_cpus(); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index de6f86bfa303..2814c32aed51 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "power.h" @@ -163,11 +164,14 @@ static int suspend_enter(suspend_state_t state) BUG_ON(!irqs_disabled()); error = sysdev_suspend(PMSG_SUSPEND); + if (!error) + error = syscore_suspend(); if (!error) { if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) { error = suspend_ops->enter(state); events_check_enabled = false; } + syscore_resume(); sysdev_resume(); } diff --git a/kernel/sys.c b/kernel/sys.c index 18da702ec813..1ad48b3b9068 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -298,6 +299,7 @@ void kernel_restart_prepare(char *cmd) system_state = SYSTEM_RESTART; device_shutdown(); sysdev_shutdown(); + syscore_shutdown(); } /** @@ -336,6 +338,7 @@ void kernel_halt(void) { kernel_shutdown_prepare(SYSTEM_HALT); sysdev_shutdown(); + syscore_shutdown(); printk(KERN_EMERG "System halted.\n"); kmsg_dump(KMSG_DUMP_HALT); machine_halt(); @@ -355,6 +358,7 @@ void kernel_power_off(void) pm_power_off_prepare(); disable_nonboot_cpus(); sysdev_shutdown(); + syscore_shutdown(); printk(KERN_EMERG "Power down.\n"); kmsg_dump(KMSG_DUMP_POWEROFF); machine_power_off(); -- cgit v1.2.3 From bea3864fb627d110933cfb8babe048b63c4fc76e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Mar 2011 00:45:46 +0100 Subject: PM / Hibernate: Reduce autotuned default image size The hibernate image size autotuning mechanism sets the default image size to 5/2 of the total system RAM, but it is reported that on some systems device drivers allocate substantial amounts of memory during suspend and the creation of the image fails as a result (too little memory is preallocated). Modify the autotuning mechanism to use 1/3 instead of 2/5 of RAM as the default image size, which is reported to be sufficient for the affected systems. References: https://bugzilla.kernel.org/show_bug.cgi?id=30482 Reported-and-tested-by: Martin Steigerwald Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 64db648ff911..ca0aacc24874 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -42,15 +42,15 @@ static void swsusp_unset_page_forbidden(struct page *); /* * Preferred image size in bytes (tunable via /sys/power/image_size). - * When it is set to N, swsusp will do its best to ensure the image - * size will not exceed N bytes, but if that is impossible, it will - * try to create the smallest image possible. + * When it is set to N, the image creating code will do its best to + * ensure the image size will not exceed N bytes, but if that is + * impossible, it will try to create the smallest image possible. */ unsigned long image_size; void __init hibernate_image_size_init(void) { - image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; + image_size = (totalram_pages / 3) * PAGE_SIZE; } /* List of PBEs needed for restoring the pages that were allocated before -- cgit v1.2.3