diff options
30 files changed, 1359 insertions, 320 deletions
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 840f7d64d483..45000f0db4d4 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -96,16 +96,26 @@ Description: is read-only. If the device is not enabled to wake up the system from sleep states, this attribute is not present. -What: /sys/devices/.../power/wakeup_hit_count -Date: September 2010 +What: /sys/devices/.../power/wakeup_abort_count +Date: February 2012 Contact: Rafael J. Wysocki <rjw@sisk.pl> Description: - The /sys/devices/.../wakeup_hit_count attribute contains the + The /sys/devices/.../wakeup_abort_count attribute contains the number of times the processing of a wakeup event associated with - the device might prevent the system from entering a sleep state. - This attribute is read-only. If the device is not enabled to - wake up the system from sleep states, this attribute is not - present. + the device might have aborted system transition into a sleep + state in progress. This attribute is read-only. If the device + is not enabled to wake up the system from sleep states, this + attribute is not present. + +What: /sys/devices/.../power/wakeup_expire_count +Date: February 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/devices/.../wakeup_expire_count attribute contains the + number of times a wakeup event associated with the device has + been reported with a timeout that expired. This attribute is + read-only. If the device is not enabled to wake up the system + from sleep states, this attribute is not present. What: /sys/devices/.../power/wakeup_active Date: September 2010 @@ -148,6 +158,17 @@ Description: not enabled to wake up the system from sleep states, this attribute is not present. +What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms +Date: February 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute + contains the total time the device has been preventing + opportunistic transitions to sleep states from occuring. + This attribute is read-only. If the device is not enabled to + wake up the system from sleep states, this attribute is not + present. + What: /sys/devices/.../power/autosuspend_delay_ms Date: September 2010 Contact: Alan Stern <stern@rowland.harvard.edu> diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index b464d12761ba..31725ffeeb3a 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -172,3 +172,62 @@ Description: Reading from this file will display the current value, which is set to 1 MB by default. + +What: /sys/power/autosleep +Date: April 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/autosleep file can be written one of the strings + returned by reads from /sys/power/state. If that happens, a + work item attempting to trigger a transition of the system to + the sleep state represented by that string is queued up. This + attempt will only succeed if there are no active wakeup sources + in the system at that time. After every execution, regardless + of whether or not the attempt to put the system to sleep has + succeeded, the work item requeues itself until user space + writes "off" to /sys/power/autosleep. + + Reading from this file causes the last string successfully + written to it to be returned. + +What: /sys/power/wake_lock +Date: February 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/wake_lock file allows user space to create + wakeup source objects and activate them on demand (if one of + those wakeup sources is active, reads from the + /sys/power/wakeup_count file block or return false). When a + string without white space is written to /sys/power/wake_lock, + it will be assumed to represent a wakeup source name. If there + is a wakeup source object with that name, it will be activated + (unless active already). Otherwise, a new wakeup source object + will be registered, assigned the given name and activated. + If a string written to /sys/power/wake_lock contains white + space, the part of the string preceding the white space will be + regarded as a wakeup source name and handled as descrived above. + The other part of the string will be regarded as a timeout (in + nanoseconds) such that the wakeup source will be automatically + deactivated after it has expired. The timeout, if present, is + set regardless of the current state of the wakeup source object + in question. + + Reads from this file return a string consisting of the names of + wakeup sources created with the help of it that are active at + the moment, separated with spaces. + + +What: /sys/power/wake_unlock +Date: February 2012 +Contact: Rafael J. Wysocki <rjw@sisk.pl> +Description: + The /sys/power/wake_unlock file allows user space to deactivate + wakeup sources created with the help of /sys/power/wake_lock. + When a string is written to /sys/power/wake_unlock, it will be + assumed to represent the name of a wakeup source to deactivate. + If a wakeup source object of that name exists and is active at + the moment, it will be deactivated. + + Reads from this file return a string consisting of the names of + wakeup sources created with the help of /sys/power/wake_lock + that are inactive at the moment, separated with spaces. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 62aba89b04a2..8cb10f77c723 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2463,6 +2463,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. resume= [SWSUSP] Specify the partition device for software suspend + Format: + {/dev/<dev> | PARTUUID=<uuid> | <int>:<int> | <hex>} resume_offset= [SWSUSP] Specify the offset from the beginning of the partition diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt index f28f9a6f0347..e13dafc8e8f1 100644 --- a/Documentation/power/suspend-and-cpuhotplug.txt +++ b/Documentation/power/suspend-and-cpuhotplug.txt @@ -29,7 +29,7 @@ More details follow: Write 'mem' to /sys/power/state - syfs file + sysfs file | v Acquire pm_mutex lock diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 73ce9fbe9839..83aa694a8efe 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -11,6 +11,7 @@ #include <linux/io.h> #include <linux/pm_runtime.h> #include <linux/pm_domain.h> +#include <linux/pm_qos.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/sched.h> @@ -38,11 +39,13 @@ ktime_t __start = ktime_get(); \ type __retval = GENPD_DEV_CALLBACK(genpd, type, callback, dev); \ s64 __elapsed = ktime_to_ns(ktime_sub(ktime_get(), __start)); \ - struct generic_pm_domain_data *__gpd_data = dev_gpd_data(dev); \ - if (__elapsed > __gpd_data->td.field) { \ - __gpd_data->td.field = __elapsed; \ + struct gpd_timing_data *__td = &dev_gpd_data(dev)->td; \ + if (!__retval && __elapsed > __td->field) { \ + __td->field = __elapsed; \ dev_warn(dev, name " latency exceeded, new value %lld ns\n", \ __elapsed); \ + genpd->max_off_time_changed = true; \ + __td->constraint_changed = true; \ } \ __retval; \ }) @@ -211,6 +214,7 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); if (elapsed_ns > genpd->power_on_latency_ns) { genpd->power_on_latency_ns = elapsed_ns; + genpd->max_off_time_changed = true; if (genpd->name) pr_warning("%s: Power-on latency exceeded, " "new value %lld ns\n", genpd->name, @@ -247,6 +251,53 @@ int pm_genpd_poweron(struct generic_pm_domain *genpd) #ifdef CONFIG_PM_RUNTIME +static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, + unsigned long val, void *ptr) +{ + struct generic_pm_domain_data *gpd_data; + struct device *dev; + + gpd_data = container_of(nb, struct generic_pm_domain_data, nb); + + mutex_lock(&gpd_data->lock); + dev = gpd_data->base.dev; + if (!dev) { + mutex_unlock(&gpd_data->lock); + return NOTIFY_DONE; + } + mutex_unlock(&gpd_data->lock); + + for (;;) { + struct generic_pm_domain *genpd; + struct pm_domain_data *pdd; + + spin_lock_irq(&dev->power.lock); + + pdd = dev->power.subsys_data ? + dev->power.subsys_data->domain_data : NULL; + if (pdd) { + to_gpd_data(pdd)->td.constraint_changed = true; + genpd = dev_to_genpd(dev); + } else { + genpd = ERR_PTR(-ENODATA); + } + + spin_unlock_irq(&dev->power.lock); + + if (!IS_ERR(genpd)) { + mutex_lock(&genpd->lock); + genpd->max_off_time_changed = true; + mutex_unlock(&genpd->lock); + } + + dev = dev->parent; + if (!dev || dev->power.ignore_children) + break; + } + + return NOTIFY_DONE; +} + /** * __pm_genpd_save_device - Save the pre-suspend state of a device. * @pdd: Domain data of the device to save the state of. @@ -435,6 +486,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); if (elapsed_ns > genpd->power_off_latency_ns) { genpd->power_off_latency_ns = elapsed_ns; + genpd->max_off_time_changed = true; if (genpd->name) pr_warning("%s: Power-off latency exceeded, " "new value %lld ns\n", genpd->name, @@ -443,17 +495,6 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } genpd->status = GPD_STATE_POWER_OFF; - genpd->power_off_time = ktime_get(); - - /* Update PM QoS information for devices in the domain. */ - list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) { - struct gpd_timing_data *td = &to_gpd_data(pdd)->td; - - pm_runtime_update_max_time_suspended(pdd->dev, - td->start_latency_ns + - td->restore_state_latency_ns + - genpd->power_on_latency_ns); - } list_for_each_entry(link, &genpd->slave_links, slave_node) { genpd_sd_counter_dec(link->master); @@ -514,9 +555,6 @@ static int pm_genpd_runtime_suspend(struct device *dev) if (ret) return ret; - pm_runtime_update_max_time_suspended(dev, - dev_gpd_data(dev)->td.start_latency_ns); - /* * If power.irq_safe is set, this routine will be run with interrupts * off, so it can't use mutexes. @@ -613,6 +651,12 @@ void pm_genpd_poweroff_unused(void) #else +static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb, + unsigned long val, void *ptr) +{ + return NOTIFY_DONE; +} + static inline void genpd_power_off_work_fn(struct work_struct *work) {} #define pm_genpd_runtime_suspend NULL @@ -1209,12 +1253,15 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) return -EINVAL; - genpd_acquire_lock(genpd); + gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL); + if (!gpd_data) + return -ENOMEM; - if (genpd->status == GPD_STATE_POWER_OFF) { - ret = -EINVAL; - goto out; - } + mutex_init(&gpd_data->lock); + gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier; + dev_pm_qos_add_notifier(dev, &gpd_data->nb); + + genpd_acquire_lock(genpd); if (genpd->prepared_count > 0) { ret = -EAGAIN; @@ -1227,26 +1274,35 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, goto out; } - gpd_data = kzalloc(sizeof(*gpd_data), GFP_KERNEL); - if (!gpd_data) { - ret = -ENOMEM; - goto out; - } - genpd->device_count++; + genpd->max_off_time_changed = true; - dev->pm_domain = &genpd->domain; dev_pm_get_subsys_data(dev); + + mutex_lock(&gpd_data->lock); + spin_lock_irq(&dev->power.lock); + dev->pm_domain = &genpd->domain; dev->power.subsys_data->domain_data = &gpd_data->base; gpd_data->base.dev = dev; - gpd_data->need_restore = false; list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); + gpd_data->need_restore = genpd->status == GPD_STATE_POWER_OFF; if (td) gpd_data->td = *td; + gpd_data->td.constraint_changed = true; + gpd_data->td.effective_constraint_ns = -1; + spin_unlock_irq(&dev->power.lock); + mutex_unlock(&gpd_data->lock); + + genpd_release_lock(genpd); + + return 0; + out: genpd_release_lock(genpd); + dev_pm_qos_remove_notifier(dev, &gpd_data->nb); + kfree(gpd_data); return ret; } @@ -1290,12 +1346,15 @@ int __pm_genpd_of_add_device(struct device_node *genpd_node, struct device *dev, int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev) { + struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; - int ret = -EINVAL; + int ret = 0; dev_dbg(dev, "%s()\n", __func__); - if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev)) + if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev) + || IS_ERR_OR_NULL(dev->pm_domain) + || pd_to_genpd(dev->pm_domain) != genpd) return -EINVAL; genpd_acquire_lock(genpd); @@ -1305,21 +1364,27 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(pdd, &genpd->dev_list, list_node) { - if (pdd->dev != dev) - continue; + genpd->device_count--; + genpd->max_off_time_changed = true; - list_del_init(&pdd->list_node); - pdd->dev = NULL; - dev_pm_put_subsys_data(dev); - dev->pm_domain = NULL; - kfree(to_gpd_data(pdd)); + spin_lock_irq(&dev->power.lock); + dev->pm_domain = NULL; + pdd = dev->power.subsys_data->domain_data; + list_del_init(&pdd->list_node); + dev->power.subsys_data->domain_data = NULL; + spin_unlock_irq(&dev->power.lock); - genpd->device_count--; + gpd_data = to_gpd_data(pdd); + mutex_lock(&gpd_data->lock); + pdd->dev = NULL; + mutex_unlock(&gpd_data->lock); - ret = 0; - break; - } + genpd_release_lock(genpd); + + dev_pm_qos_remove_notifier(dev, &gpd_data->nb); + kfree(gpd_data); + dev_pm_put_subsys_data(dev); + return 0; out: genpd_release_lock(genpd); @@ -1348,6 +1413,26 @@ void pm_genpd_dev_always_on(struct device *dev, bool val) EXPORT_SYMBOL_GPL(pm_genpd_dev_always_on); /** + * pm_genpd_dev_need_restore - Set/unset the device's "need restore" flag. + * @dev: Device to set/unset the flag for. + * @val: The new value of the device's "need restore" flag. + */ +void pm_genpd_dev_need_restore(struct device *dev, bool val) +{ + struct pm_subsys_data *psd; + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); + + psd = dev_to_psd(dev); + if (psd && psd->domain_data) + to_gpd_data(psd->domain_data)->need_restore = val; + + spin_unlock_irqrestore(&dev->power.lock, flags); +} +EXPORT_SYMBOL_GPL(pm_genpd_dev_need_restore); + +/** * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain. * @genpd: Master PM domain to add the subdomain to. * @subdomain: Subdomain to be added. @@ -1378,7 +1463,7 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(link, &genpd->slave_links, slave_node) { + list_for_each_entry(link, &genpd->master_links, master_node) { if (link->slave == subdomain && link->master == genpd) { ret = -EINVAL; goto out; @@ -1690,6 +1775,7 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->resume_count = 0; genpd->device_count = 0; genpd->max_off_time_ns = -1; + genpd->max_off_time_changed = true; genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend; genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume; genpd->domain.ops.runtime_idle = pm_generic_runtime_idle; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 66a265bf5867..28dee3053f1f 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -14,6 +14,31 @@ #ifdef CONFIG_PM_RUNTIME +static int dev_update_qos_constraint(struct device *dev, void *data) +{ + s64 *constraint_ns_p = data; + s32 constraint_ns = -1; + + if (dev->power.subsys_data && dev->power.subsys_data->domain_data) + constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns; + + if (constraint_ns < 0) { + constraint_ns = dev_pm_qos_read_value(dev); + constraint_ns *= NSEC_PER_USEC; + } + if (constraint_ns == 0) + return 0; + + /* + * constraint_ns cannot be negative here, because the device has been + * suspended. + */ + if (constraint_ns < *constraint_ns_p || *constraint_ns_p == 0) + *constraint_ns_p = constraint_ns; + + return 0; +} + /** * default_stop_ok - Default PM domain governor routine for stopping devices. * @dev: Device to check. @@ -21,14 +46,52 @@ bool default_stop_ok(struct device *dev) { struct gpd_timing_data *td = &dev_gpd_data(dev)->td; + unsigned long flags; + s64 constraint_ns; dev_dbg(dev, "%s()\n", __func__); - if (dev->power.max_time_suspended_ns < 0 || td->break_even_ns == 0) - return true; + spin_lock_irqsave(&dev->power.lock, flags); + + if (!td->constraint_changed) { + bool ret = td->cached_stop_ok; - return td->stop_latency_ns + td->start_latency_ns < td->break_even_ns - && td->break_even_ns < dev->power.max_time_suspended_ns; + spin_unlock_irqrestore(&dev->power.lock, flags); + return ret; + } + td->constraint_changed = false; + td->cached_stop_ok = false; + td->effective_constraint_ns = -1; + constraint_ns = __dev_pm_qos_read_value(dev); + + spin_unlock_irqrestore(&dev->power.lock, flags); + + if (constraint_ns < 0) + return false; + + constraint_ns *= NSEC_PER_USEC; + /* + * We can walk the children without any additional locking, because + * they all have been suspended at this point and their + * effective_constraint_ns fields won't be modified in parallel with us. + */ + if (!dev->power.ignore_children) + device_for_each_child(dev, &constraint_ns, + dev_update_qos_constraint); + + if (constraint_ns > 0) { + constraint_ns -= td->start_latency_ns; + if (constraint_ns == 0) + return false; + } + td->effective_constraint_ns = constraint_ns; + td->cached_stop_ok = constraint_ns > td->stop_latency_ns || + constraint_ns == 0; + /* + * The children have been suspended already, so we don't need to take + * their stop latencies into account here. + */ + return td->cached_stop_ok; } /** @@ -42,9 +105,27 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct gpd_link *link; struct pm_domain_data *pdd; - s64 min_dev_off_time_ns; + s64 min_off_time_ns; s64 off_on_time_ns; - ktime_t time_now = ktime_get(); + + if (genpd->max_off_time_changed) { + struct gpd_link *link; + + /* + * We have to invalidate the cached results for the masters, so + * use the observation that default_power_down_ok() is not + * going to be called for any master until this instance + * returns. + */ + list_for_each_entry(link, &genpd->slave_links, slave_node) + link->master->max_off_time_changed = true; + + genpd->max_off_time_changed = false; + genpd->cached_power_down_ok = false; + genpd->max_off_time_ns = -1; + } else { + return genpd->cached_power_down_ok; + } off_on_time_ns = genpd->power_off_latency_ns + genpd->power_on_latency_ns; @@ -61,6 +142,7 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) to_gpd_data(pdd)->td.save_state_latency_ns; } + min_off_time_ns = -1; /* * Check if subdomains can be off for enough time. * @@ -73,8 +155,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) if (sd_max_off_ns < 0) continue; - sd_max_off_ns -= ktime_to_ns(ktime_sub(time_now, - sd->power_off_time)); /* * Check if the subdomain is allowed to be off long enough for * the current domain to turn off and on (that's how much time @@ -82,60 +162,64 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) */ if (sd_max_off_ns <= off_on_time_ns) return false; + + if (min_off_time_ns > sd_max_off_ns || min_off_time_ns < 0) + min_off_time_ns = sd_max_off_ns; } /* * Check if the devices in the domain can be off enough time. */ - min_dev_off_time_ns = -1; list_for_each_entry(pdd, &genpd->dev_list, list_node) { struct gpd_timing_data *td; - struct device *dev = pdd->dev; - s64 dev_off_time_ns; + s64 constraint_ns; - if (!dev->driver || dev->power.max_time_suspended_ns < 0) + if (!pdd->dev->driver) continue; + /* + * Check if the device is allowed to be off long enough for the + * domain to turn off and on (that's how much time it will + * have to wait worst case). + */ td = &to_gpd_data(pdd)->td; - dev_off_time_ns = dev->power.max_time_suspended_ns - - (td->start_latency_ns + td->restore_state_latency_ns + - ktime_to_ns(ktime_sub(time_now, - dev->power.suspend_time))); - if (dev_off_time_ns <= off_on_time_ns) - return false; - - if (min_dev_off_time_ns > dev_off_time_ns - || min_dev_off_time_ns < 0) - min_dev_off_time_ns = dev_off_time_ns; - } + constraint_ns = td->effective_constraint_ns; + /* default_stop_ok() need not be called before us. */ + if (constraint_ns < 0) { + constraint_ns = dev_pm_qos_read_value(pdd->dev); + constraint_ns *= NSEC_PER_USEC; + } + if (constraint_ns == 0) + continue; - if (min_dev_off_time_ns < 0) { /* - * There are no latency constraints, so the domain can spend - * arbitrary time in the "off" state. + * constraint_ns cannot be negative here, because the device has + * been suspended. */ - genpd->max_off_time_ns = -1; - return true; + constraint_ns -= td->restore_state_latency_ns; + if (constraint_ns <= off_on_time_ns) + return false; + + if (min_off_time_ns > constraint_ns || min_off_time_ns < 0) + min_off_time_ns = constraint_ns; } + genpd->cached_power_down_ok = true; + /* - * The difference between the computed minimum delta and the time needed - * to turn the domain on is the maximum theoretical time this domain can - * spend in the "off" state. + * If the computed minimum device off time is negative, there are no + * latency constraints, so the domain can spend arbitrary time in the + * "off" state. */ - min_dev_off_time_ns -= genpd->power_on_latency_ns; + if (min_off_time_ns < 0) + return true; /* - * If the difference between the computed minimum delta and the time - * needed to turn the domain off and back on on is smaller than the - * domain's power break even time, removing power from the domain is not - * worth it. + * The difference between the computed minimum subdomain or device off + * time and the time needed to turn the domain on is the maximum + * theoretical time this domain can spend in the "off" state. */ - if (genpd->break_even_ns > - min_dev_off_time_ns - genpd->power_off_latency_ns) - return false; - - genpd->max_off_time_ns = min_dev_off_time_ns; + genpd->max_off_time_ns = min_off_time_ns - genpd->power_on_latency_ns; return true; } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index b462c0e341cb..e0fb5b0435a3 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -889,6 +889,11 @@ static int dpm_suspend_noirq(pm_message_t state) if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_noirq_list); put_device(dev); + + if (pm_wakeup_pending()) { + error = -EBUSY; + break; + } } mutex_unlock(&dpm_list_mtx); if (error) @@ -962,6 +967,11 @@ static int dpm_suspend_late(pm_message_t state) if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_late_early_list); put_device(dev); + + if (pm_wakeup_pending()) { + error = -EBUSY; + break; + } } mutex_unlock(&dpm_list_mtx); if (error) diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 71855570922d..fd849a2c4fa8 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -352,21 +352,26 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_remove_request); * * Will register the notifier into a notification chain that gets called * upon changes to the target value for the device. + * + * If the device's constraints object doesn't exist when this routine is called, + * it will be created (or error code will be returned if that fails). */ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier) { - int retval = 0; + int ret = 0; mutex_lock(&dev_pm_qos_mtx); - /* Silently return if the constraints object is not present. */ - if (dev->power.constraints) - retval = blocking_notifier_chain_register( - dev->power.constraints->notifiers, - notifier); + if (!dev->power.constraints) + ret = dev->power.power_state.event != PM_EVENT_INVALID ? + dev_pm_qos_constraints_allocate(dev) : -ENODEV; + + if (!ret) + ret = blocking_notifier_chain_register( + dev->power.constraints->notifiers, notifier); mutex_unlock(&dev_pm_qos_mtx); - return retval; + return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_add_notifier); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index bd0f3949bcf9..59894873a3b3 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -282,47 +282,6 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) return retval != -EACCES ? retval : -EIO; } -struct rpm_qos_data { - ktime_t time_now; - s64 constraint_ns; -}; - -/** - * rpm_update_qos_constraint - Update a given PM QoS constraint data. - * @dev: Device whose timing data to use. - * @data: PM QoS constraint data to update. - * - * Use the suspend timing data of @dev to update PM QoS constraint data pointed - * to by @data. - */ -static int rpm_update_qos_constraint(struct device *dev, void *data) -{ - struct rpm_qos_data *qos = data; - unsigned long flags; - s64 delta_ns; - int ret = 0; - - spin_lock_irqsave(&dev->power.lock, flags); - - if (dev->power.max_time_suspended_ns < 0) - goto out; - - delta_ns = dev->power.max_time_suspended_ns - - ktime_to_ns(ktime_sub(qos->time_now, dev->power.suspend_time)); - if (delta_ns <= 0) { - ret = -EBUSY; - goto out; - } - - if (qos->constraint_ns > delta_ns || qos->constraint_ns == 0) - qos->constraint_ns = delta_ns; - - out: - spin_unlock_irqrestore(&dev->power.lock, flags); - - return ret; -} - /** * rpm_suspend - Carry out runtime suspend of given device. * @dev: Device to suspend. @@ -349,7 +308,6 @@ static int rpm_suspend(struct device *dev, int rpmflags) { int (*callback)(struct device *); struct device *parent = NULL; - struct rpm_qos_data qos; int retval; trace_rpm_suspend(dev, rpmflags); @@ -445,38 +403,14 @@ static int rpm_suspend(struct device *dev, int rpmflags) goto out; } - qos.constraint_ns = __dev_pm_qos_read_value(dev); - if (qos.constraint_ns < 0) { - /* Negative constraint means "never suspend". */ + if (__dev_pm_qos_read_value(dev) < 0) { + /* Negative PM QoS constraint means "never suspend". */ retval = -EPERM; goto out; } - qos.constraint_ns *= NSEC_PER_USEC; - qos.time_now = ktime_get(); __update_runtime_status(dev, RPM_SUSPENDING); - if (!dev->power.ignore_children) { - if (dev->power.irq_safe) - spin_unlock(&dev->power.lock); - else - spin_unlock_irq(&dev->power.lock); - - retval = device_for_each_child(dev, &qos, - rpm_update_qos_constraint); - - if (dev->power.irq_safe) - spin_lock(&dev->power.lock); - else - spin_lock_irq(&dev->power.lock); - - if (retval) - goto fail; - } - - dev->power.suspend_time = qos.time_now; - dev->power.max_time_suspended_ns = qos.constraint_ns ? : -1; - if (dev->pm_domain) callback = dev->pm_domain->ops.runtime_suspend; else if (dev->type && dev->type->pm) @@ -529,8 +463,6 @@ static int rpm_suspend(struct device *dev, int rpmflags) fail: __update_runtime_status(dev, RPM_ACTIVE); - dev->power.suspend_time = ktime_set(0, 0); - dev->power.max_time_suspended_ns = -1; dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); @@ -704,9 +636,6 @@ static int rpm_resume(struct device *dev, int rpmflags) if (dev->power.no_callbacks) goto no_callback; /* Assume success. */ - dev->power.suspend_time = ktime_set(0, 0); - dev->power.max_time_suspended_ns = -1; - __update_runtime_status(dev, RPM_RESUMING); if (dev->pm_domain) @@ -1369,9 +1298,6 @@ void pm_runtime_init(struct device *dev) setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn, (unsigned long)dev); - dev->power.suspend_time = ktime_set(0, 0); - dev->power.max_time_suspended_ns = -1; - init_waitqueue_head(&dev->power.wait_queue); } @@ -1389,28 +1315,3 @@ void pm_runtime_remove(struct device *dev) if (dev->power.irq_safe && dev->parent) pm_runtime_put_sync(dev->parent); } - -/** - * pm_runtime_update_max_time_suspended - Update device's suspend time data. - * @dev: Device to handle. - * @delta_ns: Value to subtract from the device's max_time_suspended_ns field. - * - * Update the device's power.max_time_suspended_ns field by subtracting - * @delta_ns from it. The resulting value of power.max_time_suspended_ns is - * never negative. - */ -void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns) -{ - unsigned long flags; - - spin_lock_irqsave(&dev->power.lock, flags); - - if (delta_ns > 0 && dev->power.max_time_suspended_ns > 0) { - if (dev->power.max_time_suspended_ns > delta_ns) - dev->power.max_time_suspended_ns -= delta_ns; - else - dev->power.max_time_suspended_ns = 0; - } - - spin_unlock_irqrestore(&dev->power.lock, flags); -} diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 95c12f6cb5b9..48be2ad4dd2c 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -314,22 +314,41 @@ static ssize_t wakeup_active_count_show(struct device *dev, static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL); -static ssize_t wakeup_hit_count_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t wakeup_abort_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + unsigned long count = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + count = dev->power.wakeup->wakeup_count; + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_abort_count, 0444, wakeup_abort_count_show, NULL); + +static ssize_t wakeup_expire_count_show(struct device *dev, + struct device_attribute *attr, + char *buf) { unsigned long count = 0; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { - count = dev->power.wakeup->hit_count; + count = dev->power.wakeup->expire_count; enabled = true; } spin_unlock_irq(&dev->power.lock); return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n"); } -static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL); +static DEVICE_ATTR(wakeup_expire_count, 0444, wakeup_expire_count_show, NULL); static ssize_t wakeup_active_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -398,6 +417,27 @@ static ssize_t wakeup_last_time_show(struct device *dev, } static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t wakeup_prevent_sleep_time_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + s64 msec = 0; + bool enabled = false; + + spin_lock_irq(&dev->power.lock); + if (dev->power.wakeup) { + msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time); + enabled = true; + } + spin_unlock_irq(&dev->power.lock); + return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n"); +} + +static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444, + wakeup_prevent_sleep_time_show, NULL); +#endif /* CONFIG_PM_AUTOSLEEP */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_ADVANCED_DEBUG @@ -486,11 +526,15 @@ static struct attribute *wakeup_attrs[] = { &dev_attr_wakeup.attr, &dev_attr_wakeup_count.attr, &dev_attr_wakeup_active_count.attr, - &dev_attr_wakeup_hit_count.attr, + &dev_attr_wakeup_abort_count.attr, + &dev_attr_wakeup_expire_count.attr, &dev_attr_wakeup_active.attr, &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, &dev_attr_wakeup_last_time_ms.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &dev_attr_wakeup_prevent_sleep_time_ms.attr, +#endif #endif NULL, }; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 2a3e581b8dcd..cbb463b3a750 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -14,16 +14,15 @@ #include <linux/suspend.h> #include <linux/seq_file.h> #include <linux/debugfs.h> +#include <trace/events/power.h> #include "power.h" -#define TIMEOUT 100 - /* * If set, the suspend/hibernate code will abort transitions to a sleep state * if wakeup events are registered during or immediately before the transition. */ -bool events_check_enabled; +bool events_check_enabled __read_mostly; /* * Combined counters of registered wakeup events and wakeup events in progress. @@ -52,6 +51,8 @@ static void pm_wakeup_timer_fn(unsigned long data); static LIST_HEAD(wakeup_sources); +static DECLARE_WAIT_QUEUE_HEAD(wakeup_count_wait_queue); + /** * wakeup_source_prepare - Prepare a new wakeup source for initialization. * @ws: Wakeup source to prepare. @@ -132,6 +133,7 @@ void wakeup_source_add(struct wakeup_source *ws) spin_lock_init(&ws->lock); setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws); ws->active = false; + ws->last_time = ktime_get(); spin_lock_irq(&events_lock); list_add_rcu(&ws->entry, &wakeup_sources); @@ -374,12 +376,33 @@ EXPORT_SYMBOL_GPL(device_set_wakeup_enable); */ static void wakeup_source_activate(struct wakeup_source *ws) { + unsigned int cec; + ws->active = true; ws->active_count++; ws->last_time = ktime_get(); + if (ws->autosleep_enabled) + ws->start_prevent_time = ws->last_time; /* Increment the counter of events in progress. */ - atomic_inc(&combined_event_count); + cec = atomic_inc_return(&combined_event_count); + + trace_wakeup_source_activate(ws->name, cec); +} + +/** + * wakeup_source_report_event - Report wakeup event using the given source. + * @ws: Wakeup source to report the event for. + */ +static void wakeup_source_report_event(struct wakeup_source *ws) +{ + ws->event_count++; + /* This is racy, but the counter is approximate anyway. */ + if (events_check_enabled) + ws->wakeup_count++; + + if (!ws->active) + wakeup_source_activate(ws); } /** @@ -397,10 +420,7 @@ void __pm_stay_awake(struct wakeup_source *ws) spin_lock_irqsave(&ws->lock, flags); - ws->event_count++; - if (!ws->active) - wakeup_source_activate(ws); - + wakeup_source_report_event(ws); del_timer(&ws->timer); ws->timer_expires = 0; @@ -432,6 +452,17 @@ void pm_stay_awake(struct device *dev) } EXPORT_SYMBOL_GPL(pm_stay_awake); +#ifdef CONFIG_PM_AUTOSLEEP +static void update_prevent_sleep_time(struct wakeup_source *ws, ktime_t now) +{ + ktime_t delta = ktime_sub(now, ws->start_prevent_time); + ws->prevent_sleep_time = ktime_add(ws->prevent_sleep_time, delta); +} +#else +static inline void update_prevent_sleep_time(struct wakeup_source *ws, + ktime_t now) {} +#endif + /** * wakup_source_deactivate - Mark given wakeup source as inactive. * @ws: Wakeup source to handle. @@ -442,6 +473,7 @@ EXPORT_SYMBOL_GPL(pm_stay_awake); */ static void wakeup_source_deactivate(struct wakeup_source *ws) { + unsigned int cnt, inpr, cec; ktime_t duration; ktime_t now; @@ -468,14 +500,23 @@ static void wakeup_source_deactivate(struct wakeup_source *ws) if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time)) ws->max_time = duration; + ws->last_time = now; del_timer(&ws->timer); ws->timer_expires = 0; + if (ws->autosleep_enabled) + update_prevent_sleep_time(ws, now); + /* * Increment the counter of registered wakeup events and decrement the * couter of wakeup events in progress simultaneously. */ - atomic_add(MAX_IN_PROGRESS, &combined_event_count); + cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count); + trace_wakeup_source_deactivate(ws->name, cec); + + split_counters(&cnt, &inpr); + if (!inpr && waitqueue_active(&wakeup_count_wait_queue)) + wake_up(&wakeup_count_wait_queue); } /** @@ -536,8 +577,10 @@ static void pm_wakeup_timer_fn(unsigned long data) spin_lock_irqsave(&ws->lock, flags); if (ws->active && ws->timer_expires - && time_after_eq(jiffies, ws->timer_expires)) + && time_after_eq(jiffies, ws->timer_expires)) { wakeup_source_deactivate(ws); + ws->expire_count++; + } spin_unlock_irqrestore(&ws->lock, flags); } @@ -564,9 +607,7 @@ void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) spin_lock_irqsave(&ws->lock, flags); - ws->event_count++; - if (!ws->active) - wakeup_source_activate(ws); + wakeup_source_report_event(ws); if (!msec) { wakeup_source_deactivate(ws); @@ -609,24 +650,6 @@ void pm_wakeup_event(struct device *dev, unsigned int msec) EXPORT_SYMBOL_GPL(pm_wakeup_event); /** - * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources. - */ -static void pm_wakeup_update_hit_counts(void) -{ - unsigned long flags; - struct wakeup_source *ws; - - rcu_read_lock(); - list_for_each_entry_rcu(ws, &wakeup_sources, entry) { - spin_lock_irqsave(&ws->lock, flags); - if (ws->active) - ws->hit_count++; - spin_unlock_irqrestore(&ws->lock, flags); - } - rcu_read_unlock(); -} - -/** * pm_wakeup_pending - Check if power transition in progress should be aborted. * * Compare the current number of registered wakeup events with its preserved @@ -648,32 +671,38 @@ bool pm_wakeup_pending(void) events_check_enabled = !ret; } spin_unlock_irqrestore(&events_lock, flags); - if (ret) - pm_wakeup_update_hit_counts(); return ret; } /** * pm_get_wakeup_count - Read the number of registered wakeup events. * @count: Address to store the value at. + * @block: Whether or not to block. * - * Store the number of registered wakeup events at the address in @count. Block - * if the current number of wakeup events being processed is nonzero. + * Store the number of registered wakeup events at the address in @count. If + * @block is set, block until the current number of wakeup events being + * processed is zero. * - * Return 'false' if the wait for the number of wakeup events being processed to - * drop down to zero has been interrupted by a signal (and the current number - * of wakeup events being processed is still nonzero). Otherwise return 'true'. + * Return 'false' if the current number of wakeup events being processed is + * nonzero. Otherwise return 'true'. */ -bool pm_get_wakeup_count(unsigned int *count) +bool pm_get_wakeup_count(unsigned int *count, bool block) { unsigned int cnt, inpr; - for (;;) { - split_counters(&cnt, &inpr); - if (inpr == 0 || signal_pending(current)) - break; - pm_wakeup_update_hit_counts(); - schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT)); + if (block) { + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&wakeup_count_wait_queue, &wait, + TASK_INTERRUPTIBLE); + split_counters(&cnt, &inpr); + if (inpr == 0 || signal_pending(current)) + break; + + schedule(); + } + finish_wait(&wakeup_count_wait_queue, &wait); } split_counters(&cnt, &inpr); @@ -703,11 +732,37 @@ bool pm_save_wakeup_count(unsigned int count) events_check_enabled = true; } spin_unlock_irq(&events_lock); - if (!events_check_enabled) - pm_wakeup_update_hit_counts(); return events_check_enabled; } +#ifdef CONFIG_PM_AUTOSLEEP +/** + * pm_wakep_autosleep_enabled - Modify autosleep_enabled for all wakeup sources. + * @enabled: Whether to set or to clear the autosleep_enabled flags. + */ +void pm_wakep_autosleep_enabled(bool set) +{ + struct wakeup_source *ws; + ktime_t now = ktime_get(); + + rcu_read_lock(); + list_for_each_entry_rcu(ws, &wakeup_sources, entry) { + spin_lock_irq(&ws->lock); + if (ws->autosleep_enabled != set) { + ws->autosleep_enabled = set; + if (ws->active) { + if (set) + ws->start_prevent_time = now; + else + update_prevent_sleep_time(ws, now); + } + } + spin_unlock_irq(&ws->lock); + } + rcu_read_unlock(); +} +#endif /* CONFIG_PM_AUTOSLEEP */ + static struct dentry *wakeup_sources_stats_dentry; /** @@ -723,27 +778,37 @@ static int print_wakeup_source_stats(struct seq_file *m, ktime_t max_time; unsigned long active_count; ktime_t active_time; + ktime_t prevent_sleep_time; int ret; spin_lock_irqsave(&ws->lock, flags); total_time = ws->total_time; max_time = ws->max_time; + prevent_sleep_time = ws->prevent_sleep_time; active_count = ws->active_count; if (ws->active) { - active_time = ktime_sub(ktime_get(), ws->last_time); + ktime_t now = ktime_get(); + + active_time = ktime_sub(now, ws->last_time); total_time = ktime_add(total_time, active_time); if (active_time.tv64 > max_time.tv64) max_time = active_time; + + if (ws->autosleep_enabled) + prevent_sleep_time = ktime_add(prevent_sleep_time, + ktime_sub(now, ws->start_prevent_time)); } else { active_time = ktime_set(0, 0); } - ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t" - "%lld\t\t%lld\t\t%lld\t\t%lld\n", - ws->name, active_count, ws->event_count, ws->hit_count, + ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t%lu\t\t" + "%lld\t\t%lld\t\t%lld\t\t%lld\t\t%lld\n", + ws->name, active_count, ws->event_count, + ws->wakeup_count, ws->expire_count, ktime_to_ms(active_time), ktime_to_ms(total_time), - ktime_to_ms(max_time), ktime_to_ms(ws->last_time)); + ktime_to_ms(max_time), ktime_to_ms(ws->last_time), + ktime_to_ms(prevent_sleep_time)); spin_unlock_irqrestore(&ws->lock, flags); @@ -758,8 +823,9 @@ static int wakeup_sources_stats_show(struct seq_file *m, void *unused) { struct wakeup_source *ws; - seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t" - "active_since\ttotal_time\tmax_time\tlast_change\n"); + seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t" + "expire_count\tactive_since\ttotal_time\tmax_time\t" + "last_change\tprevent_suspend_time\n"); rcu_read_lock(); list_for_each_entry_rcu(ws, &wakeup_sources, entry) diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c index 574a06b1b1de..af75ddd4f158 100644 --- a/drivers/devfreq/governor_performance.c +++ b/drivers/devfreq/governor_performance.c @@ -10,6 +10,7 @@ */ #include <linux/devfreq.h> +#include "governor.h" static int devfreq_performance_func(struct devfreq *df, unsigned long *freq) @@ -25,8 +26,14 @@ static int devfreq_performance_func(struct devfreq *df, return 0; } +static int performance_init(struct devfreq *devfreq) +{ + return update_devfreq(devfreq); +} + const struct devfreq_governor devfreq_performance = { .name = "performance", + .init = performance_init, .get_target_freq = devfreq_performance_func, .no_central_polling = true, }; diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c index d742d4a82d6a..fec0cdbd2477 100644 --- a/drivers/devfreq/governor_powersave.c +++ b/drivers/devfreq/governor_powersave.c @@ -10,6 +10,7 @@ */ #include <linux/devfreq.h> +#include "governor.h" static int devfreq_powersave_func(struct devfreq *df, unsigned long *freq) @@ -22,8 +23,14 @@ static int devfreq_powersave_func(struct devfreq *df, return 0; } +static int powersave_init(struct devfreq *devfreq) +{ + return update_devfreq(devfreq); +} + const struct devfreq_governor devfreq_powersave = { .name = "powersave", + .init = powersave_init, .get_target_freq = devfreq_powersave_func, .no_central_polling = true, }; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index c0b3c70ee87a..079d1be65ba9 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -33,6 +33,7 @@ #include <linux/bitops.h> #include <linux/mutex.h> #include <linux/anon_inodes.h> +#include <linux/device.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -87,7 +88,7 @@ */ /* Epoll private bits inside the event mask */ -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET) +#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET) /* Maximum number of nesting allowed inside epoll sets */ #define EP_MAX_NESTS 4 @@ -154,6 +155,9 @@ struct epitem { /* List header used to link this item to the "struct file" items list */ struct list_head fllink; + /* wakeup_source used when EPOLLWAKEUP is set */ + struct wakeup_source *ws; + /* The structure that describe the interested events and the source fd */ struct epoll_event event; }; @@ -194,6 +198,9 @@ struct eventpoll { */ struct epitem *ovflist; + /* wakeup_source used when ep_scan_ready_list is running */ + struct wakeup_source *ws; + /* The user that created the eventpoll descriptor */ struct user_struct *user; @@ -588,8 +595,10 @@ static int ep_scan_ready_list(struct eventpoll *ep, * queued into ->ovflist but the "txlist" might already * contain them, and the list_splice() below takes care of them. */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } } /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after @@ -602,6 +611,7 @@ static int ep_scan_ready_list(struct eventpoll *ep, * Quickly re-inject items left on "txlist". */ list_splice(&txlist, &ep->rdllist); + __pm_relax(ep->ws); if (!list_empty(&ep->rdllist)) { /* @@ -656,6 +666,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); @@ -706,6 +718,7 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); free_uid(ep->user); + wakeup_source_unregister(ep->ws); kfree(ep); } @@ -737,6 +750,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, * callback, but it's not actually ready, as far as * caller requested events goes. We can remove it here. */ + __pm_relax(epi->ws); list_del_init(&epi->rdllink); } } @@ -927,13 +941,23 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k if (epi->next == EP_UNACTIVE_PTR) { epi->next = ep->ovflist; ep->ovflist = epi; + if (epi->ws) { + /* + * Activate ep->ws since epi->ws may get + * deactivated at any time. + */ + __pm_stay_awake(ep->ws); + } + } goto out_unlock; } /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(&epi->rdllink)) + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); + } /* * Wake up ( if active ) both the eventpoll wait list and the ->poll() @@ -1091,6 +1115,30 @@ static int reverse_path_check(void) return error; } +static int ep_create_wakeup_source(struct epitem *epi) +{ + const char *name; + + if (!epi->ep->ws) { + epi->ep->ws = wakeup_source_register("eventpoll"); + if (!epi->ep->ws) + return -ENOMEM; + } + + name = epi->ffd.file->f_path.dentry->d_name.name; + epi->ws = wakeup_source_register(name); + if (!epi->ws) + return -ENOMEM; + + return 0; +} + +static void ep_destroy_wakeup_source(struct epitem *epi) +{ + wakeup_source_unregister(epi->ws); + epi->ws = NULL; +} + /* * Must be called with "mtx" held. */ @@ -1118,6 +1166,13 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, epi->event = *event; epi->nwait = 0; epi->next = EP_UNACTIVE_PTR; + if (epi->event.events & EPOLLWAKEUP) { + error = ep_create_wakeup_source(epi); + if (error) + goto error_create_wakeup_source; + } else { + epi->ws = NULL; + } /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -1164,6 +1219,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1204,6 +1260,9 @@ error_unregister: list_del_init(&epi->rdllink); spin_unlock_irqrestore(&ep->lock, flags); + wakeup_source_unregister(epi->ws); + +error_create_wakeup_source: kmem_cache_free(epi_cache, epi); return error; @@ -1229,6 +1288,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even epi->event.events = event->events; pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ + if (epi->event.events & EPOLLWAKEUP) { + if (!epi->ws) + ep_create_wakeup_source(epi); + } else if (epi->ws) { + ep_destroy_wakeup_source(epi); + } /* * Get current event bits. We can safely use the file* here because @@ -1244,6 +1309,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even spin_lock_irq(&ep->lock); if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); /* Notify waiting tasks that events are available */ if (waitqueue_active(&ep->wq)) @@ -1282,6 +1348,18 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, !list_empty(head) && eventcnt < esed->maxevents;) { epi = list_first_entry(head, struct epitem, rdllink); + /* + * Activate ep->ws before deactivating epi->ws to prevent + * triggering auto-suspend here (in case we reactive epi->ws + * below). + * + * This could be rearranged to delay the deactivation of epi->ws + * instead, but then epi->ws would temporarily be out of sync + * with ep_is_linked(). + */ + if (epi->ws && epi->ws->active) + __pm_stay_awake(ep->ws); + __pm_relax(epi->ws); list_del_init(&epi->rdllink); pt._key = epi->event.events; @@ -1298,6 +1376,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, if (__put_user(revents, &uevent->events) || __put_user(epi->event.data, &uevent->data)) { list_add(&epi->rdllink, head); + __pm_stay_awake(epi->ws); return eventcnt ? eventcnt : -EFAULT; } eventcnt++; @@ -1317,6 +1396,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, * poll callback will queue them in ep->ovflist. */ list_add_tail(&epi->rdllink, &ep->rdllist); + __pm_stay_awake(epi->ws); } } } @@ -1629,6 +1709,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, if (!tfile->f_op || !tfile->f_op->poll) goto error_tgt_fput; + /* Check if EPOLLWAKEUP is allowed */ + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + epds.events &= ~EPOLLWAKEUP; + /* * We have to check that the file structure underneath the file descriptor * the user passed to us _is_ an eventpoll file. And also we do not permit diff --git a/include/linux/capability.h b/include/linux/capability.h index 12d52dedb229..c398cff3dab7 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,8 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 +/* Allow preventing system suspends while epoll events are pending */ -#define CAP_LAST_CAP CAP_WAKE_ALARM +#define CAP_EPOLLWAKEUP 36 + +#define CAP_LAST_CAP CAP_EPOLLWAKEUP #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 657ab55beda0..6f8be328770a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -26,6 +26,18 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* + * Request the handling of system wakeup events so as to prevent system suspends + * from happening while those events are being processed. + * + * Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will not be + * re-allowed until epoll_wait is called again after consuming the wakeup + * event(s). + * + * Requires CAP_EPOLLWAKEUP + */ +#define EPOLLWAKEUP (1 << 29) + /* Set the One Shot behaviour for the target file descriptor */ #define EPOLLONESHOT (1 << 30) diff --git a/include/linux/pm.h b/include/linux/pm.h index 715305e05123..f067e60a3832 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -544,8 +544,6 @@ struct dev_pm_info { unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; - ktime_t suspend_time; - s64 max_time_suspended_ns; struct dev_pm_qos_request *pq_req; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 91f8286106ea..30f794eb3826 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -14,6 +14,7 @@ #include <linux/pm.h> #include <linux/err.h> #include <linux/of.h> +#include <linux/notifier.h> enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ @@ -70,9 +71,9 @@ struct generic_pm_domain { int (*power_on)(struct generic_pm_domain *domain); s64 power_on_latency_ns; struct gpd_dev_ops dev_ops; - s64 break_even_ns; /* Power break even for the entire domain. */ s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ - ktime_t power_off_time; + bool max_off_time_changed; + bool cached_power_down_ok; struct device_node *of_node; /* Node in device tree */ }; @@ -93,13 +94,17 @@ struct gpd_timing_data { s64 start_latency_ns; s64 save_state_latency_ns; s64 restore_state_latency_ns; - s64 break_even_ns; + s64 effective_constraint_ns; + bool constraint_changed; + bool cached_stop_ok; }; struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_dev_ops ops; struct gpd_timing_data td; + struct notifier_block nb; + struct mutex lock; bool need_restore; bool always_on; }; @@ -141,6 +146,7 @@ static inline int pm_genpd_of_add_device(struct device_node *genpd_node, extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev); extern void pm_genpd_dev_always_on(struct device *dev, bool val); +extern void pm_genpd_dev_need_restore(struct device *dev, bool val); extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, @@ -184,6 +190,7 @@ static inline int pm_genpd_remove_device(struct generic_pm_domain *genpd, return -ENOSYS; } static inline void pm_genpd_dev_always_on(struct device *dev, bool val) {} +static inline void pm_genpd_dev_need_restore(struct device *dev, bool val) {} static inline int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_sd) { diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 609daae7a014..f271860c78d5 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -150,9 +150,6 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev, static inline unsigned long pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } -static inline void pm_runtime_update_max_time_suspended(struct device *dev, - s64 delta_ns) {} - #endif /* !CONFIG_PM_RUNTIME */ static inline int pm_runtime_idle(struct device *dev) diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index d9f05113e5fb..569781faa504 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -33,12 +33,15 @@ * * @total_time: Total time this wakeup source has been active. * @max_time: Maximum time this wakeup source has been continuously active. - * @last_time: Monotonic clock when the wakeup source's was activated last time. + * @last_time: Monotonic clock when the wakeup source's was touched last time. + * @prevent_sleep_time: Total time this source has been preventing autosleep. * @event_count: Number of signaled wakeup events. * @active_count: Number of times the wakeup sorce was activated. * @relax_count: Number of times the wakeup sorce was deactivated. - * @hit_count: Number of times the wakeup sorce might abort system suspend. + * @expire_count: Number of times the wakeup source's timeout has expired. + * @wakeup_count: Number of times the wakeup source might abort suspend. * @active: Status of the wakeup source. + * @has_timeout: The wakeup source has been activated with a timeout. */ struct wakeup_source { const char *name; @@ -49,11 +52,15 @@ struct wakeup_source { ktime_t total_time; ktime_t max_time; ktime_t last_time; + ktime_t start_prevent_time; + ktime_t prevent_sleep_time; unsigned long event_count; unsigned long active_count; unsigned long relax_count; - unsigned long hit_count; - unsigned int active:1; + unsigned long expire_count; + unsigned long wakeup_count; + bool active:1; + bool autosleep_enabled:1; }; #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/suspend.h b/include/linux/suspend.h index ac1c114c499d..cd83059fb592 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -356,8 +356,9 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern bool events_check_enabled; extern bool pm_wakeup_pending(void); -extern bool pm_get_wakeup_count(unsigned int *count); +extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); +extern void pm_wakep_autosleep_enabled(bool set); static inline void lock_system_sleep(void) { @@ -407,6 +408,17 @@ static inline void unlock_system_sleep(void) {} #endif /* !CONFIG_PM_SLEEP */ +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +void queue_up_suspend_work(void); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline void queue_up_suspend_work(void) {} + +#endif /* !CONFIG_PM_AUTOSLEEP */ + #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS /* * The ARCH_SAVE_PAGE_KEYS functions can be used by an architecture diff --git a/include/trace/events/power.h b/include/trace/events/power.h index cae9a94f025d..0c9783841a30 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -65,6 +65,40 @@ TRACE_EVENT(machine_suspend, TP_printk("state=%lu", (unsigned long)__entry->state) ); +DECLARE_EVENT_CLASS(wakeup_source, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + ), + + TP_printk("%s state=0x%lx", __get_str(name), + (unsigned long)__entry->state) +); + +DEFINE_EVENT(wakeup_source, wakeup_source_activate, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state) +); + +DEFINE_EVENT(wakeup_source, wakeup_source_deactivate, + + TP_PROTO(const char *name, unsigned int state), + + TP_ARGS(name, state) +); + #ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED /* diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index deb5461e3216..8f9b4eb974e0 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -103,6 +103,33 @@ config PM_SLEEP_SMP select HOTPLUG select HOTPLUG_CPU +config PM_AUTOSLEEP + bool "Opportunistic sleep" + depends on PM_SLEEP + default n + ---help--- + Allow the kernel to trigger a system transition into a global sleep + state automatically whenever there are no active wakeup sources. + +config PM_WAKELOCKS + bool "User space wakeup sources interface" + depends on PM_SLEEP + default n + ---help--- + Allow user space to create, activate and deactivate wakeup source + objects with the help of a sysfs-based interface. + +config PM_WAKELOCKS_LIMIT + int "Maximum number of user space wakeup sources (0 = no limit)" + range 0 100000 + default 100 + depends on PM_WAKELOCKS + +config PM_WAKELOCKS_GC + bool "Garbage collector for user space wakeup sources" + depends on PM_WAKELOCKS + default y + config PM_RUNTIME bool "Run-time PM core functionality" depends on !IA64_HP_SIM diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 66d808ec5252..29472bff11ef 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -9,5 +9,7 @@ obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ block_io.o +obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o +obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c new file mode 100644 index 000000000000..ca304046d9e2 --- /dev/null +++ b/kernel/power/autosleep.c @@ -0,0 +1,127 @@ +/* + * kernel/power/autosleep.c + * + * Opportunistic sleep support. + * + * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl> + */ + +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/pm_wakeup.h> + +#include "power.h" + +static suspend_state_t autosleep_state; +static struct workqueue_struct *autosleep_wq; +/* + * Note: it is only safe to mutex_lock(&autosleep_lock) if a wakeup_source + * is active, otherwise a deadlock with try_to_suspend() is possible. + * Alternatively mutex_lock_interruptible() can be used. This will then fail + * if an auto_sleep cycle tries to freeze processes. + */ +static DEFINE_MUTEX(autosleep_lock); +static struct wakeup_source *autosleep_ws; + +static void try_to_suspend(struct work_struct *work) +{ + unsigned int initial_count, final_count; + + if (!pm_get_wakeup_count(&initial_count, true)) + goto out; + + mutex_lock(&autosleep_lock); + + if (!pm_save_wakeup_count(initial_count)) { + mutex_unlock(&autosleep_lock); + goto out; + } + + if (autosleep_state == PM_SUSPEND_ON) { + mutex_unlock(&autosleep_lock); + return; + } + if (autosleep_state >= PM_SUSPEND_MAX) + hibernate(); + else + pm_suspend(autosleep_state); + + mutex_unlock(&autosleep_lock); + + if (!pm_get_wakeup_count(&final_count, false)) + goto out; + + /* + * If the wakeup occured for an unknown reason, wait to prevent the + * system from trying to suspend and waking up in a tight loop. + */ + if (final_count == initial_count) + schedule_timeout_uninterruptible(HZ / 2); + + out: + queue_up_suspend_work(); +} + +static DECLARE_WORK(suspend_work, try_to_suspend); + +void queue_up_suspend_work(void) +{ + if (!work_pending(&suspend_work) && autosleep_state > PM_SUSPEND_ON) + queue_work(autosleep_wq, &suspend_work); +} + +suspend_state_t pm_autosleep_state(void) +{ + return autosleep_state; +} + +int pm_autosleep_lock(void) +{ + return mutex_lock_interruptible(&autosleep_lock); +} + +void pm_autosleep_unlock(void) +{ + mutex_unlock(&autosleep_lock); +} + +int pm_autosleep_set_state(suspend_state_t state) +{ + +#ifndef CONFIG_HIBERNATION + if (state >= PM_SUSPEND_MAX) + return -EINVAL; +#endif + + __pm_stay_awake(autosleep_ws); + + mutex_lock(&autosleep_lock); + + autosleep_state = state; + + __pm_relax(autosleep_ws); + + if (state > PM_SUSPEND_ON) { + pm_wakep_autosleep_enabled(true); + queue_up_suspend_work(); + } else { + pm_wakep_autosleep_enabled(false); + } + + mutex_unlock(&autosleep_lock); + return 0; +} + +int __init pm_autosleep_init(void) +{ + autosleep_ws = wakeup_source_register("autosleep"); + if (!autosleep_ws) + return -ENOMEM; + + autosleep_wq = alloc_ordered_workqueue("autosleep", 0); + if (autosleep_wq) + return 0; + + wakeup_source_unregister(autosleep_ws); + return -ENOMEM; +} diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index e09dfbfeecee..8b53db38a279 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -25,6 +25,8 @@ #include <linux/freezer.h> #include <linux/gfp.h> #include <linux/syscore_ops.h> +#include <linux/ctype.h> +#include <linux/genhd.h> #include <scsi/scsi_scan.h> #include "power.h" @@ -722,6 +724,17 @@ static int software_resume(void) /* Check if the device is there */ swsusp_resume_device = name_to_dev_t(resume_file); + + /* + * name_to_dev_t is ineffective to verify parition if resume_file is in + * integer format. (e.g. major:minor) + */ + if (isdigit(resume_file[0]) && resume_wait) { + int partno; + while (!get_gendisk(swsusp_resume_device, &partno)) + msleep(10); + } + if (!swsusp_resume_device) { /* * Some device discovery might still be in progress; we need diff --git a/kernel/power/main.c b/kernel/power/main.c index 1c12581f1c62..428f8a034e96 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -269,8 +269,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, return (s - buf); } -static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n) +static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state = PM_SUSPEND_STANDBY; @@ -278,27 +277,48 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, #endif char *p; int len; - int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; - /* First, check if we are requested to hibernate */ - if (len == 4 && !strncmp(buf, "disk", len)) { - error = hibernate(); - goto Exit; - } + /* Check hibernation first. */ + if (len == 4 && !strncmp(buf, "disk", len)) + return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND - for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { - if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { - error = pm_suspend(state); - break; - } - } + for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) + if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) + return state; #endif - Exit: + return PM_SUSPEND_ON; +} + +static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + + state = decode_state(buf, n); + if (state < PM_SUSPEND_MAX) + error = pm_suspend(state); + else if (state == PM_SUSPEND_MAX) + error = hibernate(); + else + error = -EINVAL; + + out: + pm_autosleep_unlock(); return error ? error : n; } @@ -339,7 +359,8 @@ static ssize_t wakeup_count_show(struct kobject *kobj, { unsigned int val; - return pm_get_wakeup_count(&val) ? sprintf(buf, "%u\n", val) : -EINTR; + return pm_get_wakeup_count(&val, true) ? + sprintf(buf, "%u\n", val) : -EINTR; } static ssize_t wakeup_count_store(struct kobject *kobj, @@ -347,15 +368,106 @@ static ssize_t wakeup_count_store(struct kobject *kobj, const char *buf, size_t n) { unsigned int val; + int error; + + error = pm_autosleep_lock(); + if (error) + return error; + + if (pm_autosleep_state() > PM_SUSPEND_ON) { + error = -EBUSY; + goto out; + } + error = -EINVAL; if (sscanf(buf, "%u", &val) == 1) { if (pm_save_wakeup_count(val)) - return n; + error = n; } - return -EINVAL; + + out: + pm_autosleep_unlock(); + return error; } power_attr(wakeup_count); + +#ifdef CONFIG_PM_AUTOSLEEP +static ssize_t autosleep_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + suspend_state_t state = pm_autosleep_state(); + + if (state == PM_SUSPEND_ON) + return sprintf(buf, "off\n"); + +#ifdef CONFIG_SUSPEND + if (state < PM_SUSPEND_MAX) + return sprintf(buf, "%s\n", valid_state(state) ? + pm_states[state] : "error"); +#endif +#ifdef CONFIG_HIBERNATION + return sprintf(buf, "disk\n"); +#else + return sprintf(buf, "error"); +#endif +} + +static ssize_t autosleep_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + suspend_state_t state = decode_state(buf, n); + int error; + + if (state == PM_SUSPEND_ON + && strcmp(buf, "off") && strcmp(buf, "off\n")) + return -EINVAL; + + error = pm_autosleep_set_state(state); + return error ? error : n; +} + +power_attr(autosleep); +#endif /* CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS +static ssize_t wake_lock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, true); +} + +static ssize_t wake_lock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_lock(buf); + return error ? error : n; +} + +power_attr(wake_lock); + +static ssize_t wake_unlock_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return pm_show_wakelocks(buf, false); +} + +static ssize_t wake_unlock_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + int error = pm_wake_unlock(buf); + return error ? error : n; +} + +power_attr(wake_unlock); + +#endif /* CONFIG_PM_WAKELOCKS */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_TRACE @@ -409,6 +521,13 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, &wakeup_count_attr.attr, +#ifdef CONFIG_PM_AUTOSLEEP + &autosleep_attr.attr, +#endif +#ifdef CONFIG_PM_WAKELOCKS + &wake_lock_attr.attr, + &wake_unlock_attr.attr, +#endif #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif @@ -444,7 +563,10 @@ static int __init pm_init(void) power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; - return sysfs_create_group(power_kobj, &attr_group); + error = sysfs_create_group(power_kobj, &attr_group); + if (error) + return error; + return pm_autosleep_init(); } core_initcall(pm_init); diff --git a/kernel/power/power.h b/kernel/power/power.h index 98f3622d7407..b0bd4beaebfe 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -264,3 +264,30 @@ static inline void suspend_thaw_processes(void) { } #endif + +#ifdef CONFIG_PM_AUTOSLEEP + +/* kernel/power/autosleep.c */ +extern int pm_autosleep_init(void); +extern int pm_autosleep_lock(void); +extern void pm_autosleep_unlock(void); +extern suspend_state_t pm_autosleep_state(void); +extern int pm_autosleep_set_state(suspend_state_t state); + +#else /* !CONFIG_PM_AUTOSLEEP */ + +static inline int pm_autosleep_init(void) { return 0; } +static inline int pm_autosleep_lock(void) { return 0; } +static inline void pm_autosleep_unlock(void) {} +static inline suspend_state_t pm_autosleep_state(void) { return PM_SUSPEND_ON; } + +#endif /* !CONFIG_PM_AUTOSLEEP */ + +#ifdef CONFIG_PM_WAKELOCKS + +/* kernel/power/wakelock.c */ +extern ssize_t pm_show_wakelocks(char *buf, bool show_active); +extern int pm_wake_lock(const char *buf); +extern int pm_wake_unlock(const char *buf); + +#endif /* !CONFIG_PM_WAKELOCKS */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index eef311a58a64..11e22c068e8b 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -6,7 +6,7 @@ * * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> - * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com> + * Copyright (C) 2010-2012 Bojan Smojver <bojan@rexursive.com> * * This file is released under the GPLv2. * @@ -282,14 +282,17 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) return -ENOSPC; if (bio_chain) { - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ if (ret) return ret; - src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); + src = (void *)__get_free_page(__GFP_WAIT | + __GFP_NOWARN | + __GFP_NORETRY); if (src) { copy_page(src, buf); } else { @@ -367,12 +370,17 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; - } - if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { - error = hib_wait_on_bio_chain(bio_chain); - if (error) - goto out; - handle->reqd_free_pages = reqd_free_pages(); + + if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_on_bio_chain(bio_chain); + if (error) + goto out; + /* + * Recalculate the number of required free pages, to + * make sure we never take more than half. + */ + handle->reqd_free_pages = reqd_free_pages(); + } } out: return error; @@ -419,8 +427,9 @@ static int swap_writer_finish(struct swap_map_handle *handle, /* Maximum number of threads for compression/decompression. */ #define LZO_THREADS 3 -/* Maximum number of pages for read buffering. */ -#define LZO_READ_PAGES (MAP_PAGE_ENTRIES * 8) +/* Minimum/maximum number of pages for read buffering. */ +#define LZO_MIN_RD_PAGES 1024 +#define LZO_MAX_RD_PAGES 8192 /** @@ -631,12 +640,6 @@ static int save_image_lzo(struct swap_map_handle *handle, } /* - * Adjust number of free pages after all allocations have been done. - * We don't want to run out of pages when writing. - */ - handle->reqd_free_pages = reqd_free_pages(); - - /* * Start the CRC32 thread. */ init_waitqueue_head(&crc->go); @@ -657,6 +660,12 @@ static int save_image_lzo(struct swap_map_handle *handle, goto out_clean; } + /* + * Adjust the number of required free pages after all allocations have + * been done. We don't want to run out of pages when writing. + */ + handle->reqd_free_pages = reqd_free_pages(); + printk(KERN_INFO "PM: Using %u thread(s) for compression.\n" "PM: Compressing and saving image data (%u pages) ... ", @@ -1067,7 +1076,7 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned i, thr, run_threads, nr_threads; unsigned ring = 0, pg = 0, ring_size = 0, have = 0, want, need, asked = 0; - unsigned long read_pages; + unsigned long read_pages = 0; unsigned char **page = NULL; struct dec_data *data = NULL; struct crc_data *crc = NULL; @@ -1079,7 +1088,7 @@ static int load_image_lzo(struct swap_map_handle *handle, nr_threads = num_online_cpus() - 1; nr_threads = clamp_val(nr_threads, 1, LZO_THREADS); - page = vmalloc(sizeof(*page) * LZO_READ_PAGES); + page = vmalloc(sizeof(*page) * LZO_MAX_RD_PAGES); if (!page) { printk(KERN_ERR "PM: Failed to allocate LZO page\n"); ret = -ENOMEM; @@ -1144,15 +1153,22 @@ static int load_image_lzo(struct swap_map_handle *handle, } /* - * Adjust number of pages for read buffering, in case we are short. + * Set the number of pages for read buffering. + * This is complete guesswork, because we'll only know the real + * picture once prepare_image() is called, which is much later on + * during the image load phase. We'll assume the worst case and + * say that none of the image pages are from high memory. */ - read_pages = (nr_free_pages() - snapshot_get_image_size()) >> 1; - read_pages = clamp_val(read_pages, LZO_CMP_PAGES, LZO_READ_PAGES); + if (low_free_pages() > snapshot_get_image_size()) + read_pages = (low_free_pages() - snapshot_get_image_size()) / 2; + read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES); for (i = 0; i < read_pages; i++) { page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ? __GFP_WAIT | __GFP_HIGH : - __GFP_WAIT); + __GFP_WAIT | __GFP_NOWARN | + __GFP_NORETRY); + if (!page[i]) { if (i < LZO_CMP_PAGES) { ring_size = i; diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c new file mode 100644 index 000000000000..c8fba3380076 --- /dev/null +++ b/kernel/power/wakelock.c @@ -0,0 +1,259 @@ +/* + * kernel/power/wakelock.c + * + * User space wakeup sources support. + * + * Copyright (C) 2012 Rafael J. Wysocki <rjw@sisk.pl> + * + * This code is based on the analogous interface allowing user space to + * manipulate wakelocks on Android. + */ + +#include <linux/ctype.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/hrtimer.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/slab.h> + +static DEFINE_MUTEX(wakelocks_lock); + +struct wakelock { + char *name; + struct rb_node node; + struct wakeup_source ws; +#ifdef CONFIG_PM_WAKELOCKS_GC + struct list_head lru; +#endif +}; + +static struct rb_root wakelocks_tree = RB_ROOT; + +ssize_t pm_show_wakelocks(char *buf, bool show_active) +{ + struct rb_node *node; + struct wakelock *wl; + char *str = buf; + char *end = buf + PAGE_SIZE; + + mutex_lock(&wakelocks_lock); + + for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { + wl = rb_entry(node, struct wakelock, node); + if (wl->ws.active == show_active) + str += scnprintf(str, end - str, "%s ", wl->name); + } + if (str > buf) + str--; + + str += scnprintf(str, end - str, "\n"); + + mutex_unlock(&wakelocks_lock); + return (str - buf); +} + +#if CONFIG_PM_WAKELOCKS_LIMIT > 0 +static unsigned int number_of_wakelocks; + +static inline bool wakelocks_limit_exceeded(void) +{ + return number_of_wakelocks > CONFIG_PM_WAKELOCKS_LIMIT; +} + +static inline void increment_wakelocks_number(void) +{ + number_of_wakelocks++; +} + +static inline void decrement_wakelocks_number(void) +{ + number_of_wakelocks--; +} +#else /* CONFIG_PM_WAKELOCKS_LIMIT = 0 */ +static inline bool wakelocks_limit_exceeded(void) { return false; } +static inline void increment_wakelocks_number(void) {} +static inline void decrement_wakelocks_number(void) {} +#endif /* CONFIG_PM_WAKELOCKS_LIMIT */ + +#ifdef CONFIG_PM_WAKELOCKS_GC +#define WL_GC_COUNT_MAX 100 +#define WL_GC_TIME_SEC 300 + +static LIST_HEAD(wakelocks_lru_list); +static unsigned int wakelocks_gc_count; + +static inline void wakelocks_lru_add(struct wakelock *wl) +{ + list_add(&wl->lru, &wakelocks_lru_list); +} + +static inline void wakelocks_lru_most_recent(struct wakelock *wl) +{ + list_move(&wl->lru, &wakelocks_lru_list); +} + +static void wakelocks_gc(void) +{ + struct wakelock *wl, *aux; + ktime_t now; + + if (++wakelocks_gc_count <= WL_GC_COUNT_MAX) + return; + + now = ktime_get(); + list_for_each_entry_safe_reverse(wl, aux, &wakelocks_lru_list, lru) { + u64 idle_time_ns; + bool active; + + spin_lock_irq(&wl->ws.lock); + idle_time_ns = ktime_to_ns(ktime_sub(now, wl->ws.last_time)); + active = wl->ws.active; + spin_unlock_irq(&wl->ws.lock); + + if (idle_time_ns < ((u64)WL_GC_TIME_SEC * NSEC_PER_SEC)) + break; + + if (!active) { + wakeup_source_remove(&wl->ws); + rb_erase(&wl->node, &wakelocks_tree); + list_del(&wl->lru); + kfree(wl->name); + kfree(wl); + decrement_wakelocks_number(); + } + } + wakelocks_gc_count = 0; +} +#else /* !CONFIG_PM_WAKELOCKS_GC */ +static inline void wakelocks_lru_add(struct wakelock *wl) {} +static inline void wakelocks_lru_most_recent(struct wakelock *wl) {} +static inline void wakelocks_gc(void) {} +#endif /* !CONFIG_PM_WAKELOCKS_GC */ + +static struct wakelock *wakelock_lookup_add(const char *name, size_t len, + bool add_if_not_found) +{ + struct rb_node **node = &wakelocks_tree.rb_node; + struct rb_node *parent = *node; + struct wakelock *wl; + + while (*node) { + int diff; + + parent = *node; + wl = rb_entry(*node, struct wakelock, node); + diff = strncmp(name, wl->name, len); + if (diff == 0) { + if (wl->name[len]) + diff = -1; + else + return wl; + } + if (diff < 0) + node = &(*node)->rb_left; + else + node = &(*node)->rb_right; + } + if (!add_if_not_found) + return ERR_PTR(-EINVAL); + + if (wakelocks_limit_exceeded()) + return ERR_PTR(-ENOSPC); + + /* Not found, we have to add a new one. */ + wl = kzalloc(sizeof(*wl), GFP_KERNEL); + if (!wl) + return ERR_PTR(-ENOMEM); + + wl->name = kstrndup(name, len, GFP_KERNEL); + if (!wl->name) { + kfree(wl); + return ERR_PTR(-ENOMEM); + } + wl->ws.name = wl->name; + wakeup_source_add(&wl->ws); + rb_link_node(&wl->node, parent, node); + rb_insert_color(&wl->node, &wakelocks_tree); + wakelocks_lru_add(wl); + increment_wakelocks_number(); + return wl; +} + +int pm_wake_lock(const char *buf) +{ + const char *str = buf; + struct wakelock *wl; + u64 timeout_ns = 0; + size_t len; + int ret = 0; + + while (*str && !isspace(*str)) + str++; + + len = str - buf; + if (!len) + return -EINVAL; + + if (*str && *str != '\n') { + /* Find out if there's a valid timeout string appended. */ + ret = kstrtou64(skip_spaces(str), 10, &timeout_ns); + if (ret) + return -EINVAL; + } + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, true); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + if (timeout_ns) { + u64 timeout_ms = timeout_ns + NSEC_PER_MSEC - 1; + + do_div(timeout_ms, NSEC_PER_MSEC); + __pm_wakeup_event(&wl->ws, timeout_ms); + } else { + __pm_stay_awake(&wl->ws); + } + + wakelocks_lru_most_recent(wl); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} + +int pm_wake_unlock(const char *buf) +{ + struct wakelock *wl; + size_t len; + int ret = 0; + + len = strlen(buf); + if (!len) + return -EINVAL; + + if (buf[len-1] == '\n') + len--; + + if (!len) + return -EINVAL; + + mutex_lock(&wakelocks_lock); + + wl = wakelock_lookup_add(buf, len, false); + if (IS_ERR(wl)) { + ret = PTR_ERR(wl); + goto out; + } + __pm_relax(&wl->ws); + + wakelocks_lru_most_recent(wl); + wakelocks_gc(); + + out: + mutex_unlock(&wakelocks_lock); + return ret; +} |