From e978aa7d7d57d04eb5f88a7507c4fb98577def77 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Fri, 28 Oct 2011 16:20:09 +0530 Subject: cpuidle: Move dev->last_residency update to driver enter routine; remove dev->last_state Cpuidle governor only suggests the state to enter using the governor->select() interface, but allows the low level driver to override the recommended state. The actual entered state may be different because of software or hardware demotion. Software demotion is done by the back-end cpuidle driver and can be accounted correctly. Current cpuidle code uses last_state field to capture the actual state entered and based on that updates the statistics for the state entered. Ideally the driver enter routine should update the counters, and it should return the state actually entered rather than the time spent there. The generic cpuidle code should simply handle where the counters live in the sysfs namespace, not updating the counters. Reference: https://lkml.org/lkml/2011/3/25/52 Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Tested-by: Jean Pihet Reviewed-by: Kevin Hilman Acked-by: Arjan van de Ven Acked-by: Kevin Hilman Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 75 ++++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 26 deletions(-) (limited to 'drivers/acpi/processor_idle.c') diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 431ab11c8c1b..9cd08cecb347 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -741,22 +741,24 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) /** * acpi_idle_enter_c1 - enters an ACPI C1 state-type * @dev: the target CPU - * @state: the state data + * @index: index of target state * * This is equivalent to the HALT instruction. */ static int acpi_idle_enter_c1(struct cpuidle_device *dev, - struct cpuidle_state *state) + int index) { ktime_t kt1, kt2; s64 idle_time; struct acpi_processor *pr; + struct cpuidle_state *state = &dev->states[index]; struct acpi_processor_cx *cx = cpuidle_get_statedata(state); pr = __this_cpu_read(processors); + dev->last_residency = 0; if (unlikely(!pr)) - return 0; + return -EINVAL; local_irq_disable(); @@ -764,7 +766,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, if (acpi_idle_suspend) { local_irq_enable(); cpu_relax(); - return 0; + return -EINVAL; } lapic_timer_state_broadcast(pr, cx, 1); @@ -773,37 +775,46 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, kt2 = ktime_get_real(); idle_time = ktime_to_us(ktime_sub(kt2, kt1)); + /* Update device last_residency*/ + dev->last_residency = (int)idle_time; + local_irq_enable(); cx->usage++; lapic_timer_state_broadcast(pr, cx, 0); - return idle_time; + return index; } /** * acpi_idle_enter_simple - enters an ACPI state without BM handling * @dev: the target CPU - * @state: the state data + * @index: the index of suggested state */ static int acpi_idle_enter_simple(struct cpuidle_device *dev, - struct cpuidle_state *state) + int index) { struct acpi_processor *pr; + struct cpuidle_state *state = &dev->states[index]; struct acpi_processor_cx *cx = cpuidle_get_statedata(state); ktime_t kt1, kt2; s64 idle_time_ns; s64 idle_time; pr = __this_cpu_read(processors); + dev->last_residency = 0; if (unlikely(!pr)) - return 0; - - if (acpi_idle_suspend) - return(acpi_idle_enter_c1(dev, state)); + return -EINVAL; local_irq_disable(); + if (acpi_idle_suspend) { + local_irq_enable(); + cpu_relax(); + return -EINVAL; + } + + if (cx->entry_method != ACPI_CSTATE_FFH) { current_thread_info()->status &= ~TS_POLLING; /* @@ -815,7 +826,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; local_irq_enable(); - return 0; + return -EINVAL; } } @@ -837,6 +848,9 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, idle_time = idle_time_ns; do_div(idle_time, NSEC_PER_USEC); + /* Update device last_residency*/ + dev->last_residency = (int)idle_time; + /* Tell the scheduler how much we idled: */ sched_clock_idle_wakeup_event(idle_time_ns); @@ -848,7 +862,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, lapic_timer_state_broadcast(pr, cx, 0); cx->time += idle_time; - return idle_time; + return index; } static int c3_cpu_count; @@ -857,14 +871,15 @@ static DEFINE_SPINLOCK(c3_lock); /** * acpi_idle_enter_bm - enters C3 with proper BM handling * @dev: the target CPU - * @state: the state data + * @index: the index of suggested state * * If BM is detected, the deepest non-C3 idle state is entered instead. */ static int acpi_idle_enter_bm(struct cpuidle_device *dev, - struct cpuidle_state *state) + int index) { struct acpi_processor *pr; + struct cpuidle_state *state = &dev->states[index]; struct acpi_processor_cx *cx = cpuidle_get_statedata(state); ktime_t kt1, kt2; s64 idle_time_ns; @@ -872,22 +887,26 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, pr = __this_cpu_read(processors); + dev->last_residency = 0; if (unlikely(!pr)) - return 0; + return -EINVAL; - if (acpi_idle_suspend) - return(acpi_idle_enter_c1(dev, state)); + + if (acpi_idle_suspend) { + cpu_relax(); + return -EINVAL; + } if (!cx->bm_sts_skip && acpi_idle_bm_check()) { - if (dev->safe_state) { - dev->last_state = dev->safe_state; - return dev->safe_state->enter(dev, dev->safe_state); + if (dev->safe_state_index >= 0) { + return dev->states[dev->safe_state_index].enter(dev, + dev->safe_state_index); } else { local_irq_disable(); acpi_safe_halt(); local_irq_enable(); - return 0; + return -EINVAL; } } @@ -904,7 +923,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, if (unlikely(need_resched())) { current_thread_info()->status |= TS_POLLING; local_irq_enable(); - return 0; + return -EINVAL; } } @@ -954,6 +973,9 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, idle_time = idle_time_ns; do_div(idle_time, NSEC_PER_USEC); + /* Update device last_residency*/ + dev->last_residency = (int)idle_time; + /* Tell the scheduler how much we idled: */ sched_clock_idle_wakeup_event(idle_time_ns); @@ -965,7 +987,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, lapic_timer_state_broadcast(pr, cx, 0); cx->time += idle_time; - return idle_time; + return index; } struct cpuidle_driver acpi_idle_driver = { @@ -992,6 +1014,7 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) } dev->cpu = pr->id; + dev->safe_state_index = -1; for (i = 0; i < CPUIDLE_STATE_MAX; i++) { dev->states[i].name[0] = '\0'; dev->states[i].desc[0] = '\0'; @@ -1027,13 +1050,13 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_c1; - dev->safe_state = state; + dev->safe_state_index = count; break; case ACPI_STATE_C2: state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_simple; - dev->safe_state = state; + dev->safe_state_index = count; break; case ACPI_STATE_C3: -- cgit v1.2.3 From 4202735e8ab6ecfb0381631a0d0b58fefe0bd4e2 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Fri, 28 Oct 2011 16:20:33 +0530 Subject: cpuidle: Split cpuidle_state structure and move per-cpu statistics fields This is the first step towards global registration of cpuidle states. The statistics used primarily by the governor are per-cpu and have to be split from rest of the fields inside cpuidle_state, which would be made global i.e. single copy. The driver_data field is also per-cpu and moved. Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Tested-by: Jean Pihet Reviewed-by: Kevin Hilman Acked-by: Arjan van de Ven Acked-by: Kevin Hilman Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'drivers/acpi/processor_idle.c') diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 9cd08cecb347..b98c75285690 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -745,14 +745,13 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) * * This is equivalent to the HALT instruction. */ -static int acpi_idle_enter_c1(struct cpuidle_device *dev, - int index) +static int acpi_idle_enter_c1(struct cpuidle_device *dev, int index) { ktime_t kt1, kt2; s64 idle_time; struct acpi_processor *pr; - struct cpuidle_state *state = &dev->states[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state); + struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; + struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); pr = __this_cpu_read(processors); dev->last_residency = 0; @@ -790,12 +789,11 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, * @dev: the target CPU * @index: the index of suggested state */ -static int acpi_idle_enter_simple(struct cpuidle_device *dev, - int index) +static int acpi_idle_enter_simple(struct cpuidle_device *dev, int index) { struct acpi_processor *pr; - struct cpuidle_state *state = &dev->states[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state); + struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; + struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); ktime_t kt1, kt2; s64 idle_time_ns; s64 idle_time; @@ -875,12 +873,11 @@ static DEFINE_SPINLOCK(c3_lock); * * If BM is detected, the deepest non-C3 idle state is entered instead. */ -static int acpi_idle_enter_bm(struct cpuidle_device *dev, - int index) +static int acpi_idle_enter_bm(struct cpuidle_device *dev, int index) { struct acpi_processor *pr; - struct cpuidle_state *state = &dev->states[index]; - struct acpi_processor_cx *cx = cpuidle_get_statedata(state); + struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; + struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage); ktime_t kt1, kt2; s64 idle_time_ns; s64 idle_time; @@ -1004,6 +1001,7 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) int i, count = CPUIDLE_DRIVER_STATE_START; struct acpi_processor_cx *cx; struct cpuidle_state *state; + struct cpuidle_state_usage *state_usage; struct cpuidle_device *dev = &pr->power.dev; if (!pr->flags.power_setup_done) @@ -1026,6 +1024,7 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { cx = &pr->power.states[i]; state = &dev->states[count]; + state_usage = &dev->states_usage[count]; if (!cx->valid) continue; @@ -1036,7 +1035,7 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) continue; #endif - cpuidle_set_statedata(state, cx); + cpuidle_set_statedata(state_usage, cx); snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i); strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); -- cgit v1.2.3 From 46bcfad7a819bd17ac4e831b04405152d59784ab Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Fri, 28 Oct 2011 16:20:42 +0530 Subject: cpuidle: Single/Global registration of idle states This patch makes the cpuidle_states structure global (single copy) instead of per-cpu. The statistics needed on per-cpu basis by the governor are kept per-cpu. This simplifies the cpuidle subsystem as state registration is done by single cpu only. Having single copy of cpuidle_states saves memory. Rare case of asymmetric C-states can be handled within the cpuidle driver and architectures such as POWER do not have asymmetric C-states. Having single/global registration of all the idle states, dynamic C-state transitions on x86 are handled by the boot cpu. Here, the boot cpu would disable all the devices, re-populate the states and later enable all the devices, irrespective of the cpu that would receive the notification first. Reference: https://lkml.org/lkml/2011/4/25/83 Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Tested-by: Jean Pihet Reviewed-by: Kevin Hilman Acked-by: Arjan van de Ven Acked-by: Kevin Hilman Signed-off-by: Len Brown --- drivers/acpi/processor_idle.c | 191 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 166 insertions(+), 25 deletions(-) (limited to 'drivers/acpi/processor_idle.c') diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index b98c75285690..24fe3afa7119 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -741,11 +741,13 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) /** * acpi_idle_enter_c1 - enters an ACPI C1 state-type * @dev: the target CPU + * @drv: cpuidle driver containing cpuidle state info * @index: index of target state * * This is equivalent to the HALT instruction. */ -static int acpi_idle_enter_c1(struct cpuidle_device *dev, int index) +static int acpi_idle_enter_c1(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { ktime_t kt1, kt2; s64 idle_time; @@ -787,9 +789,11 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, int index) /** * acpi_idle_enter_simple - enters an ACPI state without BM handling * @dev: the target CPU + * @drv: cpuidle driver with cpuidle state information * @index: the index of suggested state */ -static int acpi_idle_enter_simple(struct cpuidle_device *dev, int index) +static int acpi_idle_enter_simple(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; @@ -869,11 +873,13 @@ static DEFINE_SPINLOCK(c3_lock); /** * acpi_idle_enter_bm - enters C3 with proper BM handling * @dev: the target CPU + * @drv: cpuidle driver containing state data * @index: the index of suggested state * * If BM is detected, the deepest non-C3 idle state is entered instead. */ -static int acpi_idle_enter_bm(struct cpuidle_device *dev, int index) +static int acpi_idle_enter_bm(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { struct acpi_processor *pr; struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; @@ -896,9 +902,9 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, int index) } if (!cx->bm_sts_skip && acpi_idle_bm_check()) { - if (dev->safe_state_index >= 0) { - return dev->states[dev->safe_state_index].enter(dev, - dev->safe_state_index); + if (drv->safe_state_index >= 0) { + return drv->states[drv->safe_state_index].enter(dev, + drv, drv->safe_state_index); } else { local_irq_disable(); acpi_safe_halt(); @@ -993,14 +999,15 @@ struct cpuidle_driver acpi_idle_driver = { }; /** - * acpi_processor_setup_cpuidle - prepares and configures CPUIDLE + * acpi_processor_setup_cpuidle_cx - prepares and configures CPUIDLE + * device i.e. per-cpu data + * * @pr: the ACPI processor */ -static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) +static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr) { int i, count = CPUIDLE_DRIVER_STATE_START; struct acpi_processor_cx *cx; - struct cpuidle_state *state; struct cpuidle_state_usage *state_usage; struct cpuidle_device *dev = &pr->power.dev; @@ -1012,18 +1019,12 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) } dev->cpu = pr->id; - dev->safe_state_index = -1; - for (i = 0; i < CPUIDLE_STATE_MAX; i++) { - dev->states[i].name[0] = '\0'; - dev->states[i].desc[0] = '\0'; - } if (max_cstate == 0) max_cstate = 1; for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { cx = &pr->power.states[i]; - state = &dev->states[count]; state_usage = &dev->states_usage[count]; if (!cx->valid) @@ -1035,8 +1036,64 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) continue; #endif + cpuidle_set_statedata(state_usage, cx); + count++; + if (count == CPUIDLE_STATE_MAX) + break; + } + + dev->state_count = count; + + if (!count) + return -EINVAL; + + return 0; +} + +/** + * acpi_processor_setup_cpuidle states- prepares and configures cpuidle + * global state data i.e. idle routines + * + * @pr: the ACPI processor + */ +static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) +{ + int i, count = CPUIDLE_DRIVER_STATE_START; + struct acpi_processor_cx *cx; + struct cpuidle_state *state; + struct cpuidle_driver *drv = &acpi_idle_driver; + + if (!pr->flags.power_setup_done) + return -EINVAL; + + if (pr->flags.power == 0) + return -EINVAL; + + drv->safe_state_index = -1; + for (i = 0; i < CPUIDLE_STATE_MAX; i++) { + drv->states[i].name[0] = '\0'; + drv->states[i].desc[0] = '\0'; + } + + if (max_cstate == 0) + max_cstate = 1; + + for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { + cx = &pr->power.states[i]; + + if (!cx->valid) + continue; + +#ifdef CONFIG_HOTPLUG_CPU + if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && + !pr->flags.has_cst && + !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) + continue; +#endif + + state = &drv->states[count]; snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i); strncpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); state->exit_latency = cx->latency; @@ -1049,13 +1106,13 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_c1; - dev->safe_state_index = count; + drv->safe_state_index = count; break; case ACPI_STATE_C2: state->flags |= CPUIDLE_FLAG_TIME_VALID; state->enter = acpi_idle_enter_simple; - dev->safe_state_index = count; + drv->safe_state_index = count; break; case ACPI_STATE_C3: @@ -1071,7 +1128,7 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) break; } - dev->state_count = count; + drv->state_count = count; if (!count) return -EINVAL; @@ -1079,7 +1136,7 @@ static int acpi_processor_setup_cpuidle(struct acpi_processor *pr) return 0; } -int acpi_processor_cst_has_changed(struct acpi_processor *pr) +int acpi_processor_hotplug(struct acpi_processor *pr) { int ret = 0; @@ -1100,7 +1157,7 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) cpuidle_disable_device(&pr->power.dev); acpi_processor_get_power_info(pr); if (pr->flags.power) { - acpi_processor_setup_cpuidle(pr); + acpi_processor_setup_cpuidle_cx(pr); ret = cpuidle_enable_device(&pr->power.dev); } cpuidle_resume_and_unlock(); @@ -1108,10 +1165,72 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) return ret; } +int acpi_processor_cst_has_changed(struct acpi_processor *pr) +{ + int cpu; + struct acpi_processor *_pr; + + if (disabled_by_idle_boot_param()) + return 0; + + if (!pr) + return -EINVAL; + + if (nocst) + return -ENODEV; + + if (!pr->flags.power_setup_done) + return -ENODEV; + + /* + * FIXME: Design the ACPI notification to make it once per + * system instead of once per-cpu. This condition is a hack + * to make the code that updates C-States be called once. + */ + + if (smp_processor_id() == 0 && + cpuidle_get_driver() == &acpi_idle_driver) { + + cpuidle_pause_and_lock(); + /* Protect against cpu-hotplug */ + get_online_cpus(); + + /* Disable all cpuidle devices */ + for_each_online_cpu(cpu) { + _pr = per_cpu(processors, cpu); + if (!_pr || !_pr->flags.power_setup_done) + continue; + cpuidle_disable_device(&_pr->power.dev); + } + + /* Populate Updated C-state information */ + acpi_processor_setup_cpuidle_states(pr); + + /* Enable all cpuidle devices */ + for_each_online_cpu(cpu) { + _pr = per_cpu(processors, cpu); + if (!_pr || !_pr->flags.power_setup_done) + continue; + acpi_processor_get_power_info(_pr); + if (_pr->flags.power) { + acpi_processor_setup_cpuidle_cx(_pr); + cpuidle_enable_device(&_pr->power.dev); + } + } + put_online_cpus(); + cpuidle_resume_and_unlock(); + } + + return 0; +} + +static int acpi_processor_registered; + int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device) { acpi_status status = 0; + int retval; static int first_run; if (disabled_by_idle_boot_param()) @@ -1148,9 +1267,26 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, * platforms that only support C1. */ if (pr->flags.power) { - acpi_processor_setup_cpuidle(pr); - if (cpuidle_register_device(&pr->power.dev)) - return -EIO; + /* Register acpi_idle_driver if not already registered */ + if (!acpi_processor_registered) { + acpi_processor_setup_cpuidle_states(pr); + retval = cpuidle_register_driver(&acpi_idle_driver); + if (retval) + return retval; + printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n", + acpi_idle_driver.name); + } + /* Register per-cpu cpuidle_device. Cpuidle driver + * must already be registered before registering device + */ + acpi_processor_setup_cpuidle_cx(pr); + retval = cpuidle_register_device(&pr->power.dev); + if (retval) { + if (acpi_processor_registered == 0) + cpuidle_unregister_driver(&acpi_idle_driver); + return retval; + } + acpi_processor_registered++; } return 0; } @@ -1161,8 +1297,13 @@ int acpi_processor_power_exit(struct acpi_processor *pr, if (disabled_by_idle_boot_param()) return 0; - cpuidle_unregister_device(&pr->power.dev); - pr->flags.power_setup_done = 0; + if (pr->flags.power) { + cpuidle_unregister_device(&pr->power.dev); + acpi_processor_registered--; + if (acpi_processor_registered == 0) + cpuidle_unregister_driver(&acpi_idle_driver); + } + pr->flags.power_setup_done = 0; return 0; } -- cgit v1.2.3