diff options
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r-- | drivers/cpuidle/cpuidle-pseries.c | 77 | ||||
-rw-r--r-- | drivers/cpuidle/governors/teo.c | 48 |
2 files changed, 77 insertions, 48 deletions
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index a2b5c6f60cf0..7e7ab5597d7a 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -346,11 +346,9 @@ static int pseries_cpuidle_driver_init(void) static void __init fixup_cede0_latency(void) { struct xcede_latency_payload *payload; - u64 min_latency_us; + u64 min_xcede_latency_us = UINT_MAX; int i; - min_latency_us = dedicated_states[1].exit_latency; // CEDE latency - if (parse_cede_parameters()) return; @@ -358,42 +356,45 @@ static void __init fixup_cede0_latency(void) nr_xcede_records); payload = &xcede_latency_parameter.payload; + + /* + * The CEDE idle state maps to CEDE(0). While the hypervisor + * does not advertise CEDE(0) exit latency values, it does + * advertise the latency values of the extended CEDE states. + * We use the lowest advertised exit latency value as a proxy + * for the exit latency of CEDE(0). + */ for (i = 0; i < nr_xcede_records; i++) { struct xcede_latency_record *record = &payload->records[i]; + u8 hint = record->hint; u64 latency_tb = be64_to_cpu(record->latency_ticks); u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC); - if (latency_us == 0) - pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i); - - if (latency_us < min_latency_us) - min_latency_us = latency_us; - } - - /* - * By default, we assume that CEDE(0) has exit latency 10us, - * since there is no way for us to query from the platform. - * - * However, if the wakeup latency of an Extended CEDE state is - * smaller than 10us, then we can be sure that CEDE(0) - * requires no more than that. - * - * Perform the fix-up. - */ - if (min_latency_us < dedicated_states[1].exit_latency) { /* - * We set a minimum of 1us wakeup latency for cede0 to - * distinguish it from snooze + * We expect the exit latency of an extended CEDE + * state to be non-zero, it to since it takes at least + * a few nanoseconds to wakeup the idle CPU and + * dispatch the virtual processor into the Linux + * Guest. + * + * So we consider only non-zero value for performing + * the fixup of CEDE(0) latency. */ - u64 cede0_latency = 1; + if (latency_us == 0) { + pr_warn("cpuidle: Skipping xcede record %d [hint=%d]. Exit latency = 0us\n", + i, hint); + continue; + } - if (min_latency_us > cede0_latency) - cede0_latency = min_latency_us - 1; + if (latency_us < min_xcede_latency_us) + min_xcede_latency_us = latency_us; + } - dedicated_states[1].exit_latency = cede0_latency; - dedicated_states[1].target_residency = 10 * (cede0_latency); + if (min_xcede_latency_us != UINT_MAX) { + dedicated_states[1].exit_latency = min_xcede_latency_us; + dedicated_states[1].target_residency = 10 * (min_xcede_latency_us); pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n", - cede0_latency); + min_xcede_latency_us); } } @@ -402,7 +403,7 @@ static void __init fixup_cede0_latency(void) * pseries_idle_probe() * Choose state table for shared versus dedicated partition */ -static int pseries_idle_probe(void) +static int __init pseries_idle_probe(void) { if (cpuidle_disable != IDLE_NO_OVERRIDE) @@ -419,7 +420,21 @@ static int pseries_idle_probe(void) cpuidle_state_table = shared_states; max_idle_state = ARRAY_SIZE(shared_states); } else { - fixup_cede0_latency(); + /* + * Use firmware provided latency values + * starting with POWER10 platforms. In the + * case that we are running on a POWER10 + * platform but in an earlier compat mode, we + * can still use the firmware provided values. + * + * However, on platforms prior to POWER10, we + * cannot rely on the accuracy of the firmware + * provided latency values. On such platforms, + * go with the conservative default estimate + * of 10us. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10)) + fixup_cede0_latency(); cpuidle_state_table = dedicated_states; max_idle_state = NR_DEDICATED_STATES; } diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 7b91060e82f6..d9262db79cae 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -382,8 +382,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum; alt_recent = idx_recent_sum > NR_RECENT / 2; if (alt_recent || alt_intercepts) { - s64 last_enabled_span_ns = duration_ns; - int last_enabled_idx = idx; + s64 first_suitable_span_ns = duration_ns; + int first_suitable_idx = idx; /* * Look for the deepest idle state whose target residency had @@ -397,37 +397,51 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, intercept_sum = 0; recent_sum = 0; - for (i = idx - 1; i >= idx0; i--) { + for (i = idx - 1; i >= 0; i--) { struct teo_bin *bin = &cpu_data->state_bins[i]; s64 span_ns; intercept_sum += bin->intercepts; recent_sum += bin->recent; + span_ns = teo_middle_of_bin(i, drv); + + if ((!alt_recent || 2 * recent_sum > idx_recent_sum) && + (!alt_intercepts || + 2 * intercept_sum > idx_intercept_sum)) { + if (teo_time_ok(span_ns) && + !dev->states_usage[i].disable) { + idx = i; + duration_ns = span_ns; + } else { + /* + * The current state is too shallow or + * disabled, so take the first enabled + * deeper state with suitable time span. + */ + idx = first_suitable_idx; + duration_ns = first_suitable_span_ns; + } + break; + } + if (dev->states_usage[i].disable) continue; - span_ns = teo_middle_of_bin(i, drv); if (!teo_time_ok(span_ns)) { /* - * The current state is too shallow, so select - * the first enabled deeper state. + * The current state is too shallow, but if an + * alternative candidate state has been found, + * it may still turn out to be a better choice. */ - duration_ns = last_enabled_span_ns; - idx = last_enabled_idx; - break; - } + if (first_suitable_idx != idx) + continue; - if ((!alt_recent || 2 * recent_sum > idx_recent_sum) && - (!alt_intercepts || - 2 * intercept_sum > idx_intercept_sum)) { - idx = i; - duration_ns = span_ns; break; } - last_enabled_span_ns = span_ns; - last_enabled_idx = i; + first_suitable_span_ns = span_ns; + first_suitable_idx = i; } } |