diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 17 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 48 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.h | 3 |
3 files changed, 53 insertions, 15 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a565a2e70f30..562eaa938847 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -500,8 +500,23 @@ struct kvm_pmc { u8 idx; bool is_paused; bool intr; + /* + * Base value of the PMC counter, relative to the *consumed* count in + * the associated perf_event. This value includes counter updates from + * the perf_event and emulated_count since the last time the counter + * was reprogrammed, but it is *not* the current value as seen by the + * guest or userspace. + * + * The count is relative to the associated perf_event so that KVM + * doesn't need to reprogram the perf_event every time the guest writes + * to the counter. + */ u64 counter; - u64 prev_counter; + /* + * PMC events triggered by KVM emulation that haven't been fully + * processed, i.e. haven't undergone overflow detection. + */ + u64 emulated_counter; u64 eventsel; struct perf_event *perf_event; struct kvm_vcpu *vcpu; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 3725d001239d..87cc6c8809ad 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -127,9 +127,9 @@ static void kvm_perf_overflow(struct perf_event *perf_event, struct kvm_pmc *pmc = perf_event->overflow_handler_context; /* - * Ignore overflow events for counters that are scheduled to be - * reprogrammed, e.g. if a PMI for the previous event races with KVM's - * handling of a related guest WRMSR. + * Ignore asynchronous overflow events for counters that are scheduled + * to be reprogrammed, e.g. if a PMI for the previous event races with + * KVM's handling of a related guest WRMSR. */ if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi)) return; @@ -224,17 +224,30 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, return 0; } -static void pmc_pause_counter(struct kvm_pmc *pmc) +static bool pmc_pause_counter(struct kvm_pmc *pmc) { u64 counter = pmc->counter; - - if (!pmc->perf_event || pmc->is_paused) - return; + u64 prev_counter; /* update counter, reset event value to avoid redundant accumulation */ - counter += perf_event_pause(pmc->perf_event, true); + if (pmc->perf_event && !pmc->is_paused) + counter += perf_event_pause(pmc->perf_event, true); + + /* + * Snapshot the previous counter *after* accumulating state from perf. + * If overflow already happened, hardware (via perf) is responsible for + * generating a PMI. KVM just needs to detect overflow on emulated + * counter events that haven't yet been processed. + */ + prev_counter = counter & pmc_bitmask(pmc); + + counter += pmc->emulated_counter; pmc->counter = counter & pmc_bitmask(pmc); + + pmc->emulated_counter = 0; pmc->is_paused = true; + + return pmc->counter < prev_counter; } static bool pmc_resume_counter(struct kvm_pmc *pmc) @@ -289,6 +302,15 @@ static void pmc_update_sample_period(struct kvm_pmc *pmc) void pmc_write_counter(struct kvm_pmc *pmc, u64 val) { + /* + * Drop any unconsumed accumulated counts, the WRMSR is a write, not a + * read-modify-write. Adjust the counter value so that its value is + * relative to the current count, as reading the current count from + * perf is faster than pausing and repgrogramming the event in order to + * reset it to '0'. Note, this very sneakily offsets the accumulated + * emulated count too, by using pmc_read_counter()! + */ + pmc->emulated_counter = 0; pmc->counter += val - pmc_read_counter(pmc); pmc->counter &= pmc_bitmask(pmc); pmc_update_sample_period(pmc); @@ -428,14 +450,15 @@ static void reprogram_counter(struct kvm_pmc *pmc) struct kvm_pmu *pmu = pmc_to_pmu(pmc); u64 eventsel = pmc->eventsel; u64 new_config = eventsel; + bool emulate_overflow; u8 fixed_ctr_ctrl; - pmc_pause_counter(pmc); + emulate_overflow = pmc_pause_counter(pmc); if (!pmc_event_is_allowed(pmc)) goto reprogram_complete; - if (pmc->counter < pmc->prev_counter) + if (emulate_overflow) __kvm_perf_overflow(pmc, false); if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) @@ -475,7 +498,6 @@ static void reprogram_counter(struct kvm_pmc *pmc) reprogram_complete: clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); - pmc->prev_counter = 0; } void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) @@ -701,6 +723,7 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu) pmc_stop_counter(pmc); pmc->counter = 0; + pmc->emulated_counter = 0; if (pmc_is_gp(pmc)) pmc->eventsel = 0; @@ -772,8 +795,7 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu) static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) { - pmc->prev_counter = pmc->counter; - pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); + pmc->emulated_counter++; kvm_pmu_request_counter_reprogram(pmc); } diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index cae85e550f60..7caeb3d8d4fd 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -66,7 +66,8 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc) { u64 counter, enabled, running; - counter = pmc->counter; + counter = pmc->counter + pmc->emulated_counter; + if (pmc->perf_event && !pmc->is_paused) counter += perf_event_read_value(pmc->perf_event, &enabled, &running); |