From 351af0725e5222e35741011d1ea62215c1ed06db Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Fri, 6 Aug 2010 13:39:08 +0800 Subject: perf, x86: Fix Intel-nhm PMU programming errata workaround Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented workaround we implemented in: 11164cd: perf, x86: Add Nehelem PMU programming errata workaround doesn't actually work fully and causes a stuck PMU state under load and non-functioning perf profiling. A functional workaround was found by trial & error. Affects all Nehalem-class Intel PMUs. Signed-off-by: Zhang Yanmin Signed-off-by: Peter Zijlstra LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com> Cc: Arjan van de Ven Cc: "H. Peter Anvin" Cc: # .35.x Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 81 ++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 214ac860ebe0..d8d86d014008 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added) * Intel Errata AAP53 (model 30) * Intel Errata BD53 (model 44) * - * These chips need to be 'reset' when adding counters by programming - * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 - * either in sequence on the same PMC or on different PMCs. + * The official story: + * These chips need to be 'reset' when adding counters by programming the + * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either + * in sequence on the same PMC or on different PMCs. + * + * In practise it appears some of these events do in fact count, and + * we need to programm all 4 events. */ -static void intel_pmu_nhm_enable_all(int added) +static void intel_pmu_nhm_workaround(void) { - if (added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int i; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + static const unsigned long nhm_magic[4] = { + 0x4300B5, + 0x4300D2, + 0x4300B1, + 0x4300B1 + }; + struct perf_event *event; + int i; + + /* + * The Errata requires below steps: + * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; + * 2) Configure 4 PERFEVTSELx with the magic events and clear + * the corresponding PMCx; + * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; + * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; + * 5) Clear 4 pairs of ERFEVTSELx and PMCx; + */ + + /* + * The real steps we choose are a little different from above. + * A) To reduce MSR operations, we don't run step 1) as they + * are already cleared before this function is called; + * B) Call x86_perf_event_update to save PMCx before configuring + * PERFEVTSELx with magic number; + * C) With step 5), we do clear only when the PERFEVTSELx is + * not used currently. + * D) Call x86_perf_event_set_period to restore PMCx; + */ - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); + /* We always operate 4 pairs of PERF Counters */ + for (i = 0; i < 4; i++) { + event = cpuc->events[i]; + if (event) + x86_perf_event_update(event); + } - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); + for (i = 0; i < 4; i++) { + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); + wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); + } - for (i = 0; i < 3; i++) { - struct perf_event *event = cpuc->events[i]; + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); - if (!event) - continue; + for (i = 0; i < 4; i++) { + event = cpuc->events[i]; + if (event) { + x86_perf_event_set_period(event); __x86_pmu_enable_event(&event->hw, - ARCH_PERFMON_EVENTSEL_ENABLE); - } + ARCH_PERFMON_EVENTSEL_ENABLE); + } else + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); } +} + +static void intel_pmu_nhm_enable_all(int added) +{ + if (added) + intel_pmu_nhm_workaround(); intel_pmu_enable_all(added); } -- cgit v1.2.3 From 07a7795ca2e6e66d00b184efb46bd0e23d90d3fe Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Wed, 18 Aug 2010 16:19:50 +0200 Subject: x86, cpu: Fix regression in AMD errata checking code A bug in the family-model-stepping matching code caused the presence of errata to go undetected when OSVW was not used. This causes hangs on some K8 systems because the E400 workaround is not enabled. Signed-off-by: Hans Rosenfeld LKML-Reference: <1282141190-930137-1-git-send-email-hans.rosenfeld@amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 60a57b13082d..ba5f62f45f01 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -669,7 +669,7 @@ bool cpu_has_amd_erratum(const int *erratum) } /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 8) | cpu->x86_mask; + ms = (cpu->x86_model << 4) | cpu->x86_mask; while ((range = *erratum++)) if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && (ms >= AMD_MODEL_RANGE_START(range)) && -- cgit v1.2.3 From 8d330919927ea31fa083b5a80084dc991da813a0 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 25 Aug 2010 21:06:32 +0000 Subject: perf, x86, Pentium4: Clear the P4_CCCR_FORCE_OVF flag If on Pentium4 CPUs the FORCE_OVF flag is set then an NMI happens on every event, which can generate a flood of NMIs. Clear it. Reported-by: Vince Weaver Signed-off-by: Lin Ming Signed-off-by: Cyrill Gorcunov Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_p4.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index febb12cea795..7e578e9cc58b 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -497,6 +497,8 @@ static int p4_hw_config(struct perf_event *event) event->hw.config |= event->attr.config & (p4_config_pack_escr(P4_ESCR_MASK_HT) | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); + + event->hw.config &= ~P4_CCCR_FORCE_OVF; } rc = x86_setup_perfctr(event); -- cgit v1.2.3