summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck/mce_64.c
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2009-02-12 13:49:36 +0100
committerH. Peter Anvin <hpa@zytor.com>2009-02-24 13:41:00 -0800
commit88ccbedd9ca85d1aca6a6f99df48dce87b7c02d4 (patch)
tree9951e6f3554789523006f187e69286f5ed541b50 /arch/x86/kernel/cpu/mcheck/mce_64.c
parent03195c6b40f2b4db92545921daa7c3a19b4e4c32 (diff)
downloadlinux-88ccbedd9ca85d1aca6a6f99df48dce87b7c02d4.tar.gz
linux-88ccbedd9ca85d1aca6a6f99df48dce87b7c02d4.tar.bz2
linux-88ccbedd9ca85d1aca6a6f99df48dce87b7c02d4.zip
x86, mce, cmci: add CMCI support
Impact: Major new feature Intel CMCI (Corrected Machine Check Interrupt) is a new feature on Nehalem CPUs. It allows the CPU to trigger interrupts on corrected events, which allows faster reaction to them instead of with the traditional polling timer. Also use CMCI to discover shared banks. Machine check banks can be shared by CPU threads or even cores. Using the CMCI enable bit it is possible to detect the fact that another CPU already saw a specific bank. Use this to assign shared banks only to one CPU to avoid reporting duplicated events. On CPU hot unplug bank sharing is re discovered. This is done using a thread that cycles through all the CPUs. To avoid races between the poller and CMCI we only poll for banks that are not CMCI capable and only check CMCI owned banks on a interrupt. The shared banks ownership information is currently only used for CMCI interrupts, not polled banks. The sharing discovery code follows the algorithm recommended in the IA32 SDM Vol3a 14.5.2.1 The CMCI interrupt handler just calls the machine check poller to pick up the machine check event that caused the interrupt. I decided not to implement a separate threshold event like the AMD version has, because the threshold is always one currently and adding another event didn't seem to add any value. Some code inspired by Yunhong Jiang's Xen implementation, which was in term inspired by a earlier CMCI implementation by me. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce_64.c')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c16
1 files changed, 13 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index a8ff38bfa6ed..bfbd5323a635 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -166,7 +166,7 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
panic(msg);
}
-static int mce_available(struct cpuinfo_x86 *c)
+int mce_available(struct cpuinfo_x86 *c)
{
if (mce_dont_init)
return 0;
@@ -1060,9 +1060,12 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
static void mce_disable_cpu(void *h)
{
int i;
+ unsigned long action = *(unsigned long *)h;
if (!mce_available(&current_cpu_data))
return;
+ if (!(action & CPU_TASKS_FROZEN))
+ cmci_clear();
for (i = 0; i < banks; i++)
wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
}
@@ -1070,9 +1073,12 @@ static void mce_disable_cpu(void *h)
static void mce_reenable_cpu(void *h)
{
int i;
+ unsigned long action = *(unsigned long *)h;
if (!mce_available(&current_cpu_data))
return;
+ if (!(action & CPU_TASKS_FROZEN))
+ cmci_reenable();
for (i = 0; i < banks; i++)
wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
}
@@ -1100,13 +1106,17 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
del_timer_sync(t);
- smp_call_function_single(cpu, mce_disable_cpu, NULL, 1);
+ smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
t->expires = round_jiffies_relative(jiffies + next_interval);
add_timer_on(t, cpu);
- smp_call_function_single(cpu, mce_reenable_cpu, NULL, 1);
+ smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+ break;
+ case CPU_POST_DEAD:
+ /* intentionally ignoring frozen here */
+ cmci_rediscover(cpu);
break;
}
return NOTIFY_OK;