summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2016-02-17 10:20:13 -0800
committerIngo Molnar <mingo@kernel.org>2016-02-18 09:28:47 +0100
commit0f68c088c0adb3c3bbeb487c4ebcde91fd5d34be (patch)
tree4aa087d9a71e7f27655d43d0a46555efd856ecf6
parent3a2f2ac9b96f9a9f5538396a212d3b9fb543bfc5 (diff)
downloadlinux-stable-0f68c088c0adb3c3bbeb487c4ebcde91fd5d34be.tar.gz
linux-stable-0f68c088c0adb3c3bbeb487c4ebcde91fd5d34be.tar.bz2
linux-stable-0f68c088c0adb3c3bbeb487c4ebcde91fd5d34be.zip
x86/cpufeature: Create a new synthetic cpu capability for machine check recovery
The Intel Software Developer Manual describes bit 24 in the MCG_CAP MSR: MCG_SER_P (software error recovery support present) flag, bit 24 — Indicates (when set) that the processor supports software error recovery But only some models with this capability bit set will actually generate recoverable machine checks. Check the model name and set a synthetic capability bit. Provide a command line option to set this bit anyway in case the kernel doesn't recognise the model name. Signed-off-by: Tony Luck <tony.luck@intel.com> Reviewed-by: Borislav Petkov <bp@suse.de> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/2e5bfb23c89800a036fb8a45fa97a74bb16bc362.1455732970.git.tony.luck@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/x86/x86_64/boot-options.txt2
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c13
4 files changed, 17 insertions, 0 deletions
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index 68ed3114c363..0965a71f9942 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -60,6 +60,8 @@ Machine check
threshold to 1. Enabling this may make memory predictive failure
analysis less effective if the bios sets thresholds for memory
errors since we will not see details for all errors.
+ mce=recovery
+ Force-enable recoverable machine check code paths
nomce (for compatibility with i386): same as mce=off
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0ceb6adc8a48..6663fae71b12 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -106,6 +106,7 @@
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 2ea4527e462f..18d2ba9c8e44 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -113,6 +113,7 @@ struct mca_config {
bool ignore_ce;
bool disabled;
bool ser;
+ bool recovery;
bool bios_cmci_threshold;
u8 banks;
s8 bootlog;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a006f4cd792b..b5b187c8cc07 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1576,6 +1576,17 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
+ /*
+ * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
+ * whether this processor will actually generate recoverable
+ * machine checks. Check to see if this is an E7 model Xeon.
+ * We can't do a model number check because E5 and E7 use the
+ * same model number. E5 doesn't support recovery, E7 does.
+ */
+ if (mca_cfg.recovery || (mca_cfg.ser &&
+ !strncmp(c->x86_model_id,
+ "Intel(R) Xeon(R) CPU E7-", 24)))
+ set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
}
if (cfg->monarch_timeout < 0)
cfg->monarch_timeout = 0;
@@ -2028,6 +2039,8 @@ static int __init mcheck_enable(char *str)
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
cfg->bios_cmci_threshold = true;
+ else if (!strcmp(str, "recovery"))
+ cfg->recovery = true;
else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));