summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-01-31 09:37:34 +0100
committerThomas Gleixner <tglx@linutronix.de>2017-01-31 21:47:58 +0100
commit0becc0ae5b42828785b589f686725ff5bc3b9b25 (patch)
treebe6d0e1f37c38ed0a7dd5da2d4b1e93f0fb43101
parent24c2503255d35c269b67162c397a1a1c1e02f6ce (diff)
downloadlinux-0becc0ae5b42828785b589f686725ff5bc3b9b25.tar.gz
linux-0becc0ae5b42828785b589f686725ff5bc3b9b25.tar.bz2
linux-0becc0ae5b42828785b589f686725ff5bc3b9b25.zip
x86/mce: Make timer handling more robust
Erik reported that on a preproduction hardware a CMCI storm triggers the BUG_ON in add_timer_on(). The reason is that the per CPU MCE timer is started by the CMCI logic before the MCE CPU hotplug callback starts the timer with add_timer_on(). So the timer is already queued which triggers the BUG. Using add_timer_on() is pretty pointless in this code because the timer is strictlty per CPU, initialized as pinned and all operations which arm the timer happen on the CPU to which the timer belongs. Simplify the whole machinery by using mod_timer() instead of add_timer_on() which avoids the problem because mod_timer() can handle already queued timers. Use __start_timer() everywhere so the earliest armed expiry time is preserved. Reported-by: Erik Veijola <erik.veijola@intel.com> Tested-by: Borislav Petkov <bp@alien8.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Borislav Petkov <bp@alien8.de> Cc: Tony Luck <tony.luck@intel.com> Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701310936080.3457@nanos Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c31
1 files changed, 12 insertions, 19 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 00ef43233e03..537c6647d84c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1373,20 +1373,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
-static void __restart_timer(struct timer_list *t, unsigned long interval)
+static void __start_timer(struct timer_list *t, unsigned long interval)
{
unsigned long when = jiffies + interval;
unsigned long flags;
local_irq_save(flags);
- if (timer_pending(t)) {
- if (time_before(when, t->expires))
- mod_timer(t, when);
- } else {
- t->expires = round_jiffies(when);
- add_timer_on(t, smp_processor_id());
- }
+ if (!timer_pending(t) || time_before(when, t->expires))
+ mod_timer(t, round_jiffies(when));
local_irq_restore(flags);
}
@@ -1421,7 +1416,7 @@ static void mce_timer_fn(unsigned long data)
done:
__this_cpu_write(mce_next_interval, iv);
- __restart_timer(t, iv);
+ __start_timer(t, iv);
}
/*
@@ -1432,7 +1427,7 @@ void mce_timer_kick(unsigned long interval)
struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned long iv = __this_cpu_read(mce_next_interval);
- __restart_timer(t, interval);
+ __start_timer(t, interval);
if (interval < iv)
__this_cpu_write(mce_next_interval, interval);
@@ -1779,17 +1774,15 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
}
}
-static void mce_start_timer(unsigned int cpu, struct timer_list *t)
+static void mce_start_timer(struct timer_list *t)
{
unsigned long iv = check_interval * HZ;
if (mca_cfg.ignore_ce || !iv)
return;
- per_cpu(mce_next_interval, cpu) = iv;
-
- t->expires = round_jiffies(jiffies + iv);
- add_timer_on(t, cpu);
+ this_cpu_write(mce_next_interval, iv);
+ __start_timer(t, iv);
}
static void __mcheck_cpu_setup_timer(void)
@@ -1806,7 +1799,7 @@ static void __mcheck_cpu_init_timer(void)
unsigned int cpu = smp_processor_id();
setup_pinned_timer(t, mce_timer_fn, cpu);
- mce_start_timer(cpu, t);
+ mce_start_timer(t);
}
/* Handle unconfigured int18 (should never happen) */
@@ -2566,7 +2559,7 @@ static int mce_cpu_dead(unsigned int cpu)
static int mce_cpu_online(unsigned int cpu)
{
- struct timer_list *t = &per_cpu(mce_timer, cpu);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
int ret;
mce_device_create(cpu);
@@ -2577,13 +2570,13 @@ static int mce_cpu_online(unsigned int cpu)
return ret;
}
mce_reenable_cpu();
- mce_start_timer(cpu, t);
+ mce_start_timer(t);
return 0;
}
static int mce_cpu_pre_down(unsigned int cpu)
{
- struct timer_list *t = &per_cpu(mce_timer, cpu);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
mce_disable_cpu();
del_timer_sync(t);