From 7c4c3a0f18ba57ea2a2985034532303d2929902a Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 27 Jun 2013 11:35:44 +0100 Subject: hrtimers: Support resuming with two or more CPUs online (but stopped) hrtimers_resume() only reprograms the timers for the current CPU as it assumes that all other CPUs are offline at this point in the resume process. If other CPUs are online then their timers will not be corrected and they may fire at the wrong time. When running as a Xen guest, this assumption is not true. Non-boot CPUs are only stopped with IRQs disabled instead of offlining them. This is a performance optimization as disabling the CPUs would add an unacceptable amount of additional downtime during a live migration (> 200 ms for a 4 VCPU guest). hrtimers_resume() cannot call on_each_cpu(retrigger_next_event,...) as the other CPUs will be stopped with IRQs disabled. Instead, defer the call to the next softirq. [ tglx: Separated the xen change out ] Signed-off-by: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: John Stultz Cc: Link: http://lkml.kernel.org/r/1372329348-20841-2-git-send-email-david.vrabel@citrix.com Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index fd4b13b131f8..e86827e94c9a 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -773,15 +773,24 @@ void clock_was_set(void) /* * During resume we might have to reprogram the high resolution timer - * interrupt (on the local CPU): + * interrupt on all online CPUs. However, all other CPUs will be + * stopped with IRQs interrupts disabled so the clock_was_set() call + * must be deferred to the softirq. + * + * The one-shot timer has already been programmed to fire immediately + * (see tick_resume_oneshot()) and this interrupt will trigger the + * softirq to run early enough to correctly reprogram the timers on + * all CPUs. */ void hrtimers_resume(void) { + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + WARN_ONCE(!irqs_disabled(), KERN_INFO "hrtimers_resume() called with IRQs enabled!"); - retrigger_next_event(NULL); - timerfd_clock_was_set(); + cpu_base->clock_was_set = 1; + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -- cgit v1.2.3 From 5ec2481b7b47a4005bb446d176e5d0257400c77d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 5 Jul 2013 12:09:18 +0200 Subject: hrtimers: Move SMP function call to thread context smp_call_function_* must not be called from softirq context. But clock_was_set() which calls on_each_cpu() is called from softirq context to implement a delayed clock_was_set() for the timer interrupt handler. Though that almost never gets invoked. A recent change in the resume code uses the softirq based delayed clock_was_set to support Xens resume mechanism. linux-next contains a new warning which warns if smp_call_function_* is called from softirq context which gets triggered by that Xen change. Fix this by moving the delayed clock_was_set() call to a work context. Reported-and-tested-by: Artem Savkov Reported-by: Sasha Levin Cc: David Vrabel Cc: Ingo Molnar Cc: H. Peter Anvin , Cc: Konrad Wilk Cc: John Stultz Cc: xen-devel@lists.xen.org Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index e86827e94c9a..b9b9420a1297 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -721,17 +721,20 @@ static int hrtimer_switch_to_hres(void) return 1; } +static void clock_was_set_work(struct work_struct *work) +{ + clock_was_set(); +} + +static DECLARE_WORK(hrtimer_work, clock_was_set_work); + /* - * Called from timekeeping code to reprogramm the hrtimer interrupt - * device. If called from the timer interrupt context we defer it to - * softirq context. + * Called from timekeeping and resume code to reprogramm the hrtimer + * interrupt device on all cpus. */ void clock_was_set_delayed(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - cpu_base->clock_was_set = 1; - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + schedule_work(&hrtimer_work); } #else @@ -775,12 +778,7 @@ void clock_was_set(void) * During resume we might have to reprogram the high resolution timer * interrupt on all online CPUs. However, all other CPUs will be * stopped with IRQs interrupts disabled so the clock_was_set() call - * must be deferred to the softirq. - * - * The one-shot timer has already been programmed to fire immediately - * (see tick_resume_oneshot()) and this interrupt will trigger the - * softirq to run early enough to correctly reprogram the timers on - * all CPUs. + * must be deferred. */ void hrtimers_resume(void) { @@ -789,8 +787,10 @@ void hrtimers_resume(void) WARN_ONCE(!irqs_disabled(), KERN_INFO "hrtimers_resume() called with IRQs enabled!"); - cpu_base->clock_was_set = 1; - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + /* Retrigger on the local CPU */ + retrigger_next_event(NULL); + /* And schedule a retrigger for all others */ + clock_was_set_delayed(); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) @@ -1441,13 +1441,6 @@ void hrtimer_peek_ahead_timers(void) static void run_hrtimer_softirq(struct softirq_action *h) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - if (cpu_base->clock_was_set) { - cpu_base->clock_was_set = 0; - clock_was_set(); - } - hrtimer_peek_ahead_timers(); } -- cgit v1.2.3 From 73b0cd674ccc64c921e25bd7154f26d342116539 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 6 Jul 2013 10:34:00 +0200 Subject: hrtimer: Remove unused variable Sigh, should have noticed myself. Reported-by: fengguang.wu@intel.com Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b9b9420a1297..3a951d8d5770 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -782,8 +782,6 @@ void clock_was_set(void) */ void hrtimers_resume(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - WARN_ONCE(!irqs_disabled(), KERN_INFO "hrtimers_resume() called with IRQs enabled!"); -- cgit v1.2.3