From ba1f14fbe70965ae0fb1655a5275a62723f65b77 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2013 14:26:41 +0100 Subject: sched: Remove PREEMPT_NEED_RESCHED from generic code While hunting a preemption issue with Alexander, Ben noticed that the currently generic PREEMPT_NEED_RESCHED stuff is horribly broken for load-store architectures. We currently rely on the IPI to fold TIF_NEED_RESCHED into PREEMPT_NEED_RESCHED, but when this IPI lands while we already have a load for the preempt-count but before the store, the store will erase the PREEMPT_NEED_RESCHED change. The current preempt-count only works on load-store archs because interrupts are assumed to be completely balanced wrt their preempt_count fiddling; the previous preempt_count load will match the preempt_count state after the interrupt and therefore nothing gets lost. This patch removes the PREEMPT_NEED_RESCHED usage from generic code and pushes it into x86 arch code; the generic code goes back to relying on TIF_NEED_RESCHED. Boot tested on x86_64 and compile tested on ppc64. Reported-by: Benjamin Herrenschmidt Reported-and-Tested-by: Alexander Graf Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20131128132641.GP10022@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/asm-generic/preempt.h | 35 +++++++++++------------------------ include/linux/sched.h | 2 -- 2 files changed, 11 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index ddf2b420ac8f..1cd3f5d767a8 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -3,13 +3,11 @@ #include -/* - * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users - * that think a non-zero value indicates we cannot preempt. - */ +#define PREEMPT_ENABLED (0) + static __always_inline int preempt_count(void) { - return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED; + return current_thread_info()->preempt_count; } static __always_inline int *preempt_count_ptr(void) @@ -17,11 +15,6 @@ static __always_inline int *preempt_count_ptr(void) return ¤t_thread_info()->preempt_count; } -/* - * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the - * alternative is loosing a reschedule. Better schedule too often -- also this - * should be a very rare operation. - */ static __always_inline void preempt_count_set(int pc) { *preempt_count_ptr() = pc; @@ -41,28 +34,17 @@ static __always_inline void preempt_count_set(int pc) task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ } while (0) -/* - * We fold the NEED_RESCHED bit into the preempt count such that - * preempt_enable() can decrement and test for needing to reschedule with a - * single instruction. - * - * We invert the actual bit, so that when the decrement hits 0 we know we both - * need to resched (the bit is cleared) and can resched (no preempt count). - */ - static __always_inline void set_preempt_need_resched(void) { - *preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED; } static __always_inline void clear_preempt_need_resched(void) { - *preempt_count_ptr() |= PREEMPT_NEED_RESCHED; } static __always_inline bool test_preempt_need_resched(void) { - return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED); + return false; } /* @@ -81,7 +63,12 @@ static __always_inline void __preempt_count_sub(int val) static __always_inline bool __preempt_count_dec_and_test(void) { - return !--*preempt_count_ptr(); + /* + * Because of load-store architectures cannot do per-cpu atomic + * operations; we cannot use PREEMPT_NEED_RESCHED because it might get + * lost. + */ + return !--*preempt_count_ptr() && tif_need_resched(); } /* @@ -89,7 +76,7 @@ static __always_inline bool __preempt_count_dec_and_test(void) */ static __always_inline bool should_resched(void) { - return unlikely(!*preempt_count_ptr()); + return unlikely(!preempt_count() && tif_need_resched()); } #ifdef CONFIG_PREEMPT diff --git a/include/linux/sched.h b/include/linux/sched.h index 768b037dfacb..96d674ba3876 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -440,8 +440,6 @@ struct task_cputime { .sum_exec_runtime = 0, \ } -#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED) - #ifdef CONFIG_PREEMPT_COUNT #define PREEMPT_DISABLED (1 + PREEMPT_ENABLED) #else -- cgit v1.2.3 From be5e610c0fd6ef772cafb9e0bd4128134804aef3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Nov 2013 18:27:06 +0100 Subject: math64: Add mul_u64_u32_shr() Introduce mul_u64_u32_shr() as proposed by Andy a while back; it allows using 64x64->128 muls on 64bit archs and recent GCC which defines __SIZEOF_INT128__ and __int128. (This new method will be used by the scheduler.) Signed-off-by: Peter Zijlstra Cc: fweisbec@gmail.com Cc: Andy Lutomirski Cc: Linus Torvalds Link: http://lkml.kernel.org/n/tip-hxjoeuzmrcaumR0uZwjpe2pv@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/math64.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/math64.h b/include/linux/math64.h index 69ed5f5e9f6e..c45c089bfdac 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -133,4 +133,34 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) return ret; } +#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) + +#ifndef mul_u64_u32_shr +static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) +{ + return (u64)(((unsigned __int128)a * mul) >> shift); +} +#endif /* mul_u64_u32_shr */ + +#else + +#ifndef mul_u64_u32_shr +static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) +{ + u32 ah, al; + u64 ret; + + al = a; + ah = a >> 32; + + ret = ((u64)al * mul) >> shift; + if (ah) + ret += ((u64)ah * mul) << (32 - shift); + + return ret; +} +#endif /* mul_u64_u32_shr */ + +#endif + #endif /* _LINUX_MATH64_H */ -- cgit v1.2.3 From 9dbdb155532395ba000c5d5d187658b0e17e529f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Nov 2013 18:27:06 +0100 Subject: sched/fair: Rework sched_fair time accounting Christian suffers from a bad BIOS that wrecks his i5's TSC sync. This results in him occasionally seeing time going backwards - which crashes the scheduler ... Most of our time accounting can actually handle that except the most common one; the tick time update of sched_fair. There is a further problem with that code; previously we assumed that because we get a tick every TICK_NSEC our time delta could never exceed 32bits and math was simpler. However, ever since Frederic managed to get NO_HZ_FULL merged; this is no longer the case since now a task can run for a long time indeed without getting a tick. It only takes about ~4.2 seconds to overflow our u32 in nanoseconds. This means we not only need to better deal with time going backwards; but also means we need to be able to deal with large deltas. This patch reworks the entire code and uses mul_u64_u32_shr() as proposed by Andy a long while ago. We express our virtual time scale factor in a u32 multiplier and shift right and the 32bit mul_u64_u32_shr() implementation reduces to a single 32x32->64 multiply if the time delta is still short (common case). For 64bit a 64x64->128 multiply can be used if ARCH_SUPPORTS_INT128. Reported-and-Tested-by: Christian Engelmayer Signed-off-by: Peter Zijlstra Cc: fweisbec@gmail.com Cc: Paul Turner Cc: Stanislaw Gruszka Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/20131118172706.GI3866@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 96d674ba3876..53f97eb8dbc7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -930,7 +930,8 @@ struct pipe_inode_info; struct uts_namespace; struct load_weight { - unsigned long weight, inv_weight; + unsigned long weight; + u32 inv_weight; }; struct sched_avg { -- cgit v1.2.3