From af4afb40085f04f8b2ea8d28df878f7f0be02f89 Mon Sep 17 00:00:00 2001 From: Pratyush Patel Date: Tue, 14 Jun 2016 11:00:42 +0200 Subject: alarmtimer: Fix comments describing structure fields Updated struct alarm and struct alarm_timer descriptions. Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Pratyush Patel Signed-off-by: John Stultz --- include/linux/alarmtimer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h index 52f3b7da4f2d..9d8031257a90 100644 --- a/include/linux/alarmtimer.h +++ b/include/linux/alarmtimer.h @@ -26,10 +26,10 @@ enum alarmtimer_restart { * struct alarm - Alarm timer structure * @node: timerqueue node for adding to the event list this value * also includes the expiration time. - * @period: Period for recuring alarms + * @timer: hrtimer used to schedule events while running * @function: Function pointer to be executed when the timer fires. - * @type: Alarm type (BOOTTIME/REALTIME) - * @enabled: Flag that represents if the alarm is set to fire or not + * @type: Alarm type (BOOTTIME/REALTIME). + * @state: Flag that represents if the alarm is set to fire or not. * @data: Internal data value. */ struct alarm { -- cgit v1.2.3 From e6c2682a1da36a2e79d9bab470412374434ce89e Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Wed, 8 Jun 2016 22:04:59 -0700 Subject: time: Add time64_to_tm() time_to_tm() takes time_t as an argument. time_t is not y2038 safe. Add time64_to_tm() that takes time64_t as an argument which is y2038 safe. The plan is to eventually replace all calls to time_to_tm() by time64_to_tm(). Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Deepa Dinamani Signed-off-by: John Stultz --- include/linux/time.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index 297f09f23896..4cea09d94208 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -205,7 +205,20 @@ struct tm { int tm_yday; }; -void time_to_tm(time_t totalsecs, int offset, struct tm *result); +void time64_to_tm(time64_t totalsecs, int offset, struct tm *result); + +/** + * time_to_tm - converts the calendar time to local broken-down time + * + * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970, + * Coordinated Universal Time (UTC). + * @offset offset seconds adding to totalsecs. + * @result pointer to struct tm variable to receive broken-down time + */ +static inline void time_to_tm(time_t totalsecs, int offset, struct tm *result) +{ + time64_to_tm(totalsecs, offset, result); +} /** * timespec_to_ns - Convert timespec to nanoseconds -- cgit v1.2.3 From c35d9292fee0474a1a037f75b0b85af32200c76f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 18 Apr 2016 23:06:48 +0200 Subject: of: Add a new macro to declare_of for one parameter function returning a value The macro OF_DECLARE_1 expect a void (*func)(struct device_node *) while the OF_DECLARE_2 expect a int (*func)(struct device_node *, struct device_node *). The second one allows to pass an init function returning a value, which make possible to call the functions in the table and check the return value in order to catch at a higher level the errors and handle them from there instead of doing a panic in each driver (well at least this is the case for the clkevt). Unfortunately the OF_DECLARE_1 does not allow that and that lead to some code duplication and crappyness in the drivers. The OF_DECLARE_1 is used by all the clk drivers and the clocksource/clockevent drivers. It is not possible to do the change in one shot as we have to change all the init functions. The OF_DECLARE_2 specifies an init function prototype with two parameters with the node and its parent. The latter won't be used, ever, in the timer drivers. Introduce a OF_DECLARE_1_RET macro to be used, and hopefully we can smoothly and iteratively change the users of OF_DECLARE_1 to use the new macro instead. Signed-off-by: Daniel Lezcano Acked-by: Rob Herring --- include/linux/of.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index c7292e8ea080..552943d37f40 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1009,10 +1009,13 @@ static inline int of_get_available_child_count(const struct device_node *np) #endif typedef int (*of_init_fn_2)(struct device_node *, struct device_node *); +typedef int (*of_init_fn_1_ret)(struct device_node *); typedef void (*of_init_fn_1)(struct device_node *); #define OF_DECLARE_1(table, name, compat, fn) \ _OF_DECLARE(table, name, compat, fn, of_init_fn_1) +#define OF_DECLARE_1_RET(table, name, compat, fn) \ + _OF_DECLARE(table, name, compat, fn, of_init_fn_1_ret) #define OF_DECLARE_2(table, name, compat, fn) \ _OF_DECLARE(table, name, compat, fn, of_init_fn_2) -- cgit v1.2.3 From b7c4db861683af5fc50ac3cb3751cf847d765211 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 31 May 2016 16:25:59 +0200 Subject: clocksource/drivers/clksrc-probe: Introduce init functions with return code Currently, the clksrc-probe is not able to handle any error from the init functions. There are different issues with the current code: - the code is duplicated in the init functions by writing error - every driver tends to panic in its own init function - counting the number of clocksources is not reliable This patch adds another table to store the functions returning an error. The table is temporary while we convert all the drivers to return an error and will disappear. Signed-off-by: Daniel Lezcano --- include/asm-generic/vmlinux.lds.h | 2 ++ include/linux/clocksource.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 6a67ab94b553..8c6c626285c0 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -173,6 +173,7 @@ *(__##name##_of_table_end) #define CLKSRC_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc) +#define CLKSRC_RET_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc_ret) #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip) #define CLK_OF_TABLES() OF_TABLE(CONFIG_COMMON_CLK, clk) #define IOMMU_OF_TABLES() OF_TABLE(CONFIG_OF_IOMMU, iommu) @@ -531,6 +532,7 @@ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ CLKSRC_OF_TABLES() \ + CLKSRC_RET_OF_TABLES() \ IOMMU_OF_TABLES() \ CPU_METHOD_OF_TABLES() \ CPUIDLE_METHOD_OF_TABLES() \ diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 44a1aff22566..15c3839850f4 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -246,6 +246,9 @@ extern int clocksource_i8253_init(void); #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ OF_DECLARE_1(clksrc, name, compat, fn) +#define CLOCKSOURCE_OF_DECLARE_RET(name, compat, fn) \ + OF_DECLARE_1_RET(clksrc_ret, name, compat, fn) + #ifdef CONFIG_CLKSRC_PROBE extern void clocksource_probe(void); #else -- cgit v1.2.3 From 2ef2538bc613af45baf9f2a032c9a8259c4c6672 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 6 Jun 2016 23:28:01 +0200 Subject: clocksource/drivers/sp804: Convert init function to return error The init functions do not return any error. They behave as the following: - panic, thus leading to a kernel crash while another timer may work and make the system boot up correctly or - print an error and let the caller unaware if the state of the system Change that by converting the init functions to return an error conforming to the CLOCKSOURCE_OF_RET prototype. Proper error handling (rollback, errno value) will be changed later case by case, thus this change just return back an error or success in the init function. Signed-off-by: Daniel Lezcano --- include/clocksource/timer-sp804.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/clocksource/timer-sp804.h b/include/clocksource/timer-sp804.h index 1f8a1caa7cb4..7654d71243dd 100644 --- a/include/clocksource/timer-sp804.h +++ b/include/clocksource/timer-sp804.h @@ -3,10 +3,10 @@ struct clk; -void __sp804_clocksource_and_sched_clock_init(void __iomem *, - const char *, struct clk *, int); -void __sp804_clockevents_init(void __iomem *, unsigned int, - struct clk *, const char *); +int __sp804_clocksource_and_sched_clock_init(void __iomem *, + const char *, struct clk *, int); +int __sp804_clockevents_init(void __iomem *, unsigned int, + struct clk *, const char *); void sp804_timer_disable(void __iomem *); static inline void sp804_clocksource_init(void __iomem *base, const char *name) -- cgit v1.2.3 From 177cf6e52b0a1a382b9892d3cc9aafd6e7c5943f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 7 Jun 2016 00:27:44 +0200 Subject: clocksources: Switch back to the clksrc table All the clocksource drivers's init function are now converted to return an error code. CLOCKSOURCE_OF_DECLARE is no longer used as well as the clksrc-of table. Let's convert back the names: - CLOCKSOURCE_OF_DECLARE_RET => CLOCKSOURCE_OF_DECLARE - clksrc-of-ret => clksrc-of Signed-off-by: Daniel Lezcano For exynos_mct and samsung_pwm_timer: Acked-by: Krzysztof Kozlowski For arch/arc: Acked-by: Vineet Gupta For mediatek driver: Acked-by: Matthias Brugger For the Rockchip-part Acked-by: Heiko Stuebner For STi : Acked-by: Patrice Chotard For the mps2-timer.c and versatile.c changes: Acked-by: Liviu Dudau For the OXNAS part : Acked-by: Neil Armstrong For LPC32xx driver: Acked-by: Sylvain Lemieux For Broadcom Kona timer change: Acked-by: Ray Jui For Sun4i and Sun5i: Acked-by: Chen-Yu Tsai For Meson6: Acked-by: Carlo Caione For Keystone: Acked-by: Santosh Shilimkar For NPS: Acked-by: Noam Camus For bcm2835: Acked-by: Eric Anholt --- include/asm-generic/vmlinux.lds.h | 2 -- include/linux/clocksource.h | 5 +---- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8c6c626285c0..6a67ab94b553 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -173,7 +173,6 @@ *(__##name##_of_table_end) #define CLKSRC_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc) -#define CLKSRC_RET_OF_TABLES() OF_TABLE(CONFIG_CLKSRC_OF, clksrc_ret) #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip) #define CLK_OF_TABLES() OF_TABLE(CONFIG_COMMON_CLK, clk) #define IOMMU_OF_TABLES() OF_TABLE(CONFIG_OF_IOMMU, iommu) @@ -532,7 +531,6 @@ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ CLKSRC_OF_TABLES() \ - CLKSRC_RET_OF_TABLES() \ IOMMU_OF_TABLES() \ CPU_METHOD_OF_TABLES() \ CPUIDLE_METHOD_OF_TABLES() \ diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 15c3839850f4..08398182f56e 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -244,10 +244,7 @@ extern int clocksource_mmio_init(void __iomem *, const char *, extern int clocksource_i8253_init(void); #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ - OF_DECLARE_1(clksrc, name, compat, fn) - -#define CLOCKSOURCE_OF_DECLARE_RET(name, compat, fn) \ - OF_DECLARE_1_RET(clksrc_ret, name, compat, fn) + OF_DECLARE_1_RET(clksrc, name, compat, fn) #ifdef CONFIG_CLKSRC_PROBE extern void clocksource_probe(void); -- cgit v1.2.3 From b81ea968706bdb5c2f2af62c5700573a32ab310a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Jun 2016 22:44:49 +0200 Subject: clk: Add missing clk_get_sys() stub When compiling with the COMPILE_TEST option set, the clps711x does not compile because of the clk_get_sys() noop stub missing. Signed-off-by: Daniel Lezcano Reviewed-by: Michael Turquette --- include/linux/clk.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/clk.h b/include/linux/clk.h index 0df4a51e1a78..834179f3fa72 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -461,6 +461,10 @@ static inline struct clk *clk_get_parent(struct clk *clk) return NULL; } +static inline struct clk *clk_get_sys(const char *dev_id, const char *con_id) +{ + return NULL; +} #endif /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */ -- cgit v1.2.3 From e675447bda51c1ea72d1ac9132ce3bed974f1da3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:15 +0000 Subject: timers: Make 'pinned' a timer property We want to move the timer migration logic from a 'push' to a 'pull' model. Under the current 'push' model pinned timers are handled via a runtime API variant: mod_timer_pinned(). The 'pull' model requires us to store the pinned attribute of a timer in the timer_list structure itself, as a new TIMER_PINNED bit in timer->flags. This flag must be set at initialization time and the timer APIs recognize the flag. This patch: - Implements the new flag and associated new-style initialization methods - makes mod_timer() recognize new-style pinned timers, - and adds some migration helper facility to allow step by step conversion of old-style to new-style pinned timers. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.049338558@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timer.h | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 20ac746f3eb3..046d6cf26498 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -62,7 +62,8 @@ struct timer_list { #define TIMER_MIGRATING 0x00080000 #define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING) #define TIMER_DEFERRABLE 0x00100000 -#define TIMER_IRQSAFE 0x00200000 +#define TIMER_PINNED 0x00200000 +#define TIMER_IRQSAFE 0x00400000 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ @@ -78,9 +79,15 @@ struct timer_list { #define TIMER_INITIALIZER(_function, _expires, _data) \ __TIMER_INITIALIZER((_function), (_expires), (_data), 0) +#define TIMER_PINNED_INITIALIZER(_function, _expires, _data) \ + __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_PINNED) + #define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) \ __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE) +#define TIMER_PINNED_DEFERRED_INITIALIZER(_function, _expires, _data) \ + __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE | TIMER_PINNED) + #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ TIMER_INITIALIZER(_function, _expires, _data) @@ -124,8 +131,12 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define init_timer(timer) \ __init_timer((timer), 0) +#define init_timer_pinned(timer) \ + __init_timer((timer), TIMER_PINNED) #define init_timer_deferrable(timer) \ __init_timer((timer), TIMER_DEFERRABLE) +#define init_timer_pinned_deferrable(timer) \ + __init_timer((timer), TIMER_DEFERRABLE | TIMER_PINNED) #define init_timer_on_stack(timer) \ __init_timer_on_stack((timer), 0) @@ -145,12 +156,20 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define setup_timer(timer, fn, data) \ __setup_timer((timer), (fn), (data), 0) +#define setup_pinned_timer(timer, fn, data) \ + __setup_timer((timer), (fn), (data), TIMER_PINNED) #define setup_deferrable_timer(timer, fn, data) \ __setup_timer((timer), (fn), (data), TIMER_DEFERRABLE) +#define setup_pinned_deferrable_timer(timer, fn, data) \ + __setup_timer((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) #define setup_timer_on_stack(timer, fn, data) \ __setup_timer_on_stack((timer), (fn), (data), 0) +#define setup_pinned_timer_on_stack(timer, fn, data) \ + __setup_timer_on_stack((timer), (fn), (data), TIMER_PINNED) #define setup_deferrable_timer_on_stack(timer, fn, data) \ __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE) +#define setup_pinned_deferrable_timer_on_stack(timer, fn, data) \ + __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) /** * timer_pending - is a timer pending? @@ -175,8 +194,8 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); extern void set_timer_slack(struct timer_list *time, int slack_hz); -#define TIMER_NOT_PINNED 0 -#define TIMER_PINNED 1 +#define MOD_TIMER_NOT_PINNED 0 +#define MOD_TIMER_PINNED 1 /* * The jiffies value which is added to now, when there is no timer * in the timer wheel: -- cgit v1.2.3 From 177ec0a0a531695210b277d734b2f92ee5796303 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:24 +0000 Subject: timers: Remove the deprecated mod_timer_pinned() API We switched all users to initialize the timers as pinned and call mod_timer(). Remove the now unused timer API function. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.706205231@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 046d6cf26498..a8f6c70eb414 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -190,12 +190,9 @@ extern void add_timer_on(struct timer_list *timer, int cpu); extern int del_timer(struct timer_list * timer); extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); -extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); extern void set_timer_slack(struct timer_list *time, int slack_hz); -#define MOD_TIMER_NOT_PINNED 0 -#define MOD_TIMER_PINNED 1 /* * The jiffies value which is added to now, when there is no timer * in the timer wheel: -- cgit v1.2.3 From 15dba1e37b5cfd7fab9bc84e0f05f35c918f4eef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:27 +0000 Subject: hlist: Add hlist_is_singular_node() helper Required to figure out whether the entry is the only one in the hlist. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094341.867631372@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/list.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 5356f4d661a7..5183138aa932 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -678,6 +678,16 @@ static inline bool hlist_fake(struct hlist_node *h) return h->pprev == &h->next; } +/* + * Check whether the node is the only node of the head without + * accessing head: + */ +static inline bool +hlist_is_singular_node(struct hlist_node *n, struct hlist_head *h) +{ + return !n->next && n->pprev == &h->first; +} + /* * Move a list from one list head to another. Fixup the pprev * reference of the first entry if it exists. -- cgit v1.2.3 From b0d6e2dcb284f1f4dcb4b92760f49eeaf5fc0bc7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:29 +0000 Subject: timers: Reduce the CPU index space to 256k We want to store the array index in the flags space. 256k CPUs should be enough for a while. Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Chris Mason Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.030144293@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timer.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index a8f6c70eb414..989f33d16ebf 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -58,12 +58,12 @@ struct timer_list { * workqueue locking issues. It's not meant for executing random crap * with interrupts disabled. Abuse is monitored! */ -#define TIMER_CPUMASK 0x0007FFFF -#define TIMER_MIGRATING 0x00080000 +#define TIMER_CPUMASK 0x0003FFFF +#define TIMER_MIGRATING 0x00040000 #define TIMER_BASEMASK (TIMER_CPUMASK | TIMER_MIGRATING) -#define TIMER_DEFERRABLE 0x00100000 -#define TIMER_PINNED 0x00200000 -#define TIMER_IRQSAFE 0x00400000 +#define TIMER_DEFERRABLE 0x00080000 +#define TIMER_PINNED 0x00100000 +#define TIMER_IRQSAFE 0x00200000 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ -- cgit v1.2.3 From 500462a9de657f86edaa102f8ab6bff7f7e43fc2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:30 +0000 Subject: timers: Switch to a non-cascading wheel The current timer wheel has some drawbacks: 1) Cascading: Cascading can be an unbound operation and is completely pointless in most cases because the vast majority of the timer wheel timers are canceled or rearmed before expiration. (They are used as timeout safeguards, not as real timers to measure time.) 2) No fast lookup of the next expiring timer: In NOHZ scenarios the first timer soft interrupt after a long NOHZ period must fast forward the base time to the current value of jiffies. As we have no way to find the next expiring timer fast, the code loops linearly and increments the base time one by one and checks for expired timers in each step. This causes unbound overhead spikes exactly in the moment when we should wake up as fast as possible. After a thorough analysis of real world data gathered on laptops, workstations, webservers and other machines (thanks Chris!) I came to the conclusion that the current 'classic' timer wheel implementation can be modified to address the above issues. The vast majority of timer wheel timers is canceled or rearmed before expiry. Most of them are timeouts for networking and other I/O tasks. The nature of timeouts is to catch the exception from normal operation (TCP ack timed out, disk does not respond, etc.). For these kinds of timeouts the accuracy of the timeout is not really a concern. Timeouts are very often approximate worst-case values and in case the timeout fires, we already waited for a long time and performance is down the drain already. The few timers which actually expire can be split into two categories: 1) Short expiry times which expect halfways accurate expiry 2) Long term expiry times are inaccurate today already due to the batching which is done for NOHZ automatically and also via the set_timer_slack() API. So for long term expiry timers we can avoid the cascading property and just leave them in the less granular outer wheels until expiry or cancelation. Timers which are armed with a timeout larger than the wheel capacity are no longer cascaded. We expire them with the longest possible timeout (6+ days). We have not observed such timeouts in our data collection, but at least we handle them, applying the rule of the least surprise. To avoid extending the wheel levels for HZ=1000 so we can accomodate the longest observed timeouts (5 days in the network conntrack code) we reduce the first level granularity on HZ=1000 to 4ms, which effectively is the same as the HZ=250 behaviour. From our data analysis there is nothing which relies on that 1ms granularity and as a side effect we get better batching and timer locality for the networking code as well. Contrary to the classic wheel the granularity of the next wheel is not the capacity of the first wheel. The granularities of the wheels are in the currently chosen setting 8 times the granularity of the previous wheel. So for HZ=250 we end up with the following granularity levels: Level Offset Granularity Range 0 0 4 ms 0 ms - 252 ms 1 64 32 ms 256 ms - 2044 ms (256ms - ~2s) 2 128 256 ms 2048 ms - 16380 ms (~2s - ~16s) 3 192 2048 ms (~2s) 16384 ms - 131068 ms (~16s - ~2m) 4 256 16384 ms (~16s) 131072 ms - 1048572 ms (~2m - ~17m) 5 320 131072 ms (~2m) 1048576 ms - 8388604 ms (~17m - ~2h) 6 384 1048576 ms (~17m) 8388608 ms - 67108863 ms (~2h - ~18h) 7 448 8388608 ms (~2h) 67108864 ms - 536870911 ms (~18h - ~6d) That's a worst case inaccuracy of 12.5% for the timers which are queued at the beginning of a level. So the new wheel concept addresses the old issues: 1) Cascading is avoided completely 2) By keeping the timers in the bucket until expiry/cancelation we can track the buckets which have timers enqueued in a bucket bitmap and therefore can look up the next expiring timer very fast and O(1). A further benefit of the concept is that the slack calculation which is done on every timer start is no longer necessary because the granularity levels provide natural batching already. Our extensive testing with various loads did not show any performance degradation vs. the current wheel implementation. This patch does not address the 'fast lookup' issue as we wanted to make sure that there is no regression introduced by the wheel redesign. The optimizations are in follow up patches. This patch contains fixes from Anna-Maria Gleixner and Richard Cochran. Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Chris Mason Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.108621834@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 989f33d16ebf..5869ab9848fe 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -64,6 +64,8 @@ struct timer_list { #define TIMER_DEFERRABLE 0x00080000 #define TIMER_PINNED 0x00100000 #define TIMER_IRQSAFE 0x00200000 +#define TIMER_ARRAYSHIFT 22 +#define TIMER_ARRAYMASK 0xFFC00000 #define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ -- cgit v1.2.3 From 53bf837b78d155b8e1110b3c25b4d0d6391b8ff3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jul 2016 09:50:31 +0000 Subject: timers: Remove set_timer_slack() leftovers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now have implicit batching in the timer wheel. The slack API is no longer used, so remove it. Signed-off-by: Thomas Gleixner Cc: Alan Stern Cc: Andrew F. Davis Cc: Arjan van de Ven Cc: Chris Mason Cc: David S. Miller Cc: David Woodhouse Cc: Dmitry Eremin-Solenikov Cc: Eric Dumazet Cc: Frederic Weisbecker Cc: George Spelvin Cc: Greg Kroah-Hartman Cc: Jaehoon Chung Cc: Jens Axboe Cc: John Stultz Cc: Josh Triplett Cc: Len Brown Cc: Linus Torvalds Cc: Mathias Nyman Cc: Pali Rohár Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Sebastian Reichel Cc: Ulf Hansson Cc: linux-block@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: linux-usb@vger.kernel.org Cc: netdev@vger.kernel.org Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160704094342.189813118@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/timer.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/timer.h b/include/linux/timer.h index 5869ab9848fe..4419506b564e 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -19,7 +19,6 @@ struct timer_list { void (*function)(unsigned long); unsigned long data; u32 flags; - int slack; #ifdef CONFIG_TIMER_STATS int start_pid; @@ -73,7 +72,6 @@ struct timer_list { .expires = (_expires), \ .data = (_data), \ .flags = (_flags), \ - .slack = -1, \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ } @@ -193,8 +191,6 @@ extern int del_timer(struct timer_list * timer); extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); -extern void set_timer_slack(struct timer_list *time, int slack_hz); - /* * The jiffies value which is added to now, when there is no timer * in the timer wheel: -- cgit v1.2.3