summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/clockchips.h9
-rw-r--r--include/linux/sched.h12
-rw-r--r--kernel/hrtimer.c51
-rw-r--r--kernel/sched.c5
-rw-r--r--kernel/time/clockevents.c12
-rw-r--r--kernel/timer.c17
6 files changed, 101 insertions, 5 deletions
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 3a1dbba4d3ae..20a100fe2b4f 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -143,3 +143,12 @@ extern void clockevents_notify(unsigned long reason, void *arg);
#endif
#endif
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+extern ktime_t clockevents_get_next_event(int cpu);
+#else
+static inline ktime_t clockevents_get_next_event(int cpu)
+{
+ return (ktime_t) { .tv64 = KTIME_MAX };
+}
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 618504010400..311dec123974 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -257,6 +257,7 @@ extern void task_rq_unlock_wait(struct task_struct *p);
extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu);
+extern int get_nohz_load_balancer(void);
#else
static inline int select_nohz_load_balancer(int cpu)
{
@@ -1772,6 +1773,17 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length,
loff_t *ppos);
#endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+ return sysctl_timer_migration;
+}
+#else
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+ return 1;
+}
+#endif
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c71bcd549241..b675a67c9ac3 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -43,6 +43,8 @@
#include <linux/seq_file.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
#include <asm/uaccess.h>
@@ -198,8 +200,19 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
+ int cpu, preferred_cpu = -1;
+
+ cpu = smp_processor_id();
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
+ preferred_cpu = get_nohz_load_balancer();
+ if (preferred_cpu >= 0)
+ cpu = preferred_cpu;
+ }
+#endif
- new_cpu_base = &__get_cpu_var(hrtimer_bases);
+again:
+ new_cpu_base = &per_cpu(hrtimer_bases, cpu);
new_base = &new_cpu_base->clock_base[base->index];
if (base != new_base) {
@@ -219,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
timer->base = NULL;
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
+
+ /* Optimized away for NOHZ=n SMP=n */
+ if (cpu == preferred_cpu) {
+ /* Calculate clock monotonic expiry time */
+#ifdef CONFIG_HIGH_RES_TIMERS
+ ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
+ new_base->offset);
+#else
+ ktime_t expires = hrtimer_get_expires(timer);
+#endif
+
+ /*
+ * Get the next event on target cpu from the
+ * clock events layer.
+ * This covers the highres=off nohz=on case as well.
+ */
+ ktime_t next = clockevents_get_next_event(cpu);
+
+ ktime_t delta = ktime_sub(expires, next);
+
+ /*
+ * We do not migrate the timer when it is expiring
+ * before the next event on the target cpu because
+ * we cannot reprogram the target cpu hardware and
+ * we would cause it to fire late.
+ */
+ if (delta.tv64 < 0) {
+ cpu = smp_processor_id();
+ spin_unlock(&new_base->cpu_base->lock);
+ spin_lock(&base->cpu_base->lock);
+ timer->base = base;
+ goto again;
+ }
+ }
timer->base = new_base;
}
return new_base;
@@ -236,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
return base;
}
-# define switch_hrtimer_base(t, b) (b)
+# define switch_hrtimer_base(t, b, p) (b)
#endif /* !CONFIG_SMP */
diff --git a/kernel/sched.c b/kernel/sched.c
index 7f1dd56af863..9fe3774a0fd3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4244,6 +4244,11 @@ static struct {
.load_balancer = ATOMIC_INIT(-1),
};
+int get_nohz_load_balancer(void)
+{
+ return atomic_read(&nohz.load_balancer);
+}
+
/*
* This routine will try to nominate the ilb (idle load balancing)
* owner among the cpus whose ticks are stopped. ilb owner will do the idle
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d13be216a790..ab20ded013bd 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,6 +18,7 @@
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysdev.h>
+#include <linux/tick.h>
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
@@ -251,4 +252,15 @@ void clockevents_notify(unsigned long reason, void *arg)
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
+
+ktime_t clockevents_get_next_event(int cpu)
+{
+ struct tick_device *td;
+ struct clock_event_device *dev;
+
+ td = &per_cpu(tick_cpu_device, cpu);
+ dev = td->evtdev;
+
+ return dev->next_event;
+}
#endif
diff --git a/kernel/timer.c b/kernel/timer.c
index 3424dfd11d50..3f841db5edf9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,6 +37,7 @@
#include <linux/delay.h>
#include <linux/tick.h>
#include <linux/kallsyms.h>
+#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -609,9 +610,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
{
struct tvec_base *base, *new_base;
unsigned long flags;
- int ret;
-
- ret = 0;
+ int ret = 0 , cpu;
timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function);
@@ -630,6 +629,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
new_base = __get_cpu_var(tvec_bases);
+ cpu = smp_processor_id();
+
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
+ int preferred_cpu = get_nohz_load_balancer();
+
+ if (preferred_cpu >= 0)
+ cpu = preferred_cpu;
+ }
+#endif
+ new_base = per_cpu(tvec_bases, cpu);
+
if (base != new_base) {
/*
* We are trying to schedule the timer on the local CPU.