summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-06-27 13:41:35 +0200
committerIngo Molnar <mingo@elte.hu>2008-06-27 14:31:45 +0200
commit2398f2c6d34b43025f274fc42eaca34d23ec2320 (patch)
tree0ff3e9edf12c6b4485e4fa94f47a79b44d75376a
parentcd80917e4ff465ea77106f8e4fb631eedc4cf426 (diff)
downloadlinux-2398f2c6d34b43025f274fc42eaca34d23ec2320.tar.gz
linux-2398f2c6d34b43025f274fc42eaca34d23ec2320.tar.bz2
linux-2398f2c6d34b43025f274fc42eaca34d23ec2320.zip
sched: update shares on wakeup
We found that the affine wakeup code needs rather accurate load figures to be effective. The trouble is that updating the load figures is fairly expensive with group scheduling. Therefore ratelimit the updating. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Cc: Mike Galbraith <efault@gmx.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/sched.c30
-rw-r--r--kernel/sched_features.h3
-rw-r--r--kernel/sysctl.c8
4 files changed, 42 insertions, 2 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eaf821072dbd..835b6c6fcc56 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -783,6 +783,8 @@ struct sched_domain {
unsigned int balance_interval; /* initialise to 1. units in ms. */
unsigned int nr_balance_failed; /* initialise to 0 */
+ u64 last_update;
+
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1605,6 +1607,7 @@ extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_shares_ratelimit;
int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length,
diff --git a/kernel/sched.c b/kernel/sched.c
index 1cff969f6646..62db0891025a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -778,6 +778,12 @@ late_initcall(sched_init_debug);
const_debug unsigned int sysctl_sched_nr_migrate = 32;
/*
+ * ratelimit for updating the group shares.
+ * default: 0.5ms
+ */
+const_debug unsigned int sysctl_sched_shares_ratelimit = 500000;
+
+/*
* period over which we measure -rt task cpu usage in us.
* default: 1s
*/
@@ -1590,7 +1596,13 @@ tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
static void update_shares(struct sched_domain *sd)
{
- walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
+ u64 now = cpu_clock(raw_smp_processor_id());
+ s64 elapsed = now - sd->last_update;
+
+ if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
+ sd->last_update = now;
+ walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
+ }
}
static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
@@ -2199,6 +2211,22 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
if (!sched_feat(SYNC_WAKEUPS))
sync = 0;
+#ifdef CONFIG_SMP
+ if (sched_feat(LB_WAKEUP_UPDATE)) {
+ struct sched_domain *sd;
+
+ this_cpu = raw_smp_processor_id();
+ cpu = task_cpu(p);
+
+ for_each_domain(this_cpu, sd) {
+ if (cpu_isset(cpu, sd->span)) {
+ update_shares(sd);
+ break;
+ }
+ }
+ }
+#endif
+
smp_wmb();
rq = task_rq_lock(p, &flags);
old_state = p->state;
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index d56e3053e746..7d616d2a2a3f 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -8,4 +8,5 @@ SCHED_FEAT(SYNC_WAKEUPS, 1)
SCHED_FEAT(HRTICK, 1)
SCHED_FEAT(DOUBLE_TICK, 0)
SCHED_FEAT(ASYM_GRAN, 1)
-SCHED_FEAT(LB_BIAS, 0) \ No newline at end of file
+SCHED_FEAT(LB_BIAS, 0)
+SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 29116652dca8..fe8cdc80ff02 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -266,6 +266,14 @@ static struct ctl_table kern_table[] = {
},
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_shares_ratelimit",
+ .data = &sysctl_sched_shares_ratelimit,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "sched_child_runs_first",
.data = &sysctl_sched_child_runs_first,
.maxlen = sizeof(unsigned int),