diff options
author | Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | 2009-03-31 16:56:03 +0900 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-01 16:44:16 +0200 |
commit | c5f8d99585d7b5b7e857fabf8aefd0174903a98c (patch) | |
tree | 504127a7d4b6a0e3aee56d0822e5e8b23f8062d7 /kernel | |
parent | 13b8bd0a5713bdf05659019badd7c0407984ece1 (diff) | |
download | linux-stable-c5f8d99585d7b5b7e857fabf8aefd0174903a98c.tar.gz linux-stable-c5f8d99585d7b5b7e857fabf8aefd0174903a98c.tar.bz2 linux-stable-c5f8d99585d7b5b7e857fabf8aefd0174903a98c.zip |
posixtimers, sched: Fix posix clock monotonicity
Impact: Regression fix (against clock_gettime() backwarding bug)
This patch re-introduces a couple of functions, task_sched_runtime
and thread_group_sched_runtime, which was once removed at the
time of 2.6.28-rc1.
These functions protect the sampling of thread/process clock with
rq lock. This rq lock is required not to update rq->clock during
the sampling.
i.e.
The clock_gettime() may return
((accounted runtime before update) + (delta after update))
that is less than what it should be.
v2 -> v3:
- Rename static helper function __task_delta_exec()
to do_task_delta_exec() since -tip tree already has
a __task_delta_exec() of different version.
v1 -> v2:
- Revises comments of function and patch description.
- Add note about accuracy of thread group's runtime.
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stable@kernel.org [2.6.28.x][2.6.29.x]
LKML-Reference: <49D1CC93.4080401@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/posix-cpu-timers.c | 7 | ||||
-rw-r--r-- | kernel/sched.c | 65 |
2 files changed, 61 insertions, 11 deletions
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index fa07da94d7be..4318c3085788 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, cpu->cpu = virt_ticks(p); break; case CPUCLOCK_SCHED: - cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); + cpu->sched = task_sched_runtime(p); break; } return 0; @@ -240,18 +240,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock, { struct task_cputime cputime; - thread_group_cputime(p, &cputime); switch (CPUCLOCK_WHICH(which_clock)) { default: return -EINVAL; case CPUCLOCK_PROF: + thread_group_cputime(p, &cputime); cpu->cpu = cputime_add(cputime.utime, cputime.stime); break; case CPUCLOCK_VIRT: + thread_group_cputime(p, &cputime); cpu->cpu = cputime.utime; break; case CPUCLOCK_SCHED: - cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); + cpu->sched = thread_group_sched_runtime(p); break; } return 0; diff --git a/kernel/sched.c b/kernel/sched.c index cc397aae5eae..c8d7f17bd036 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4139,9 +4139,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); EXPORT_PER_CPU_SYMBOL(kstat); /* - * Return any ns on the sched_clock that have not yet been banked in + * Return any ns on the sched_clock that have not yet been accounted in * @p in case that task is currently running. + * + * Called with task_rq_lock() held on @rq. */ +static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) +{ + u64 ns = 0; + + if (task_current(rq, p)) { + update_rq_clock(rq); + ns = rq->clock - p->se.exec_start; + if ((s64)ns < 0) + ns = 0; + } + + return ns; +} + unsigned long long task_delta_exec(struct task_struct *p) { unsigned long flags; @@ -4149,16 +4165,49 @@ unsigned long long task_delta_exec(struct task_struct *p) u64 ns = 0; rq = task_rq_lock(p, &flags); + ns = do_task_delta_exec(p, rq); + task_rq_unlock(rq, &flags); - if (task_current(rq, p)) { - u64 delta_exec; + return ns; +} - update_rq_clock(rq); - delta_exec = rq->clock - p->se.exec_start; - if ((s64)delta_exec > 0) - ns = delta_exec; - } +/* + * Return accounted runtime for the task. + * In case the task is currently running, return the runtime plus current's + * pending runtime that have not been accounted yet. + */ +unsigned long long task_sched_runtime(struct task_struct *p) +{ + unsigned long flags; + struct rq *rq; + u64 ns = 0; + + rq = task_rq_lock(p, &flags); + ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); + task_rq_unlock(rq, &flags); + + return ns; +} +/* + * Return sum_exec_runtime for the thread group. + * In case the task is currently running, return the sum plus current's + * pending runtime that have not been accounted yet. + * + * Note that the thread group might have other running tasks as well, + * so the return value not includes other pending runtime that other + * running tasks might have. + */ +unsigned long long thread_group_sched_runtime(struct task_struct *p) +{ + struct task_cputime totals; + unsigned long flags; + struct rq *rq; + u64 ns; + + rq = task_rq_lock(p, &flags); + thread_group_cputime(p, &totals); + ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); task_rq_unlock(rq, &flags); return ns; |