From 286549dcaf4f128cb04f0ad56dfb677d7d19b500 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 21 Jan 2014 15:51:03 -0800 Subject: sched: add tracepoints related to NUMA task migration This patch adds three tracepoints o trace_sched_move_numa when a task is moved to a node o trace_sched_swap_numa when a task is swapped with another task o trace_sched_stick_numa when a numa-related migration fails The tracepoints allow the NUMA scheduler activity to be monitored and the following high-level metrics can be calculated o NUMA migrated stuck nr trace_sched_stick_numa o NUMA migrated idle nr trace_sched_move_numa o NUMA migrated swapped nr trace_sched_swap_numa o NUMA local swapped trace_sched_swap_numa src_nid == dst_nid (should never happen) o NUMA remote swapped trace_sched_swap_numa src_nid != dst_nid (should == NUMA migrated swapped) o NUMA group swapped trace_sched_swap_numa src_ngid == dst_ngid Maybe a small number of these are acceptable but a high number would be a major surprise. It would be even worse if bounces are frequent. o NUMA avg task migs. Average number of migrations for tasks o NUMA stddev task mig Self-explanatory o NUMA max task migs. Maximum number of migrations for a single task In general the intent of the tracepoints is to help diagnose problems where automatic NUMA balancing appears to be doing an excessive amount of useless work. [akpm@linux-foundation.org: remove semicolon-after-if, repair coding-style] Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Alex Thorlton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/core.c | 2 ++ kernel/sched/fair.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 36c951b7eef8..5ae36cc11fe5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1108,6 +1108,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) if (!cpumask_test_cpu(arg.src_cpu, tsk_cpus_allowed(arg.dst_task))) goto out; + trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); ret = stop_two_cpus(arg.dst_cpu, arg.src_cpu, migrate_swap_stop, &arg); out: @@ -4603,6 +4604,7 @@ int migrate_task_to(struct task_struct *p, int target_cpu) /* TODO: This is not properly updating schedstats */ + trace_sched_move_numa(p, curr_cpu, target_cpu); return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b24b6cfde9aa..867b0a4b0893 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1250,11 +1250,15 @@ static int task_numa_migrate(struct task_struct *p) p->numa_scan_period = task_scan_min(p); if (env.best_task == NULL) { - int ret = migrate_task_to(p, env.best_cpu); + ret = migrate_task_to(p, env.best_cpu); + if (ret != 0) + trace_sched_stick_numa(p, env.src_cpu, env.best_cpu); return ret; } ret = migrate_swap(p, env.best_task); + if (ret != 0) + trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task)); put_task_struct(env.best_task); return ret; } -- cgit v1.2.3