sched: Make select_fallback_rq() cpuset friendly

commit 9084bb8246ea935b98320554229e2f371f7f52fa upstream Introduce cpuset_cpus_allowed_fallback() helper to fix the cpuset problems with select_fallback_rq(). It can be called from any context and can't use any cpuset locks including task_lock(). It is called when the task doesn't have online cpus in ->cpus_allowed but ttwu/etc must be able to find a suitable cpu. I am not proud of this patch. Everything which needs such a fat comment can't be good even if correct. But I'd prefer to not change the locking rules in the code I hardly understand, and in any case I believe this simple change make the code much more correct compared to deadlocks we currently have. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20100315091027.GA9155@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
author: Oleg Nesterov <oleg@redhat.com> 2010-03-15 10:10:27 +0100
committer: Greg Kroah-Hartman <gregkh@suse.de> 2010-09-20 13:18:08 -0700
commit: 81695bf0ee1a6b3f2a8f183273d945decc1d3f18 (patch)
tree: 23ae28a2592b20f7068d48a8b589acf39670be98 /kernel
parent: 1ccc5a299bd54b1c0f0bac6316c45b6ce31b9ac1 (diff)
download: linux-stable-81695bf0ee1a6b3f2a8f183273d945decc1d3f18.tar.gz
linux-stable-81695bf0ee1a6b3f2a8f183273d945decc1d3f18.tar.bz2
linux-stable-81695bf0ee1a6b3f2a8f183273d945decc1d3f18.zip
2 files changed, 43 insertions, 3 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 79bcd3d58082..b120fd01a353 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2151,6 +2151,48 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
 	mutex_unlock(&callback_mutex);
 }
 
+int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+{
+	const struct cpuset *cs;
+	int cpu;
+
+	rcu_read_lock();
+	cs = task_cs(tsk);
+	if (cs)
+		cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
+	rcu_read_unlock();
+
+	/*
+	 * We own tsk->cpus_allowed, nobody can change it under us.
+	 *
+	 * But we used cs && cs->cpus_allowed lockless and thus can
+	 * race with cgroup_attach_task() or update_cpumask() and get
+	 * the wrong tsk->cpus_allowed. However, both cases imply the
+	 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
+	 * which takes task_rq_lock().
+	 *
+	 * If we are called after it dropped the lock we must see all
+	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
+	 * set any mask even if it is not right from task_cs() pov,
+	 * the pending set_cpus_allowed_ptr() will fix things.
+	 */
+
+	cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
+	if (cpu >= nr_cpu_ids) {
+		/*
+		 * Either tsk->cpus_allowed is wrong (see above) or it
+		 * is actually empty. The latter case is only possible
+		 * if we are racing with remove_tasks_in_empty_cpuset().
+		 * Like above we can temporary set any mask and rely on
+		 * set_cpus_allowed_ptr() as synchronization point.
+		 */
+		cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
+		cpu = cpumask_any(cpu_active_mask);
+	}
+
+	return cpu;
+}
+
 void cpuset_init_current_mems_allowed(void)
 {
 	nodes_setall(current->mems_allowed);
diff --git a/kernel/sched.c b/kernel/sched.c
index 4541bf324ac6..befe608e14be 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2353,9 +2353,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 
 	/* No more Mr. Nice Guy. */
 	if (unlikely(dest_cpu >= nr_cpu_ids)) {
-		cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
-		dest_cpu = cpumask_any(cpu_active_mask);
-
+		dest_cpu = cpuset_cpus_allowed_fallback(p);
 		/*
 		 * Don't tell them about moving exiting tasks or
 		 * kernel threads (both mm NULL), since they never
author	Oleg Nesterov <oleg@redhat.com>	2010-03-15 10:10:27 +0100
committer	Greg Kroah-Hartman <gregkh@suse.de>	2010-09-20 13:18:08 -0700
commit	81695bf0ee1a6b3f2a8f183273d945decc1d3f18 (patch)
tree	23ae28a2592b20f7068d48a8b589acf39670be98 /kernel
parent	1ccc5a299bd54b1c0f0bac6316c45b6ce31b9ac1 (diff)
download	linux-stable-81695bf0ee1a6b3f2a8f183273d945decc1d3f18.tar.gz linux-stable-81695bf0ee1a6b3f2a8f183273d945decc1d3f18.tar.bz2 linux-stable-81695bf0ee1a6b3f2a8f183273d945decc1d3f18.zip