summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2013-04-09 09:33:34 +0200
committerThomas Gleixner <tglx@linutronix.de>2013-04-12 14:18:43 +0200
commitf2530dc71cf0822f90bb63ea4600caaef33a66bb (patch)
tree182473ca3a295cdbd907826aa22bae94c52fa486
parentcfb63bafdb87bbcdc5d6dbbca623d3f69475f118 (diff)
downloadlinux-f2530dc71cf0822f90bb63ea4600caaef33a66bb.tar.gz
linux-f2530dc71cf0822f90bb63ea4600caaef33a66bb.tar.bz2
linux-f2530dc71cf0822f90bb63ea4600caaef33a66bb.zip
kthread: Prevent unpark race which puts threads on the wrong cpu
The smpboot threads rely on the park/unpark mechanism which binds per cpu threads on a particular core. Though the functionality is racy: CPU0 CPU1 CPU2 unpark(T) wake_up_process(T) clear(SHOULD_PARK) T runs leave parkme() due to !SHOULD_PARK bind_to(CPU2) BUG_ON(wrong CPU) We cannot let the tasks move themself to the target CPU as one of those tasks is actually the migration thread itself, which requires that it starts running on the target cpu right away. The solution to this problem is to prevent wakeups in park mode which are not from unpark(). That way we can guarantee that the association of the task to the target cpu is working correctly. Add a new task state (TASK_PARKED) which prevents other wakeups and use this state explicitly for the unpark wakeup. Peter noticed: Also, since the task state is visible to userspace and all the parked tasks are still in the PID space, its a good hint in ps and friends that these tasks aren't really there for the moment. The migration thread has another related issue. CPU0 CPU1 Bring up CPU2 create_thread(T) park(T) wait_for_completion() parkme() complete() sched_set_stop_task() schedule(TASK_PARKED) The sched_set_stop_task() call is issued while the task is on the runqueue of CPU1 and that confuses the hell out of the stop_task class on that cpu. So we need the same synchronizaion before sched_set_stop_task(). Reported-by: Dave Jones <davej@redhat.com> Reported-and-tested-by: Dave Hansen <dave@sr71.net> Reported-and-tested-by: Borislav Petkov <bp@alien8.de> Acked-by: Peter Ziljstra <peterz@infradead.org> Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Cc: dhillf@gmail.com Cc: Ingo Molnar <mingo@kernel.org> Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1304091635430.21884@ionos Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--fs/proc/array.c1
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/trace/events/sched.h2
-rw-r--r--kernel/kthread.c52
-rw-r--r--kernel/smpboot.c14
5 files changed, 45 insertions, 29 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f7ed9ee46eb9..cbd0f1b324b9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -143,6 +143,7 @@ static const char * const task_state_array[] = {
"x (dead)", /* 64 */
"K (wakekill)", /* 128 */
"W (waking)", /* 256 */
+ "P (parked)", /* 512 */
};
static inline const char *get_task_state(struct task_struct *tsk)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d35d2b6ddbfb..e692a022527b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -163,9 +163,10 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#define TASK_DEAD 64
#define TASK_WAKEKILL 128
#define TASK_WAKING 256
-#define TASK_STATE_MAX 512
+#define TASK_PARKED 512
+#define TASK_STATE_MAX 1024
-#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW"
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
extern char ___assert_task_state[1 - 2*!!(
sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 5a8671e8a67f..e5586caff67a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -147,7 +147,7 @@ TRACE_EVENT(sched_switch,
__print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
{ 16, "Z" }, { 32, "X" }, { 64, "x" },
- { 128, "W" }) : "R",
+ { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R",
__entry->prev_state & TASK_STATE_MAX ? "+" : "",
__entry->next_comm, __entry->next_pid, __entry->next_prio)
);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 691dc2ef9baf..9eb7fed0bbaa 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *task)
static void __kthread_parkme(struct kthread *self)
{
- __set_current_state(TASK_INTERRUPTIBLE);
+ __set_current_state(TASK_PARKED);
while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
complete(&self->parked);
schedule();
- __set_current_state(TASK_INTERRUPTIBLE);
+ __set_current_state(TASK_PARKED);
}
clear_bit(KTHREAD_IS_PARKED, &self->flags);
__set_current_state(TASK_RUNNING);
@@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
}
EXPORT_SYMBOL(kthread_create_on_node);
-static void __kthread_bind(struct task_struct *p, unsigned int cpu)
+static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
{
+ /* Must have done schedule() in kthread() before we set_task_cpu */
+ if (!wait_task_inactive(p, state)) {
+ WARN_ON(1);
+ return;
+ }
/* It's safe because the task is inactive. */
do_set_cpus_allowed(p, cpumask_of(cpu));
p->flags |= PF_THREAD_BOUND;
@@ -274,12 +279,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu)
*/
void kthread_bind(struct task_struct *p, unsigned int cpu)
{
- /* Must have done schedule() in kthread() before we set_task_cpu */
- if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
- WARN_ON(1);
- return;
- }
- __kthread_bind(p, cpu);
+ __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(kthread_bind);
@@ -324,6 +324,22 @@ static struct kthread *task_get_live_kthread(struct task_struct *k)
return NULL;
}
+static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
+{
+ clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+ /*
+ * We clear the IS_PARKED bit here as we don't wait
+ * until the task has left the park code. So if we'd
+ * park before that happens we'd see the IS_PARKED bit
+ * which might be about to be cleared.
+ */
+ if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
+ if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
+ __kthread_bind(k, kthread->cpu, TASK_PARKED);
+ wake_up_state(k, TASK_PARKED);
+ }
+}
+
/**
* kthread_unpark - unpark a thread created by kthread_create().
* @k: thread created by kthread_create().
@@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = task_get_live_kthread(k);
- if (kthread) {
- clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
- /*
- * We clear the IS_PARKED bit here as we don't wait
- * until the task has left the park code. So if we'd
- * park before that happens we'd see the IS_PARKED bit
- * which might be about to be cleared.
- */
- if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
- if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
- __kthread_bind(k, kthread->cpu);
- wake_up_process(k);
- }
- }
+ if (kthread)
+ __kthread_unpark(k, kthread);
put_task_struct(k);
}
@@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k)
trace_sched_kthread_stop(k);
if (kthread) {
set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
- clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+ __kthread_unpark(k, kthread);
wake_up_process(k);
wait_for_completion(&kthread->exited);
}
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 8eaed9aa9cf0..02fc5c933673 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -185,8 +185,18 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
}
get_task_struct(tsk);
*per_cpu_ptr(ht->store, cpu) = tsk;
- if (ht->create)
- ht->create(cpu);
+ if (ht->create) {
+ /*
+ * Make sure that the task has actually scheduled out
+ * into park position, before calling the create
+ * callback. At least the migration thread callback
+ * requires that the task is off the runqueue.
+ */
+ if (!wait_task_inactive(tsk, TASK_PARKED))
+ WARN_ON(1);
+ else
+ ht->create(cpu);
+ }
return 0;
}