diff options
author | Oleg Nesterov <oleg@redhat.com> | 2012-08-26 21:12:09 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-09-13 16:47:33 +0200 |
commit | ac3d0da8f3290b3d394cdb7f50604424a7cd6092 (patch) | |
tree | c1a126ac8ab3d87f22baaa51fd786f3c56c22327 /kernel/task_work.c | |
parent | 15674868d6c5985466835c56dd89d39235f16302 (diff) | |
download | linux-ac3d0da8f3290b3d394cdb7f50604424a7cd6092.tar.gz linux-ac3d0da8f3290b3d394cdb7f50604424a7cd6092.tar.bz2 linux-ac3d0da8f3290b3d394cdb7f50604424a7cd6092.zip |
task_work: Make task_work_add() lockless
Change task_work's to use llist-like code to avoid pi_lock
in task_work_add(), this makes it useable under rq->lock.
task_work_cancel() and task_work_run() still use pi_lock
to synchronize with each other.
(This is in preparation for a deadlock fix.)
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20120826191209.GA4221@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/task_work.c')
-rw-r--r-- | kernel/task_work.c | 95 |
1 files changed, 48 insertions, 47 deletions
diff --git a/kernel/task_work.c b/kernel/task_work.c index d320d44903bd..f13ec0bda1d5 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -3,25 +3,18 @@ #include <linux/tracehook.h> int -task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) +task_work_add(struct task_struct *task, struct callback_head *work, bool notify) { - struct callback_head *last, *first; - unsigned long flags; - + struct callback_head *head; /* * Not inserting the new work if the task has already passed * exit_task_work() is the responisbility of callers. */ - raw_spin_lock_irqsave(&task->pi_lock, flags); - last = task->task_works; - first = last ? last->next : twork; - twork->next = first; - if (last) - last->next = twork; - task->task_works = twork; - raw_spin_unlock_irqrestore(&task->pi_lock, flags); + do { + head = ACCESS_ONCE(task->task_works); + work->next = head; + } while (cmpxchg(&task->task_works, head, work) != head); - /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ if (notify) set_notify_resume(task); return 0; @@ -30,52 +23,60 @@ task_work_add(struct task_struct *task, struct callback_head *twork, bool notify struct callback_head * task_work_cancel(struct task_struct *task, task_work_func_t func) { + struct callback_head **pprev = &task->task_works; + struct callback_head *work = NULL; unsigned long flags; - struct callback_head *last, *res = NULL; - + /* + * If cmpxchg() fails we continue without updating pprev. + * Either we raced with task_work_add() which added the + * new entry before this work, we will find it again. Or + * we raced with task_work_run(), *pprev == NULL. + */ raw_spin_lock_irqsave(&task->pi_lock, flags); - last = task->task_works; - if (last) { - struct callback_head *q = last, *p = q->next; - while (1) { - if (p->func == func) { - q->next = p->next; - if (p == last) - task->task_works = q == p ? NULL : q; - res = p; - break; - } - if (p == last) - break; - q = p; - p = q->next; - } + while ((work = ACCESS_ONCE(*pprev))) { + read_barrier_depends(); + if (work->func != func) + pprev = &work->next; + else if (cmpxchg(pprev, work, work->next) == work) + break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); - return res; + + return work; } void task_work_run(void) { struct task_struct *task = current; - struct callback_head *p, *q; + struct callback_head *work, *head, *next; - while (1) { - raw_spin_lock_irq(&task->pi_lock); - p = task->task_works; - task->task_works = NULL; - raw_spin_unlock_irq(&task->pi_lock); + for (;;) { + work = xchg(&task->task_works, NULL); + if (!work) + break; + /* + * Synchronize with task_work_cancel(). It can't remove + * the first entry == work, cmpxchg(task_works) should + * fail, but it can play with *work and other entries. + */ + raw_spin_unlock_wait(&task->pi_lock); + smp_mb(); - if (unlikely(!p)) - return; + /* Reverse the list to run the works in fifo order */ + head = NULL; + do { + next = work->next; + work->next = head; + head = work; + work = next; + } while (work); - q = p->next; /* head */ - p->next = NULL; /* cut it */ - while (q) { - p = q->next; - q->func(q); - q = p; + work = head; + do { + next = work->next; + work->func(work); + work = next; cond_resched(); - } + } while (work); } } |