summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2021-02-01 15:12:05 +0000
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2021-02-03 23:16:16 +0100
commit0838b8271ad99c8b1cfea47fc9054ccbce6c0e88 (patch)
tree01846a5466be0caac5ad518975c7616c1f03cf00
parent8d2b511c52c4ba36da1803f4f94f6dfe9c1cbf84 (diff)
downloadlinux-stable-0838b8271ad99c8b1cfea47fc9054ccbce6c0e88.tar.gz
linux-stable-0838b8271ad99c8b1cfea47fc9054ccbce6c0e88.tar.bz2
linux-stable-0838b8271ad99c8b1cfea47fc9054ccbce6c0e88.zip
futex: Replace PF_EXITPIDONE with a state
commit 3d4775df0a89240f671861c6ab6e8d59af8e9e41 upstream. The futex exit handling relies on PF_ flags. That's suboptimal as it requires a smp_mb() and an ugly lock/unlock of the exiting tasks pi_lock in the middle of do_exit() to enforce the observability of PF_EXITING in the futex code. Add a futex_state member to task_struct and convert the PF_EXITPIDONE logic over to the new state. The PF_EXITING dependency will be cleaned up in a later step. This prepares for handling various futex exit issues later. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Ingo Molnar <mingo@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20191106224556.149449274@linutronix.de Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Lee Jones <lee.jones@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/linux/futex.h34
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/exit.c18
-rw-r--r--kernel/futex.c17
4 files changed, 45 insertions, 26 deletions
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 63d353cedfcd..a0de6fe28e00 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -55,6 +55,11 @@ union futex_key {
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
#ifdef CONFIG_FUTEX
+enum {
+ FUTEX_STATE_OK,
+ FUTEX_STATE_DEAD,
+};
+
static inline void futex_init_task(struct task_struct *tsk)
{
tsk->robust_list = NULL;
@@ -63,6 +68,34 @@ static inline void futex_init_task(struct task_struct *tsk)
#endif
INIT_LIST_HEAD(&tsk->pi_state_list);
tsk->pi_state_cache = NULL;
+ tsk->futex_state = FUTEX_STATE_OK;
+}
+
+/**
+ * futex_exit_done - Sets the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk: task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This has two callers:
+ *
+ * - futex_mm_release() after the futex exit cleanup has been done
+ *
+ * - do_exit() from the recursive fault handling path.
+ *
+ * In case of a recursive fault this is best effort. Either the futex exit
+ * code has run already or not. If the OWNER_DIED bit has been set on the
+ * futex then the waiter can take it over. If not, the problem is pushed
+ * back to user space. If the futex exit code did not run yet, then an
+ * already queued waiter might block forever, but there is nothing which
+ * can be done about that.
+ */
+static inline void futex_exit_done(struct task_struct *tsk)
+{
+ tsk->futex_state = FUTEX_STATE_DEAD;
}
void futex_mm_release(struct task_struct *tsk);
@@ -72,5 +105,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
#else
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_mm_release(struct task_struct *tsk) { }
+static inline void futex_exit_done(struct task_struct *tsk) { }
#endif
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index df5f53ea2f86..bdd41a0127d1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1704,6 +1704,7 @@ struct task_struct {
#endif
struct list_head pi_state_list;
struct futex_pi_state *pi_state_cache;
+ unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
@@ -2099,7 +2100,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
* Per process flags
*/
#define PF_EXITING 0x00000004 /* getting shut down */
-#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
diff --git a/kernel/exit.c b/kernel/exit.c
index 5c20a32c9539..274a3c3834a1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -695,16 +695,7 @@ void do_exit(long code)
*/
if (unlikely(tsk->flags & PF_EXITING)) {
pr_alert("Fixing recursive fault but reboot is needed!\n");
- /*
- * We can do this unlocked here. The futex code uses
- * this flag just to verify whether the pi state
- * cleanup has been done or not. In the worst case it
- * loops once more. We pretend that the cleanup was
- * done as there is no way to return. Either the
- * OWNER_DIED bit is set by now or we push the blocked
- * task into the wait for ever nirwana as well.
- */
- tsk->flags |= PF_EXITPIDONE;
+ futex_exit_done(tsk);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
}
@@ -793,12 +784,7 @@ void do_exit(long code)
* Make sure we are holding no locks:
*/
debug_check_no_locks_held();
- /*
- * We can do this unlocked here. The futex code uses this flag
- * just to verify whether the pi state cleanup has been done
- * or not. In the worst case it loops once more.
- */
- tsk->flags |= PF_EXITPIDONE;
+ futex_exit_done(tsk);
if (tsk->io_context)
exit_io_context(tsk);
diff --git a/kernel/futex.c b/kernel/futex.c
index 5282b9b8d1ec..e531789aa440 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1094,19 +1094,18 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
}
/*
- * We need to look at the task state flags to figure out,
- * whether the task is exiting. To protect against the do_exit
- * change of the task flags, we do this protected by
- * p->pi_lock:
+ * We need to look at the task state to figure out, whether the
+ * task is exiting. To protect against the change of the task state
+ * in futex_exit_release(), we do this protected by p->pi_lock:
*/
raw_spin_lock_irq(&p->pi_lock);
- if (unlikely(p->flags & PF_EXITING)) {
+ if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
/*
- * The task is on the way out. When PF_EXITPIDONE is
- * set, we know that the task has finished the
- * cleanup:
+ * The task is on the way out. When the futex state is
+ * FUTEX_STATE_DEAD, we know that the task has finished
+ * the cleanup:
*/
- int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
+ int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;
raw_spin_unlock_irq(&p->pi_lock);
put_task_struct(p);