sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK

We currently keep every task's stack around until the task_struct itself is freed. This means that we keep the stack allocation alive for longer than necessary and that, under load, we free stacks in big batches whenever RCU drops the last task reference. Neither of these is good for reuse of cache-hot memory, and freeing in batches prevents us from usefully caching small numbers of vmalloced stacks. On architectures that have thread_info on the stack, we can't easily change this, but on architectures that set THREAD_INFO_IN_TASK, we can free it as soon as the task is dead. Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Jann Horn <jann@thejh.net> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/08ca06cde00ebed0046c5d26cbbf3fbb7ef5b812.1474003868.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Andy Lutomirski <luto@kernel.org> 2016-09-15 22:45:48 -0700
committer: Ingo Molnar <mingo@kernel.org> 2016-09-16 09:18:54 +0200
commit: 68f24b08ee892d47bdef925d676e1ae1ccc316f8 (patch)
tree: eb68202da134522dd22c4bf78487ae9017df970f /kernel/fork.c
parent: aa1f1a639621672b68f654dc815a7d8298ff396f (diff)
download: linux-stable-68f24b08ee892d47bdef925d676e1ae1ccc316f8.tar.gz
linux-stable-68f24b08ee892d47bdef925d676e1ae1ccc316f8.tar.bz2
linux-stable-68f24b08ee892d47bdef925d676e1ae1ccc316f8.zip
1 files changed, 34 insertions, 1 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 0c240fd5beba..5dd0a516626d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -269,11 +269,40 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 	}
 }
 
-void free_task(struct task_struct *tsk)
+static void release_task_stack(struct task_struct *tsk)
 {
 	account_kernel_stack(tsk, -1);
 	arch_release_thread_stack(tsk->stack);
 	free_thread_stack(tsk);
+	tsk->stack = NULL;
+#ifdef CONFIG_VMAP_STACK
+	tsk->stack_vm_area = NULL;
+#endif
+}
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+void put_task_stack(struct task_struct *tsk)
+{
+	if (atomic_dec_and_test(&tsk->stack_refcount))
+		release_task_stack(tsk);
+}
+#endif
+
+void free_task(struct task_struct *tsk)
+{
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+	/*
+	 * The task is finally done with both the stack and thread_info,
+	 * so free both.
+	 */
+	release_task_stack(tsk);
+#else
+	/*
+	 * If the task had a separate stack allocation, it should be gone
+	 * by now.
+	 */
+	WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
+#endif
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
@@ -411,6 +440,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #ifdef CONFIG_VMAP_STACK
 	tsk->stack_vm_area = stack_vm_area;
 #endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+	atomic_set(&tsk->stack_refcount, 1);
+#endif
 
 	if (err)
 		goto free_stack;
@@ -1771,6 +1803,7 @@ bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
 	exit_creds(p);
 bad_fork_free:
+	put_task_stack(p);
 	free_task(p);
 fork_out:
 	return ERR_PTR(retval);
author	Andy Lutomirski <luto@kernel.org>	2016-09-15 22:45:48 -0700
committer	Ingo Molnar <mingo@kernel.org>	2016-09-16 09:18:54 +0200
commit	68f24b08ee892d47bdef925d676e1ae1ccc316f8 (patch)
tree	eb68202da134522dd22c4bf78487ae9017df970f /kernel/fork.c
parent	aa1f1a639621672b68f654dc815a7d8298ff396f (diff)
download	linux-stable-68f24b08ee892d47bdef925d676e1ae1ccc316f8.tar.gz linux-stable-68f24b08ee892d47bdef925d676e1ae1ccc316f8.tar.bz2 linux-stable-68f24b08ee892d47bdef925d676e1ae1ccc316f8.zip