From 544b2c91a9f14f9565af1972203438b7f49afd48 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Jun 2011 11:20:18 +0200 Subject: ptrace: implement PTRACE_LISTEN The previous patch implemented async notification for ptrace but it only worked while trace is running. This patch introduces PTRACE_LISTEN which is suggested by Oleg Nestrov. It's allowed iff tracee is in STOP trap and puts tracee into quasi-running state - tracee never really runs but wait(2) and ptrace(2) consider it to be running. While ptracer is listening, tracee is allowed to re-enter STOP to notify an async event. Listening state is cleared on the first notification. Ptracer can also clear it by issuing INTERRUPT - tracee will re-trap into STOP with listening state cleared. This allows ptracer to monitor group stop state without running tracee - use INTERRUPT to put tracee into STOP trap, issue LISTEN and then wait(2) to wait for the next group stop event. When it happens, PTRACE_GETSIGINFO provides information to determine the current state. Test program follows. #define PTRACE_SEIZE 0x4206 #define PTRACE_INTERRUPT 0x4207 #define PTRACE_LISTEN 0x4208 #define PTRACE_SEIZE_DEVEL 0x80000000 static const struct timespec ts1s = { .tv_sec = 1 }; int main(int argc, char **argv) { pid_t tracee, tracer; int i; tracee = fork(); if (!tracee) while (1) pause(); tracer = fork(); if (!tracer) { siginfo_t si; ptrace(PTRACE_SEIZE, tracee, NULL, (void *)(unsigned long)PTRACE_SEIZE_DEVEL); ptrace(PTRACE_INTERRUPT, tracee, NULL, NULL); repeat: waitid(P_PID, tracee, NULL, WSTOPPED); ptrace(PTRACE_GETSIGINFO, tracee, NULL, &si); if (!si.si_code) { printf("tracer: SIG %d\n", si.si_signo); ptrace(PTRACE_CONT, tracee, NULL, (void *)(unsigned long)si.si_signo); goto repeat; } printf("tracer: stopped=%d signo=%d\n", si.si_signo != SIGTRAP, si.si_signo); if (si.si_signo != SIGTRAP) ptrace(PTRACE_LISTEN, tracee, NULL, NULL); else ptrace(PTRACE_CONT, tracee, NULL, NULL); goto repeat; } for (i = 0; i < 3; i++) { nanosleep(&ts1s, NULL); printf("mother: SIGSTOP\n"); kill(tracee, SIGSTOP); nanosleep(&ts1s, NULL); printf("mother: SIGCONT\n"); kill(tracee, SIGCONT); } nanosleep(&ts1s, NULL); kill(tracer, SIGKILL); kill(tracee, SIGKILL); return 0; } This is identical to the program to test TRAP_NOTIFY except that tracee is PTRACE_LISTEN'd instead of PTRACE_CONT'd when group stopped. This allows ptracer to monitor when group stop ends without running tracee. # ./test-listen tracer: stopped=0 signo=5 mother: SIGSTOP tracer: SIG 19 tracer: stopped=1 signo=19 mother: SIGCONT tracer: stopped=0 signo=5 tracer: SIG 18 mother: SIGSTOP tracer: SIG 19 tracer: stopped=1 signo=19 mother: SIGCONT tracer: stopped=0 signo=5 tracer: SIG 18 mother: SIGSTOP tracer: SIG 19 tracer: stopped=1 signo=19 mother: SIGCONT tracer: stopped=0 signo=5 tracer: SIG 18 -v2: Moved JOBCTL_LISTENING check in wait_task_stopped() into task_stopped_code() as suggested by Oleg. Signed-off-by: Tejun Heo Cc: Oleg Nesterov --- kernel/exit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 20a406471525..289f59d686bf 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1368,7 +1368,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) static int *task_stopped_code(struct task_struct *p, bool ptrace) { if (ptrace) { - if (task_is_stopped_or_traced(p)) + if (task_is_stopped_or_traced(p) && + !(p->jobctl & JOBCTL_LISTENING)) return &p->exit_code; } else { if (p->signal->flags & SIGNAL_STOP_STOPPED) -- cgit v1.2.3 From d21142ece414ce1088cfcae760689aa60d6fee80 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2011 16:50:34 +0200 Subject: ptrace: kill task_ptrace() task_ptrace(task) simply dereferences task->ptrace and isn't even used consistently only adding confusion. Kill it and directly access ->ptrace instead. This doesn't introduce any behavior change. Signed-off-by: Tejun Heo Signed-off-by: Oleg Nesterov --- kernel/exit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 289f59d686bf..e5cc05644609 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -765,7 +765,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, p->exit_signal = SIGCHLD; /* If it has exited notify the new parent about this child's death. */ - if (!task_ptrace(p) && + if (!p->ptrace && p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { do_notify_parent(p, p->exit_signal); if (task_detached(p)) { @@ -795,7 +795,7 @@ static void forget_original_parent(struct task_struct *father) do { t->real_parent = reaper; if (t->parent == father) { - BUG_ON(task_ptrace(t)); + BUG_ON(t->ptrace); t->parent = t->real_parent; } if (t->pdeath_signal) @@ -1565,7 +1565,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, * Notification and reaping will be cascaded to the real * parent when the ptracer detaches. */ - if (likely(!ptrace) && unlikely(task_ptrace(p))) { + if (likely(!ptrace) && unlikely(p->ptrace)) { /* it will become visible, clear notask_error */ wo->notask_error = 0; return 0; @@ -1608,7 +1608,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, * own children, it should create a separate process which * takes the role of real parent. */ - if (likely(!ptrace) && task_ptrace(p) && + if (likely(!ptrace) && p->ptrace && same_thread_group(p->parent, p->real_parent)) return 0; -- cgit v1.2.3 From a288eecce5253cc1565d400a52b9b476a157e040 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2011 16:50:37 +0200 Subject: ptrace: kill trivial tracehooks At this point, tracehooks aren't useful to mainline kernel and mostly just add an extra layer of obfuscation. Although they have comments, without actual in-kernel users, it is difficult to tell what are their assumptions and they're actually trying to achieve. To mainline kernel, they just aren't worth keeping around. This patch kills the following trivial tracehooks. * Ones testing whether task is ptraced. Replace with ->ptrace test. tracehook_expect_breakpoints() tracehook_consider_ignored_signal() tracehook_consider_fatal_signal() * ptrace_event() wrappers. Call directly. tracehook_report_exec() tracehook_report_exit() tracehook_report_vfork_done() * ptrace_release_task() wrapper. Call directly. tracehook_finish_release_task() * noop tracehook_prepare_release_task() tracehook_report_death() This doesn't introduce any behavior change. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Martin Schwidefsky Signed-off-by: Oleg Nesterov --- kernel/exit.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index e5cc05644609..d49134a7f250 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -169,7 +169,6 @@ void release_task(struct task_struct * p) struct task_struct *leader; int zap_leader; repeat: - tracehook_prepare_release_task(p); /* don't need to get the RCU readlock here - the process is dead and * can't be modifying its own credentials. But shut RCU-lockdep up */ rcu_read_lock(); @@ -179,7 +178,7 @@ repeat: proc_flush_task(p); write_lock_irq(&tasklist_lock); - tracehook_finish_release_task(p); + ptrace_release_task(p); __exit_signal(p); /* @@ -868,8 +867,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead) wake_up_process(tsk->signal->group_exit_task); write_unlock_irq(&tasklist_lock); - tracehook_report_death(tsk, signal, cookie, group_dead); - /* If the process is dead, release it - nobody will wait for it */ if (signal == DEATH_REAP) release_task(tsk); @@ -924,7 +921,7 @@ NORET_TYPE void do_exit(long code) */ set_fs(USER_DS); - tracehook_report_exit(&code); + ptrace_event(PTRACE_EVENT_EXIT, code); validate_creds_for_do_exit(tsk); -- cgit v1.2.3 From 53c8f9f199b239668e6b1a907735ee323a0d1ccd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:08:18 +0200 Subject: make do_notify_parent() return bool - change do_notify_parent() to return a boolean, true if the task should be reaped because its parent ignores SIGCHLD. - update the only caller which checks the returned value, exit_notify(). This temporary uglifies exit_notify() even more, will be cleanuped by the next change. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- kernel/exit.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index d49134a7f250..34d135f4fccc 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -820,6 +820,7 @@ static void forget_original_parent(struct task_struct *father) static void exit_notify(struct task_struct *tsk, int group_dead) { int signal; + bool autoreap; void *cookie; /* @@ -858,9 +859,11 @@ static void exit_notify(struct task_struct *tsk, int group_dead) signal = tracehook_notify_death(tsk, &cookie, group_dead); if (signal >= 0) - signal = do_notify_parent(tsk, signal); + autoreap = do_notify_parent(tsk, signal); + else + autoreap = (signal == DEATH_REAP); - tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE; + tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; /* mt-exec, de_thread() is waiting for group leader */ if (unlikely(tsk->signal->notify_count < 0)) @@ -868,7 +871,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) write_unlock_irq(&tasklist_lock); /* If the process is dead, release it - nobody will wait for it */ - if (signal == DEATH_REAP) + if (autoreap) release_task(tsk); } -- cgit v1.2.3 From 45cdf5cc0703c537194588c63d53bad1f2539d36 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jun 2011 19:06:50 +0200 Subject: kill tracehook_notify_death() Kill tracehook_notify_death(), reimplement the logic in its caller, exit_notify(). Also, change the exec_id's check to use thread_group_leader() instead of task_detached(), this is more clear. This logic only applies to the exiting leader, a sub-thread must never change its exit_signal. Note: when the traced group leader exits the exit_signal-or-SIGCHLD logic looks really strange: - we notify the tracer even if !thread_group_empty() but do_wait(WEXITED) can't work until all threads exit - if the tracer is real_parent, it is not clear why can't we use ->exit_signal event if !thread_group_empty() -v2: do not try to fix the 2nd oddity to avoid the subtle behavior change mixed with reorganization, suggested by Tejun. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo --- kernel/exit.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 34d135f4fccc..bb08e938ca74 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -819,9 +819,7 @@ static void forget_original_parent(struct task_struct *father) */ static void exit_notify(struct task_struct *tsk, int group_dead) { - int signal; bool autoreap; - void *cookie; /* * This does two things: @@ -852,16 +850,23 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * we have changed execution domain as these two values started * the same after a fork. */ - if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && + if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD && (tsk->parent_exec_id != tsk->real_parent->self_exec_id || tsk->self_exec_id != tsk->parent_exec_id)) tsk->exit_signal = SIGCHLD; - signal = tracehook_notify_death(tsk, &cookie, group_dead); - if (signal >= 0) - autoreap = do_notify_parent(tsk, signal); - else - autoreap = (signal == DEATH_REAP); + if (unlikely(tsk->ptrace)) { + int sig = thread_group_leader(tsk) && + thread_group_empty(tsk) && + !ptrace_reparented(tsk) ? + tsk->exit_signal : SIGCHLD; + autoreap = do_notify_parent(tsk, sig); + } else if (thread_group_leader(tsk)) { + autoreap = thread_group_empty(tsk) && + do_notify_parent(tsk, tsk->exit_signal); + } else { + autoreap = true; + } tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; -- cgit v1.2.3 From 8677347378044ab564470bced2275520efb3670d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:09:09 +0200 Subject: make do_notify_parent() __must_check, update the callers Change other callers of do_notify_parent() to check the value it returns, this makes the subsequent task_detached() unnecessary. Mark do_notify_parent() as __must_check. Use thread_group_leader() instead of !task_detached() to check if we need to notify the real parent in wait_task_zombie(). Remove the stale comment in release_task(). "just for sanity" is no longer true, we have to set EXIT_DEAD to avoid the races with do_wait(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- kernel/exit.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index bb08e938ca74..f68d137ffeb4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -190,21 +190,12 @@ repeat: leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { BUG_ON(task_detached(leader)); - do_notify_parent(leader, leader->exit_signal); /* * If we were the last child thread and the leader has * exited already, and the leader's parent ignores SIGCHLD, * then we are the one who should release the leader. - * - * do_notify_parent() will have marked it self-reaping in - * that case. - */ - zap_leader = task_detached(leader); - - /* - * This maintains the invariant that release_task() - * only runs on a task in EXIT_DEAD, just for sanity. */ + zap_leader = do_notify_parent(leader, leader->exit_signal); if (zap_leader) leader->exit_state = EXIT_DEAD; } @@ -766,8 +757,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, /* If it has exited notify the new parent about this child's death. */ if (!p->ptrace && p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { - do_notify_parent(p, p->exit_signal); - if (task_detached(p)) { + if (do_notify_parent(p, p->exit_signal)) { p->exit_state = EXIT_DEAD; list_move_tail(&p->sibling, dead); } @@ -1351,16 +1341,13 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) /* We dropped tasklist, ptracer could die and untrace */ ptrace_unlink(p); /* - * If this is not a detached task, notify the parent. - * If it's still not detached after that, don't release - * it now. + * If this is not a sub-thread, notify the parent. + * If parent wants a zombie, don't release it now. */ - if (!task_detached(p)) { - do_notify_parent(p, p->exit_signal); - if (!task_detached(p)) { - p->exit_state = EXIT_ZOMBIE; - p = NULL; - } + if (thread_group_leader(p) && + !do_notify_parent(p, p->exit_signal)) { + p->exit_state = EXIT_ZOMBIE; + p = NULL; } write_unlock_irq(&tasklist_lock); } -- cgit v1.2.3 From 0976a03e5ce8ec346e985f21046d7a75bb7fdffd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:09:39 +0200 Subject: reparent_leader: check EXIT_DEAD instead of task_detached() Change reparent_leader() to check ->exit_state instead of ->exit_signal, this matches the similar EXIT_DEAD check in wait_consider_task() and allows us to cleanup the do_notify_parent/task_detached logic. task_detached() was really needed during reparenting before 9cd80bbb "do_wait() optimization: do not place sub-threads on ->children list" to filter out the sub-threads. After this change task_detached(p) can only be true if p is the dead group_leader and its parent ignores SIGCHLD, in this case the caller of do_notify_parent() is going to reap this task and it should set EXIT_DEAD. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index f68d137ffeb4..2b1ba8048a14 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -742,7 +742,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, { list_move_tail(&p->sibling, &p->real_parent->children); - if (task_detached(p)) + if (p->exit_state == EXIT_DEAD) return; /* * If this is a threaded reparent there is no need to -- cgit v1.2.3 From e550f14dc6322e794d4e70825f63c9c99177ae8b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:09:54 +0200 Subject: kill task_detached() Upadate the last user of task_detached(), wait_task_zombie(), to use thread_group_leader() and kill task_detached(). Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo --- kernel/exit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 2b1ba8048a14..9fa99702645d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -189,7 +189,6 @@ repeat: zap_leader = 0; leader = p->group_leader; if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) { - BUG_ON(task_detached(leader)); /* * If we were the last child thread and the leader has * exited already, and the leader's parent ignores SIGCHLD, @@ -1231,9 +1230,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) traced = ptrace_reparented(p); /* * It can be ptraced but not reparented, check - * !task_detached() to filter out sub-threads. + * thread_group_leader() to filter out sub-threads. */ - if (likely(!traced) && likely(!task_detached(p))) { + if (likely(!traced) && thread_group_leader(p)) { struct signal_struct *psig; struct signal_struct *sig; unsigned long maxrss; -- cgit v1.2.3 From 479bf98c1c29b40d86e40a4e6e4944c2f03d9493 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 24 Jun 2011 17:34:39 +0200 Subject: ptrace: wait_consider_task: s/same_thread_group/ptrace_reparented/ wait_consider_task() checks same_thread_group(parent, real_parent), this is the open-coded ptrace_reparented(). __ptrace_detach() remains the only function which has to check this by hand, although we could reorganize the code to delay __ptrace_unlink. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- kernel/exit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 9fa99702645d..b8d3b47bb881 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1599,8 +1599,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, * own children, it should create a separate process which * takes the role of real parent. */ - if (likely(!ptrace) && p->ptrace && - same_thread_group(p->parent, p->real_parent)) + if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p)) return 0; /* -- cgit v1.2.3 From 961c4675c75112717705fa5c0c53cb9664051479 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 7 Jul 2011 21:33:54 +0200 Subject: has_stopped_jobs: s/task_is_stopped/SIGNAL_STOP_STOPPED/ has_stopped_jobs() naively checks task_is_stopped(group_leader). This was always wrong even without ptrace, group_leader can be dead. And given that ptrace can change the state to TRACED this is wrong even in the single-threaded case. Change the code to check SIGNAL_STOP_STOPPED and simplify the code, retval + break/continue doesn't make this trivial code more readable. We could probably add the usual "|| signal->group_stop_count" check but I don't think this makes sense, the task can start the group-stop right after the check anyway. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- kernel/exit.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index b8d3b47bb881..6c7fbbe7d86f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -266,18 +266,16 @@ int is_current_pgrp_orphaned(void) return retval; } -static int has_stopped_jobs(struct pid *pgrp) +static bool has_stopped_jobs(struct pid *pgrp) { - int retval = 0; struct task_struct *p; do_each_pid_task(pgrp, PIDTYPE_PGID, p) { - if (!task_is_stopped(p)) - continue; - retval = 1; - break; + if (p->signal->flags & SIGNAL_STOP_STOPPED) + return true; } while_each_pid_task(pgrp, PIDTYPE_PGID, p); - return retval; + + return false; } /* -- cgit v1.2.3