From 4d671d922d51907bc41f1f7f2dc737c928ae78fd Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Fri, 28 Aug 2020 21:56:13 -0400 Subject: seccomp: kill process instead of thread for unknown actions Asynchronous termination of a thread outside of the userspace thread library's knowledge is an unsafe operation that leaves the process in an inconsistent, corrupt, and possibly unrecoverable state. In order to make new actions that may be added in the future safe on kernels not aware of them, change the default action from SECCOMP_RET_KILL_THREAD to SECCOMP_RET_KILL_PROCESS. Signed-off-by: Rich Felker Link: https://lore.kernel.org/r/20200829015609.GA32566@brightrain.aerifal.cx [kees: Fixed up coredump selection logic to match] Signed-off-by: Kees Cook --- kernel/seccomp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 676d4af62103..f754c1087e41 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1020,7 +1020,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, default: seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ - if (action == SECCOMP_RET_KILL_PROCESS || + if (action != SECCOMP_RET_KILL_THREAD || get_nr_threads(current) == 1) { kernel_siginfo_t info; @@ -1030,10 +1030,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, seccomp_init_siginfo(&info, this_syscall, data); do_coredump(&info); } - if (action == SECCOMP_RET_KILL_PROCESS) - do_group_exit(SIGSYS); - else + if (action == SECCOMP_RET_KILL_THREAD) do_exit(SIGSYS); + else + do_group_exit(SIGSYS); } unreachable(); -- cgit v1.2.3 From 3932fcecd96221e18100055d623b736d0ff873a4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 8 Sep 2020 12:28:27 -0700 Subject: selftests/seccomp: Add test for unknown SECCOMP_RET kill behavior While we were testing for the behavior of unknown seccomp filter return values, there was no test for how it acted in a thread group. Add a test in the thread group tests for this. Reviewed-by: Shuah Khan Acked-by: Christian Brauner Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 43 +++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7a6d40286a42..bfb382580493 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -774,8 +774,15 @@ void *kill_thread(void *data) return (void *)SIBLING_EXIT_UNKILLED; } +enum kill_t { + KILL_THREAD, + KILL_PROCESS, + RET_UNKNOWN +}; + /* Prepare a thread that will kill itself or both of us. */ -void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) +void kill_thread_or_group(struct __test_metadata *_metadata, + enum kill_t kill_how) { pthread_t thread; void *status; @@ -791,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) .len = (unsigned short)ARRAY_SIZE(filter_thread), .filter = filter_thread, }; + int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA; struct sock_filter filter_process[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), + BPF_STMT(BPF_RET|BPF_K, kill), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog_process = { @@ -808,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) } ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, - kill_process ? &prog_process : &prog_thread)); + kill_how == KILL_THREAD ? &prog_thread + : &prog_process)); /* * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS * flag cannot be downgraded by a new filter. */ - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); + if (kill_how == KILL_PROCESS) + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); /* Start a thread that will exit immediately. */ ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); @@ -842,7 +852,7 @@ TEST(KILL_thread) child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, false); + kill_thread_or_group(_metadata, KILL_THREAD); _exit(38); } @@ -861,7 +871,7 @@ TEST(KILL_process) child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, true); + kill_thread_or_group(_metadata, KILL_PROCESS); _exit(38); } @@ -872,6 +882,27 @@ TEST(KILL_process) ASSERT_EQ(SIGSYS, WTERMSIG(status)); } +TEST(KILL_unknown) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, RET_UNKNOWN); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If the entire process was killed, we'll see SIGSYS. */ + EXPECT_TRUE(WIFSIGNALED(status)) { + TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); + } + ASSERT_EQ(SIGSYS, WTERMSIG(status)); +} + /* TODO(wad) add 64-bit versus 32-bit arg tests. */ TEST(arg_out_of_range) { -- cgit v1.2.3 From 2d9ca267a944c2b6ed5b4d750b1cf0407b6262b4 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Mon, 24 Aug 2020 15:59:21 +0300 Subject: seccomp: Use current_pt_regs() instead of task_pt_regs(current) As described in commit a3460a59747c ("new helper: current_pt_regs()"): - arch versions are "optimized versions". - some architectures have task_pt_regs() working only for traced tasks blocked on signal delivery. current_pt_regs() needs to work for *all* processes. In preparation for adding a coccinelle rule for using current_*(), instead of raw accesses to current members, modify seccomp_do_user_notification(), __seccomp_filter(), __secure_computing() to use current_pt_regs(). Signed-off-by: Denis Efremov Link: https://lore.kernel.org/r/20200824125921.488311-1-efremov@linux.com [kees: Reworded commit log, add comment to populate_seccomp_data()] Signed-off-by: Kees Cook --- kernel/seccomp.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index f754c1087e41..ae6b40cc39f4 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -196,6 +196,10 @@ struct seccomp_filter { */ static void populate_seccomp_data(struct seccomp_data *sd) { + /* + * Instead of using current_pt_reg(), we're already doing the work + * to safely fetch "current", so just use "task" everywhere below. + */ struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); unsigned long args[6]; @@ -910,7 +914,7 @@ out: if (flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE) return 0; - syscall_set_return_value(current, task_pt_regs(current), + syscall_set_return_value(current, current_pt_regs(), err, ret); return -1; } @@ -943,13 +947,13 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, /* Set low-order bits as an errno, capped at MAX_ERRNO. */ if (data > MAX_ERRNO) data = MAX_ERRNO; - syscall_set_return_value(current, task_pt_regs(current), + syscall_set_return_value(current, current_pt_regs(), -data, 0); goto skip; case SECCOMP_RET_TRAP: /* Show the handler the original registers. */ - syscall_rollback(current, task_pt_regs(current)); + syscall_rollback(current, current_pt_regs()); /* Let the filter pass back 16 bits of data. */ seccomp_send_sigsys(this_syscall, data); goto skip; @@ -962,7 +966,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, /* ENOSYS these calls if there is no tracer attached. */ if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) { syscall_set_return_value(current, - task_pt_regs(current), + current_pt_regs(), -ENOSYS, 0); goto skip; } @@ -982,7 +986,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, if (fatal_signal_pending(current)) goto skip; /* Check if the tracer forced the syscall to be skipped. */ - this_syscall = syscall_get_nr(current, task_pt_regs(current)); + this_syscall = syscall_get_nr(current, current_pt_regs()); if (this_syscall < 0) goto skip; @@ -1025,7 +1029,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, kernel_siginfo_t info; /* Show the original registers in the dump. */ - syscall_rollback(current, task_pt_regs(current)); + syscall_rollback(current, current_pt_regs()); /* Trigger a manual coredump since do_exit skips it. */ seccomp_init_siginfo(&info, this_syscall, data); do_coredump(&info); @@ -1060,7 +1064,7 @@ int __secure_computing(const struct seccomp_data *sd) return 0; this_syscall = sd ? sd->nr : - syscall_get_nr(current, task_pt_regs(current)); + syscall_get_nr(current, current_pt_regs()); switch (mode) { case SECCOMP_MODE_STRICT: -- cgit v1.2.3 From a23042882ff29ae2df04f46ac0d4c50f66e7e497 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Wed, 15 Apr 2020 12:15:01 +0800 Subject: selftests/seccomp: Use bitwise instead of arithmetic operator for flags This silences the following coccinelle warning: "WARNING: sum of probable bitmasks, consider |" tools/testing/selftests/seccomp/seccomp_bpf.c:3131:17-18: WARNING: sum of probable bitmasks, consider | tools/testing/selftests/seccomp/seccomp_bpf.c:3133:18-19: WARNING: sum of probable bitmasks, consider | tools/testing/selftests/seccomp/seccomp_bpf.c:3134:18-19: WARNING: sum of probable bitmasks, consider | tools/testing/selftests/seccomp/seccomp_bpf.c:3135:18-19: WARNING: sum of probable bitmasks, consider | Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1586924101-65940-1-git-send-email-zou_wei@huawei.com Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index bfb382580493..c5002fc25b00 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3173,11 +3173,11 @@ skip: static int user_notif_syscall(int nr, unsigned int flags) { struct sock_filter filter[] = { - BPF_STMT(BPF_LD+BPF_W+BPF_ABS, + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog = { -- cgit v1.2.3 From 05b52c6625278cc6ed1245a569167f86a971ff86 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:20 -0700 Subject: selftests/seccomp: Use __NR_mknodat instead of __NR_mknod The __NR_mknod syscall doesn't exist on arm64 (only __NR_mknodat). Switch to the modern syscall. Fixes: ad5682184a81 ("selftests/seccomp: Check for EPOLLHUP for user_notif") Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-16-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c5002fc25b00..6ddef9fc7ea5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3746,7 +3746,7 @@ TEST(user_notification_filter_empty) if (pid == 0) { int listener; - listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); if (listener < 0) _exit(EXIT_FAILURE); -- cgit v1.2.3 From a6a4d78419a04095221ec2b518edefb080218d55 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:06 -0700 Subject: selftests/seccomp: Refactor arch register macros to avoid xtensa special case To avoid an xtensa special-case, refactor all arch register macros to take the register variable instead of depending on the macro expanding as a struct member name. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-2-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 97 +++++++++++++-------------- 1 file changed, 47 insertions(+), 50 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 6ddef9fc7ea5..a261643db4b8 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1698,64 +1698,64 @@ TEST_F(TRACE_poke, getpid_runs_normally) } #if defined(__x86_64__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_rax -# define SYSCALL_RET rax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_rax +# define SYSCALL_RET(_regs) (_regs).rax #elif defined(__i386__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_eax -# define SYSCALL_RET eax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_eax +# define SYSCALL_RET(_regs) (_regs).eax #elif defined(__arm__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM ARM_r7 -# define SYSCALL_RET ARM_r0 +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).ARM_r7 +# define SYSCALL_RET(_regs) (_regs).ARM_r0 #elif defined(__aarch64__) -# define ARCH_REGS struct user_pt_regs -# define SYSCALL_NUM regs[8] -# define SYSCALL_RET regs[0] +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[8] +# define SYSCALL_RET(_regs) (_regs).regs[0] #elif defined(__riscv) && __riscv_xlen == 64 -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM a7 -# define SYSCALL_RET a0 +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).a7 +# define SYSCALL_RET(_regs) (_regs).a0 #elif defined(__csky__) -# define ARCH_REGS struct pt_regs -#if defined(__CSKYABIV2__) -# define SYSCALL_NUM regs[3] -#else -# define SYSCALL_NUM regs[9] -#endif -# define SYSCALL_RET a0 +# define ARCH_REGS struct pt_regs +# if defined(__CSKYABIV2__) +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# else +# define SYSCALL_NUM(_regs) (_regs).regs[9] +# endif +# define SYSCALL_RET(_regs) (_regs).a0 #elif defined(__hppa__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM gr[20] -# define SYSCALL_RET gr[28] +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).gr[20] +# define SYSCALL_RET(_regs) (_regs).gr[28] #elif defined(__powerpc__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM gpr[0] -# define SYSCALL_RET gpr[3] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).gpr[0] +# define SYSCALL_RET(_regs) (_regs).gpr[3] #elif defined(__s390__) -# define ARCH_REGS s390_regs -# define SYSCALL_NUM gprs[2] -# define SYSCALL_RET gprs[2] +# define ARCH_REGS s390_regs +# define SYSCALL_NUM(_regs) (_regs).gprs[2] +# define SYSCALL_RET(_regs) (_regs).gprs[2] # define SYSCALL_NUM_RET_SHARE_REG #elif defined(__mips__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM regs[2] -# define SYSCALL_SYSCALL_NUM regs[4] -# define SYSCALL_RET regs[2] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[2] +# define SYSCALL_SYSCALL_NUM regs[4] +# define SYSCALL_RET(_regs) (_regs).regs[2] # define SYSCALL_NUM_RET_SHARE_REG #elif defined(__xtensa__) -# define ARCH_REGS struct user_pt_regs -# define SYSCALL_NUM syscall +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).syscall /* * On xtensa syscall return value is in the register * a2 of the current window which is not fixed. */ -#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2] +#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] #elif defined(__sh__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM gpr[3] -# define SYSCALL_RET gpr[0] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).gpr[3] +# define SYSCALL_RET(_regs) (_regs).gpr[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1804,10 +1804,10 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) #endif #if defined(__mips__) - if (regs.SYSCALL_NUM == __NR_O32_Linux) + if (SYSCALL_NUM(regs) == __NR_O32_Linux) return regs.SYSCALL_SYSCALL_NUM; #endif - return regs.SYSCALL_NUM; + return SYSCALL_NUM(regs); } /* Architecture-specific syscall changing routine. */ @@ -1830,14 +1830,14 @@ void change_syscall(struct __test_metadata *_metadata, defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ defined(__xtensa__) || defined(__csky__) || defined(__sh__) { - regs.SYSCALL_NUM = syscall; + SYSCALL_NUM(regs) = syscall; } #elif defined(__mips__) { - if (regs.SYSCALL_NUM == __NR_O32_Linux) + if (SYSCALL_NUM(regs) == __NR_O32_Linux) regs.SYSCALL_SYSCALL_NUM = syscall; else - regs.SYSCALL_NUM = syscall; + SYSCALL_NUM(regs) = syscall; } #elif defined(__arm__) @@ -1871,11 +1871,8 @@ void change_syscall(struct __test_metadata *_metadata, if (syscall == -1) #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); - -#elif defined(__xtensa__) - regs.SYSCALL_RET(regs) = result; #else - regs.SYSCALL_RET = result; + SYSCALL_RET(regs) = result; #endif #ifdef HAVE_GETREGS -- cgit v1.2.3 From 31c36eb87c85b6de0f341b1184f90137106a9f81 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:07 -0700 Subject: selftests/seccomp: Provide generic syscall setting macro In order to avoid "#ifdef"s in the main function bodies, create a new macro, SYSCALL_NUM_SET(), where arch-specific logic can live. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-3-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index a261643db4b8..c44abe7c6a3c 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1760,6 +1760,17 @@ TEST_F(TRACE_poke, getpid_runs_normally) # error "Do not know how to find your architecture's registers and syscalls" #endif +/* + * Most architectures can change the syscall by just updating the + * associated register. This is the default if not defined above. + */ +#ifndef SYSCALL_NUM_SET +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + SYSCALL_NUM(_regs) = (_nr); \ + } while (0) +#endif + /* When the syscall return can't be changed, stub out the tests for it. */ #ifdef SYSCALL_NUM_RET_SHARE_REG # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) @@ -1830,14 +1841,14 @@ void change_syscall(struct __test_metadata *_metadata, defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ defined(__xtensa__) || defined(__csky__) || defined(__sh__) { - SYSCALL_NUM(regs) = syscall; + SYSCALL_NUM_SET(regs, syscall); } #elif defined(__mips__) { if (SYSCALL_NUM(regs) == __NR_O32_Linux) regs.SYSCALL_SYSCALL_NUM = syscall; else - SYSCALL_NUM(regs) = syscall; + SYSCALL_NUM_SET(regs, syscall); } #elif defined(__arm__) -- cgit v1.2.3 From a084a6cba37cfa7c03e88f86ade961fb1d7c18a2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:08 -0700 Subject: selftests/seccomp: mips: Define SYSCALL_NUM_SET macro Remove the mips special-case in change_syscall(). Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-4-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c44abe7c6a3c..dc9e3e2c3db5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1742,6 +1742,13 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).regs[2] # define SYSCALL_SYSCALL_NUM regs[4] +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + (_regs).regs[4] = _nr; \ + else \ + (_regs).regs[2] = _nr; \ + } while (0) # define SYSCALL_RET(_regs) (_regs).regs[2] # define SYSCALL_NUM_RET_SHARE_REG #elif defined(__xtensa__) @@ -1839,17 +1846,11 @@ void change_syscall(struct __test_metadata *_metadata, #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ - defined(__xtensa__) || defined(__csky__) || defined(__sh__) + defined(__xtensa__) || defined(__csky__) || defined(__sh__) || \ + defined(__mips__) { SYSCALL_NUM_SET(regs, syscall); } -#elif defined(__mips__) - { - if (SYSCALL_NUM(regs) == __NR_O32_Linux) - regs.SYSCALL_SYSCALL_NUM = syscall; - else - SYSCALL_NUM_SET(regs, syscall); - } #elif defined(__arm__) # ifndef PTRACE_SET_SYSCALL -- cgit v1.2.3 From aa8fbb80a8034af23b08310142af1d9824d25533 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:09 -0700 Subject: selftests/seccomp: arm: Define SYSCALL_NUM_SET macro Remove the arm special-case in change_syscall(). Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-5-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index dc9e3e2c3db5..aa275ed8e183 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1708,6 +1708,11 @@ TEST_F(TRACE_poke, getpid_runs_normally) #elif defined(__arm__) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).ARM_r7 +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) # define SYSCALL_RET(_regs) (_regs).ARM_r0 #elif defined(__aarch64__) # define ARCH_REGS struct user_pt_regs @@ -1847,20 +1852,11 @@ void change_syscall(struct __test_metadata *_metadata, #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ defined(__xtensa__) || defined(__csky__) || defined(__sh__) || \ - defined(__mips__) + defined(__mips__) || defined(__arm__) { SYSCALL_NUM_SET(regs, syscall); } -#elif defined(__arm__) -# ifndef PTRACE_SET_SYSCALL -# define PTRACE_SET_SYSCALL 23 -# endif - { - ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); - EXPECT_EQ(0, ret); - } - #elif defined(__aarch64__) # ifndef NT_ARM_SYSTEM_CALL # define NT_ARM_SYSTEM_CALL 0x404 -- cgit v1.2.3 From 0dd7d68572d9393765b57c001adc30822e3003ed Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:10 -0700 Subject: selftests/seccomp: arm64: Define SYSCALL_NUM_SET macro Remove the arm64 special-case in change_syscall(). Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-6-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index aa275ed8e183..ef7f65468069 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1717,6 +1717,18 @@ TEST_F(TRACE_poke, getpid_runs_normally) #elif defined(__aarch64__) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM(_regs) (_regs).regs[8] +# ifndef NT_ARM_SYSTEM_CALL +# define NT_ARM_SYSTEM_CALL 0x404 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + struct iovec __v; \ + typeof(_nr) __nr = (_nr); \ + __v.iov_base = &__nr; \ + __v.iov_len = sizeof(__nr); \ + EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ + NT_ARM_SYSTEM_CALL, &__v)); \ + } while (0) # define SYSCALL_RET(_regs) (_regs).regs[0] #elif defined(__riscv) && __riscv_xlen == 64 # define ARCH_REGS struct user_regs_struct @@ -1852,23 +1864,10 @@ void change_syscall(struct __test_metadata *_metadata, #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ defined(__xtensa__) || defined(__csky__) || defined(__sh__) || \ - defined(__mips__) || defined(__arm__) + defined(__mips__) || defined(__arm__) || defined(__aarch64__) { SYSCALL_NUM_SET(regs, syscall); } - -#elif defined(__aarch64__) -# ifndef NT_ARM_SYSTEM_CALL -# define NT_ARM_SYSTEM_CALL 0x404 -# endif - { - iov.iov_base = &syscall; - iov.iov_len = sizeof(syscall); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, - &iov); - EXPECT_EQ(0, ret); - } - #else ASSERT_EQ(1, 0) { TH_LOG("How is the syscall changed on this architecture?"); -- cgit v1.2.3 From 37989de731dbea5af143806192c4cd1484990ab4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:11 -0700 Subject: selftests/seccomp: mips: Remove O32-specific macro Instead of having the mips O32 macro special-cased, pull the logic into the SYSCALL_NUM() macro. Additionally include the ABI headers, since these appear to have been missing, leaving __NR_O32_Linux undefined. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-7-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index ef7f65468069..7976cb480912 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1756,9 +1756,19 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define SYSCALL_RET(_regs) (_regs).gprs[2] # define SYSCALL_NUM_RET_SHARE_REG #elif defined(__mips__) +# include +# include +# include # define ARCH_REGS struct pt_regs -# define SYSCALL_NUM(_regs) (_regs).regs[2] -# define SYSCALL_SYSCALL_NUM regs[4] +# define SYSCALL_NUM(_regs) \ + ({ \ + typeof((_regs).regs[2]) _nr; \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + _nr = (_regs).regs[4]; \ + else \ + _nr = (_regs).regs[2]; \ + _nr; \ + }) # define SYSCALL_NUM_SET(_regs, _nr) \ do { \ if ((_regs).regs[2] == __NR_O32_Linux) \ @@ -1838,10 +1848,6 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) } #endif -#if defined(__mips__) - if (SYSCALL_NUM(regs) == __NR_O32_Linux) - return regs.SYSCALL_SYSCALL_NUM; -#endif return SYSCALL_NUM(regs); } -- cgit v1.2.3 From 78f26627fd36cb74277dd562ec277aee384525a1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:12 -0700 Subject: selftests/seccomp: Remove syscall setting #ifdefs With all architectures now using the common SYSCALL_NUM_SET() macro, the arch-specific #ifdef can be removed from change_syscall() itself. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-8-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7976cb480912..e4b2b9468ff9 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1865,20 +1865,9 @@ void change_syscall(struct __test_metadata *_metadata, iov.iov_len = sizeof(regs); ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); #endif - EXPECT_EQ(0, ret) {} + EXPECT_EQ(0, ret); -#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ - defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ - defined(__xtensa__) || defined(__csky__) || defined(__sh__) || \ - defined(__mips__) || defined(__arm__) || defined(__aarch64__) - { - SYSCALL_NUM_SET(regs, syscall); - } -#else - ASSERT_EQ(1, 0) { - TH_LOG("How is the syscall changed on this architecture?"); - } -#endif + SYSCALL_NUM_SET(regs, syscall); /* If syscall is skipped, change return value. */ if (syscall == -1) @@ -1888,6 +1877,7 @@ void change_syscall(struct __test_metadata *_metadata, SYSCALL_RET(regs) = result; #endif + /* Flush any register changes made. */ #ifdef HAVE_GETREGS ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); #else -- cgit v1.2.3 From fdbaa798eaf52a3f1d5d86c5bc035fe8dcc3a384 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:13 -0700 Subject: selftests/seccomp: Convert HAVE_GETREG into ARCH_GETREG/ARCH_SETREG Instead of special-casing the get/set-registers routines, move the HAVE_GETREG logic into the new ARCH_GETREG() and ARCH_SETREG() macros. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-9-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e4b2b9468ff9..68f1f132a517 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1821,20 +1821,21 @@ TEST_F(TRACE_poke, getpid_runs_normally) } while (0) #endif -/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for +/* + * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) -#define HAVE_GETREGS +# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) +# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) #endif /* Architecture-specific syscall fetching routine. */ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) { ARCH_REGS regs; -#ifdef HAVE_GETREGS - EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { - TH_LOG("PTRACE_GETREGS failed"); +#ifdef ARCH_GETREGS + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return -1; } #else @@ -1855,17 +1856,19 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) void change_syscall(struct __test_metadata *_metadata, pid_t tracee, int syscall, int result) { - int ret; ARCH_REGS regs; -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); +#ifdef ARCH_GETREGS + EXPECT_EQ(0, ARCH_GETREGS(regs)) { + return; + } #else + int ret; struct iovec iov; iov.iov_base = ®s; iov.iov_len = sizeof(regs); ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); -#endif EXPECT_EQ(0, ret); +#endif SYSCALL_NUM_SET(regs, syscall); @@ -1878,14 +1881,14 @@ void change_syscall(struct __test_metadata *_metadata, #endif /* Flush any register changes made. */ -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); +#ifdef ARCH_SETREGS + EXPECT_EQ(0, ARCH_SETREGS(regs)); #else iov.iov_base = ®s; iov.iov_len = sizeof(regs); ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); -#endif EXPECT_EQ(0, ret); +#endif } void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, -- cgit v1.2.3 From dc2ad165f4fbef0fe1028b6b3720c5bec034874f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:14 -0700 Subject: selftests/seccomp: Convert REGSET calls into ARCH_GETREG/ARCH_SETREG Consolidate the REGSET logic into the new ARCH_GETREG() and ARCH_SETREG() macros, avoiding more #ifdef code in function bodies. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-10-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 42 ++++++++++----------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 68f1f132a517..00056e067846 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1828,26 +1828,29 @@ TEST_F(TRACE_poke, getpid_runs_normally) #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) # define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) # define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) +#else +# define ARCH_GETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) +# define ARCH_SETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) #endif /* Architecture-specific syscall fetching routine. */ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) { ARCH_REGS regs; -#ifdef ARCH_GETREGS - EXPECT_EQ(0, ARCH_GETREGS(regs)) { - return -1; - } -#else - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { - TH_LOG("PTRACE_GETREGSET failed"); + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return -1; } -#endif return SYSCALL_NUM(regs); } @@ -1857,18 +1860,10 @@ void change_syscall(struct __test_metadata *_metadata, pid_t tracee, int syscall, int result) { ARCH_REGS regs; -#ifdef ARCH_GETREGS + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return; } -#else - int ret; - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); - EXPECT_EQ(0, ret); -#endif SYSCALL_NUM_SET(regs, syscall); @@ -1881,14 +1876,7 @@ void change_syscall(struct __test_metadata *_metadata, #endif /* Flush any register changes made. */ -#ifdef ARCH_SETREGS EXPECT_EQ(0, ARCH_SETREGS(regs)); -#else - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); - EXPECT_EQ(0, ret); -#endif } void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, -- cgit v1.2.3 From e4e8e5d28d5e1dac24f775452d4cc6f49f5c069e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:15 -0700 Subject: selftests/seccomp: Avoid redundant register flushes When none of the registers have changed, don't flush them back. This can happen if the architecture uses a non-register way to change the syscall (e.g. arm64) , and a return value hasn't been written. Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-11-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 00056e067846..638cea8cb23d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1859,11 +1859,12 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) void change_syscall(struct __test_metadata *_metadata, pid_t tracee, int syscall, int result) { - ARCH_REGS regs; + ARCH_REGS orig, regs; EXPECT_EQ(0, ARCH_GETREGS(regs)) { return; } + orig = regs; SYSCALL_NUM_SET(regs, syscall); @@ -1876,7 +1877,8 @@ void change_syscall(struct __test_metadata *_metadata, #endif /* Flush any register changes made. */ - EXPECT_EQ(0, ARCH_SETREGS(regs)); + if (memcmp(&orig, ®s, sizeof(orig)) != 0) + EXPECT_EQ(0, ARCH_SETREGS(regs)); } void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, -- cgit v1.2.3 From f04cf78bbfcd1fa5b1819613a5f354b228f36e03 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:16 -0700 Subject: selftests/seccomp: Remove SYSCALL_NUM_RET_SHARE_REG in favor of SYSCALL_RET_SET Instead of special-casing the specific case of shared registers, create a default SYSCALL_RET_SET() macro (mirroring SYSCALL_NUM_SET()), that writes to the SYSCALL_RET register. For architectures that can't set the return value (for whatever reason), they can define SYSCALL_RET_SET() without an associated SYSCALL_RET() macro. This also paves the way for architectures that need to do special things to set the return value (e.g. powerpc). Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-12-keescook@chromium.org Acked-by: Christian Brauner --- tools/testing/selftests/seccomp/seccomp_bpf.c | 33 +++++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 638cea8cb23d..84766a001ed0 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1753,8 +1753,8 @@ TEST_F(TRACE_poke, getpid_runs_normally) #elif defined(__s390__) # define ARCH_REGS s390_regs # define SYSCALL_NUM(_regs) (_regs).gprs[2] -# define SYSCALL_RET(_regs) (_regs).gprs[2] -# define SYSCALL_NUM_RET_SHARE_REG +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__mips__) # include # include @@ -1776,8 +1776,8 @@ TEST_F(TRACE_poke, getpid_runs_normally) else \ (_regs).regs[2] = _nr; \ } while (0) -# define SYSCALL_RET(_regs) (_regs).regs[2] -# define SYSCALL_NUM_RET_SHARE_REG +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__xtensa__) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM(_regs) (_regs).syscall @@ -1804,9 +1804,26 @@ TEST_F(TRACE_poke, getpid_runs_normally) SYSCALL_NUM(_regs) = (_nr); \ } while (0) #endif +/* + * Most architectures can change the syscall return value by just + * writing to the SYSCALL_RET register. This is the default if not + * defined above. If an architecture cannot set the return value + * (for example when the syscall and return value register is + * shared), report it with TH_LOG() in an arch-specific definition + * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. + */ +#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) +# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" +#endif +#ifndef SYSCALL_RET_SET +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + SYSCALL_RET(_regs) = (_val); \ + } while (0) +#endif /* When the syscall return can't be changed, stub out the tests for it. */ -#ifdef SYSCALL_NUM_RET_SHARE_REG +#ifndef SYSCALL_RET # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else # define EXPECT_SYSCALL_RETURN(val, action) \ @@ -1870,11 +1887,7 @@ void change_syscall(struct __test_metadata *_metadata, /* If syscall is skipped, change return value. */ if (syscall == -1) -#ifdef SYSCALL_NUM_RET_SHARE_REG - TH_LOG("Can't modify syscall return on this architecture"); -#else - SYSCALL_RET(regs) = result; -#endif + SYSCALL_RET_SET(regs, result); /* Flush any register changes made. */ if (memcmp(&orig, ®s, sizeof(orig)) != 0) -- cgit v1.2.3 From 46138329faeac3598f5a4dc991a174386b6de833 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:17 -0700 Subject: selftests/seccomp: powerpc: Fix seccomp return value testing On powerpc, the errno is not inverted, and depends on ccr.so being set. Add this to a powerpc definition of SYSCALL_RET_SET(). Co-developed-by: Thadeu Lima de Souza Cascardo Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/linux-kselftest/20200911181012.171027-1-cascardo@canonical.com/ Fixes: 5d83c2b37d43 ("selftests/seccomp: Add powerpc support") Signed-off-by: Kees Cook Link: https://lore.kernel.org/lkml/20200912110820.597135-13-keescook@chromium.org Reviewed-by: Michael Ellerman --- tools/testing/selftests/seccomp/seccomp_bpf.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 84766a001ed0..bc0fb463c709 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1750,6 +1750,21 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM(_regs) (_regs).gpr[0] # define SYSCALL_RET(_regs) (_regs).gpr[3] +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + typeof(_val) _result = (_val); \ + /* \ + * A syscall error is signaled by CR0 SO bit \ + * and the code is stored as a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = result; \ + (_regs).ccr &= ~0x10000000; \ + } \ + } while (0) #elif defined(__s390__) # define ARCH_REGS s390_regs # define SYSCALL_NUM(_regs) (_regs).gprs[2] -- cgit v1.2.3 From 71c87fbe720038007543abff7540ef0376c519f5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 19 Sep 2020 00:14:31 -0700 Subject: selftests/seccomp: Record syscall during ptrace entry In preparation for performing actions during ptrace syscall exit, save the syscall number during ptrace syscall entry. Some architectures do no have the syscall number available during ptrace syscall exit. Suggested-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/linux-kselftest/20200911181012.171027-1-cascardo@canonical.com/ Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/20200921074354.6shkt2e5yhzhj3sn@wittgenstein/ Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 40 ++++++++++++++++++--------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index bc0fb463c709..c0311b4c736b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1949,12 +1949,19 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, } +FIXTURE(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; + long syscall_nr; +}; + void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { - int ret, nr; + int ret; unsigned long msg; static bool entry; + FIXTURE_DATA(TRACE_syscall) *self = args; /* * The traditional way to tell PTRACE_SYSCALL entry/exit @@ -1968,24 +1975,31 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); - if (!entry) + /* + * Some architectures only support setting return values during + * syscall exit under ptrace, and on exit the syscall number may + * no longer be available. Therefore, save the initial sycall + * number here, so it can be examined during both entry and exit + * phases. + */ + if (entry) + self->syscall_nr = get_syscall(_metadata, tracee); + else return; - nr = get_syscall(_metadata, tracee); - - if (nr == __NR_getpid) + switch (self->syscall_nr) { + case __NR_getpid: change_syscall(_metadata, tracee, __NR_getppid, 0); - if (nr == __NR_gettid) + break; + case __NR_gettid: change_syscall(_metadata, tracee, -1, 45000); - if (nr == __NR_openat) + break; + case __NR_openat: change_syscall(_metadata, tracee, -1, -ESRCH); + break; + } } -FIXTURE(TRACE_syscall) { - struct sock_fprog prog; - pid_t tracer, mytid, mypid, parent; -}; - FIXTURE_VARIANT(TRACE_syscall) { /* * All of the SECCOMP_RET_TRACE behaviors can be tested with either @@ -2044,7 +2058,7 @@ FIXTURE_SETUP(TRACE_syscall) self->tracer = setup_trace_fixture(_metadata, variant->use_ptrace ? tracer_ptrace : tracer_seccomp, - NULL, variant->use_ptrace); + self, variant->use_ptrace); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); -- cgit v1.2.3 From bef71f86b64de8b2eb5b2f26e1cbd7735e3551da Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 19 Sep 2020 00:21:34 -0700 Subject: selftests/seccomp: Allow syscall nr and ret value to be set separately In preparation for setting syscall nr and ret values separately, refactor the helpers to take a pointer to a value, so that a NULL can indicate "do not change this respective value". This is done to keep the regset read/write happening once and in one code path. Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/20200921075031.j4gruygeugkp2zwd@wittgenstein/ Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 59 +++++++++++++++++++++------ 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c0311b4c736b..98ce5e8a6398 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1888,27 +1888,47 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) } /* Architecture-specific syscall changing routine. */ -void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall, int result) +void __change_syscall(struct __test_metadata *_metadata, + pid_t tracee, long *syscall, long *ret) { ARCH_REGS orig, regs; + /* Do not get/set registers if we have nothing to do. */ + if (!syscall && !ret) + return; + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return; } orig = regs; - SYSCALL_NUM_SET(regs, syscall); + if (syscall) + SYSCALL_NUM_SET(regs, *syscall); - /* If syscall is skipped, change return value. */ - if (syscall == -1) - SYSCALL_RET_SET(regs, result); + if (ret) + SYSCALL_RET_SET(regs, *ret); /* Flush any register changes made. */ if (memcmp(&orig, ®s, sizeof(orig)) != 0) EXPECT_EQ(0, ARCH_SETREGS(regs)); } +/* Change only syscall number. */ +void change_syscall_nr(struct __test_metadata *_metadata, + pid_t tracee, long syscall) +{ + __change_syscall(_metadata, tracee, &syscall, NULL); +} + +/* Change syscall return value (and set syscall number to -1). */ +void change_syscall_ret(struct __test_metadata *_metadata, + pid_t tracee, long ret) +{ + long syscall = -1; + + __change_syscall(_metadata, tracee, &syscall, &ret); +} + void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { @@ -1924,17 +1944,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid, 0); + change_syscall_nr(_metadata, tracee, __NR_getppid); break; case 0x1003: /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, 45000); + change_syscall_ret(_metadata, tracee, 45000); break; case 0x1004: /* skip openat with error. */ EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, -ESRCH); + change_syscall_ret(_metadata, tracee, -ESRCH); break; case 0x1005: /* do nothing (allow getppid) */ @@ -1961,6 +1981,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int ret; unsigned long msg; static bool entry; + long syscall_nr_val, syscall_ret_val; + long *syscall_nr = NULL, *syscall_ret = NULL; FIXTURE_DATA(TRACE_syscall) *self = args; /* @@ -1987,17 +2009,30 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, else return; + syscall_nr = &syscall_nr_val; + syscall_ret = &syscall_ret_val; + + /* Now handle the actual rewriting cases. */ switch (self->syscall_nr) { case __NR_getpid: - change_syscall(_metadata, tracee, __NR_getppid, 0); + syscall_nr_val = __NR_getppid; + /* Never change syscall return for this case. */ + syscall_ret = NULL; break; case __NR_gettid: - change_syscall(_metadata, tracee, -1, 45000); + syscall_nr_val = -1; + syscall_ret_val = 45000; break; case __NR_openat: - change_syscall(_metadata, tracee, -1, -ESRCH); + syscall_nr_val = -1; + syscall_ret_val = -ESRCH; break; + default: + /* Unhandled, do nothing. */ + return; } + + __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); } FIXTURE_VARIANT(TRACE_syscall) { -- cgit v1.2.3 From a39caac02f2f5819d39f37d7987babe19fcafe21 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Sep 2020 19:49:50 -0700 Subject: selftests/seccomp: powerpc: Set syscall return during ptrace syscall exit Some archs (like powerpc) only support changing the return code during syscall exit when ptrace is used. Test entry vs exit phases for which portions of the syscall number and return values need to be set at which different phases. For non-powerpc, all changes are made during ptrace syscall entry, as before. For powerpc, the syscall number is changed at ptrace syscall entry and the syscall return value is changed on ptrace syscall exit. Reported-by: Thadeu Lima de Souza Cascardo Suggested-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/linux-kselftest/20200911181012.171027-1-cascardo@canonical.com/ Fixes: 58d0a862f573 ("seccomp: add tests for ptrace hole") Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/20200921075300.7iylzof2w5vrutah@wittgenstein/ Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 98ce5e8a6398..894c2404d321 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1765,6 +1765,7 @@ TEST_F(TRACE_poke, getpid_runs_normally) (_regs).ccr &= ~0x10000000; \ } \ } while (0) +# define SYSCALL_RET_SET_ON_PTRACE_EXIT #elif defined(__s390__) # define ARCH_REGS s390_regs # define SYSCALL_NUM(_regs) (_regs).gprs[2] @@ -1853,6 +1854,18 @@ TEST_F(TRACE_poke, getpid_runs_normally) } while (0) #endif +/* + * Some architectures (e.g. powerpc) can only set syscall + * return values on syscall exit during ptrace. + */ +const bool ptrace_entry_set_syscall_nr = true; +const bool ptrace_entry_set_syscall_ret = +#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT + true; +#else + false; +#endif + /* * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). @@ -2006,11 +2019,15 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, */ if (entry) self->syscall_nr = get_syscall(_metadata, tracee); - else - return; - syscall_nr = &syscall_nr_val; - syscall_ret = &syscall_ret_val; + /* + * Depending on the architecture's syscall setting abilities, we + * pick which things to set during this phase (entry or exit). + */ + if (entry == ptrace_entry_set_syscall_nr) + syscall_nr = &syscall_nr_val; + if (entry == ptrace_entry_set_syscall_ret) + syscall_ret = &syscall_ret_val; /* Now handle the actual rewriting cases. */ switch (self->syscall_nr) { -- cgit v1.2.3 From e953aeaa913bedcdabc168276ef41c83ae75f161 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 12 Sep 2020 04:08:19 -0700 Subject: selftests/clone3: Avoid OS-defined clone_args As the UAPI headers start to appear in distros, we need to avoid outdated versions of struct clone_args to be able to test modern features, named "struct __clone_args". Additionally update the struct size macro names to match UAPI names. Acked-by: Christian Brauner Link: https://lore.kernel.org/lkml/20200921075432.u4gis3s2o5qrsb5g@wittgenstein/ Signed-off-by: Kees Cook --- tools/testing/selftests/clone3/clone3.c | 45 +++++++++------------- .../clone3/clone3_cap_checkpoint_restore.c | 4 +- .../selftests/clone3/clone3_clear_sighand.c | 2 +- tools/testing/selftests/clone3/clone3_selftests.h | 24 +++++++----- tools/testing/selftests/clone3/clone3_set_tid.c | 4 +- tools/testing/selftests/pidfd/pidfd_setns_test.c | 2 +- tools/testing/selftests/seccomp/seccomp_bpf.c | 4 +- 7 files changed, 41 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index b7e6dec36173..42be3b925830 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -20,13 +20,6 @@ #include "../kselftest.h" #include "clone3_selftests.h" -/* - * Different sizes of struct clone_args - */ -#ifndef CLONE3_ARGS_SIZE_V0 -#define CLONE3_ARGS_SIZE_V0 64 -#endif - enum test_mode { CLONE3_ARGS_NO_TEST, CLONE3_ARGS_ALL_0, @@ -38,13 +31,13 @@ enum test_mode { static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) { - struct clone_args args = { + struct __clone_args args = { .flags = flags, .exit_signal = SIGCHLD, }; struct clone_args_extended { - struct clone_args args; + struct __clone_args args; __aligned_u64 excess_space[2]; } args_ext; @@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) int status; memset(&args_ext, 0, sizeof(args_ext)); - if (size > sizeof(struct clone_args)) + if (size > sizeof(struct __clone_args)) args_ext.excess_space[1] = 1; if (size == 0) - size = sizeof(struct clone_args); + size = sizeof(struct __clone_args); switch (test_mode) { case CLONE3_ARGS_ALL_0: @@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) break; } - memcpy(&args_ext.args, &args, sizeof(struct clone_args)); + memcpy(&args_ext.args, &args, sizeof(struct __clone_args)); - pid = sys_clone3((struct clone_args *)&args_ext, size); + pid = sys_clone3((struct __clone_args *)&args_ext, size); if (pid < 0) { ksft_print_msg("%s - Failed to create new process\n", strerror(errno)); @@ -144,14 +137,14 @@ int main(int argc, char *argv[]) else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */ - test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST); + /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */ + test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */ - test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */ + test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); /* Do a clone3() with sizeof(struct clone_args) + 8 */ - test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); + test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); /* Do a clone3() with exit_signal having highest 32 bits non-zero */ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG); @@ -165,31 +158,31 @@ int main(int argc, char *argv[]) /* Do a clone3() with NSIG < exit_signal < CSIG */ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG); - test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0); + test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0); - test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG, + test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG, CLONE3_ARGS_ALL_0); - test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG, + test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG, CLONE3_ARGS_ALL_0); /* Do a clone3() with > page size */ test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */ + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */ if (uid == 0) - test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0, + test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST); else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */ - test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */ + test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */ if (uid == 0) - test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0, + test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 9562425aa0a9..55bd387ce7ec 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata, int status; pid_t pid = -1; - struct clone_args args = { + struct __clone_args args = { .exit_signal = SIGCHLD, .set_tid = ptr_to_u64(set_tid), .set_tid_size = set_tid_size, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(args)); if (pid < 0) { TH_LOG("%s - Failed to create new process", strerror(errno)); return -errno; diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c index db5fc9c5edcf..47a8c0fc3676 100644 --- a/tools/testing/selftests/clone3/clone3_clear_sighand.c +++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c @@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void) { int ret; pid_t pid; - struct clone_args args = {}; + struct __clone_args args = {}; struct sigaction act; /* diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h index 91c1a78ddb39..e81ffaaee02b 100644 --- a/tools/testing/selftests/clone3/clone3_selftests.h +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -19,13 +19,11 @@ #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ #endif -#ifndef CLONE_ARGS_SIZE_VER0 -#define CLONE_ARGS_SIZE_VER0 64 -#endif - #ifndef __NR_clone3 #define __NR_clone3 -1 -struct clone_args { +#endif + +struct __clone_args { __aligned_u64 flags; __aligned_u64 pidfd; __aligned_u64 child_tid; @@ -34,15 +32,21 @@ struct clone_args { __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; -#define CLONE_ARGS_SIZE_VER1 80 +#ifndef CLONE_ARGS_SIZE_VER0 +#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ +#endif __aligned_u64 set_tid; __aligned_u64 set_tid_size; -#define CLONE_ARGS_SIZE_VER2 88 +#ifndef CLONE_ARGS_SIZE_VER1 +#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ +#endif __aligned_u64 cgroup; +#ifndef CLONE_ARGS_SIZE_VER2 +#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ +#endif }; -#endif /* __NR_clone3 */ -static pid_t sys_clone3(struct clone_args *args, size_t size) +static pid_t sys_clone3(struct __clone_args *args, size_t size) { fflush(stdout); fflush(stderr); @@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size) static inline void test_clone3_supported(void) { pid_t pid; - struct clone_args args = {}; + struct __clone_args args = {}; if (__NR_clone3 < 0) ksft_exit_skip("clone3() syscall is not supported\n"); diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c index 5831c1082d6d..0229e9ebb995 100644 --- a/tools/testing/selftests/clone3/clone3_set_tid.c +++ b/tools/testing/selftests/clone3/clone3_set_tid.c @@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid, int status; pid_t pid = -1; - struct clone_args args = { + struct __clone_args args = { .flags = flags, .exit_signal = SIGCHLD, .set_tid = ptr_to_u64(set_tid), .set_tid_size = set_tid_size, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(args)); if (pid < 0) { ksft_print_msg("%s - Failed to create new process\n", strerror(errno)); diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 7dca1aa4672d..1f085b922c6e 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options) pid_t create_child(int *pidfd, unsigned flags) { - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_PIDFD | flags, .exit_signal = SIGCHLD, .pidfd = ptr_to_u64(pidfd), diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 894c2404d321..4a180439ee9e 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3817,7 +3817,7 @@ TEST(user_notification_filter_empty) long ret; int status; struct pollfd pollfd; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; @@ -3871,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded) long ret; int status; struct pollfd pollfd; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; -- cgit v1.2.3 From 282a181b1a0d66de1f0894d82f395fcd478f51d1 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 24 Sep 2020 07:44:16 -0500 Subject: seccomp: Move config option SECCOMP to arch/Kconfig In order to make adding configurable features into seccomp easier, it's better to have the options at one single location, considering especially that the bulk of seccomp code is arch-independent. An quick look also show that many SECCOMP descriptions are outdated; they talk about /proc rather than prctl. As a result of moving the config option and keeping it default on, architectures arm, arm64, csky, riscv, sh, and xtensa did not have SECCOMP on by default prior to this and SECCOMP will be default in this change. Architectures microblaze, mips, powerpc, s390, sh, and sparc have an outdated depend on PROC_FS and this dependency is removed in this change. Suggested-by: Jann Horn Link: https://lore.kernel.org/lkml/CAG48ez1YWz9cnp08UZgeieYRhHdqh-ch7aNwc4JRBnGyrmgfMg@mail.gmail.com/ Signed-off-by: YiFei Zhu [kees: added HAVE_ARCH_SECCOMP help text, tweaked wording] Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/9ede6ef35c847e58d61e476c6a39540520066613.1600951211.git.yifeifz2@illinois.edu --- arch/Kconfig | 30 ++++++++++++++++++++++++++++++ arch/arm/Kconfig | 15 +-------------- arch/arm64/Kconfig | 13 ------------- arch/csky/Kconfig | 13 ------------- arch/microblaze/Kconfig | 18 +----------------- arch/mips/Kconfig | 17 ----------------- arch/parisc/Kconfig | 16 ---------------- arch/powerpc/Kconfig | 17 ----------------- arch/riscv/Kconfig | 13 ------------- arch/s390/Kconfig | 17 ----------------- arch/sh/Kconfig | 16 ---------------- arch/sparc/Kconfig | 18 +----------------- arch/um/Kconfig | 16 ---------------- arch/x86/Kconfig | 16 ---------------- arch/xtensa/Kconfig | 14 -------------- 15 files changed, 33 insertions(+), 216 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index af14a567b493..21a3675a7a3a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -444,10 +444,23 @@ config ARCH_WANT_OLD_COMPAT_IPC select ARCH_WANT_COMPAT_IPC_PARSE_VERSION bool +config HAVE_ARCH_SECCOMP + bool + help + An arch should select this symbol to support seccomp mode 1 (the fixed + syscall policy), and must provide an overrides for __NR_seccomp_sigreturn, + and compat syscalls if the asm-generic/seccomp.h defaults need adjustment: + - __NR_seccomp_read_32 + - __NR_seccomp_write_32 + - __NR_seccomp_exit_32 + - __NR_seccomp_sigreturn_32 + config HAVE_ARCH_SECCOMP_FILTER bool + select HAVE_ARCH_SECCOMP help An arch should select this symbol if it provides all of these things: + - all the requirements for HAVE_ARCH_SECCOMP - syscall_get_arch() - syscall_get_arguments() - syscall_rollback() @@ -458,6 +471,23 @@ config HAVE_ARCH_SECCOMP_FILTER results in the system call being skipped immediately. - seccomp syscall wired up +config SECCOMP + prompt "Enable seccomp to safely execute untrusted bytecode" + def_bool y + depends on HAVE_ARCH_SECCOMP + help + This kernel feature is useful for number crunching applications + that may need to handle untrusted bytecode during their + execution. By using pipes or other transports made available + to the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in their + own address space using seccomp. Once seccomp is enabled via + prctl(PR_SET_SECCOMP) or the seccomp() syscall, it cannot be + disabled and the task is only allowed to execute a few safe + syscalls defined by each seccomp mode. + + If unsure, say Y. + config SECCOMP_FILTER def_bool y depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e00d94b16658..e26c19a16284 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -67,6 +67,7 @@ config ARM select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU + select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK @@ -1617,20 +1618,6 @@ config UACCESS_WITH_MEMCPY However, if the CPU data cache is using a write-allocate mode, this option is unlikely to provide any performance gain. -config SECCOMP - bool - prompt "Enable seccomp to safely compute untrusted bytecode" - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl(PR_SET_SECCOMP), it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - config PARAVIRT bool "Enable paravirtualization code" help diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6d232837cbee..98c4e34cbec1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1033,19 +1033,6 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK config CC_HAVE_SHADOW_CALL_STACK def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl(PR_SET_SECCOMP), it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - config PARAVIRT bool "Enable paravirtualization code" help diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 3d5afb5f5685..7f424c85772c 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -309,16 +309,3 @@ endmenu source "arch/csky/Kconfig.platforms" source "kernel/Kconfig.hz" - -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl(PR_SET_SECCOMP), it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index d262ac0c8714..37bd6a5f38fb 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -26,6 +26,7 @@ config MICROBLAZE select GENERIC_SCHED_CLOCK select HAVE_ARCH_HASH select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE @@ -120,23 +121,6 @@ config CMDLINE_FORCE Set this to have arguments from the default kernel command string override those passed by the boot loader. -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - endmenu menu "Kernel features" diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c95fa3a2484c..5f88a8fc11fc 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3004,23 +3004,6 @@ config PHYSICAL_START specified in the "crashkernel=YM@XM" command line boot parameter passed to the panic-ed kernel). -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - config MIPS_O32_FP64_SUPPORT bool "Support for O32 binaries using 64-bit FP" if !CPU_MIPSR6 depends on 32BIT || MIPS32_O32 diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 3b0f53dd70bc..cd4afe1e7a6c 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -378,19 +378,3 @@ endmenu source "drivers/parisc/Kconfig" - -config SECCOMP - def_bool y - prompt "Enable seccomp to safely compute untrusted bytecode" - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl(PR_SET_SECCOMP), it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 1f48bbfb3ce9..136fe860caef 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -934,23 +934,6 @@ config ARCH_WANTS_FREEZER_CONTROL source "kernel/power/Kconfig" -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - config PPC_MEM_KEYS prompt "PowerPC Memory Protection Keys" def_bool y diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index df18372861d8..c456b558fab9 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -333,19 +333,6 @@ menu "Kernel features" source "kernel/Kconfig.hz" -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl(PR_SET_SECCOMP), it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - config RISCV_SBI_V01 bool "SBI v0.1 support" default y diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3d86e12e8e3c..7f7b40ec699e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -791,23 +791,6 @@ config CRASH_DUMP endmenu -config SECCOMP - def_bool y - prompt "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. - config CCW def_bool y diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index d20927128fce..18278152c91c 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -600,22 +600,6 @@ config PHYSICAL_START where the fail safe kernel needs to run at a different address than the panic-ed kernel. -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl, it cannot be disabled and the task is only - allowed to execute a few safe syscalls defined by each seccomp - mode. - - If unsure, say N. - config SMP bool "Symmetric multi-processing support" depends on SYS_SUPPORTS_SMP diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index efeff2c896a5..d62ce83cf009 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -23,6 +23,7 @@ config SPARC select HAVE_OPROFILE select HAVE_ARCH_KGDB if !SMP || SPARC64 select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_SECCOMP if SPARC64 select HAVE_EXIT_THREAD select HAVE_PCI select SYSCTL_EXCEPTION_TRACE @@ -226,23 +227,6 @@ config EARLYFB help Say Y here to enable a faster early framebuffer boot console. -config SECCOMP - bool "Enable seccomp to safely compute untrusted bytecode" - depends on SPARC64 && PROC_FS - default y - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via /proc//seccomp, it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" depends on SPARC64 && SMP diff --git a/arch/um/Kconfig b/arch/um/Kconfig index eb51fec75948..d49f471b02e3 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -173,22 +173,6 @@ config PGTABLE_LEVELS default 3 if 3_LEVEL_PGTABLES default 2 -config SECCOMP - def_bool y - prompt "Enable seccomp to safely compute untrusted bytecode" - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl(PR_SET_SECCOMP), it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. - config UML_TIME_TRAVEL_SUPPORT bool prompt "Support time-travel mode (e.g. for test execution)" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7101ac64bb20..1ab22869a765 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1968,22 +1968,6 @@ config EFI_MIXED If unsure, say N. -config SECCOMP - def_bool y - prompt "Enable seccomp to safely compute untrusted bytecode" - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl(PR_SET_SECCOMP), it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - - If unsure, say Y. Only embedded should say N here. - source "kernel/Kconfig.hz" config KEXEC diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index e997e0119c02..d8a29dc5a284 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -217,20 +217,6 @@ config HOTPLUG_CPU Say N if you want to disable CPU hotplug. -config SECCOMP - bool - prompt "Enable seccomp to safely compute untrusted bytecode" - help - This kernel feature is useful for number crunching applications - that may need to compute untrusted bytecode during their - execution. By using pipes or other transports made available to - the process as file descriptors supporting the read/write - syscalls, it's possible to isolate those applications in - their own address space using seccomp. Once seccomp is - enabled via prctl(PR_SET_SECCOMP), it cannot be disabled - and the task is only allowed to execute a few safe syscalls - defined by each seccomp mode. - config FAST_SYSCALL_XTENSA bool "Enable fast atomic syscalls" default n -- cgit v1.2.3 From dfe719fef03d752f1682fa8aeddf30ba501c8555 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 5 Oct 2020 03:44:01 +0200 Subject: seccomp: Make duplicate listener detection non-racy Currently, init_listener() tries to prevent adding a filter with SECCOMP_FILTER_FLAG_NEW_LISTENER if one of the existing filters already has a listener. However, this check happens without holding any lock that would prevent another thread from concurrently installing a new filter (potentially with a listener) on top of the ones we already have. Theoretically, this is also a data race: The plain load from current->seccomp.filter can race with concurrent writes to the same location. Fix it by moving the check into the region that holds the siglock to guard against concurrent TSYNC. (The "Fixes" tag points to the commit that introduced the theoretical data race; concurrent installation of another filter with TSYNC only became possible later, in commit 51891498f2da ("seccomp: allow TSYNC and USER_NOTIF together").) Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Reviewed-by: Tycho Andersen Signed-off-by: Jann Horn Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201005014401.490175-1-jannh@google.com --- kernel/seccomp.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ae6b40cc39f4..8ad7a293255a 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1476,13 +1476,7 @@ static const struct file_operations seccomp_notify_ops = { static struct file *init_listener(struct seccomp_filter *filter) { - struct file *ret = ERR_PTR(-EBUSY); - struct seccomp_filter *cur; - - for (cur = current->seccomp.filter; cur; cur = cur->prev) { - if (cur->notif) - goto out; - } + struct file *ret; ret = ERR_PTR(-ENOMEM); filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL); @@ -1508,6 +1502,31 @@ out: return ret; } +/* + * Does @new_child have a listener while an ancestor also has a listener? + * If so, we'll want to reject this filter. + * This only has to be tested for the current process, even in the TSYNC case, + * because TSYNC installs @child with the same parent on all threads. + * Note that @new_child is not hooked up to its parent at this point yet, so + * we use current->seccomp.filter. + */ +static bool has_duplicate_listener(struct seccomp_filter *new_child) +{ + struct seccomp_filter *cur; + + /* must be protected against concurrent TSYNC */ + lockdep_assert_held(¤t->sighand->siglock); + + if (!new_child->notif) + return false; + for (cur = current->seccomp.filter; cur; cur = cur->prev) { + if (cur->notif) + return true; + } + + return false; +} + /** * seccomp_set_mode_filter: internal function for setting seccomp filter * @flags: flags to change filter behavior @@ -1579,6 +1598,11 @@ static long seccomp_set_mode_filter(unsigned int flags, if (!seccomp_may_assign_mode(seccomp_mode)) goto out; + if (has_duplicate_listener(prepared)) { + ret = -EBUSY; + goto out; + } + ret = seccomp_attach_filter(flags, prepared); if (ret) goto out; -- cgit v1.2.3