From 028b6e8a89de9133a869bb4cd1bc72445b1ec8ca Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 14 Jul 2019 19:20:47 +0300 Subject: clone: fix CLONE_PIDFD support The introduction of clone3 syscall accidentally broke CLONE_PIDFD support in traditional clone syscall on compat x86 and those architectures that use do_fork to implement clone syscall. This bug was found by strace test suite. Link: https://strace.io/logs/strace/2019-07-12 Fixes: 7f192e3cd316 ("fork: add clone3") Bisected-and-tested-by: Anatoly Pugachev Signed-off-by: Dmitry V. Levin Link: https://lore.kernel.org/r/20190714162047.GB10389@altlinux.org Signed-off-by: Christian Brauner --- arch/x86/ia32/sys_ia32.c | 4 ++++ include/linux/sched/task.h | 1 + kernel/fork.c | 17 +++++++++++++++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 64a6c952091e..21790307121e 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -239,6 +239,7 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, { struct kernel_clone_args args = { .flags = (clone_flags & ~CSIGNAL), + .pidfd = parent_tidptr, .child_tid = child_tidptr, .parent_tid = parent_tidptr, .exit_signal = (clone_flags & CSIGNAL), @@ -246,5 +247,8 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, .tls = tls_val, }; + if (!legacy_clone_args_valid(&args)) + return -EINVAL; + return _do_fork(&args); } diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 109a0df5af39..0497091e40c1 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -89,6 +89,7 @@ extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern long _do_fork(struct kernel_clone_args *kargs); +extern bool legacy_clone_args_valid(const struct kernel_clone_args *kargs); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); diff --git a/kernel/fork.c b/kernel/fork.c index 8f3e2d97d771..ef1e05a68827 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2406,6 +2406,16 @@ long _do_fork(struct kernel_clone_args *args) return nr; } +bool legacy_clone_args_valid(const struct kernel_clone_args *kargs) +{ + /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */ + if ((kargs->flags & CLONE_PIDFD) && + (kargs->flags & CLONE_PARENT_SETTID)) + return false; + + return true; +} + #ifndef CONFIG_HAVE_COPY_THREAD_TLS /* For compatibility with architectures that call do_fork directly rather than * using the syscall entry points below. */ @@ -2417,6 +2427,7 @@ long do_fork(unsigned long clone_flags, { struct kernel_clone_args args = { .flags = (clone_flags & ~CSIGNAL), + .pidfd = parent_tidptr, .child_tid = child_tidptr, .parent_tid = parent_tidptr, .exit_signal = (clone_flags & CSIGNAL), @@ -2424,6 +2435,9 @@ long do_fork(unsigned long clone_flags, .stack_size = stack_size, }; + if (!legacy_clone_args_valid(&args)) + return -EINVAL; + return _do_fork(&args); } #endif @@ -2505,8 +2519,7 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, .tls = tls, }; - /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */ - if ((clone_flags & CLONE_PIDFD) && (clone_flags & CLONE_PARENT_SETTID)) + if (!legacy_clone_args_valid(&args)) return -EINVAL; return _do_fork(&args); -- cgit v1.2.3 From 1a271a68e030f3e134de12087117574a883e20f0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 14 Jul 2019 21:22:04 +0200 Subject: arch: mark syscall number 435 reserved for clone3 A while ago Arnd made it possible to give new system calls the same syscall number on all architectures (except alpha). To not break this nice new feature let's mark 435 for clone3 as reserved on all architectures that do not yet implement it. Even if an architecture does not plan to implement it this ensures that new system calls coming after clone3 will have the same number on all architectures. Signed-off-by: Christian Brauner Cc: linux-arch@vger.kernel.org Cc: linux-alpha@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-mips@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: sparclinux@vger.kernel.org Link: https://lore.kernel.org/r/20190714192205.27190-2-christian@brauner.io Reviewed-by: Arnd Bergmann Signed-off-by: Christian Brauner --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + 11 files changed, 11 insertions(+) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 1db9bbcfb84e..728fe028c02c 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -474,3 +474,4 @@ 542 common fsmount sys_fsmount 543 common fspick sys_fspick 544 common pidfd_open sys_pidfd_open +# 545 reserved for clone3 diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index ecc44926737b..36d5faf4c86c 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -355,3 +355,4 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open +# 435 reserved for clone3 diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 9a3eb2558568..a88a285a0e5f 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -434,3 +434,4 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open +# 435 reserved for clone3 diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 97035e19ad03..c9c879ec9b6d 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -373,3 +373,4 @@ 432 n32 fsmount sys_fsmount 433 n32 fspick sys_fspick 434 n32 pidfd_open sys_pidfd_open +# 435 reserved for clone3 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index d7292722d3b0..bbce9159caa1 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -349,3 +349,4 @@ 432 n64 fsmount sys_fsmount 433 n64 fspick sys_fspick 434 n64 pidfd_open sys_pidfd_open +# 435 reserved for clone3 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index dba084c92f14..9653591428ec 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -422,3 +422,4 @@ 432 o32 fsmount sys_fsmount 433 o32 fspick sys_fspick 434 o32 pidfd_open sys_pidfd_open +# 435 reserved for clone3 diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 5022b9e179c2..c7aadfef5386 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -431,3 +431,4 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open +# 435 reserved for clone3 diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index f2c3bda2d39f..3331749aab20 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -516,3 +516,4 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open +# 435 reserved for clone3 diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 6ebacfeaf853..a90d3e945445 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 432 common fsmount sys_fsmount sys_fsmount 433 common fspick sys_fspick sys_fspick 434 common pidfd_open sys_pidfd_open sys_pidfd_open +# 435 reserved for clone3 diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 834c9c7d79fa..b5ed26c4c005 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open +# 435 reserved for clone3 diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index c58e71f21129..8c8cc7537fb2 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -480,3 +480,4 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open +# 435 reserved for clone3 -- cgit v1.2.3 From 05a70a8ec287c4381ee1441ca779deab7bae124d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 14 Jul 2019 21:22:05 +0200 Subject: unistd: protect clone3 via __ARCH_WANT_SYS_CLONE3 This lets us catch new architectures that implicitly make use of clone3 without setting __ARCH_WANT_SYS_CLONE3. Failing on missing __ARCH_WANT_SYS_CLONE3 is a good indicator that they either did not really want this syscall or haven't really thought about whether it needs special treatment and just accidently included it in their entrypoints by e.g. generating their syscall table automatically via asm-generic/unistd.h This patch has been compile-tested for the h8300 architecture which is one of the architectures that does not yet implement clone3 and generates its syscall table via asm-generic/unistd.h. Signed-off-by: Christian Brauner Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/r/20190714192205.27190-3-christian@brauner.io Reviewed-by: Arnd Bergmann Signed-off-by: Christian Brauner --- include/uapi/asm-generic/unistd.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 9acfff0cd153..1be0e798e362 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -846,8 +846,10 @@ __SYSCALL(__NR_fsmount, sys_fsmount) __SYSCALL(__NR_fspick, sys_fspick) #define __NR_pidfd_open 434 __SYSCALL(__NR_pidfd_open, sys_pidfd_open) +#ifdef __ARCH_WANT_SYS_CLONE3 #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) +#endif #undef __NR_syscalls #define __NR_syscalls 436 -- cgit v1.2.3 From 69b53720e92c1bdea854a2fc204477ddabfa902b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 15 Jul 2019 02:30:21 +0200 Subject: MAINTAINERS: add new entry for pidfd api Add me as a maintainer for pidfd stuff so people know who to yell at and to easily keep track of incoming changes. Signed-off-by: Christian Brauner Link: https://lore.kernel.org/r/20190715003021.25040-1-christian@brauner.io --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1be025959be9..63522c0043d4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12567,6 +12567,17 @@ F: arch/arm/boot/dts/picoxcell* F: arch/arm/mach-picoxcell/ F: drivers/crypto/picoxcell* +PIDFD API +M: Christian Brauner +L: linux-kernel@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git +F: samples/pidfd/ +F: tools/testing/selftests/pidfd/ +K: (?i)pidfd +K: (?i)clone3 +K: \b(clone_args|kernel_clone_args)\b + PIN CONTROL SUBSYSTEM M: Linus Walleij L: linux-gpio@vger.kernel.org -- cgit v1.2.3