From e2bd64d92a10f34b425cf3884c6032588c661335 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 26 Jun 2017 13:38:04 -0500 Subject: signal/alpha: Document a conflict with SI_USER for SIGTRAP Setting si_code to __SI_FAULT results in a userspace seeing an si_code of 0. This is the same si_code as SI_USER. Posix and common sense requires that SI_USER not be a signal specific si_code. As such this use of 0 for the si_code is a pretty horribly broken ABI. Given that alpha is on it's last legs I don't know that it is worth fixing this, but it is worth documenting what is going on so that no one decides to copy this bad decision. This was introduced during the 2.5 development cycle so this mess has had a long time for people to be able to depend upon it. v2: Added FPE_FIXME for alpha as Helge Deller pointed out with his alternate patch one of the cases is SIGFPE not SIGTRAP. Cc: Helge Deller Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: linux-alpha@vger.kernel.org Acked-by: Richard Henderson History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Ref: 0a635c7a84cf ("Fill in siginfo_t.") Signed-off-by: "Eric W. Biederman" --- arch/alpha/include/uapi/asm/siginfo.h | 14 ++++++++++++++ arch/alpha/kernel/traps.c | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/alpha/include/uapi/asm/siginfo.h b/arch/alpha/include/uapi/asm/siginfo.h index 9822362a8424..972f547d9e41 100644 --- a/arch/alpha/include/uapi/asm/siginfo.h +++ b/arch/alpha/include/uapi/asm/siginfo.h @@ -6,4 +6,18 @@ #include +/* + * SIGFPE si_codes + */ +#ifdef __KERNEL__ +#define FPE_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ + +/* + * SIGTRAP si_codes + */ +#ifdef __KERNEL__ +#define TRAP_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ + #endif diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 65bb102d985b..e94f4b73ac04 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -278,7 +278,7 @@ do_entIF(unsigned long type, struct pt_regs *regs) case 1: /* bugcheck */ info.si_signo = SIGTRAP; info.si_errno = 0; - info.si_code = __SI_FAULT; + info.si_code = TRAP_FIXME; info.si_addr = (void __user *) regs->pc; info.si_trapno = 0; send_sig_info(SIGTRAP, &info, current); @@ -318,7 +318,7 @@ do_entIF(unsigned long type, struct pt_regs *regs) break; case GEN_ROPRAND: signo = SIGFPE; - code = __SI_FAULT; + code = FPE_FIXME; break; case GEN_DECOVF: @@ -340,7 +340,7 @@ do_entIF(unsigned long type, struct pt_regs *regs) case GEN_SUBRNG7: default: signo = SIGTRAP; - code = __SI_FAULT; + code = TRAP_FIXME; break; } -- cgit v1.2.3 From 80dce5e37493029e1fb7f890b960264ba9a46fe4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Jul 2017 19:35:35 -0500 Subject: signal/ia64: Document a conflict with SI_USER with SIGFPE Setting si_code to __SI_FAULT results in a userspace seeing an si_code of 0. This is the same si_code as SI_USER. Posix and common sense requires that SI_USER not be a signal specific si_code. As such this use of 0 for the si_code is a pretty horribly broken ABI. Given that ia64 is on it's last legs I don't know that it is worth fixing this, but it is worth documenting what is going on so that no one decides to copy this bad decision. This was introduced in 2.3.51 so this mess has had a long time for people to be able to start depending on it. Cc: Tony Luck Cc: Fenghua Yu Cc: linux-ia64@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- arch/ia64/include/uapi/asm/siginfo.h | 3 +++ arch/ia64/kernel/traps.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h index 4694c64252d6..3282f8b992fc 100644 --- a/arch/ia64/include/uapi/asm/siginfo.h +++ b/arch/ia64/include/uapi/asm/siginfo.h @@ -107,6 +107,9 @@ typedef struct siginfo { /* * SIGFPE si_codes */ +#ifdef __KERNEL__ +#define FPE_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ #define __FPE_DECOVF (__SI_FAULT|9) /* decimal overflow */ #define __FPE_DECDIV (__SI_FAULT|10) /* decimal division by zero */ #define __FPE_DECERR (__SI_FAULT|11) /* packed decimal error */ diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 7b1fe9462158..3cb17cf9b362 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -349,7 +349,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) } siginfo.si_signo = SIGFPE; siginfo.si_errno = 0; - siginfo.si_code = __SI_FAULT; /* default code */ + siginfo.si_code = FPE_FIXME; /* default code */ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); if (isr & 0x11) { siginfo.si_code = FPE_FLTINV; @@ -373,7 +373,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) /* raise exception */ siginfo.si_signo = SIGFPE; siginfo.si_errno = 0; - siginfo.si_code = __SI_FAULT; /* default code */ + siginfo.si_code = FPE_FIXME; /* default code */ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); if (isr & 0x880) { siginfo.si_code = FPE_FLTOVF; -- cgit v1.2.3 From cc9f72e474a4da365896d0e528da324f205e8b31 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Jul 2017 20:04:58 -0500 Subject: signal/sparc: Document a conflict with SI_USER with SIGFPE Setting si_code to __SI_FAULT results in a userspace seeing an si_code of 0. This is the same si_code as SI_USER. Posix and common sense requires that SI_USER not be a signal specific si_code. As such this use of 0 for the si_code is a pretty horribly broken ABI. This was introduced in 2.3.41 so this mess has had a long time for people to be able to start depending on it. As this bug has existed for 17 years already I don't know if it is worth fixing. It is definitely worth documenting what is going on so that no one decides to copy this bad decision. Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- arch/sparc/include/uapi/asm/siginfo.h | 7 +++++++ arch/sparc/kernel/traps_32.c | 2 +- arch/sparc/kernel/traps_64.c | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/uapi/asm/siginfo.h b/arch/sparc/include/uapi/asm/siginfo.h index 2d9b79ccaa50..da2126e0c536 100644 --- a/arch/sparc/include/uapi/asm/siginfo.h +++ b/arch/sparc/include/uapi/asm/siginfo.h @@ -16,6 +16,13 @@ #define SI_NOINFO 32767 /* no information in siginfo_t */ +/* + * SIGFPE si_codes + */ +#ifdef __KERNEL__ +#define FPE_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ + /* * SIGEMT si_codes */ diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index 466d4aed06c7..581cf35ee7e3 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -306,7 +306,7 @@ void do_fpe_trap(struct pt_regs *regs, unsigned long pc, unsigned long npc, info.si_errno = 0; info.si_addr = (void __user *)pc; info.si_trapno = 0; - info.si_code = __SI_FAULT; + info.si_code = FPE_FIXME; if ((fsr & 0x1c000) == (1 << 14)) { if (fsr & 0x10) info.si_code = FPE_FLTINV; diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 196ee5eb4d48..e882e128faa3 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2258,7 +2258,7 @@ static void do_fpe_common(struct pt_regs *regs) info.si_errno = 0; info.si_addr = (void __user *)regs->tpc; info.si_trapno = 0; - info.si_code = __SI_FAULT; + info.si_code = FPE_FIXME; if ((fsr & 0x1c000) == (1 << 14)) { if (fsr & 0x10) info.si_code = FPE_FLTINV; -- cgit v1.2.3 From ea1b75cf9138003eee6389b70e654f5865728525 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 26 Jun 2017 16:16:17 -0500 Subject: signal/mips: Document a conflict with SI_USER with SIGFPE Setting si_code to __SI_FAULT results in a userspace seeing an si_code of 0. This is the same si_code as SI_USER. Posix and common sense requires that SI_USER not be a signal specific si_code. As such this use of 0 for the si_code is a pretty horribly broken ABI. This use of of __SI_FAULT is only a decade old. Which compared to the other pieces of kernel code that has made this mistake is almost yesterday. This is probably worth fixing but I don't know mips well enough to know what si_code to would be the proper one to use. Cc: Ralf Baechle Ref: 948a34cf3988 ("[MIPS] Maintain si_code field properly for FP exceptions") Signed-off-by: "Eric W. Biederman" --- arch/mips/include/uapi/asm/siginfo.h | 7 +++++++ arch/mips/kernel/traps.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/mips/include/uapi/asm/siginfo.h b/arch/mips/include/uapi/asm/siginfo.h index 8069cf766603..9becfd102132 100644 --- a/arch/mips/include/uapi/asm/siginfo.h +++ b/arch/mips/include/uapi/asm/siginfo.h @@ -123,4 +123,11 @@ typedef struct siginfo { #define SI_TIMER __SI_CODE(__SI_TIMER, -3) /* sent by timer expiration */ #define SI_MESGQ __SI_CODE(__SI_MESGQ, -4) /* sent by real time mesq state change */ +/* + * SIGFPE si_codes + */ +#ifdef __KERNEL__ +#define FPE_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#endif /* __KERNEL__ */ + #endif /* _UAPI_ASM_SIGINFO_H */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b68b4d0726d3..6c9cca9c5341 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -735,7 +735,7 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, else if (fcr31 & FPU_CSR_INE_X) si.si_code = FPE_FLTRES; else - si.si_code = __SI_FAULT; + si.si_code = FPE_FIXME; force_sig_info(SIGFPE, &si, tsk); } -- cgit v1.2.3 From d12fe87e62d773e81e0cb3a123c5a480a10d7d91 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 26 Jun 2017 16:36:57 -0500 Subject: signal/testing: Don't look for __SI_FAULT in userspace Fix the debug print statements in these tests where they reference si_codes and in particular __SI_FAULT. __SI_FAULT is a kernel internal value and should never be seen by userspace. While I am in there also fix si_code_str. si_codes are an enumeration there are not a bitmap so == and not & is the apropriate operation to test for an si_code. Cc: Dave Hansen Fixes: 5f23f6d082a9 ("x86/pkeys: Add self-tests") Fixes: e754aedc26ef ("x86/mpx, selftests: Add MPX self test") Signed-off-by: "Eric W. Biederman" --- tools/testing/selftests/x86/mpx-mini-test.c | 3 +-- tools/testing/selftests/x86/protection_keys.c | 13 ++++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index a8df159a8924..ec0f6b45ce8b 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -391,8 +391,7 @@ void handler(int signum, siginfo_t *si, void *vucontext) br_count++; dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); -#define __SI_FAULT (3 << 16) -#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ +#define SEGV_BNDERR 3 /* failed address bound checks */ dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", status, ip, br_reason); diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 3237bc010e1c..23927845518d 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -212,19 +212,18 @@ void dump_mem(void *dumpme, int len_bytes) } } -#define __SI_FAULT (3 << 16) -#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ -#define SEGV_PKUERR (__SI_FAULT|4) +#define SEGV_BNDERR 3 /* failed address bound checks */ +#define SEGV_PKUERR 4 static char *si_code_str(int si_code) { - if (si_code & SEGV_MAPERR) + if (si_code == SEGV_MAPERR) return "SEGV_MAPERR"; - if (si_code & SEGV_ACCERR) + if (si_code == SEGV_ACCERR) return "SEGV_ACCERR"; - if (si_code & SEGV_BNDERR) + if (si_code == SEGV_BNDERR) return "SEGV_BNDERR"; - if (si_code & SEGV_PKUERR) + if (si_code == SEGV_PKUERR) return "SEGV_PKUERR"; return "UNKNOWN"; } -- cgit v1.2.3 From a2b426267c56773201f968fdb5eda6ab9ae94e34 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 29 Apr 2017 14:12:15 -0500 Subject: userns,pidns: Verify the userns for new pid namespaces It is pointless and confusing to allow a pid namespace hierarchy and the user namespace hierarchy to get out of sync. The owner of a child pid namespace should be the owner of the parent pid namespace or a descendant of the owner of the parent pid namespace. Otherwise it is possible to construct scenarios where a process has a capability over a parent pid namespace but does not have the capability over a child pid namespace. Which confusingly makes permission checks non-transitive. It requires use of setns into a pid namespace (but not into a user namespace) to create such a scenario. Add the function in_userns to help in making this determination. v2: Optimized in_userns by using level as suggested by: Kirill Tkhai Ref: 49f4d8b93ccf ("pidns: Capture the user namespace and filter ns_last_pid") Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 9 ++++++++- kernel/pid_namespace.c | 4 ++++ kernel/user_namespace.c | 20 ++++++++++++-------- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 32354b4b4b2b..4005877bb8b6 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -112,8 +112,9 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); extern int proc_setgroups_show(struct seq_file *m, void *v); extern bool userns_may_setgroups(const struct user_namespace *ns); +extern bool in_userns(const struct user_namespace *ancestor, + const struct user_namespace *child); extern bool current_in_userns(const struct user_namespace *target_ns); - struct ns_common *ns_get_owner(struct ns_common *ns); #else @@ -144,6 +145,12 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns) return true; } +static inline bool in_userns(const struct user_namespace *ancestor, + const struct user_namespace *child) +{ + return true; +} + static inline bool current_in_userns(const struct user_namespace *target_ns) { return true; diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 74a5a7255b4d..4918314893bc 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -101,6 +101,10 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns int i; int err; + err = -EINVAL; + if (!in_userns(parent_pid_ns->user_ns, user_ns)) + goto out; + err = -ENOSPC; if (level > MAX_PID_NS_LEVEL) goto out; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 2f735cbe05e8..c490f1e4313b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -986,17 +986,21 @@ bool userns_may_setgroups(const struct user_namespace *ns) } /* - * Returns true if @ns is the same namespace as or a descendant of - * @target_ns. + * Returns true if @child is the same namespace or a descendant of + * @ancestor. */ +bool in_userns(const struct user_namespace *ancestor, + const struct user_namespace *child) +{ + const struct user_namespace *ns; + for (ns = child; ns->level > ancestor->level; ns = ns->parent) + ; + return (ns == ancestor); +} + bool current_in_userns(const struct user_namespace *target_ns) { - struct user_namespace *ns; - for (ns = current_user_ns(); ns; ns = ns->parent) { - if (ns == target_ns) - return true; - } - return false; + return in_userns(target_ns, current_user_ns()); } static inline struct user_namespace *to_user_ns(struct ns_common *ns) -- cgit v1.2.3 From 64db4c7f4c1dde23d47b60f887000e28f82b268f Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 2 May 2017 20:11:52 +0300 Subject: security: Use user_namespace::level to avoid redundant iterations in cap_capable() When ns->level is not larger then cred->user_ns->level, then ns can't be cred->user_ns's descendant, and there is no a sense to search in parents. So, break the cycle earlier and skip needless iterations. v2: Change comment on suggested by Andy Lutomirski. Signed-off-by: Kirill Tkhai Signed-off-by: Eric W. Biederman --- security/commoncap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/commoncap.c b/security/commoncap.c index 7abebd782d5e..d59320282294 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -82,8 +82,11 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, if (ns == cred->user_ns) return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; - /* Have we tried all of the parent namespaces? */ - if (ns == &init_user_ns) + /* + * If we're already at a lower level than we're looking for, + * we're done searching. + */ + if (ns->level <= cred->user_ns->level) return -EPERM; /* -- cgit v1.2.3 From 4d28df6152aa3ffd0ad0389bb1d31f5b1c1c2b1f Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 12 May 2017 17:33:36 +0300 Subject: prctl: Allow local CAP_SYS_ADMIN changing exe_file During checkpointing and restore of userspace tasks we bumped into the situation, that it's not possible to restore the tasks, which user namespace does not have uid 0 or gid 0 mapped. People create user namespace mappings like they want, and there is no a limitation on obligatory uid and gid "must be mapped". So, if there is no uid 0 or gid 0 in the mapping, it's impossible to restore mm->exe_file of the processes belonging to this user namespace. Also, there is no a workaround. It's impossible to create a temporary uid/gid mapping, because only one write to /proc/[pid]/uid_map and gid_map is allowed during a namespace lifetime. If there is an entry, then no more mapings can't be written. If there isn't an entry, we can't write there too, otherwise user task won't be able to do that in the future. The patch changes the check, and looks for CAP_SYS_ADMIN instead of zero uid and gid. This allows to restore a task independently of its user namespace mappings. Signed-off-by: Kirill Tkhai CC: Andrew Morton CC: Serge Hallyn CC: "Eric W. Biederman" CC: Oleg Nesterov CC: Michal Hocko CC: Andrei Vagin CC: Cyrill Gorcunov CC: Stanislav Kinsburskiy CC: Pavel Tikhomirov Reviewed-by: Cyrill Gorcunov Signed-off-by: Eric W. Biederman --- kernel/sys.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 2855ee73acd0..9aebc2935013 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1896,15 +1896,11 @@ static int validate_prctl_map(struct prctl_mm_map *prctl_map) /* * Finally, make sure the caller has the rights to - * change /proc/pid/exe link: only local root should + * change /proc/pid/exe link: only local sys admin should * be allowed to. */ if (prctl_map->exe_fd != (u32)-1) { - struct user_namespace *ns = current_user_ns(); - const struct cred *cred = current_cred(); - - if (!uid_eq(cred->uid, make_kuid(ns, 0)) || - !gid_eq(cred->gid, make_kgid(ns, 0))) + if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN)) goto out; } -- cgit v1.2.3 From d08477aa975e97f1dc64c0ae59cebf98520456ce Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 29 Jun 2017 09:28:50 -0500 Subject: fcntl: Don't use ambiguous SIG_POLL si_codes We have a weird and problematic intersection of features that when they all come together result in ambiguous siginfo values, that we can not support properly. - Supporting fcntl(F_SETSIG,...) with arbitrary valid signals. - Using positive values for POLL_IN, POLL_OUT, POLL_MSG, ..., etc that imply they are signal specific si_codes and using the aforementioned arbitrary signal to deliver them. - Supporting injection of arbitrary siginfo values for debugging and checkpoint/restore. The result is that just looking at siginfo si_codes of 1 to 6 are ambigious. It could either be a signal specific si_code or it could be a generic si_code. For most of the kernel this is a non-issue but for sending signals with siginfo it is impossible to play back the kernel signals and get the same result. Strictly speaking when the si_code was changed from SI_SIGIO to POLL_IN and friends between 2.2 and 2.4 this functionality was not ambiguous, as only real time signals were supported. Before 2.4 was released the kernel began supporting siginfo with non realtime signals so they could give details of why the signal was sent. The result is that if F_SETSIG is set to one of the signals with signal specific si_codes then user space can not know why the signal was sent. I grepped through a bunch of userspace programs using debian code search to get a feel for how often people choose a signal that results in an ambiguous si_code. I only found one program doing so and it was using SIGCHLD to test the F_SETSIG functionality, and did not appear to be a real world usage. Therefore the ambiguity does not appears to be a real world problem in practice. Remove the ambiguity while introducing the smallest chance of breakage by changing the si_code to SI_SIGIO when signals with signal specific si_codes are targeted. Fixes: v2.3.40 -- Added support for queueing non-rt signals Fixes: v2.3.21 -- Changed the si_code from SI_SIGIO Signed-off-by: "Eric W. Biederman" --- fs/fcntl.c | 13 ++++++++++++- include/linux/signal.h | 8 ++++++++ include/uapi/asm-generic/siginfo.h | 4 ++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 3b01b646e528..0491da3b28c3 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -741,10 +741,21 @@ static void send_sigio_to_task(struct task_struct *p, si.si_signo = signum; si.si_errno = 0; si.si_code = reason; + /* + * Posix definies POLL_IN and friends to be signal + * specific si_codes for SIG_POLL. Linux extended + * these si_codes to other signals in a way that is + * ambiguous if other signals also have signal + * specific si_codes. In that case use SI_SIGIO instead + * to remove the ambiguity. + */ + if (sig_specific_sicodes(signum)) + si.si_code = SI_SIGIO; + /* Make sure we are called with one of the POLL_* reasons, otherwise we could leak kernel stack into userspace. */ - BUG_ON((reason & __SI_MASK) != __SI_POLL); + BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL)); if (reason - POLL_IN >= NSIGPOLL) si.si_band = ~0L; else diff --git a/include/linux/signal.h b/include/linux/signal.h index e2678b5dbb21..c97cc20369c0 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -380,10 +380,18 @@ int unhandled_signal(struct task_struct *tsk, int sig); rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \ rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) ) +#define SIG_SPECIFIC_SICODES_MASK (\ + rt_sigmask(SIGILL) | rt_sigmask(SIGFPE) | \ + rt_sigmask(SIGSEGV) | rt_sigmask(SIGBUS) | \ + rt_sigmask(SIGTRAP) | rt_sigmask(SIGCHLD) | \ + rt_sigmask(SIGPOLL) | rt_sigmask(SIGSYS) | \ + SIGEMT_MASK ) + #define sig_kernel_only(sig) siginmask(sig, SIG_KERNEL_ONLY_MASK) #define sig_kernel_coredump(sig) siginmask(sig, SIG_KERNEL_COREDUMP_MASK) #define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK) #define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK) +#define sig_specific_sicodes(sig) siginmask(sig, SIG_SPECIFIC_SICODES_MASK) #define sig_fatal(t, signr) \ (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 9c4eca6b374a..9e956ea94d57 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -184,7 +184,7 @@ typedef struct siginfo { #define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */ #define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change */ #define SI_ASYNCIO -4 /* sent by AIO completion */ -#define SI_SIGIO -5 /* sent by queued SIGIO */ +#define SI_SIGIO __SI_CODE(__SI_POLL,-5) /* sent by queued SIGIO */ #define SI_TKILL -6 /* sent by tkill system call */ #define SI_DETHREAD -7 /* sent by execve() killing subsidiary threads */ @@ -259,7 +259,7 @@ typedef struct siginfo { #define NSIGCHLD 6 /* - * SIGPOLL si_codes + * SIGPOLL (or any other signal without signal specific si_codes) si_codes */ #define POLL_IN (__SI_POLL|1) /* data input available */ #define POLL_OUT (__SI_POLL|2) /* output buffers available */ -- cgit v1.2.3 From cc731525f26af85a1c3537da41e0abd1d35e0bdb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 Jul 2017 22:36:59 -0500 Subject: signal: Remove kernel interal si_code magic struct siginfo is a union and the kernel since 2.4 has been hiding a union tag in the high 16bits of si_code using the values: __SI_KILL __SI_TIMER __SI_POLL __SI_FAULT __SI_CHLD __SI_RT __SI_MESGQ __SI_SYS While this looks plausible on the surface, in practice this situation has not worked well. - Injected positive signals are not copied to user space properly unless they have these magic high bits set. - Injected positive signals are not reported properly by signalfd unless they have these magic high bits set. - These kernel internal values leaked to userspace via ptrace_peek_siginfo - It was possible to inject these kernel internal values and cause the the kernel to misbehave. - Kernel developers got confused and expected these kernel internal values in userspace in kernel self tests. - Kernel developers got confused and set si_code to __SI_FAULT which is SI_USER in userspace which causes userspace to think an ordinary user sent the signal and that it was not kernel generated. - The values make it impossible to reorganize the code to transform siginfo_copy_to_user into a plain copy_to_user. As si_code must be massaged before being passed to userspace. So remove these kernel internal si codes and make the kernel code simpler and more maintainable. To replace these kernel internal magic si_codes introduce the helper function siginfo_layout, that takes a signal number and an si_code and computes which union member of siginfo is being used. Have siginfo_layout return an enumeration so that gcc will have enough information to warn if a switch statement does not handle all of union members. A couple of architectures have a messed up ABI that defines signal specific duplications of SI_USER which causes more special cases in siginfo_layout than I would like. The good news is only problem architectures pay the cost. Update all of the code that used the previous magic __SI_ values to use the new SIL_ values and to call siginfo_layout to get those values. Escept where not all of the cases are handled remove the defaults in the switch statements so that if a new case is missed in the future the lack will show up at compile time. Modify the code that copies siginfo si_code to userspace to just copy the value and not cast si_code to a short first. The high bits are no longer used to hold a magic union member. Fixup the siginfo header files to stop including the __SI_ values in their constants and for the headers that were missing it to properly update the number of si_codes for each signal type. The fixes to copy_siginfo_from_user32 implementations has the interesting property that several of them perviously should never have worked as the __SI_ values they depended up where kernel internal. With that dependency gone those implementations should work much better. The idea of not passing the __SI_ values out to userspace and then not reinserting them has been tested with criu and criu worked without changes. Ref: 2.4.0-test1 Signed-off-by: "Eric W. Biederman" --- arch/alpha/include/uapi/asm/siginfo.h | 4 +- arch/arm64/kernel/signal32.c | 23 +++---- arch/blackfin/include/uapi/asm/siginfo.h | 30 +++++--- arch/frv/include/uapi/asm/siginfo.h | 2 +- arch/ia64/include/uapi/asm/siginfo.h | 20 +++--- arch/ia64/kernel/signal.c | 17 +++-- arch/mips/include/uapi/asm/siginfo.h | 6 +- arch/mips/kernel/signal32.c | 19 +++-- arch/parisc/kernel/signal32.c | 31 ++++----- arch/powerpc/kernel/signal_32.c | 20 +++--- arch/s390/kernel/compat_signal.c | 32 ++++----- arch/sparc/include/uapi/asm/siginfo.h | 4 +- arch/sparc/kernel/signal32.c | 16 ++--- arch/tile/include/uapi/asm/siginfo.h | 4 +- arch/tile/kernel/compat_signal.c | 18 +++-- arch/tile/kernel/traps.c | 2 +- arch/x86/kernel/signal_compat.c | 21 +++--- fs/signalfd.c | 22 +++--- include/linux/signal.h | 14 ++++ include/uapi/asm-generic/siginfo.h | 115 +++++++++++++------------------ kernel/exit.c | 4 +- kernel/ptrace.c | 6 +- kernel/signal.c | 72 ++++++++++++++----- 23 files changed, 257 insertions(+), 245 deletions(-) diff --git a/arch/alpha/include/uapi/asm/siginfo.h b/arch/alpha/include/uapi/asm/siginfo.h index 972f547d9e41..70494d1d8f29 100644 --- a/arch/alpha/include/uapi/asm/siginfo.h +++ b/arch/alpha/include/uapi/asm/siginfo.h @@ -10,14 +10,14 @@ * SIGFPE si_codes */ #ifdef __KERNEL__ -#define FPE_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#define FPE_FIXME 0 /* Broken dup of SI_USER */ #endif /* __KERNEL__ */ /* * SIGTRAP si_codes */ #ifdef __KERNEL__ -#define TRAP_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#define TRAP_FIXME 0 /* Broken dup of SI_USER */ #endif /* __KERNEL__ */ #endif diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index c747a0fc5d7d..9b95a935c21d 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -142,25 +142,25 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user(from->si_code, &to->si_code); if (from->si_code < 0) err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); - else switch (from->si_code & __SI_MASK) { - case __SI_KILL: + else switch (siginfo_layout(from->si_signo, from->si_code)) { + case SIL_KILL: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); break; - case __SI_TIMER: + case SIL_TIMER: err |= __put_user(from->si_tid, &to->si_tid); err |= __put_user(from->si_overrun, &to->si_overrun); err |= __put_user(from->si_int, &to->si_int); break; - case __SI_POLL: + case SIL_POLL: err |= __put_user(from->si_band, &to->si_band); err |= __put_user(from->si_fd, &to->si_fd); break; - case __SI_FAULT: + case SIL_FAULT: err |= __put_user((compat_uptr_t)(unsigned long)from->si_addr, &to->si_addr); #ifdef BUS_MCEERR_AO @@ -173,29 +173,24 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif break; - case __SI_CHLD: + case SIL_CHLD: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_status, &to->si_status); err |= __put_user(from->si_utime, &to->si_utime); err |= __put_user(from->si_stime, &to->si_stime); break; - case __SI_RT: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ: /* But this is */ + case SIL_RT: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); break; - case __SI_SYS: + case SIL_SYS: err |= __put_user((compat_uptr_t)(unsigned long) from->si_call_addr, &to->si_call_addr); err |= __put_user(from->si_syscall, &to->si_syscall); err |= __put_user(from->si_arch, &to->si_arch); break; - default: /* this is just in case for now ... */ - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - break; } return err; } diff --git a/arch/blackfin/include/uapi/asm/siginfo.h b/arch/blackfin/include/uapi/asm/siginfo.h index c72f4e6e386f..79dfe3979123 100644 --- a/arch/blackfin/include/uapi/asm/siginfo.h +++ b/arch/blackfin/include/uapi/asm/siginfo.h @@ -14,28 +14,36 @@ #define si_uid16 _sifields._kill._uid -#define ILL_ILLPARAOP (__SI_FAULT|2) /* illegal opcode combine ********** */ -#define ILL_ILLEXCPT (__SI_FAULT|4) /* unrecoverable exception ********** */ -#define ILL_CPLB_VI (__SI_FAULT|9) /* D/I CPLB protect violation ******** */ -#define ILL_CPLB_MISS (__SI_FAULT|10) /* D/I CPLB miss ******** */ -#define ILL_CPLB_MULHIT (__SI_FAULT|11) /* D/I CPLB multiple hit ******** */ +#define ILL_ILLPARAOP 2 /* illegal opcode combine ********** */ +#define ILL_ILLEXCPT 4 /* unrecoverable exception ********** */ +#define ILL_CPLB_VI 9 /* D/I CPLB protect violation ******** */ +#define ILL_CPLB_MISS 10 /* D/I CPLB miss ******** */ +#define ILL_CPLB_MULHIT 11 /* D/I CPLB multiple hit ******** */ +#undef NSIGILL +#define NSIGILL 11 /* * SIGBUS si_codes */ -#define BUS_OPFETCH (__SI_FAULT|4) /* error from instruction fetch ******** */ +#define BUS_OPFETCH 4 /* error from instruction fetch ******** */ +#undef NSIGBUS +#define NSIGBUS 4 /* * SIGTRAP si_codes */ -#define TRAP_STEP (__SI_FAULT|1) /* single-step breakpoint************* */ -#define TRAP_TRACEFLOW (__SI_FAULT|2) /* trace buffer overflow ************* */ -#define TRAP_WATCHPT (__SI_FAULT|3) /* watchpoint match ************* */ -#define TRAP_ILLTRAP (__SI_FAULT|4) /* illegal trap ************* */ +#define TRAP_STEP 1 /* single-step breakpoint************* */ +#define TRAP_TRACEFLOW 2 /* trace buffer overflow ************* */ +#define TRAP_WATCHPT 3 /* watchpoint match ************* */ +#define TRAP_ILLTRAP 4 /* illegal trap ************* */ +#undef NSIGTRAP +#define NSIGTRAP 4 /* * SIGSEGV si_codes */ -#define SEGV_STACKFLOW (__SI_FAULT|3) /* stack overflow */ +#define SEGV_STACKFLOW 3 /* stack overflow */ +#undef NSIGSEGV +#define NSIGSEGV 3 #endif /* _UAPI_BFIN_SIGINFO_H */ diff --git a/arch/frv/include/uapi/asm/siginfo.h b/arch/frv/include/uapi/asm/siginfo.h index d3fd1ca45653..f55d9e0e9068 100644 --- a/arch/frv/include/uapi/asm/siginfo.h +++ b/arch/frv/include/uapi/asm/siginfo.h @@ -4,7 +4,7 @@ #include #include -#define FPE_MDAOVF (__SI_FAULT|9) /* media overflow */ +#define FPE_MDAOVF 9 /* media overflow */ #undef NSIGFPE #define NSIGFPE 9 diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h index 3282f8b992fc..33389fc36f23 100644 --- a/arch/ia64/include/uapi/asm/siginfo.h +++ b/arch/ia64/include/uapi/asm/siginfo.h @@ -98,9 +98,9 @@ typedef struct siginfo { /* * SIGILL si_codes */ -#define ILL_BADIADDR (__SI_FAULT|9) /* unimplemented instruction address */ -#define __ILL_BREAK (__SI_FAULT|10) /* illegal break */ -#define __ILL_BNDMOD (__SI_FAULT|11) /* bundle-update (modification) in progress */ +#define ILL_BADIADDR 9 /* unimplemented instruction address */ +#define __ILL_BREAK 10 /* illegal break */ +#define __ILL_BNDMOD 11 /* bundle-update (modification) in progress */ #undef NSIGILL #define NSIGILL 11 @@ -108,20 +108,20 @@ typedef struct siginfo { * SIGFPE si_codes */ #ifdef __KERNEL__ -#define FPE_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#define FPE_FIXME 0 /* Broken dup of SI_USER */ #endif /* __KERNEL__ */ -#define __FPE_DECOVF (__SI_FAULT|9) /* decimal overflow */ -#define __FPE_DECDIV (__SI_FAULT|10) /* decimal division by zero */ -#define __FPE_DECERR (__SI_FAULT|11) /* packed decimal error */ -#define __FPE_INVASC (__SI_FAULT|12) /* invalid ASCII digit */ -#define __FPE_INVDEC (__SI_FAULT|13) /* invalid decimal digit */ +#define __FPE_DECOVF 9 /* decimal overflow */ +#define __FPE_DECDIV 10 /* decimal division by zero */ +#define __FPE_DECERR 11 /* packed decimal error */ +#define __FPE_INVASC 12 /* invalid ASCII digit */ +#define __FPE_INVDEC 13 /* invalid decimal digit */ #undef NSIGFPE #define NSIGFPE 13 /* * SIGSEGV si_codes */ -#define __SEGV_PSTKOVF (__SI_FAULT|4) /* paragraph stack overflow */ +#define __SEGV_PSTKOVF 4 /* paragraph stack overflow */ #undef NSIGSEGV #define NSIGSEGV 4 diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 5db52c6813c4..6146d53b6ad7 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -124,31 +124,30 @@ copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from) */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: + err |= __put_user(from->si_code, &to->si_code); + switch (siginfo_layout(from->si_signo, from->si_code)) { + case SIL_FAULT: err |= __put_user(from->si_flags, &to->si_flags); err |= __put_user(from->si_isr, &to->si_isr); - case __SI_POLL >> 16: + case SIL_POLL: err |= __put_user(from->si_addr, &to->si_addr); err |= __put_user(from->si_imm, &to->si_imm); break; - case __SI_TIMER >> 16: + case SIL_TIMER: err |= __put_user(from->si_tid, &to->si_tid); err |= __put_user(from->si_overrun, &to->si_overrun); err |= __put_user(from->si_ptr, &to->si_ptr); break; - case __SI_RT >> 16: /* Not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: + case SIL_RT: err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_ptr, &to->si_ptr); break; - case __SI_CHLD >> 16: + case SIL_CHLD: err |= __put_user(from->si_utime, &to->si_utime); err |= __put_user(from->si_stime, &to->si_stime); err |= __put_user(from->si_status, &to->si_status); - default: + case SIL_KILL: err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_pid, &to->si_pid); break; diff --git a/arch/mips/include/uapi/asm/siginfo.h b/arch/mips/include/uapi/asm/siginfo.h index 9becfd102132..22a86d84a504 100644 --- a/arch/mips/include/uapi/asm/siginfo.h +++ b/arch/mips/include/uapi/asm/siginfo.h @@ -120,14 +120,14 @@ typedef struct siginfo { #undef SI_TIMER #undef SI_MESGQ #define SI_ASYNCIO -2 /* sent by AIO completion */ -#define SI_TIMER __SI_CODE(__SI_TIMER, -3) /* sent by timer expiration */ -#define SI_MESGQ __SI_CODE(__SI_MESGQ, -4) /* sent by real time mesq state change */ +#define SI_TIMER -3 /* sent by timer expiration */ +#define SI_MESGQ -4 /* sent by real time mesq state change */ /* * SIGFPE si_codes */ #ifdef __KERNEL__ -#define FPE_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#define FPE_FIXME 0 /* Broken dup of SI_USER */ #endif /* __KERNEL__ */ #endif /* _UAPI_ASM_SIGINFO_H */ diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 84165f2b31ff..cf5c7c05e5a3 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -93,38 +93,37 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) at the same time. */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user(from->si_code, &to->si_code); if (from->si_code < 0) err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); else { - switch (from->si_code >> 16) { - case __SI_TIMER >> 16: + switch (siginfo_layout(from->si_signo, from->si_code)) { + case SIL_TIMER: err |= __put_user(from->si_tid, &to->si_tid); err |= __put_user(from->si_overrun, &to->si_overrun); err |= __put_user(from->si_int, &to->si_int); break; - case __SI_CHLD >> 16: + case SIL_CHLD: err |= __put_user(from->si_utime, &to->si_utime); err |= __put_user(from->si_stime, &to->si_stime); err |= __put_user(from->si_status, &to->si_status); - default: + case SIL_KILL: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); break; - case __SI_FAULT >> 16: + case SIL_FAULT: err |= __put_user((unsigned long)from->si_addr, &to->si_addr); break; - case __SI_POLL >> 16: + case SIL_POLL: err |= __put_user(from->si_band, &to->si_band); err |= __put_user(from->si_fd, &to->si_fd); break; - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: + case SIL_RT: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); break; - case __SI_SYS >> 16: + case SIL_SYS: err |= __copy_to_user(&to->si_call_addr, &from->si_call_addr, sizeof(compat_uptr_t)); err |= __put_user(from->si_syscall, &to->si_syscall); diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c index 70aaabb8b3cb..9e0cb6a577d6 100644 --- a/arch/parisc/kernel/signal32.c +++ b/arch/parisc/kernel/signal32.c @@ -290,25 +290,25 @@ copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from) if (to->si_code < 0) err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); else { - switch (to->si_code >> 16) { - case __SI_CHLD >> 16: + switch (siginfo_layout(to->si_signo, to->si_code)) { + case SIL_CHLD: err |= __get_user(to->si_utime, &from->si_utime); err |= __get_user(to->si_stime, &from->si_stime); err |= __get_user(to->si_status, &from->si_status); default: + case SIL_KILL: err |= __get_user(to->si_pid, &from->si_pid); err |= __get_user(to->si_uid, &from->si_uid); break; - case __SI_FAULT >> 16: + case SIL_FAULT: err |= __get_user(addr, &from->si_addr); to->si_addr = compat_ptr(addr); break; - case __SI_POLL >> 16: + case SIL_POLL: err |= __get_user(to->si_band, &from->si_band); err |= __get_user(to->si_fd, &from->si_fd); break; - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: + case SIL_RT: err |= __get_user(to->si_pid, &from->si_pid); err |= __get_user(to->si_uid, &from->si_uid); err |= __get_user(to->si_int, &from->si_int); @@ -337,41 +337,40 @@ copy_siginfo_to_user32 (compat_siginfo_t __user *to, const siginfo_t *from) at the same time. */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user(from->si_code, &to->si_code); if (from->si_code < 0) err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); else { - switch (from->si_code >> 16) { - case __SI_CHLD >> 16: + switch (siginfo_layout(from->si_signo, from->si_code)) { + case SIL_CHLD: err |= __put_user(from->si_utime, &to->si_utime); err |= __put_user(from->si_stime, &to->si_stime); err |= __put_user(from->si_status, &to->si_status); - default: + case SIL_KILL: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); break; - case __SI_FAULT >> 16: + case SIL_FAULT: addr = ptr_to_compat(from->si_addr); err |= __put_user(addr, &to->si_addr); break; - case __SI_POLL >> 16: + case SIL_POLL: err |= __put_user(from->si_band, &to->si_band); err |= __put_user(from->si_fd, &to->si_fd); break; - case __SI_TIMER >> 16: + case SIL_TIMER: err |= __put_user(from->si_tid, &to->si_tid); err |= __put_user(from->si_overrun, &to->si_overrun); val = (compat_int_t)from->si_int; err |= __put_user(val, &to->si_int); break; - case __SI_RT >> 16: /* Not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: + case SIL_RT: err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_pid, &to->si_pid); val = (compat_int_t)from->si_int; err |= __put_user(val, &to->si_int); break; - case __SI_SYS >> 16: + case SIL_SYS: err |= __put_user(ptr_to_compat(from->si_call_addr), &to->si_call_addr); err |= __put_user(from->si_syscall, &to->si_syscall); err |= __put_user(from->si_arch, &to->si_arch); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 97bb1385e771..92fb1c8dbbd8 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -913,42 +913,40 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) */ err = __put_user(s->si_signo, &d->si_signo); err |= __put_user(s->si_errno, &d->si_errno); - err |= __put_user((short)s->si_code, &d->si_code); + err |= __put_user(s->si_code, &d->si_code); if (s->si_code < 0) err |= __copy_to_user(&d->_sifields._pad, &s->_sifields._pad, SI_PAD_SIZE32); - else switch(s->si_code >> 16) { - case __SI_CHLD >> 16: + else switch(siginfo_layout(s->si_signo, s->si_code)) { + case SIL_CHLD: err |= __put_user(s->si_pid, &d->si_pid); err |= __put_user(s->si_uid, &d->si_uid); err |= __put_user(s->si_utime, &d->si_utime); err |= __put_user(s->si_stime, &d->si_stime); err |= __put_user(s->si_status, &d->si_status); break; - case __SI_FAULT >> 16: + case SIL_FAULT: err |= __put_user((unsigned int)(unsigned long)s->si_addr, &d->si_addr); break; - case __SI_POLL >> 16: + case SIL_POLL: err |= __put_user(s->si_band, &d->si_band); err |= __put_user(s->si_fd, &d->si_fd); break; - case __SI_TIMER >> 16: + case SIL_TIMER: err |= __put_user(s->si_tid, &d->si_tid); err |= __put_user(s->si_overrun, &d->si_overrun); err |= __put_user(s->si_int, &d->si_int); break; - case __SI_SYS >> 16: + case SIL_SYS: err |= __put_user(ptr_to_compat(s->si_call_addr), &d->si_call_addr); err |= __put_user(s->si_syscall, &d->si_syscall); err |= __put_user(s->si_arch, &d->si_arch); break; - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: + case SIL_RT: err |= __put_user(s->si_int, &d->si_int); /* fallthrough */ - case __SI_KILL >> 16: - default: + case SIL_KILL: err |= __put_user(s->si_pid, &d->si_pid); err |= __put_user(s->si_uid, &d->si_uid); break; diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index c620049c61f2..f549c4657376 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -75,35 +75,34 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) at the same time. */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user(from->si_code, &to->si_code); if (from->si_code < 0) err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); else { - switch (from->si_code >> 16) { - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: + switch (siginfo_layout(from->si_signo, from->si_code)) { + case SIL_RT: err |= __put_user(from->si_int, &to->si_int); /* fallthrough */ - case __SI_KILL >> 16: + case SIL_KILL: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); break; - case __SI_CHLD >> 16: + case SIL_CHLD: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_utime, &to->si_utime); err |= __put_user(from->si_stime, &to->si_stime); err |= __put_user(from->si_status, &to->si_status); break; - case __SI_FAULT >> 16: + case SIL_FAULT: err |= __put_user((unsigned long) from->si_addr, &to->si_addr); break; - case __SI_POLL >> 16: + case SIL_POLL: err |= __put_user(from->si_band, &to->si_band); err |= __put_user(from->si_fd, &to->si_fd); break; - case __SI_TIMER >> 16: + case SIL_TIMER: err |= __put_user(from->si_tid, &to->si_tid); err |= __put_user(from->si_overrun, &to->si_overrun); err |= __put_user(from->si_int, &to->si_int); @@ -127,32 +126,31 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) if (to->si_code < 0) err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); else { - switch (to->si_code >> 16) { - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: + switch (siginfo_layout(to->si_signo, to->si_code)) { + case SIL_RT: err |= __get_user(to->si_int, &from->si_int); /* fallthrough */ - case __SI_KILL >> 16: + case SIL_KILL: err |= __get_user(to->si_pid, &from->si_pid); err |= __get_user(to->si_uid, &from->si_uid); break; - case __SI_CHLD >> 16: + case SIL_CHLD: err |= __get_user(to->si_pid, &from->si_pid); err |= __get_user(to->si_uid, &from->si_uid); err |= __get_user(to->si_utime, &from->si_utime); err |= __get_user(to->si_stime, &from->si_stime); err |= __get_user(to->si_status, &from->si_status); break; - case __SI_FAULT >> 16: + case SIL_FAULT: err |= __get_user(tmp, &from->si_addr); to->si_addr = (void __force __user *) (u64) (tmp & PSW32_ADDR_INSN); break; - case __SI_POLL >> 16: + case SIL_POLL: err |= __get_user(to->si_band, &from->si_band); err |= __get_user(to->si_fd, &from->si_fd); break; - case __SI_TIMER >> 16: + case SIL_TIMER: err |= __get_user(to->si_tid, &from->si_tid); err |= __get_user(to->si_overrun, &from->si_overrun); err |= __get_user(to->si_int, &from->si_int); diff --git a/arch/sparc/include/uapi/asm/siginfo.h b/arch/sparc/include/uapi/asm/siginfo.h index da2126e0c536..157f46fe374f 100644 --- a/arch/sparc/include/uapi/asm/siginfo.h +++ b/arch/sparc/include/uapi/asm/siginfo.h @@ -20,13 +20,13 @@ * SIGFPE si_codes */ #ifdef __KERNEL__ -#define FPE_FIXME (__SI_FAULT|0) /* Broken dup of SI_USER */ +#define FPE_FIXME 0 /* Broken dup of SI_USER */ #endif /* __KERNEL__ */ /* * SIGEMT si_codes */ -#define EMT_TAGOVF (__SI_FAULT|1) /* tag overflow */ +#define EMT_TAGOVF 1 /* tag overflow */ #define NSIGEMT 1 #endif /* _UAPI__SPARC_SIGINFO_H */ diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index b4096bb665b2..0e4c08c45a37 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -85,34 +85,34 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) at the same time. */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user(from->si_code, &to->si_code); if (from->si_code < 0) err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); else { - switch (from->si_code >> 16) { - case __SI_TIMER >> 16: + switch (siginfo_layout(from->si_signo, from->si_code)) { + case SIL_TIMER: err |= __put_user(from->si_tid, &to->si_tid); err |= __put_user(from->si_overrun, &to->si_overrun); err |= __put_user(from->si_int, &to->si_int); break; - case __SI_CHLD >> 16: + case SIL_CHLD: err |= __put_user(from->si_utime, &to->si_utime); err |= __put_user(from->si_stime, &to->si_stime); err |= __put_user(from->si_status, &to->si_status); default: + case SIL_KILL: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); break; - case __SI_FAULT >> 16: + case SIL_FAULT: err |= __put_user(from->si_trapno, &to->si_trapno); err |= __put_user((unsigned long)from->si_addr, &to->si_addr); break; - case __SI_POLL >> 16: + case SIL_POLL: err |= __put_user(from->si_band, &to->si_band); err |= __put_user(from->si_fd, &to->si_fd); break; - case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ >> 16: + case SIL_RT: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); diff --git a/arch/tile/include/uapi/asm/siginfo.h b/arch/tile/include/uapi/asm/siginfo.h index 56d661bb010b..e83f931aa1f0 100644 --- a/arch/tile/include/uapi/asm/siginfo.h +++ b/arch/tile/include/uapi/asm/siginfo.h @@ -26,8 +26,8 @@ /* * Additional Tile-specific SIGILL si_codes */ -#define ILL_DBLFLT (__SI_FAULT|9) /* double fault */ -#define ILL_HARDWALL (__SI_FAULT|10) /* user networks hardwall violation */ +#define ILL_DBLFLT 9 /* double fault */ +#define ILL_HARDWALL 10 /* user networks hardwall violation */ #undef NSIGILL #define NSIGILL 10 diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index 0e863f1ee08c..971d87a1d8cf 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -64,7 +64,7 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *fr 3 ints plus the relevant union member. */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user(from->si_code, &to->si_code); if (from->si_code < 0) { err |= __put_user(from->si_pid, &to->si_pid); @@ -77,28 +77,26 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *fr */ err |= __put_user(from->_sifields._pad[0], &to->_sifields._pad[0]); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: + switch (siginfo_layout(from->si_signo, from->si_code)) { + case SIL_FAULT: break; - case __SI_CHLD >> 16: + case SIL_CHLD: err |= __put_user(from->si_utime, &to->si_utime); err |= __put_user(from->si_stime, &to->si_stime); err |= __put_user(from->si_status, &to->si_status); /* FALL THROUGH */ default: - case __SI_KILL >> 16: + case SIL_KILL: err |= __put_user(from->si_uid, &to->si_uid); break; - case __SI_POLL >> 16: + case SIL_POLL: err |= __put_user(from->si_fd, &to->si_fd); break; - case __SI_TIMER >> 16: + case SIL_TIMER: err |= __put_user(from->si_overrun, &to->si_overrun); err |= __put_user(from->si_int, &to->si_int); break; - /* This is not generated by the kernel as of now. */ - case __SI_RT >> 16: - case __SI_MESGQ >> 16: + case SIL_RT: err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); break; diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 54804866f238..9b08c6055f15 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -188,7 +188,7 @@ static int special_ill(tile_bundle_bits bundle, int *sigp, int *codep) /* Make it the requested signal. */ *sigp = sig; - *codep = code | __SI_FAULT; + *codep = code; return 1; } diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 71beb28600d4..ab9feb5887b1 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -129,7 +129,7 @@ int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from, 3 ints plus the relevant union member. */ put_user_ex(from->si_signo, &to->si_signo); put_user_ex(from->si_errno, &to->si_errno); - put_user_ex((short)from->si_code, &to->si_code); + put_user_ex(from->si_code, &to->si_code); if (from->si_code < 0) { put_user_ex(from->si_pid, &to->si_pid); @@ -142,8 +142,8 @@ int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from, */ put_user_ex(from->_sifields._pad[0], &to->_sifields._pad[0]); - switch (from->si_code >> 16) { - case __SI_FAULT >> 16: + switch (siginfo_layout(from->si_signo, from->si_code)) { + case SIL_FAULT: if (from->si_signo == SIGBUS && (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) @@ -160,11 +160,11 @@ int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from, put_user_ex(from->si_pkey, &to->si_pkey); } break; - case __SI_SYS >> 16: + case SIL_SYS: put_user_ex(from->si_syscall, &to->si_syscall); put_user_ex(from->si_arch, &to->si_arch); break; - case __SI_CHLD >> 16: + case SIL_CHLD: if (!x32_ABI) { put_user_ex(from->si_utime, &to->si_utime); put_user_ex(from->si_stime, &to->si_stime); @@ -174,21 +174,18 @@ int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from, } put_user_ex(from->si_status, &to->si_status); /* FALL THROUGH */ - default: - case __SI_KILL >> 16: + case SIL_KILL: put_user_ex(from->si_uid, &to->si_uid); break; - case __SI_POLL >> 16: + case SIL_POLL: put_user_ex(from->si_fd, &to->si_fd); break; - case __SI_TIMER >> 16: + case SIL_TIMER: put_user_ex(from->si_overrun, &to->si_overrun); put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr); break; - /* This is not generated by the kernel as of now. */ - case __SI_RT >> 16: - case __SI_MESGQ >> 16: + case SIL_RT: put_user_ex(from->si_uid, &to->si_uid); put_user_ex(from->si_int, &to->si_int); break; diff --git a/fs/signalfd.c b/fs/signalfd.c index 593b022ac11b..d2c434112f42 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -95,23 +95,23 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, */ err |= __put_user(kinfo->si_signo, &uinfo->ssi_signo); err |= __put_user(kinfo->si_errno, &uinfo->ssi_errno); - err |= __put_user((short) kinfo->si_code, &uinfo->ssi_code); - switch (kinfo->si_code & __SI_MASK) { - case __SI_KILL: + err |= __put_user(kinfo->si_code, &uinfo->ssi_code); + switch (siginfo_layout(kinfo->si_signo, kinfo->si_code)) { + case SIL_KILL: err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); break; - case __SI_TIMER: + case SIL_TIMER: err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid); err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun); err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); err |= __put_user(kinfo->si_int, &uinfo->ssi_int); break; - case __SI_POLL: + case SIL_POLL: err |= __put_user(kinfo->si_band, &uinfo->ssi_band); err |= __put_user(kinfo->si_fd, &uinfo->ssi_fd); break; - case __SI_FAULT: + case SIL_FAULT: err |= __put_user((long) kinfo->si_addr, &uinfo->ssi_addr); #ifdef __ARCH_SI_TRAPNO err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno); @@ -128,20 +128,14 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, &uinfo->ssi_addr_lsb); #endif break; - case __SI_CHLD: + case SIL_CHLD: err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); err |= __put_user(kinfo->si_status, &uinfo->ssi_status); err |= __put_user(kinfo->si_utime, &uinfo->ssi_utime); err |= __put_user(kinfo->si_stime, &uinfo->ssi_stime); break; - case __SI_RT: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ: /* But this is */ - err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); - err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid); - err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr); - err |= __put_user(kinfo->si_int, &uinfo->ssi_int); - break; + case SIL_RT: default: /* * This case catches also the signals queued by sigqueue(). diff --git a/include/linux/signal.h b/include/linux/signal.h index c97cc20369c0..38564e3e54c7 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -21,6 +21,20 @@ static inline void copy_siginfo(struct siginfo *to, struct siginfo *from) int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from); +enum siginfo_layout { + SIL_KILL, + SIL_TIMER, + SIL_POLL, + SIL_FAULT, + SIL_CHLD, + SIL_RT, +#ifdef __ARCH_SIGSYS + SIL_SYS, +#endif +}; + +enum siginfo_layout siginfo_layout(int sig, int si_code); + /* * Define some primitives to manipulate sigset_t. */ diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 9e956ea94d57..e5aa6794cea4 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -151,29 +151,6 @@ typedef struct siginfo { #define si_arch _sifields._sigsys._arch #endif -#ifdef __KERNEL__ -#define __SI_MASK 0xffff0000u -#define __SI_KILL (0 << 16) -#define __SI_TIMER (1 << 16) -#define __SI_POLL (2 << 16) -#define __SI_FAULT (3 << 16) -#define __SI_CHLD (4 << 16) -#define __SI_RT (5 << 16) -#define __SI_MESGQ (6 << 16) -#define __SI_SYS (7 << 16) -#define __SI_CODE(T,N) ((T) | ((N) & 0xffff)) -#else /* __KERNEL__ */ -#define __SI_KILL 0 -#define __SI_TIMER 0 -#define __SI_POLL 0 -#define __SI_FAULT 0 -#define __SI_CHLD 0 -#define __SI_RT 0 -#define __SI_MESGQ 0 -#define __SI_SYS 0 -#define __SI_CODE(T,N) (N) -#endif /* __KERNEL__ */ - /* * si_code values * Digital reserves positive values for kernel-generated signals. @@ -181,10 +158,10 @@ typedef struct siginfo { #define SI_USER 0 /* sent by kill, sigsend, raise */ #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */ #define SI_QUEUE -1 /* sent by sigqueue */ -#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */ -#define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change */ +#define SI_TIMER -2 /* sent by timer expiration */ +#define SI_MESGQ -3 /* sent by real time mesq state change */ #define SI_ASYNCIO -4 /* sent by AIO completion */ -#define SI_SIGIO __SI_CODE(__SI_POLL,-5) /* sent by queued SIGIO */ +#define SI_SIGIO -5 /* sent by queued SIGIO */ #define SI_TKILL -6 /* sent by tkill system call */ #define SI_DETHREAD -7 /* sent by execve() killing subsidiary threads */ @@ -194,86 +171,86 @@ typedef struct siginfo { /* * SIGILL si_codes */ -#define ILL_ILLOPC (__SI_FAULT|1) /* illegal opcode */ -#define ILL_ILLOPN (__SI_FAULT|2) /* illegal operand */ -#define ILL_ILLADR (__SI_FAULT|3) /* illegal addressing mode */ -#define ILL_ILLTRP (__SI_FAULT|4) /* illegal trap */ -#define ILL_PRVOPC (__SI_FAULT|5) /* privileged opcode */ -#define ILL_PRVREG (__SI_FAULT|6) /* privileged register */ -#define ILL_COPROC (__SI_FAULT|7) /* coprocessor error */ -#define ILL_BADSTK (__SI_FAULT|8) /* internal stack error */ +#define ILL_ILLOPC 1 /* illegal opcode */ +#define ILL_ILLOPN 2 /* illegal operand */ +#define ILL_ILLADR 3 /* illegal addressing mode */ +#define ILL_ILLTRP 4 /* illegal trap */ +#define ILL_PRVOPC 5 /* privileged opcode */ +#define ILL_PRVREG 6 /* privileged register */ +#define ILL_COPROC 7 /* coprocessor error */ +#define ILL_BADSTK 8 /* internal stack error */ #define NSIGILL 8 /* * SIGFPE si_codes */ -#define FPE_INTDIV (__SI_FAULT|1) /* integer divide by zero */ -#define FPE_INTOVF (__SI_FAULT|2) /* integer overflow */ -#define FPE_FLTDIV (__SI_FAULT|3) /* floating point divide by zero */ -#define FPE_FLTOVF (__SI_FAULT|4) /* floating point overflow */ -#define FPE_FLTUND (__SI_FAULT|5) /* floating point underflow */ -#define FPE_FLTRES (__SI_FAULT|6) /* floating point inexact result */ -#define FPE_FLTINV (__SI_FAULT|7) /* floating point invalid operation */ -#define FPE_FLTSUB (__SI_FAULT|8) /* subscript out of range */ +#define FPE_INTDIV 1 /* integer divide by zero */ +#define FPE_INTOVF 2 /* integer overflow */ +#define FPE_FLTDIV 3 /* floating point divide by zero */ +#define FPE_FLTOVF 4 /* floating point overflow */ +#define FPE_FLTUND 5 /* floating point underflow */ +#define FPE_FLTRES 6 /* floating point inexact result */ +#define FPE_FLTINV 7 /* floating point invalid operation */ +#define FPE_FLTSUB 8 /* subscript out of range */ #define NSIGFPE 8 /* * SIGSEGV si_codes */ -#define SEGV_MAPERR (__SI_FAULT|1) /* address not mapped to object */ -#define SEGV_ACCERR (__SI_FAULT|2) /* invalid permissions for mapped object */ -#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ -#define SEGV_PKUERR (__SI_FAULT|4) /* failed protection key checks */ +#define SEGV_MAPERR 1 /* address not mapped to object */ +#define SEGV_ACCERR 2 /* invalid permissions for mapped object */ +#define SEGV_BNDERR 3 /* failed address bound checks */ +#define SEGV_PKUERR 4 /* failed protection key checks */ #define NSIGSEGV 4 /* * SIGBUS si_codes */ -#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ -#define BUS_ADRERR (__SI_FAULT|2) /* non-existent physical address */ -#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ +#define BUS_ADRALN 1 /* invalid address alignment */ +#define BUS_ADRERR 2 /* non-existent physical address */ +#define BUS_OBJERR 3 /* object specific hardware error */ /* hardware memory error consumed on a machine check: action required */ -#define BUS_MCEERR_AR (__SI_FAULT|4) +#define BUS_MCEERR_AR 4 /* hardware memory error detected in process but not consumed: action optional*/ -#define BUS_MCEERR_AO (__SI_FAULT|5) +#define BUS_MCEERR_AO 5 #define NSIGBUS 5 /* * SIGTRAP si_codes */ -#define TRAP_BRKPT (__SI_FAULT|1) /* process breakpoint */ -#define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */ -#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */ -#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint/watchpoint */ +#define TRAP_BRKPT 1 /* process breakpoint */ +#define TRAP_TRACE 2 /* process trace trap */ +#define TRAP_BRANCH 3 /* process taken branch trap */ +#define TRAP_HWBKPT 4 /* hardware breakpoint/watchpoint */ #define NSIGTRAP 4 /* * SIGCHLD si_codes */ -#define CLD_EXITED (__SI_CHLD|1) /* child has exited */ -#define CLD_KILLED (__SI_CHLD|2) /* child was killed */ -#define CLD_DUMPED (__SI_CHLD|3) /* child terminated abnormally */ -#define CLD_TRAPPED (__SI_CHLD|4) /* traced child has trapped */ -#define CLD_STOPPED (__SI_CHLD|5) /* child has stopped */ -#define CLD_CONTINUED (__SI_CHLD|6) /* stopped child has continued */ +#define CLD_EXITED 1 /* child has exited */ +#define CLD_KILLED 2 /* child was killed */ +#define CLD_DUMPED 3 /* child terminated abnormally */ +#define CLD_TRAPPED 4 /* traced child has trapped */ +#define CLD_STOPPED 5 /* child has stopped */ +#define CLD_CONTINUED 6 /* stopped child has continued */ #define NSIGCHLD 6 /* * SIGPOLL (or any other signal without signal specific si_codes) si_codes */ -#define POLL_IN (__SI_POLL|1) /* data input available */ -#define POLL_OUT (__SI_POLL|2) /* output buffers available */ -#define POLL_MSG (__SI_POLL|3) /* input message available */ -#define POLL_ERR (__SI_POLL|4) /* i/o error */ -#define POLL_PRI (__SI_POLL|5) /* high priority input available */ -#define POLL_HUP (__SI_POLL|6) /* device disconnected */ +#define POLL_IN 1 /* data input available */ +#define POLL_OUT 2 /* output buffers available */ +#define POLL_MSG 3 /* input message available */ +#define POLL_ERR 4 /* i/o error */ +#define POLL_PRI 5 /* high priority input available */ +#define POLL_HUP 6 /* device disconnected */ #define NSIGPOLL 6 /* * SIGSYS si_codes */ -#define SYS_SECCOMP (__SI_SYS|1) /* seccomp triggered */ -#define NSIGSYS 1 +#define SYS_SECCOMP 1 /* seccomp triggered */ +#define NSIGSYS 1 /* * sigevent definitions diff --git a/kernel/exit.c b/kernel/exit.c index c5548faa9f37..c8f23613df5b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1616,7 +1616,7 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, user_access_begin(); unsafe_put_user(signo, &infop->si_signo, Efault); unsafe_put_user(0, &infop->si_errno, Efault); - unsafe_put_user((short)info.cause, &infop->si_code, Efault); + unsafe_put_user(info.cause, &infop->si_code, Efault); unsafe_put_user(info.pid, &infop->si_pid, Efault); unsafe_put_user(info.uid, &infop->si_uid, Efault); unsafe_put_user(info.status, &infop->si_status, Efault); @@ -1742,7 +1742,7 @@ COMPAT_SYSCALL_DEFINE5(waitid, user_access_begin(); unsafe_put_user(signo, &infop->si_signo, Efault); unsafe_put_user(0, &infop->si_errno, Efault); - unsafe_put_user((short)info.cause, &infop->si_code, Efault); + unsafe_put_user(info.cause, &infop->si_code, Efault); unsafe_put_user(info.pid, &infop->si_pid, Efault); unsafe_put_user(info.uid, &infop->si_uid, Efault); unsafe_put_user(info.status, &infop->si_status, Efault); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 60f356d91060..84b1367935e4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -728,8 +728,7 @@ static int ptrace_peek_siginfo(struct task_struct *child, if (unlikely(in_compat_syscall())) { compat_siginfo_t __user *uinfo = compat_ptr(data); - if (copy_siginfo_to_user32(uinfo, &info) || - __put_user(info.si_code, &uinfo->si_code)) { + if (copy_siginfo_to_user32(uinfo, &info)) { ret = -EFAULT; break; } @@ -739,8 +738,7 @@ static int ptrace_peek_siginfo(struct task_struct *child, { siginfo_t __user *uinfo = (siginfo_t __user *) data; - if (copy_siginfo_to_user(uinfo, &info) || - __put_user(info.si_code, &uinfo->si_code)) { + if (copy_siginfo_to_user(uinfo, &info)) { ret = -EFAULT; break; } diff --git a/kernel/signal.c b/kernel/signal.c index caed9133ae52..6bd53c8189f0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2682,6 +2682,51 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset, } #endif +enum siginfo_layout siginfo_layout(int sig, int si_code) +{ + enum siginfo_layout layout = SIL_KILL; + if ((si_code > SI_USER) && (si_code < SI_KERNEL)) { + static const struct { + unsigned char limit, layout; + } filter[] = { + [SIGILL] = { NSIGILL, SIL_FAULT }, + [SIGFPE] = { NSIGFPE, SIL_FAULT }, + [SIGSEGV] = { NSIGSEGV, SIL_FAULT }, + [SIGBUS] = { NSIGBUS, SIL_FAULT }, + [SIGTRAP] = { NSIGTRAP, SIL_FAULT }, +#if defined(SIGMET) && defined(NSIGEMT) + [SIGEMT] = { NSIGEMT, SIL_FAULT }, +#endif + [SIGCHLD] = { NSIGCHLD, SIL_CHLD }, + [SIGPOLL] = { NSIGPOLL, SIL_POLL }, +#ifdef __ARCH_SIGSYS + [SIGSYS] = { NSIGSYS, SIL_SYS }, +#endif + }; + if ((sig < ARRAY_SIZE(filter)) && (si_code <= filter[sig].limit)) + layout = filter[sig].layout; + else if (si_code <= NSIGPOLL) + layout = SIL_POLL; + } else { + if (si_code == SI_TIMER) + layout = SIL_TIMER; + else if (si_code == SI_SIGIO) + layout = SIL_POLL; + else if (si_code < 0) + layout = SIL_RT; + /* Tests to support buggy kernel ABIs */ +#ifdef TRAP_FIXME + if ((sig == SIGTRAP) && (si_code == TRAP_FIXME)) + layout = SIL_FAULT; +#endif +#ifdef FPE_FIXME + if ((sig == SIGFPE) && (si_code == FPE_FIXME)) + layout = SIL_FAULT; +#endif + } + return layout; +} + #ifndef HAVE_ARCH_COPY_SIGINFO_TO_USER int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) @@ -2704,22 +2749,20 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); - switch (from->si_code & __SI_MASK) { - case __SI_KILL: + err |= __put_user(from->si_code, &to->si_code); + switch (siginfo_layout(from->si_signo, from->si_code)) { + case SIL_KILL: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); break; - case __SI_TIMER: - err |= __put_user(from->si_tid, &to->si_tid); - err |= __put_user(from->si_overrun, &to->si_overrun); - err |= __put_user(from->si_ptr, &to->si_ptr); + case SIL_TIMER: + /* Unreached SI_TIMER is negative */ break; - case __SI_POLL: + case SIL_POLL: err |= __put_user(from->si_band, &to->si_band); err |= __put_user(from->si_fd, &to->si_fd); break; - case __SI_FAULT: + case SIL_FAULT: err |= __put_user(from->si_addr, &to->si_addr); #ifdef __ARCH_SI_TRAPNO err |= __put_user(from->si_trapno, &to->si_trapno); @@ -2744,30 +2787,25 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) err |= __put_user(from->si_pkey, &to->si_pkey); #endif break; - case __SI_CHLD: + case SIL_CHLD: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_status, &to->si_status); err |= __put_user(from->si_utime, &to->si_utime); err |= __put_user(from->si_stime, &to->si_stime); break; - case __SI_RT: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ: /* But this is */ + case SIL_RT: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_ptr, &to->si_ptr); break; #ifdef __ARCH_SIGSYS - case __SI_SYS: + case SIL_SYS: err |= __put_user(from->si_call_addr, &to->si_call_addr); err |= __put_user(from->si_syscall, &to->si_syscall); err |= __put_user(from->si_arch, &to->si_arch); break; #endif - default: /* this is just in case for now ... */ - err |= __put_user(from->si_pid, &to->si_pid); - err |= __put_user(from->si_uid, &to->si_uid); - break; } return err; } -- cgit v1.2.3 From 20229305afdc0e49b3af2204ab848d6bddcb9fc3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 8 Aug 2017 13:52:28 -0500 Subject: mips/signal: In force_fcr31_sig return in the impossible case In a recent discussion Maciej Rozycki reported that this case is impossible. Handle the impossible case by just returning instead of trying to handle it. This makes static analysis simpler as it means nothing needs to consider the impossible case after the return statement. As the code no longer has to deal with this case remove FPE_FIXME from the mips siginfo.h Cc: "Maciej W. Rozycki" Cc: Ralf Baechle Link: http://lkml.kernel.org/r/20170718140651.15973-4-ebiederm@xmission.com Ref: ea1b75cf9138 ("signal/mips: Document a conflict with SI_USER with SIGFPE") Signed-off-by: "Eric W. Biederman" --- arch/mips/include/uapi/asm/siginfo.h | 7 ------- arch/mips/kernel/traps.c | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/mips/include/uapi/asm/siginfo.h b/arch/mips/include/uapi/asm/siginfo.h index 22a86d84a504..cf6113bbcb98 100644 --- a/arch/mips/include/uapi/asm/siginfo.h +++ b/arch/mips/include/uapi/asm/siginfo.h @@ -123,11 +123,4 @@ typedef struct siginfo { #define SI_TIMER -3 /* sent by timer expiration */ #define SI_MESGQ -4 /* sent by real time mesq state change */ -/* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - #endif /* _UAPI_ASM_SIGINFO_H */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 6c9cca9c5341..2bf414993347 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -735,7 +735,7 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, else if (fcr31 & FPU_CSR_INE_X) si.si_code = FPE_FLTRES; else - si.si_code = FPE_FIXME; + return; /* Broken hardware? */ force_sig_info(SIGFPE, &si, tsk); } -- cgit v1.2.3 From 8db6c34f1dbc8e06aa016a9b829b06902c3e1340 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 8 May 2017 13:11:56 -0500 Subject: Introduce v3 namespaced file capabilities Root in a non-initial user ns cannot be trusted to write a traditional security.capability xattr. If it were allowed to do so, then any unprivileged user on the host could map his own uid to root in a private namespace, write the xattr, and execute the file with privilege on the host. However supporting file capabilities in a user namespace is very desirable. Not doing so means that any programs designed to run with limited privilege must continue to support other methods of gaining and dropping privilege. For instance a program installer must detect whether file capabilities can be assigned, and assign them if so but set setuid-root otherwise. The program in turn must know how to drop partial capabilities, and do so only if setuid-root. This patch introduces v3 of the security.capability xattr. It builds a vfs_ns_cap_data struct by appending a uid_t rootid to struct vfs_cap_data. This is the absolute uid_t (that is, the uid_t in user namespace which mounted the filesystem, usually init_user_ns) of the root id in whose namespaces the file capabilities may take effect. When a task asks to write a v2 security.capability xattr, if it is privileged with respect to the userns which mounted the filesystem, then nothing should change. Otherwise, the kernel will transparently rewrite the xattr as a v3 with the appropriate rootid. This is done during the execution of setxattr() to catch user-space-initiated capability writes. Subsequently, any task executing the file which has the noted kuid as its root uid, or which is in a descendent user_ns of such a user_ns, will run the file with capabilities. Similarly when asking to read file capabilities, a v3 capability will be presented as v2 if it applies to the caller's namespace. If a task writes a v3 security.capability, then it can provide a uid for the xattr so long as the uid is valid in its own user namespace, and it is privileged with CAP_SETFCAP over its namespace. The kernel will translate that rootid to an absolute uid, and write that to disk. After this, a task in the writer's namespace will not be able to use those capabilities (unless rootid was 0), but a task in a namespace where the given uid is root will. Only a single security.capability xattr may exist at a time for a given file. A task may overwrite an existing xattr so long as it is privileged over the inode. Note this is a departure from previous semantics, which required privilege to remove a security.capability xattr. This check can be re-added if deemed useful. This allows a simple setxattr to work, allows tar/untar to work, and allows us to tar in one namespace and untar in another while preserving the capability, without risking leaking privilege into a parent namespace. Example using tar: $ cp /bin/sleep sleepx $ mkdir b1 b2 $ lxc-usernsexec -m b:0:100000:1 -m b:1:$(id -u):1 -- chown 0:0 b1 $ lxc-usernsexec -m b:0:100001:1 -m b:1:$(id -u):1 -- chown 0:0 b2 $ lxc-usernsexec -m b:0:100000:1000 -- tar --xattrs-include=security.capability --xattrs -cf b1/sleepx.tar sleepx $ lxc-usernsexec -m b:0:100001:1000 -- tar --xattrs-include=security.capability --xattrs -C b2 -xf b1/sleepx.tar $ lxc-usernsexec -m b:0:100001:1000 -- getcap b2/sleepx b2/sleepx = cap_sys_admin+ep # /opt/ltp/testcases/bin/getv3xattr b2/sleepx v3 xattr, rootid is 100001 A patch to linux-test-project adding a new set of tests for this functionality is in the nsfscaps branch at github.com/hallyn/ltp Changelog: Nov 02 2016: fix invalid check at refuse_fcap_overwrite() Nov 07 2016: convert rootid from and to fs user_ns (From ebiederm: mar 28 2017) commoncap.c: fix typos - s/v4/v3 get_vfs_caps_from_disk: clarify the fs_ns root access check nsfscaps: change the code split for cap_inode_setxattr() Apr 09 2017: don't return v3 cap for caps owned by current root. return a v2 cap for a true v2 cap in non-init ns Apr 18 2017: . Change the flow of fscap writing to support s_user_ns writing. . Remove refuse_fcap_overwrite(). The value of the previous xattr doesn't matter. Apr 24 2017: . incorporate Eric's incremental diff . move cap_convert_nscap to setxattr and simplify its usage May 8, 2017: . fix leaking dentry refcount in cap_inode_getsecurity Signed-off-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- fs/xattr.c | 6 + include/linux/capability.h | 2 + include/linux/security.h | 2 + include/uapi/linux/capability.h | 22 +++- security/commoncap.c | 270 +++++++++++++++++++++++++++++++++++++--- 5 files changed, 280 insertions(+), 22 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 464c94bf65f9..7b03df6b8be2 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -441,6 +441,12 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) posix_acl_fix_xattr_from_user(kvalue, size); + else if (strcmp(kname, XATTR_NAME_CAPS) == 0) { + error = cap_convert_nscap(d, &kvalue, size); + if (error < 0) + goto out; + size = error; + } } error = vfs_setxattr(d, kname, kvalue, size, flags); diff --git a/include/linux/capability.h b/include/linux/capability.h index 6ffb67e10c06..b52e278e4744 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -248,4 +248,6 @@ extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); +extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size); + #endif /* !_LINUX_CAPABILITY_H */ diff --git a/include/linux/security.h b/include/linux/security.h index b6ea1dc9cc9d..6fff8c924718 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -91,6 +91,8 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_inode_getsecurity(struct inode *inode, const char *name, + void **buffer, bool alloc); extern int cap_mmap_addr(unsigned long addr); extern int cap_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags); diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 6fe14d001f68..230e05d35191 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -60,9 +60,13 @@ typedef struct __user_cap_data_struct { #define VFS_CAP_U32_2 2 #define XATTR_CAPS_SZ_2 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2)) -#define XATTR_CAPS_SZ XATTR_CAPS_SZ_2 -#define VFS_CAP_U32 VFS_CAP_U32_2 -#define VFS_CAP_REVISION VFS_CAP_REVISION_2 +#define VFS_CAP_REVISION_3 0x03000000 +#define VFS_CAP_U32_3 2 +#define XATTR_CAPS_SZ_3 (sizeof(__le32)*(2 + 2*VFS_CAP_U32_3)) + +#define XATTR_CAPS_SZ XATTR_CAPS_SZ_3 +#define VFS_CAP_U32 VFS_CAP_U32_3 +#define VFS_CAP_REVISION VFS_CAP_REVISION_3 struct vfs_cap_data { __le32 magic_etc; /* Little endian */ @@ -72,6 +76,18 @@ struct vfs_cap_data { } data[VFS_CAP_U32]; }; +/* + * same as vfs_cap_data but with a rootid at the end + */ +struct vfs_ns_cap_data { + __le32 magic_etc; + struct { + __le32 permitted; /* Little endian */ + __le32 inheritable; /* Little endian */ + } data[VFS_CAP_U32]; + __le32 rootid; +}; + #ifndef __KERNEL__ /* diff --git a/security/commoncap.c b/security/commoncap.c index d59320282294..c37d27dd1e2c 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -335,6 +335,209 @@ int cap_inode_killpriv(struct dentry *dentry) return error; } +static bool rootid_owns_currentns(kuid_t kroot) +{ + struct user_namespace *ns; + + if (!uid_valid(kroot)) + return false; + + for (ns = current_user_ns(); ; ns = ns->parent) { + if (from_kuid(ns, kroot) == 0) + return true; + if (ns == &init_user_ns) + break; + } + + return false; +} + +static __u32 sansflags(__u32 m) +{ + return m & ~VFS_CAP_FLAGS_EFFECTIVE; +} + +static bool is_v2header(size_t size, __le32 magic) +{ + __u32 m = le32_to_cpu(magic); + if (size != XATTR_CAPS_SZ_2) + return false; + return sansflags(m) == VFS_CAP_REVISION_2; +} + +static bool is_v3header(size_t size, __le32 magic) +{ + __u32 m = le32_to_cpu(magic); + + if (size != XATTR_CAPS_SZ_3) + return false; + return sansflags(m) == VFS_CAP_REVISION_3; +} + +/* + * getsecurity: We are called for security.* before any attempt to read the + * xattr from the inode itself. + * + * This gives us a chance to read the on-disk value and convert it. If we + * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler. + * + * Note we are not called by vfs_getxattr_alloc(), but that is only called + * by the integrity subsystem, which really wants the unconverted values - + * so that's good. + */ +int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, + bool alloc) +{ + int size, ret; + kuid_t kroot; + uid_t root, mappedroot; + char *tmpbuf = NULL; + struct vfs_cap_data *cap; + struct vfs_ns_cap_data *nscap; + struct dentry *dentry; + struct user_namespace *fs_ns; + + if (strcmp(name, "capability") != 0) + return -EOPNOTSUPP; + + dentry = d_find_alias(inode); + if (!dentry) + return -EINVAL; + + size = sizeof(struct vfs_ns_cap_data); + ret = (int) vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS, + &tmpbuf, size, GFP_NOFS); + dput(dentry); + + if (ret < 0) + return ret; + + fs_ns = inode->i_sb->s_user_ns; + cap = (struct vfs_cap_data *) tmpbuf; + if (is_v2header((size_t) ret, cap->magic_etc)) { + /* If this is sizeof(vfs_cap_data) then we're ok with the + * on-disk value, so return that. */ + if (alloc) + *buffer = tmpbuf; + else + kfree(tmpbuf); + return ret; + } else if (!is_v3header((size_t) ret, cap->magic_etc)) { + kfree(tmpbuf); + return -EINVAL; + } + + nscap = (struct vfs_ns_cap_data *) tmpbuf; + root = le32_to_cpu(nscap->rootid); + kroot = make_kuid(fs_ns, root); + + /* If the root kuid maps to a valid uid in current ns, then return + * this as a nscap. */ + mappedroot = from_kuid(current_user_ns(), kroot); + if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) { + if (alloc) { + *buffer = tmpbuf; + nscap->rootid = cpu_to_le32(mappedroot); + } else + kfree(tmpbuf); + return size; + } + + if (!rootid_owns_currentns(kroot)) { + kfree(tmpbuf); + return -EOPNOTSUPP; + } + + /* This comes from a parent namespace. Return as a v2 capability */ + size = sizeof(struct vfs_cap_data); + if (alloc) { + *buffer = kmalloc(size, GFP_ATOMIC); + if (*buffer) { + struct vfs_cap_data *cap = *buffer; + __le32 nsmagic, magic; + magic = VFS_CAP_REVISION_2; + nsmagic = le32_to_cpu(nscap->magic_etc); + if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE) + magic |= VFS_CAP_FLAGS_EFFECTIVE; + memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32); + cap->magic_etc = cpu_to_le32(magic); + } + } + kfree(tmpbuf); + return size; +} + +static kuid_t rootid_from_xattr(const void *value, size_t size, + struct user_namespace *task_ns) +{ + const struct vfs_ns_cap_data *nscap = value; + uid_t rootid = 0; + + if (size == XATTR_CAPS_SZ_3) + rootid = le32_to_cpu(nscap->rootid); + + return make_kuid(task_ns, rootid); +} + +static bool validheader(size_t size, __le32 magic) +{ + return is_v2header(size, magic) || is_v3header(size, magic); +} + +/* + * User requested a write of security.capability. If needed, update the + * xattr to change from v2 to v3, or to fixup the v3 rootid. + * + * If all is ok, we return the new size, on error return < 0. + */ +int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size) +{ + struct vfs_ns_cap_data *nscap; + uid_t nsrootid; + const struct vfs_cap_data *cap = *ivalue; + __u32 magic, nsmagic; + struct inode *inode = d_backing_inode(dentry); + struct user_namespace *task_ns = current_user_ns(), + *fs_ns = inode->i_sb->s_user_ns; + kuid_t rootid; + size_t newsize; + + if (!*ivalue) + return -EINVAL; + if (!validheader(size, cap->magic_etc)) + return -EINVAL; + if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) + return -EPERM; + if (size == XATTR_CAPS_SZ_2) + if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) + /* user is privileged, just write the v2 */ + return size; + + rootid = rootid_from_xattr(*ivalue, size, task_ns); + if (!uid_valid(rootid)) + return -EINVAL; + + nsrootid = from_kuid(fs_ns, rootid); + if (nsrootid == -1) + return -EINVAL; + + newsize = sizeof(struct vfs_ns_cap_data); + nscap = kmalloc(newsize, GFP_ATOMIC); + if (!nscap) + return -ENOMEM; + nscap->rootid = cpu_to_le32(nsrootid); + nsmagic = VFS_CAP_REVISION_3; + magic = le32_to_cpu(cap->magic_etc); + if (magic & VFS_CAP_FLAGS_EFFECTIVE) + nsmagic |= VFS_CAP_FLAGS_EFFECTIVE; + nscap->magic_etc = cpu_to_le32(nsmagic); + memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32); + + kvfree(*ivalue); + *ivalue = nscap; + return newsize; +} + /* * Calculate the new process capability sets from the capability sets attached * to a file. @@ -388,7 +591,10 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data __u32 magic_etc; unsigned tocopy, i; int size; - struct vfs_cap_data caps; + struct vfs_ns_cap_data data, *nscaps = &data; + struct vfs_cap_data *caps = (struct vfs_cap_data *) &data; + kuid_t rootkuid; + struct user_namespace *fs_ns = inode->i_sb->s_user_ns; memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); @@ -396,18 +602,20 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data return -ENODATA; size = __vfs_getxattr((struct dentry *)dentry, inode, - XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ); + XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ); if (size == -ENODATA || size == -EOPNOTSUPP) /* no data, that's ok */ return -ENODATA; + if (size < 0) return size; if (size < sizeof(magic_etc)) return -EINVAL; - cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc); + cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc); + rootkuid = make_kuid(fs_ns, 0); switch (magic_etc & VFS_CAP_REVISION_MASK) { case VFS_CAP_REVISION_1: if (size != XATTR_CAPS_SZ_1) @@ -419,15 +627,27 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data return -EINVAL; tocopy = VFS_CAP_U32_2; break; + case VFS_CAP_REVISION_3: + if (size != XATTR_CAPS_SZ_3) + return -EINVAL; + tocopy = VFS_CAP_U32_3; + rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid)); + break; + default: return -EINVAL; } + /* Limit the caps to the mounter of the filesystem + * or the more limited uid specified in the xattr. + */ + if (!rootid_owns_currentns(rootkuid)) + return -ENODATA; CAP_FOR_EACH_U32(i) { if (i >= tocopy) break; - cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted); - cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable); + cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted); + cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable); } cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; @@ -465,8 +685,8 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); if (rc < 0) { if (rc == -EINVAL) - printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n", - __func__, rc, bprm->filename); + printk(KERN_NOTICE "Invalid argument reading file caps for %s\n", + bprm->filename); else if (rc == -ENODATA) rc = 0; goto out; @@ -663,15 +883,19 @@ int cap_bprm_secureexec(struct linux_binprm *bprm) int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { - if (!strcmp(name, XATTR_NAME_CAPS)) { - if (!capable(CAP_SETFCAP)) - return -EPERM; + /* Ignore non-security xattrs */ + if (strncmp(name, XATTR_SECURITY_PREFIX, + sizeof(XATTR_SECURITY_PREFIX) - 1) != 0) + return 0; + + /* + * For XATTR_NAME_CAPS the check will be done in + * cap_convert_nscap(), called by setxattr() + */ + if (strcmp(name, XATTR_NAME_CAPS) == 0) return 0; - } - if (!strncmp(name, XATTR_SECURITY_PREFIX, - sizeof(XATTR_SECURITY_PREFIX) - 1) && - !capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; return 0; } @@ -689,15 +913,22 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name, */ int cap_inode_removexattr(struct dentry *dentry, const char *name) { - if (!strcmp(name, XATTR_NAME_CAPS)) { - if (!capable(CAP_SETFCAP)) + /* Ignore non-security xattrs */ + if (strncmp(name, XATTR_SECURITY_PREFIX, + sizeof(XATTR_SECURITY_PREFIX) - 1) != 0) + return 0; + + if (strcmp(name, XATTR_NAME_CAPS) == 0) { + /* security.capability gets namespaced */ + struct inode *inode = d_backing_inode(dentry); + if (!inode) + return -EINVAL; + if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) return -EPERM; return 0; } - if (!strncmp(name, XATTR_SECURITY_PREFIX, - sizeof(XATTR_SECURITY_PREFIX) - 1) && - !capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; return 0; } @@ -1085,6 +1316,7 @@ struct security_hook_list capability_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bprm_secureexec, cap_bprm_secureexec), LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv), LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv), + LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity), LSM_HOOK_INIT(mmap_addr, cap_mmap_addr), LSM_HOOK_INIT(mmap_file, cap_mmap_file), LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid), -- cgit v1.2.3