From d02f6b7dab8228487268298ea1f21081c0b4b3eb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 7 Apr 2020 14:12:45 +1000 Subject: powerpc/uaccess: Evaluate macro arguments once, before user access is allowed get/put_user() can be called with nontrivial arguments. fs/proc/page.c has a good example: if (put_user(stable_page_flags(ppage), out)) { stable_page_flags() is quite a lot of code, including spin locks in the page allocator. Ensure these arguments are evaluated before user access is allowed. This improves security by reducing code with access to userspace, but it also fixes a PREEMPT bug with KUAP on powerpc/64s: stable_page_flags() is currently called with AMR set to allow writes, it ends up calling spin_unlock(), which can call preempt_schedule. But the task switch code can not be called with AMR set (it relies on interrupts saving the register), so this blows up. It's fine if the code inside allow_user_access() is preemptible, because a timer or IPI will save the AMR, but it's not okay to explicitly cause a reschedule. Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200407041245.600651-1-npiggin@gmail.com --- arch/powerpc/include/asm/uaccess.h | 49 +++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 14 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 2f500debae21..0969285996cb 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -166,13 +166,17 @@ do { \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ if (!is_kernel_addr((unsigned long)__pu_addr)) \ might_fault(); \ - __chk_user_ptr(ptr); \ + __chk_user_ptr(__pu_addr); \ if (do_allow) \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ else \ - __put_user_size_allowed((x), __pu_addr, (size), __pu_err); \ + __put_user_size_allowed(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -180,9 +184,13 @@ do { \ ({ \ long __pu_err = -EFAULT; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ might_fault(); \ - if (access_ok(__pu_addr, size)) \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + if (access_ok(__pu_addr, __pu_size)) \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -190,8 +198,12 @@ do { \ ({ \ long __pu_err; \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - __chk_user_ptr(ptr); \ - __put_user_size((x), __pu_addr, (size), __pu_err); \ + __typeof__(*(ptr)) __pu_val = (x); \ + __typeof__(size) __pu_size = (size); \ + \ + __chk_user_ptr(__pu_addr); \ + __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \ + \ __pu_err; \ }) @@ -283,15 +295,18 @@ do { \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __chk_user_ptr(ptr); \ + __typeof__(size) __gu_size = (size); \ + \ + __chk_user_ptr(__gu_addr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ barrier_nospec(); \ if (do_allow) \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ else \ - __get_user_size_allowed(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) @@ -300,12 +315,15 @@ do { \ long __gu_err = -EFAULT; \ __long_type(*(ptr)) __gu_val = 0; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(size) __gu_size = (size); \ + \ might_fault(); \ - if (access_ok(__gu_addr, (size))) { \ + if (access_ok(__gu_addr, __gu_size)) { \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) @@ -314,10 +332,13 @@ do { \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ - __chk_user_ptr(ptr); \ + __typeof__(size) __gu_size = (size); \ + \ + __chk_user_ptr(__gu_addr); \ barrier_nospec(); \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ + __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + \ __gu_err; \ }) -- cgit v1.2.3 From 0094368e3bb97a710ce163f9c06d290c1c308621 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 3 May 2020 00:33:16 +1000 Subject: powerpc/64s: Fix unrecoverable SLB crashes due to preemption check Hugh reported that his trusty G5 crashed after a few hours under load with an "Unrecoverable exception 380". The crash is in interrupt_return() where we check lazy_irq_pending(), which calls get_paca() and with CONFIG_DEBUG_PREEMPT=y that goes to check_preemption_disabled() via debug_smp_processor_id(). As Nick explained on the list: Problem is MSR[RI] is cleared here, ready to do the last few things for interrupt return where we're not allowed to take any other interrupts. SLB interrupts can happen just about anywhere aside from kernel text, global variables, and stack. When that hits, it appears to be unrecoverable due to RI=0. The problematic access is in preempt_count() which is: return READ_ONCE(current_thread_info()->preempt_count); Because of THREAD_INFO_IN_TASK, current_thread_info() just points to current, so the access is to somewhere in kernel memory, but not on the stack or in .data, which means it can cause an SLB miss. If we take an SLB miss with RI=0 it is fatal. The easiest solution is to add a version of lazy_irq_pending() that doesn't do the preemption check and call it from the interrupt return path. Fixes: 68b34588e202 ("powerpc/64/sycall: Implement syscall entry/exit logic in C") Reported-by: Hugh Dickins Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200502143316.929341-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/hw_irq.h | 20 +++++++++++++++++++- arch/powerpc/kernel/syscall_64.c | 6 +++--- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index e0e71777961f..3a0db7b0b46e 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -250,9 +250,27 @@ static inline bool arch_irqs_disabled(void) } \ } while(0) +static inline bool __lazy_irq_pending(u8 irq_happened) +{ + return !!(irq_happened & ~PACA_IRQ_HARD_DIS); +} + +/* + * Check if a lazy IRQ is pending. Should be called with IRQs hard disabled. + */ static inline bool lazy_irq_pending(void) { - return !!(get_paca()->irq_happened & ~PACA_IRQ_HARD_DIS); + return __lazy_irq_pending(get_paca()->irq_happened); +} + +/* + * Check if a lazy IRQ is pending, with no debugging checks. + * Should be called with IRQs hard disabled. + * For use in RI disabled code or other constrained situations. + */ +static inline bool lazy_irq_pending_nocheck(void) +{ + return __lazy_irq_pending(local_paca->irq_happened); } /* diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index c74295a7765b..1fe94dd9de32 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -189,7 +189,7 @@ again: /* This pattern matches prep_irq_for_idle */ __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending())) { + if (unlikely(lazy_irq_pending_nocheck())) { __hard_RI_enable(); trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; @@ -264,7 +264,7 @@ again: trace_hardirqs_on(); __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending())) { + if (unlikely(lazy_irq_pending_nocheck())) { __hard_RI_enable(); trace_hardirqs_off(); local_paca->irq_happened |= PACA_IRQ_HARD_DIS; @@ -334,7 +334,7 @@ again: trace_hardirqs_on(); __hard_EE_RI_disable(); - if (unlikely(lazy_irq_pending())) { + if (unlikely(lazy_irq_pending_nocheck())) { __hard_RI_enable(); irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); -- cgit v1.2.3 From c0d7dcf89e5151b2259d1c2c1b922da3b881d02e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 29 Apr 2020 16:56:49 +1000 Subject: powerpc/64/kuap: Move kuap checks out of MSR[RI]=0 regions of exit code Any kind of WARN causes a program check that will crash with unrecoverable exception if it occurs when RI is clear. Fixes: 68b34588e202 ("powerpc/64/sycall: Implement syscall entry/exit logic in C") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200429065654.1677541-2-npiggin@gmail.com --- arch/powerpc/kernel/syscall_64.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 1fe94dd9de32..7b7c89cad901 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -35,6 +35,8 @@ notrace long system_call_exception(long r3, long r4, long r5, BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); + kuap_check_amr(); + account_cpu_user_entry(); #ifdef CONFIG_PPC_SPLPAR @@ -47,8 +49,6 @@ notrace long system_call_exception(long r3, long r4, long r5, } #endif - kuap_check_amr(); - /* * This is not required for the syscall exit path, but makes the * stack frame look nicer. If this was initialised in the first stack @@ -117,6 +117,8 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, unsigned long ti_flags; unsigned long ret = 0; + kuap_check_amr(); + regs->result = r3; /* Check whether the syscall is issued inside a restartable sequence */ @@ -204,8 +206,6 @@ again: local_paca->tm_scratch = regs->msr; #endif - kuap_check_amr(); - account_cpu_user_exit(); return ret; @@ -228,6 +228,8 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned BUG_ON(!FULL_REGS(regs)); BUG_ON(regs->softe != IRQS_ENABLED); + kuap_check_amr(); + local_irq_save(flags); again: @@ -292,8 +294,6 @@ again: local_paca->tm_scratch = regs->msr; #endif - kuap_check_amr(); - account_cpu_user_exit(); return ret; @@ -313,6 +313,8 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign BUG_ON(regs->msr & MSR_PR); BUG_ON(!FULL_REGS(regs)); + kuap_check_amr(); + if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) { clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp); ret = 1; -- cgit v1.2.3 From 53459dc9709db2141d784702abbd43e8fcac8e6d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 29 Apr 2020 16:56:52 +1000 Subject: powerpc/64s/kuap: Restore AMR in system reset exception The system reset interrupt handler locks AMR and exits with EXCEPTION_RESTORE_REGS without restoring AMR. Similarly to the soft-NMI handler, it needs to restore. Fixes: 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200429065654.1677541-5-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 728ccb0f560c..b0ad930cbae5 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -971,6 +971,7 @@ EXC_COMMON_BEGIN(system_reset_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) + kuap_restore_amr r10 EXCEPTION_RESTORE_REGS RFI_TO_USER_OR_KERNEL -- cgit v1.2.3 From c44dc6323cd49d8d742c37e234b952e822c35de4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 29 Apr 2020 16:56:53 +1000 Subject: powerpc/64s/kuap: Restore AMR in fast_interrupt_return Interrupts that use fast_interrupt_return actually do lock AMR, but they have been ones which tend to come from userspace (or kernel bugs) in radix mode. With kuap on hash, segment interrupts are taken in kernel often, which quickly breaks due to the missing restore. Fixes: 890274c2dc4c ("powerpc/64s: Implement KUAP for Radix MMU") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200429065654.1677541-6-npiggin@gmail.com --- arch/powerpc/kernel/entry_64.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9a1e5d636dea..b3c9f15089b6 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -472,15 +472,17 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) #ifdef CONFIG_PPC_BOOK3S /* * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not - * touched, AMR not set, no exit work created, then this can be used. + * touched, no exit work created, then this can be used. */ .balign IFETCH_ALIGN_BYTES .globl fast_interrupt_return fast_interrupt_return: _ASM_NOKPROBE_SYMBOL(fast_interrupt_return) + kuap_check_amr r3, r4 ld r4,_MSR(r1) andi. r0,r4,MSR_PR bne .Lfast_user_interrupt_return + kuap_restore_amr r3 andi. r0,r4,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return -- cgit v1.2.3 From fa4f3f56ccd28ac031ab275e673ed4098855fed4 Mon Sep 17 00:00:00 2001 From: Nayna Jain Date: Fri, 1 May 2020 10:16:52 -0400 Subject: powerpc/ima: Fix secure boot rules in ima arch policy To prevent verifying the kernel module appended signature twice (finit_module), once by the module_sig_check() and again by IMA, powerpc secure boot rules define an IMA architecture specific policy rule only if CONFIG_MODULE_SIG_FORCE is not enabled. This, unfortunately, does not take into account the ability of enabling "sig_enforce" on the boot command line (module.sig_enforce=1). Including the IMA module appraise rule results in failing the finit_module syscall, unless the module signing public key is loaded onto the IMA keyring. This patch fixes secure boot policy rules to be based on CONFIG_MODULE_SIG instead. Fixes: 4238fad366a6 ("powerpc/ima: Add support to initialize ima policy rules") Signed-off-by: Nayna Jain Signed-off-by: Michael Ellerman Signed-off-by: Mimi Zohar Link: https://lore.kernel.org/r/1588342612-14532-1-git-send-email-nayna@linux.ibm.com --- arch/powerpc/kernel/ima_arch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c index e34116255ced..957abd592075 100644 --- a/arch/powerpc/kernel/ima_arch.c +++ b/arch/powerpc/kernel/ima_arch.c @@ -19,12 +19,12 @@ bool arch_ima_get_secureboot(void) * to be stored as an xattr or as an appended signature. * * To avoid duplicate signature verification as much as possible, the IMA - * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE + * policy rule for module appraisal is added only if CONFIG_MODULE_SIG * is not enabled. */ static const char *const secure_rules[] = { "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", -#ifndef CONFIG_MODULE_SIG_FORCE +#ifndef CONFIG_MODULE_SIG "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", #endif NULL @@ -50,7 +50,7 @@ static const char *const secure_and_trusted_rules[] = { "measure func=KEXEC_KERNEL_CHECK template=ima-modsig", "measure func=MODULE_CHECK template=ima-modsig", "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", -#ifndef CONFIG_MODULE_SIG_FORCE +#ifndef CONFIG_MODULE_SIG "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", #endif NULL -- cgit v1.2.3 From 4833ce06e6855d526234618b746ffb71d6612c9a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 20 Apr 2020 07:47:05 +0000 Subject: powerpc/32s: Fix build failure with CONFIG_PPC_KUAP_DEBUG gpr2 is not a parametre of kuap_check(), it doesn't exist. Use gpr instead. Fixes: a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access Protection") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/ea599546f2a7771bde551393889e44e6b2632332.1587368807.git.christophe.leroy@c-s.fr --- arch/powerpc/include/asm/book3s/32/kup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 3c0ba22dc360..db0a1c281587 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -75,7 +75,7 @@ .macro kuap_check current, gpr #ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr2, KUAP(thread) + lwz \gpr, KUAP(thread) 999: twnei \gpr, 0 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) #endif -- cgit v1.2.3 From e963b7a28b2bf2416304e1a15df967fcf662aff5 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 9 May 2020 09:42:14 +0000 Subject: powerpc/vdso32: Fallback on getres syscall when clock is unknown There are other clocks than the standard ones, for instance per process clocks. Therefore, being above the last standard clock doesn't mean it is a bad clock. So, fallback to syscall instead of returning -EINVAL inconditionaly. Fixes: e33ffc956b08 ("powerpc/vdso32: implement clock_getres entirely") Cc: stable@vger.kernel.org # v5.6+ Reported-by: Aurelien Jarno Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Tested-by: Aurelien Jarno Link: https://lore.kernel.org/r/7316a9e2c0c2517923eb4b0411c4a08d15e675a4.1589017281.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso32/gettimeofday.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index a3951567118a..e7f8f9f1b3f4 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -218,11 +218,11 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) blr /* - * invalid clock + * syscall fallback */ 99: - li r3, EINVAL - crset so + li r0,__NR_clock_getres + sc blr .cfi_endproc V_FUNCTION_END(__kernel_clock_getres) -- cgit v1.2.3 From 249c9b0cd193d983c3a0b00f3fd3b92333bfeebe Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 9 May 2020 17:05:11 +0000 Subject: powerpc/40x: Make more space for system call exception When CONFIG_VIRT_CPU_ACCOUNTING is selected, system call exception handler doesn't fit below 0xd00 and build fails. As exception 0xd00 doesn't exist and is never generated by 40x, comment it out in order to get more space for system call exception. Fixes: 9e27086292aa ("powerpc/32: Warn and return ENOSYS on syscalls from kernel") Reported-by: kbuild test robot Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/633165d72f75b4ef4c0901aebe99d3915c93e9a2.1589043863.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_40x.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 9bb663977e84..2cec543c38f0 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -344,8 +344,9 @@ _ENTRY(saved_ksp_limit) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) SYSCALL_ENTRY 0xc00 +/* Trap_0D is commented out to get more space for system call exception */ - EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) +/* EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) */ EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD) EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) -- cgit v1.2.3 From 40bb0e904212cf7d6f041a98c58c8341b2016670 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 20 May 2020 10:23:45 +0000 Subject: Revert "powerpc/32s: reorder Linux PTE bits to better match Hash PTE bits." This reverts commit 697ece78f8f749aeea40f2711389901f0974017a. The implementation of SWAP on powerpc requires page protection bits to not be one of the least significant PTE bits. Until the SWAP implementation is changed and this requirement voids, we have to keep at least _PAGE_RW outside of the 3 last bits. For now, revert to previous PTE bits order. A further rework may come later. Fixes: 697ece78f8f7 ("powerpc/32s: reorder Linux PTE bits to better match Hash PTE bits.") Reported-by: Rui Salvaterra Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b34706f8de87f84d135abb5f3ede6b6f16fb1f41.1589969799.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/hash.h | 8 ++++---- arch/powerpc/kernel/head_32.S | 9 ++++++--- arch/powerpc/mm/book3s32/hash_low.S | 14 ++++++++------ 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h index 34a7215ae81e..2a0a467d2985 100644 --- a/arch/powerpc/include/asm/book3s/32/hash.h +++ b/arch/powerpc/include/asm/book3s/32/hash.h @@ -17,9 +17,9 @@ * updating the accessed and modified bits in the page table tree. */ -#define _PAGE_USER 0x001 /* usermode access allowed */ -#define _PAGE_RW 0x002 /* software: user write access allowed */ -#define _PAGE_PRESENT 0x004 /* software: pte contains a translation */ +#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */ +#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */ +#define _PAGE_USER 0x004 /* usermode access allowed */ #define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */ #define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */ #define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */ @@ -27,7 +27,7 @@ #define _PAGE_DIRTY 0x080 /* C: page changed */ #define _PAGE_ACCESSED 0x100 /* R: page referenced */ #define _PAGE_EXEC 0x200 /* software: exec allowed */ -#define _PAGE_HASHPTE 0x400 /* hash_page has made an HPTE for this pte */ +#define _PAGE_RW 0x400 /* software: user write access allowed */ #define _PAGE_SPECIAL 0x800 /* software: Special page */ #ifdef CONFIG_PTE_64BIT diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index daaa153950c2..97c887950c3c 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -348,7 +348,7 @@ BEGIN_MMU_FTR_SECTION andis. r0, r5, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h #endif bne handle_page_fault_tramp_2 /* if not, try to put a PTE */ - rlwinm r3, r5, 32 - 24, 30, 30 /* DSISR_STORE -> _PAGE_RW */ + rlwinm r3, r5, 32 - 15, 21, 21 /* DSISR_STORE -> _PAGE_RW */ bl hash_page b handle_page_fault_tramp_1 FTR_SECTION_ELSE @@ -497,6 +497,7 @@ InstructionTLBMiss: andc. r1,r1,r0 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ ori r1, r1, 0xe06 /* clear out reserved bits */ andc r1, r0, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION @@ -564,8 +565,9 @@ DataLoadTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,0,30,30 /* _PAGE_RW -> PP msb */ - rlwimi r0,r0,1,30,30 /* _PAGE_USER -> PP msb */ + rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ + rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ ori r1,r1,0xe04 /* clear out reserved bits */ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION @@ -643,6 +645,7 @@ DataStoreTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ andc r1,r0,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index 6d236080cb1a..877d880890fe 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -35,7 +35,7 @@ mmu_hash_lock: /* * Load a PTE into the hash table, if possible. * The address is in r4, and r3 contains an access flag: - * _PAGE_RW (0x002) if a write. + * _PAGE_RW (0x400) if a write. * r9 contains the SRR1 value, from which we use the MSR_PR bit. * SPRG_THREAD contains the physical address of the current task's thread. * @@ -69,7 +69,7 @@ _GLOBAL(hash_page) blt+ 112f /* assume user more likely */ lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ - rlwimi r3,r9,32-14,31,31 /* MSR_PR -> _PAGE_USER */ + rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ 112: #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ @@ -94,7 +94,7 @@ _GLOBAL(hash_page) #else rlwimi r8,r4,23,20,28 /* compute pte address */ #endif - rlwinm r0,r3,6,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE /* @@ -310,9 +310,11 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ + rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */ - and r8,r5,r0 /* writable if _RW & _DIRTY */ - rlwimi r5,r5,1,30,30 /* _PAGE_USER -> PP msb */ + and r8,r8,r0 /* writable if _RW & _DIRTY */ + rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ ori r8,r8,0xe04 /* clear out reserved bits */ andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */ BEGIN_FTR_SECTION @@ -564,7 +566,7 @@ _GLOBAL(flush_hash_pages) 33: lwarx r8,0,r5 /* fetch the pte flags word */ andi. r0,r8,_PAGE_HASHPTE beq 8f /* done if HASHPTE is already clear */ - rlwinm r8,r8,0,~_PAGE_HASHPTE /* clear HASHPTE bit */ + rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */ stwcx. r8,0,r5 /* update the pte */ bne- 33b -- cgit v1.2.3 From 8659a0e0efdd975c73355dbc033f79ba3b31e82c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 20 May 2020 23:36:05 +1000 Subject: powerpc/64s: Disable STRICT_KERNEL_RWX Several strange crashes have been eventually traced back to STRICT_KERNEL_RWX and its interaction with code patching. Various paths in our ftrace, kprobes and other patching code need to be hardened against patching failures, otherwise we can end up running with partially/incorrectly patched ftrace paths, kprobes or jump labels, which can then cause strange crashes. Although fixes for those are in development, they're not -rc material. There also seem to be problems with the underlying strict RWX logic, which needs further debugging. So for now disable STRICT_KERNEL_RWX on 64-bit to prevent people from enabling the option and tripping over the bugs. Fixes: 1e0fc9d1eb2b ("powerpc/Kconfig: Enable STRICT_KERNEL_RWX for some configs") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200520133605.972649-1-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 924c541a9260..d13b5328ca10 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -130,7 +130,7 @@ config PPC select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 - select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UACCESS_MCSAFE if PPC64 -- cgit v1.2.3