diff options
Diffstat (limited to 'arch/powerpc/kernel/interrupt.c')
-rw-r--r-- | arch/powerpc/kernel/interrupt.c | 484 |
1 files changed, 311 insertions, 173 deletions
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index e0938ba298f2..21bbd615ca41 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -3,6 +3,7 @@ #include <linux/context_tracking.h> #include <linux/err.h> #include <linux/compat.h> +#include <linux/sched/debug.h> /* for show_regs */ #include <asm/asm-prototypes.h> #include <asm/kup.h> @@ -26,6 +27,53 @@ unsigned long global_dbcr0[NR_CPUS]; typedef long (*syscall_fn)(long, long, long, long, long, long); +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); +static inline bool exit_must_hard_disable(void) +{ + return static_branch_unlikely(&interrupt_exit_not_reentrant); +} +#else +static inline bool exit_must_hard_disable(void) +{ + return true; +} +#endif + +/* + * local irqs must be disabled. Returns false if the caller must re-enable + * them, check for new work, and try again. + * + * This should be called with local irqs disabled, but if they were previously + * enabled when the interrupt handler returns (indicating a process-context / + * synchronous interrupt) then irqs_enabled should be true. + * + * restartable is true then EE/RI can be left on because interrupts are handled + * with a restart sequence. + */ +static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) +{ + /* This must be done with RI=1 because tracing may touch vmaps */ + trace_hardirqs_on(); + + if (exit_must_hard_disable() || !restartable) + __hard_EE_RI_disable(); + +#ifdef CONFIG_PPC64 + /* This pattern matches prep_irq_for_idle */ + if (unlikely(lazy_irq_pending_nocheck())) { + if (exit_must_hard_disable() || !restartable) { + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + __hard_RI_enable(); + } + trace_hardirqs_off(); + + return false; + } +#endif + return true; +} + /* Has to run notrace because it is entered not completely "reconciled" */ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, @@ -144,71 +192,6 @@ notrace long system_call_exception(long r3, long r4, long r5, return f(r3, r4, r5, r6, r7, r8); } -/* - * local irqs must be disabled. Returns false if the caller must re-enable - * them, check for new work, and try again. - * - * This should be called with local irqs disabled, but if they were previously - * enabled when the interrupt handler returns (indicating a process-context / - * synchronous interrupt) then irqs_enabled should be true. - */ -static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri) -{ - /* This must be done with RI=1 because tracing may touch vmaps */ - trace_hardirqs_on(); - - /* This pattern matches prep_irq_for_idle */ - if (clear_ri) - __hard_EE_RI_disable(); - else - __hard_irq_disable(); -#ifdef CONFIG_PPC64 - if (unlikely(lazy_irq_pending_nocheck())) { - /* Took an interrupt, may have more exit work to do. */ - if (clear_ri) - __hard_RI_enable(); - trace_hardirqs_off(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - return false; - } - local_paca->irq_happened = 0; - irq_soft_mask_set(IRQS_ENABLED); -#endif - return true; -} - -static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled) -{ - if (__prep_irq_for_enabled_exit(clear_ri)) - return true; - - /* - * Must replay pending soft-masked interrupts now. Don't just - * local_irq_enabe(); local_irq_disable(); because if we are - * returning from an asynchronous interrupt here, another one - * might hit after irqs are enabled, and it would exit via this - * same path allowing another to fire, and so on unbounded. - * - * If interrupts were enabled when this interrupt exited, - * indicating a process context (synchronous) interrupt, - * local_irq_enable/disable can be used, which will enable - * interrupts rather than keeping them masked (unclear how - * much benefit this is over just replaying for all cases, - * because we immediately disable again, so all we're really - * doing is allowing hard interrupts to execute directly for - * a very small time, rather than being masked and replayed). - */ - if (irqs_enabled) { - local_irq_enable(); - local_irq_disable(); - } else { - replay_soft_interrupts(); - } - - return false; -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -231,57 +214,92 @@ static notrace void booke_load_dbcr0(void) #endif } -/* - * This should be called after a syscall returns, with r3 the return value - * from the syscall. If this function returns non-zero, the system call - * exit assembly should additionally load all GPR registers and CTR and XER - * from the interrupt frame. - * - * The function graph tracer can not trace the return side of this function, - * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. - */ -notrace unsigned long syscall_exit_prepare(unsigned long r3, - struct pt_regs *regs, - long scv) +static void check_return_regs_valid(struct pt_regs *regs) { - unsigned long ti_flags; - unsigned long ret = 0; - bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; +#ifdef CONFIG_PPC_BOOK3S_64 + unsigned long trap, srr0, srr1; + static bool warned; + u8 *validp; + char *h; - CT_WARN_ON(ct_state() == CONTEXT_USER); + if (trap_is_scv(regs)) + return; - kuap_assert_locked(); + trap = regs->trap; + // EE in HV mode sets HSRRs like 0xea0 + if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL) + trap = 0xea0; + + switch (trap) { + case 0x980: + case INTERRUPT_H_DATA_STORAGE: + case 0xe20: + case 0xe40: + case INTERRUPT_HMI: + case 0xe80: + case 0xea0: + case INTERRUPT_H_FAC_UNAVAIL: + case 0x1200: + case 0x1500: + case 0x1600: + case 0x1800: + validp = &local_paca->hsrr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_HSRR0); + srr1 = mfspr(SPRN_HSRR1); + h = "H"; + + break; + default: + validp = &local_paca->srr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_SRR0); + srr1 = mfspr(SPRN_SRR1); + h = ""; + break; + } - regs->result = r3; + if (srr0 == regs->nip && srr1 == regs->msr) + return; - /* Check whether the syscall is issued inside a restartable sequence */ - rseq_syscall(regs); + /* + * A NMI / soft-NMI interrupt may have come in after we found + * srr_valid and before the SRRs are loaded. The interrupt then + * comes in and clobbers SRRs and clears srr_valid. Then we load + * the SRRs here and test them above and find they don't match. + * + * Test validity again after that, to catch such false positives. + * + * This test in general will have some window for false negatives + * and may not catch and fix all such cases if an NMI comes in + * later and clobbers SRRs without clearing srr_valid, but hopefully + * such things will get caught most of the time, statistically + * enough to be able to get a warning out. + */ + barrier(); - ti_flags = current_thread_info()->flags; + if (!*validp) + return; - if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { - if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { - r3 = -r3; - regs->ccr |= 0x10000000; /* Set SO bit in CR */ - } + if (!warned) { + warned = true; + printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); + printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); + show_regs(regs); } - if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { - if (ti_flags & _TIF_RESTOREALL) - ret = _TIF_RESTOREALL; - else - regs->gpr[3] = r3; - clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); - } else { - regs->gpr[3] = r3; - } - - if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { - do_syscall_trace_leave(regs); - ret |= _TIF_RESTOREALL; - } + *validp = 0; /* fixup */ +#endif +} - local_irq_disable(); +static notrace unsigned long +interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) +{ + unsigned long ti_flags; again: ti_flags = READ_ONCE(current_thread_info()->flags); @@ -303,7 +321,7 @@ again: ti_flags = READ_ONCE(current_thread_info()->flags); } - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && unlikely((ti_flags & _TIF_RESTORE_TM))) { restore_tm_state(regs); @@ -327,10 +345,10 @@ again: } } - user_enter_irqoff(); + check_return_regs_valid(regs); - /* scv need not set RI=0 because SRRs are not used */ - if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) { + user_enter_irqoff(); + if (!prep_irq_for_enabled_exit(true)) { user_exit_irqoff(); local_irq_enable(); local_irq_disable(); @@ -352,90 +370,131 @@ again: return ret; } -notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr) +/* + * This should be called after a syscall returns, with r3 the return value + * from the syscall. If this function returns non-zero, the system call + * exit assembly should additionally load all GPR registers and CTR and XER + * from the interrupt frame. + * + * The function graph tracer can not trace the return side of this function, + * because RI=0 and soft mask state is "unreconciled", so it is marked notrace. + */ +notrace unsigned long syscall_exit_prepare(unsigned long r3, + struct pt_regs *regs, + long scv) { unsigned long ti_flags; - unsigned long flags; unsigned long ret = 0; + bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) - BUG_ON(!(regs->msr & MSR_RI)); - BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(arch_irq_disabled_regs(regs)); CT_WARN_ON(ct_state() == CONTEXT_USER); - /* - * We don't need to restore AMR on the way back to userspace for KUAP. - * AMR can only have been unlocked if we interrupted the kernel. - */ kuap_assert_locked(); - local_irq_save(flags); - -again: - ti_flags = READ_ONCE(current_thread_info()->flags); - while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { - local_irq_enable(); /* returning to user: may enable */ - if (ti_flags & _TIF_NEED_RESCHED) { - schedule(); - } else { - if (ti_flags & _TIF_SIGPENDING) - ret |= _TIF_RESTOREALL; - do_notify_resume(regs, ti_flags); - } - local_irq_disable(); - ti_flags = READ_ONCE(current_thread_info()->flags); - } + regs->result = r3; - if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely((ti_flags & _TIF_RESTORE_TM))) { - restore_tm_state(regs); - } else { - unsigned long mathflags = MSR_FP; + /* Check whether the syscall is issued inside a restartable sequence */ + rseq_syscall(regs); - if (cpu_has_feature(CPU_FTR_VSX)) - mathflags |= MSR_VEC | MSR_VSX; - else if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mathflags |= MSR_VEC; + ti_flags = current_thread_info()->flags; - /* See above restore_math comment */ - if ((regs->msr & mathflags) != mathflags) - restore_math(regs); + if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { + if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { + r3 = -r3; + regs->ccr |= 0x10000000; /* Set SO bit in CR */ } } - user_enter_irqoff(); + if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) { + if (ti_flags & _TIF_RESTOREALL) + ret = _TIF_RESTOREALL; + else + regs->gpr[3] = r3; + clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags); + } else { + regs->gpr[3] = r3; + } - if (unlikely(!__prep_irq_for_enabled_exit(true))) { - user_exit_irqoff(); - local_irq_enable(); - local_irq_disable(); - goto again; + if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + do_syscall_trace_leave(regs); + ret |= _TIF_RESTOREALL; } - booke_load_dbcr0(); + local_irq_disable(); + ret = interrupt_exit_user_prepare_main(ret, regs); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - local_paca->tm_scratch = regs->msr; +#ifdef CONFIG_PPC64 + regs->exit_result = ret; #endif - account_cpu_user_exit(); + return ret; +} - /* Restore user access locks last */ - kuap_user_restore(regs); - kuep_unlock(); +#ifdef CONFIG_PPC64 +notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs) +{ + /* + * This is called when detecting a soft-pending interrupt as well as + * an alternate-return interrupt. So we can't just have the alternate + * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless + * the soft-pending case were to fix things up as well). RI might be + * disabled, in which case it gets re-enabled by __hard_irq_disable(). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs); + + return regs->exit_result; +} +#endif + +notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) +{ + unsigned long ret; + + if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x)) + BUG_ON(!(regs->msr & MSR_RI)); + BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(arch_irq_disabled_regs(regs)); + CT_WARN_ON(ct_state() == CONTEXT_USER); + + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * AMR can only have been unlocked if we interrupted the kernel. + */ + kuap_assert_locked(); + + local_irq_disable(); + + ret = interrupt_exit_user_prepare_main(0, regs); + +#ifdef CONFIG_PPC64 + regs->exit_result = ret; +#endif return ret; } void preempt_schedule_irq(void); -notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr) +notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) { unsigned long flags; unsigned long ret = 0; unsigned long kuap; + bool stack_store = current_thread_info()->flags & + _TIF_EMULATE_STACK_STORE; if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) && unlikely(!(regs->msr & MSR_RI))) @@ -450,11 +509,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsign kuap = kuap_get_and_assert_locked(); - if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) { - clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); - ret = 1; - } - local_irq_save(flags); if (!arch_irq_disabled_regs(regs)) { @@ -469,17 +523,58 @@ again: } } - if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags)))) + check_return_regs_valid(regs); + + /* + * Stack store exit can't be restarted because the interrupt + * stack frame might have been clobbered. + */ + if (!prep_irq_for_enabled_exit(unlikely(stack_store))) { + /* + * Replay pending soft-masked interrupts now. Don't + * just local_irq_enabe(); local_irq_disable(); because + * if we are returning from an asynchronous interrupt + * here, another one might hit after irqs are enabled, + * and it would exit via this same path allowing + * another to fire, and so on unbounded. + */ + hard_irq_disable(); + replay_soft_interrupts(); + /* Took an interrupt, may have more exit work to do. */ goto again; - } else { - /* Returning to a kernel context with local irqs disabled. */ - __hard_EE_RI_disable(); + } #ifdef CONFIG_PPC64 + /* + * An interrupt may clear MSR[EE] and set this concurrently, + * but it will be marked pending and the exit will be retried. + * This leaves a racy window where MSR[EE]=0 and HARD_DIS is + * clear, until interrupt_exit_kernel_restart() calls + * hard_irq_disable(), which will set HARD_DIS again. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + + } else { + check_return_regs_valid(regs); + + if (unlikely(stack_store)) + __hard_EE_RI_disable(); + /* + * Returning to a kernel context with local irqs disabled. + * Here, if EE was enabled in the interrupted context, enable + * it on return as well. A problem exists here where a soft + * masked interrupt may have cleared MSR[EE] and set HARD_DIS + * here, and it will still exist on return to the caller. This + * will be resolved by the masked interrupt firing again. + */ if (regs->msr & MSR_EE) local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; -#endif +#endif /* CONFIG_PPC64 */ } + if (unlikely(stack_store)) { + clear_bits(_TIF_EMULATE_STACK_STORE, ¤t_thread_info()->flags); + ret = 1; + } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM local_paca->tm_scratch = regs->msr; @@ -494,3 +589,46 @@ again: return ret; } + +#ifdef CONFIG_PPC64 +notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + trace_hardirqs_off(); + user_exit_irqoff(); + account_cpu_user_entry(); + + BUG_ON(!user_mode(regs)); + + regs->exit_result |= interrupt_exit_user_prepare(regs); + + return regs->exit_result; +} + +/* + * No real need to return a value here because the stack store case does not + * get restarted. + */ +notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs) +{ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + +#ifdef CONFIG_PPC_BOOK3S_64 + set_kuap(AMR_KUAP_BLOCKED); +#endif + + if (regs->softe == IRQS_ENABLED) + trace_hardirqs_off(); + + BUG_ON(user_mode(regs)); + + return interrupt_exit_kernel_prepare(regs); +} +#endif |