diff options
author | Nicholas Piggin <npiggin@gmail.com> | 2021-06-22 22:16:28 +1000 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2021-06-25 00:06:57 +1000 |
commit | 6eaaf9de3599865ee3b339d90cb24f2153f40bf4 (patch) | |
tree | 85aaa6964cdec3b750f442d2ebe22b9bd013f4f8 /arch/powerpc/kernel/interrupt.c | |
parent | ae58b1c645895c28ca155843db6788d57ea99e11 (diff) | |
download | linux-6eaaf9de3599865ee3b339d90cb24f2153f40bf4.tar.gz linux-6eaaf9de3599865ee3b339d90cb24f2153f40bf4.tar.bz2 linux-6eaaf9de3599865ee3b339d90cb24f2153f40bf4.zip |
powerpc/64s/interrupt: Check and fix srr_valid without crashing
The PPC_RFI_SRR_DEBUG check added by patch "powerpc/64s: avoid reloading
(H)SRR registers if they are still valid" has a few deficiencies. It
does not fix the actual problem, it's not enabled by default, and it
causes a program check interrupt which can cause more difficulties.
However there are a lot of paths which may clobber SRRs or change return
regs, and difficult to have a high confidence that all paths are covered
without wider testing.
Add a relatively low overhead always-enabled check that catches most
such cases, reports once, and fixes it so the kernel can continue.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Rebase, use switch & INT names, squash in race fix from Nick]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel/interrupt.c')
-rw-r--r-- | arch/powerpc/kernel/interrupt.c | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index cc945b27fc01..cee12f2fd459 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -3,6 +3,7 @@ #include <linux/context_tracking.h> #include <linux/err.h> #include <linux/compat.h> +#include <linux/sched/debug.h> /* for show_regs */ #include <asm/asm-prototypes.h> #include <asm/kup.h> @@ -215,6 +216,88 @@ static notrace void booke_load_dbcr0(void) #endif } +static void check_return_regs_valid(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + unsigned long trap, srr0, srr1; + static bool warned; + u8 *validp; + char *h; + + if (regs->trap == 0x3000) + return; + + trap = regs->trap; + // EE in HV mode sets HSRRs like 0xea0 + if (cpu_has_feature(CPU_FTR_HVMODE) && trap == 0x500) + trap = 0xea0; + + switch (trap) { + case 0x980: + case INTERRUPT_H_DATA_STORAGE: + case 0xe20: + case 0xe40: + case INTERRUPT_HMI: + case 0xe80: + case 0xea0: + case INTERRUPT_H_FAC_UNAVAIL: + case 0x1200: + case 0x1500: + case 0x1600: + case 0x1800: + validp = &local_paca->hsrr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_HSRR0); + srr1 = mfspr(SPRN_HSRR1); + h = "H"; + + break; + default: + validp = &local_paca->srr_valid; + if (!*validp) + return; + + srr0 = mfspr(SPRN_SRR0); + srr1 = mfspr(SPRN_SRR1); + h = ""; + break; + } + + if (srr0 == regs->nip && srr1 == regs->msr) + return; + + /* + * A NMI / soft-NMI interrupt may have come in after we found + * srr_valid and before the SRRs are loaded. The interrupt then + * comes in and clobbers SRRs and clears srr_valid. Then we load + * the SRRs here and test them above and find they don't match. + * + * Test validity again after that, to catch such false positives. + * + * This test in general will have some window for false negatives + * and may not catch and fix all such cases if an NMI comes in + * later and clobbers SRRs without clearing srr_valid, but hopefully + * such things will get caught most of the time, statistically + * enough to be able to get a warning out. + */ + barrier(); + + if (!*validp) + return; + + if (!warned) { + warned = true; + printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); + printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); + show_regs(regs); + } + + *validp = 0; /* fixup */ +#endif +} + static notrace unsigned long interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) { @@ -264,6 +347,8 @@ again: } } + check_return_regs_valid(regs); + user_enter_irqoff(); if (!prep_irq_for_enabled_exit(true)) { user_exit_irqoff(); @@ -440,6 +525,8 @@ again: } } + check_return_regs_valid(regs); + /* * Stack store exit can't be restarted because the interrupt * stack frame might have been clobbered. @@ -469,6 +556,8 @@ again: local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; } else { + check_return_regs_valid(regs); + if (unlikely(stack_store)) __hard_EE_RI_disable(); /* |