summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2021-06-22 22:16:28 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2021-06-25 00:06:57 +1000
commit6eaaf9de3599865ee3b339d90cb24f2153f40bf4 (patch)
tree85aaa6964cdec3b750f442d2ebe22b9bd013f4f8 /arch/powerpc/kernel
parentae58b1c645895c28ca155843db6788d57ea99e11 (diff)
downloadlinux-6eaaf9de3599865ee3b339d90cb24f2153f40bf4.tar.gz
linux-6eaaf9de3599865ee3b339d90cb24f2153f40bf4.tar.bz2
linux-6eaaf9de3599865ee3b339d90cb24f2153f40bf4.zip
powerpc/64s/interrupt: Check and fix srr_valid without crashing
The PPC_RFI_SRR_DEBUG check added by patch "powerpc/64s: avoid reloading (H)SRR registers if they are still valid" has a few deficiencies. It does not fix the actual problem, it's not enabled by default, and it causes a program check interrupt which can cause more difficulties. However there are a lot of paths which may clobber SRRs or change return regs, and difficult to have a high confidence that all paths are covered without wider testing. Add a relatively low overhead always-enabled check that catches most such cases, reports once, and fixes it so the kernel can continue. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> [mpe: Rebase, use switch & INT names, squash in race fix from Nick] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/interrupt.c89
1 files changed, 89 insertions, 0 deletions
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index cc945b27fc01..cee12f2fd459 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -3,6 +3,7 @@
#include <linux/context_tracking.h>
#include <linux/err.h>
#include <linux/compat.h>
+#include <linux/sched/debug.h> /* for show_regs */
#include <asm/asm-prototypes.h>
#include <asm/kup.h>
@@ -215,6 +216,88 @@ static notrace void booke_load_dbcr0(void)
#endif
}
+static void check_return_regs_valid(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ unsigned long trap, srr0, srr1;
+ static bool warned;
+ u8 *validp;
+ char *h;
+
+ if (regs->trap == 0x3000)
+ return;
+
+ trap = regs->trap;
+ // EE in HV mode sets HSRRs like 0xea0
+ if (cpu_has_feature(CPU_FTR_HVMODE) && trap == 0x500)
+ trap = 0xea0;
+
+ switch (trap) {
+ case 0x980:
+ case INTERRUPT_H_DATA_STORAGE:
+ case 0xe20:
+ case 0xe40:
+ case INTERRUPT_HMI:
+ case 0xe80:
+ case 0xea0:
+ case INTERRUPT_H_FAC_UNAVAIL:
+ case 0x1200:
+ case 0x1500:
+ case 0x1600:
+ case 0x1800:
+ validp = &local_paca->hsrr_valid;
+ if (!*validp)
+ return;
+
+ srr0 = mfspr(SPRN_HSRR0);
+ srr1 = mfspr(SPRN_HSRR1);
+ h = "H";
+
+ break;
+ default:
+ validp = &local_paca->srr_valid;
+ if (!*validp)
+ return;
+
+ srr0 = mfspr(SPRN_SRR0);
+ srr1 = mfspr(SPRN_SRR1);
+ h = "";
+ break;
+ }
+
+ if (srr0 == regs->nip && srr1 == regs->msr)
+ return;
+
+ /*
+ * A NMI / soft-NMI interrupt may have come in after we found
+ * srr_valid and before the SRRs are loaded. The interrupt then
+ * comes in and clobbers SRRs and clears srr_valid. Then we load
+ * the SRRs here and test them above and find they don't match.
+ *
+ * Test validity again after that, to catch such false positives.
+ *
+ * This test in general will have some window for false negatives
+ * and may not catch and fix all such cases if an NMI comes in
+ * later and clobbers SRRs without clearing srr_valid, but hopefully
+ * such things will get caught most of the time, statistically
+ * enough to be able to get a warning out.
+ */
+ barrier();
+
+ if (!*validp)
+ return;
+
+ if (!warned) {
+ warned = true;
+ printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+ printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+ show_regs(regs);
+ }
+
+ *validp = 0; /* fixup */
+#endif
+}
+
static notrace unsigned long
interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
{
@@ -264,6 +347,8 @@ again:
}
}
+ check_return_regs_valid(regs);
+
user_enter_irqoff();
if (!prep_irq_for_enabled_exit(true)) {
user_exit_irqoff();
@@ -440,6 +525,8 @@ again:
}
}
+ check_return_regs_valid(regs);
+
/*
* Stack store exit can't be restarted because the interrupt
* stack frame might have been clobbered.
@@ -469,6 +556,8 @@ again:
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
} else {
+ check_return_regs_valid(regs);
+
if (unlikely(stack_store))
__hard_EE_RI_disable();
/*