summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/interrupt.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel/interrupt.c')
-rw-r--r--arch/powerpc/kernel/interrupt.c393
1 files changed, 261 insertions, 132 deletions
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 05fa3ae56e25..acd6a3f5ec9d 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -26,6 +26,83 @@ unsigned long global_dbcr0[NR_CPUS];
typedef long (*syscall_fn)(long, long, long, long, long, long);
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+static inline bool exit_must_hard_disable(void)
+{
+ return static_branch_unlikely(&interrupt_exit_not_reentrant);
+}
+#elif defined(CONFIG_PPC64)
+static inline bool exit_must_hard_disable(void)
+{
+ return false;
+}
+#endif
+
+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ *
+ * This should be called with local irqs disabled, but if they were previously
+ * enabled when the interrupt handler returns (indicating a process-context /
+ * synchronous interrupt) then irqs_enabled should be true.
+ */
+static notrace __always_inline bool prep_irq_for_user_exit(void)
+{
+ user_enter_irqoff();
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+#ifdef CONFIG_PPC32
+ __hard_EE_RI_disable();
+#else
+ if (exit_must_hard_disable())
+ __hard_EE_RI_disable();
+
+ /* This pattern matches prep_irq_for_idle */
+ if (unlikely(lazy_irq_pending_nocheck())) {
+ if (exit_must_hard_disable()) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ __hard_RI_enable();
+ }
+ trace_hardirqs_off();
+ user_exit_irqoff();
+
+ return false;
+ }
+#endif
+ return true;
+}
+
+/*
+ * restartable is true then EE/RI can be left on because interrupts are handled
+ * with a restart sequence.
+ */
+static notrace __always_inline bool prep_irq_for_kernel_enabled_exit(bool restartable)
+{
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+#ifdef CONFIG_PPC32
+ __hard_EE_RI_disable();
+#else
+ if (exit_must_hard_disable() || !restartable)
+ __hard_EE_RI_disable();
+
+ /* This pattern matches prep_irq_for_idle */
+ if (unlikely(lazy_irq_pending_nocheck())) {
+ if (exit_must_hard_disable() || !restartable) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ __hard_RI_enable();
+ }
+ trace_hardirqs_off();
+
+ return false;
+ }
+#endif
+ return true;
+}
+
/* Has to run notrace because it is entered not completely "reconciled" */
notrace long system_call_exception(long r3, long r4, long r5,
long r6, long r7, long r8,
@@ -144,71 +221,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
return f(r3, r4, r5, r6, r7, r8);
}
-/*
- * local irqs must be disabled. Returns false if the caller must re-enable
- * them, check for new work, and try again.
- *
- * This should be called with local irqs disabled, but if they were previously
- * enabled when the interrupt handler returns (indicating a process-context /
- * synchronous interrupt) then irqs_enabled should be true.
- */
-static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri)
-{
- /* This must be done with RI=1 because tracing may touch vmaps */
- trace_hardirqs_on();
-
- /* This pattern matches prep_irq_for_idle */
- if (clear_ri)
- __hard_EE_RI_disable();
- else
- __hard_irq_disable();
-#ifdef CONFIG_PPC64
- if (unlikely(lazy_irq_pending_nocheck())) {
- /* Took an interrupt, may have more exit work to do. */
- if (clear_ri)
- __hard_RI_enable();
- trace_hardirqs_off();
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
-
- return false;
- }
- local_paca->irq_happened = 0;
- irq_soft_mask_set(IRQS_ENABLED);
-#endif
- return true;
-}
-
-static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri, bool irqs_enabled)
-{
- if (__prep_irq_for_enabled_exit(clear_ri))
- return true;
-
- /*
- * Must replay pending soft-masked interrupts now. Don't just
- * local_irq_enabe(); local_irq_disable(); because if we are
- * returning from an asynchronous interrupt here, another one
- * might hit after irqs are enabled, and it would exit via this
- * same path allowing another to fire, and so on unbounded.
- *
- * If interrupts were enabled when this interrupt exited,
- * indicating a process context (synchronous) interrupt,
- * local_irq_enable/disable can be used, which will enable
- * interrupts rather than keeping them masked (unclear how
- * much benefit this is over just replaying for all cases,
- * because we immediately disable again, so all we're really
- * doing is allowing hard interrupts to execute directly for
- * a very small time, rather than being masked and replayed).
- */
- if (irqs_enabled) {
- local_irq_enable();
- local_irq_disable();
- } else {
- replay_soft_interrupts();
- }
-
- return false;
-}
-
static notrace void booke_load_dbcr0(void)
{
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -231,57 +243,11 @@ static notrace void booke_load_dbcr0(void)
#endif
}
-/*
- * This should be called after a syscall returns, with r3 the return value
- * from the syscall. If this function returns non-zero, the system call
- * exit assembly should additionally load all GPR registers and CTR and XER
- * from the interrupt frame.
- *
- * The function graph tracer can not trace the return side of this function,
- * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
- */
-notrace unsigned long syscall_exit_prepare(unsigned long r3,
- struct pt_regs *regs,
- long scv)
+static notrace unsigned long syscall_exit_prepare_main(unsigned long r3,
+ struct pt_regs *regs)
{
unsigned long ti_flags;
unsigned long ret = 0;
- bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
-
- CT_WARN_ON(ct_state() == CONTEXT_USER);
-
- kuap_assert_locked();
-
- regs->result = r3;
-
- /* Check whether the syscall is issued inside a restartable sequence */
- rseq_syscall(regs);
-
- ti_flags = current_thread_info()->flags;
-
- if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
- if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
- r3 = -r3;
- regs->ccr |= 0x10000000; /* Set SO bit in CR */
- }
- }
-
- if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
- if (ti_flags & _TIF_RESTOREALL)
- ret = _TIF_RESTOREALL;
- else
- regs->gpr[3] = r3;
- clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
- } else {
- regs->gpr[3] = r3;
- }
-
- if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
- do_syscall_trace_leave(regs);
- ret |= _TIF_RESTOREALL;
- }
-
- local_irq_disable();
again:
ti_flags = READ_ONCE(current_thread_info()->flags);
@@ -327,11 +293,7 @@ again:
}
}
- user_enter_irqoff();
-
- /* scv need not set RI=0 because SRRs are not used */
- if (unlikely(!__prep_irq_for_enabled_exit(is_not_scv))) {
- user_exit_irqoff();
+ if (!prep_irq_for_user_exit()) {
local_irq_enable();
local_irq_disable();
goto again;
@@ -352,6 +314,95 @@ again:
return ret;
}
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+ struct pt_regs *regs,
+ long scv)
+{
+ unsigned long ti_flags;
+ unsigned long ret = 0;
+ bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
+
+ CT_WARN_ON(ct_state() == CONTEXT_USER);
+
+ kuap_assert_locked();
+
+ regs->result = r3;
+
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
+
+ ti_flags = current_thread_info()->flags;
+
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
+ }
+ }
+
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
+ } else {
+ regs->gpr[3] = r3;
+ }
+
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ do_syscall_trace_leave(regs);
+ ret |= _TIF_RESTOREALL;
+ }
+
+ local_irq_disable();
+ ret |= syscall_exit_prepare_main(r3, regs);
+
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
+{
+ /*
+ * This is called when detecting a soft-pending interrupt as well as
+ * an alternate-return interrupt. So we can't just have the alternate
+ * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless
+ * the soft-pending case were to fix things up as well). RI might be
+ * disabled, in which case it gets re-enabled by __hard_irq_disable().
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result |= syscall_exit_prepare_main(r3, regs);
+
+ return regs->exit_result;
+}
+#endif
+
notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
{
unsigned long ti_flags;
@@ -405,10 +456,7 @@ again:
}
}
- user_enter_irqoff();
-
- if (unlikely(!__prep_irq_for_enabled_exit(true))) {
- user_exit_irqoff();
+ if (!prep_irq_for_user_exit()) {
local_irq_enable();
local_irq_disable();
goto again;
@@ -422,6 +470,10 @@ again:
account_cpu_user_exit();
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
+
/* Restore user access locks last */
kuap_user_restore(regs);
kuep_unlock();
@@ -436,6 +488,8 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
unsigned long flags;
unsigned long ret = 0;
unsigned long kuap;
+ bool stack_store = current_thread_info()->flags &
+ _TIF_EMULATE_STACK_STORE;
if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x) &&
unlikely(!(regs->msr & MSR_RI)))
@@ -450,11 +504,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
kuap = kuap_get_and_assert_locked();
- if (unlikely(current_thread_info()->flags & _TIF_EMULATE_STACK_STORE)) {
- clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
- ret = 1;
- }
-
local_irq_save(flags);
if (!arch_irq_disabled_regs(regs)) {
@@ -469,17 +518,54 @@ again:
}
}
- if (unlikely(!prep_irq_for_enabled_exit(true, !irqs_disabled_flags(flags))))
+ /*
+ * Stack store exit can't be restarted because the interrupt
+ * stack frame might have been clobbered.
+ */
+ if (!prep_irq_for_kernel_enabled_exit(unlikely(stack_store))) {
+ /*
+ * Replay pending soft-masked interrupts now. Don't
+ * just local_irq_enabe(); local_irq_disable(); because
+ * if we are returning from an asynchronous interrupt
+ * here, another one might hit after irqs are enabled,
+ * and it would exit via this same path allowing
+ * another to fire, and so on unbounded.
+ */
+ hard_irq_disable();
+ replay_soft_interrupts();
+ /* Took an interrupt, may have more exit work to do. */
goto again;
- } else {
- /* Returning to a kernel context with local irqs disabled. */
- __hard_EE_RI_disable();
+ }
#ifdef CONFIG_PPC64
+ /*
+ * An interrupt may clear MSR[EE] and set this concurrently,
+ * but it will be marked pending and the exit will be retried.
+ * This leaves a racy window where MSR[EE]=0 and HARD_DIS is
+ * clear, until interrupt_exit_kernel_restart() calls
+ * hard_irq_disable(), which will set HARD_DIS again.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+ } else {
+ if (unlikely(stack_store))
+ __hard_EE_RI_disable();
+ /*
+ * Returning to a kernel context with local irqs disabled.
+ * Here, if EE was enabled in the interrupted context, enable
+ * it on return as well. A problem exists here where a soft
+ * masked interrupt may have cleared MSR[EE] and set HARD_DIS
+ * here, and it will still exist on return to the caller. This
+ * will be resolved by the masked interrupt firing again.
+ */
if (regs->msr & MSR_EE)
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-#endif
+#endif /* CONFIG_PPC64 */
}
+ if (unlikely(stack_store)) {
+ clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
+ ret = 1;
+ }
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
local_paca->tm_scratch = regs->msr;
@@ -494,3 +580,46 @@ again:
return ret;
}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result |= interrupt_exit_user_prepare(regs);
+
+ return regs->exit_result;
+}
+
+/*
+ * No real need to return a value here because the stack store case does not
+ * get restarted.
+ */
+notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ if (regs->softe == IRQS_ENABLED)
+ trace_hardirqs_off();
+
+ BUG_ON(user_mode(regs));
+
+ return interrupt_exit_kernel_prepare(regs);
+}
+#endif