summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/entry_64.S60
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S38
-rw-r--r--arch/powerpc/kernel/idle_book3s.S21
-rw-r--r--arch/powerpc/kernel/irq.c15
-rw-r--r--arch/powerpc/kernel/process.c9
-rw-r--r--arch/powerpc/kernel/ptrace.c13
-rw-r--r--arch/powerpc/kernel/smp.c18
-rw-r--r--arch/powerpc/kernel/watchdog.c49
8 files changed, 129 insertions, 94 deletions
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 49d8422767b4..e925c1c99c71 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -223,17 +223,27 @@ system_call_exit:
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- .Lsyscall_exit_work
- /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
- li r7,MSR_FP
+ andi. r0,r8,MSR_FP
+ beq 2f
#ifdef CONFIG_ALTIVEC
- oris r7,r7,MSR_VEC@h
+ andis. r0,r8,MSR_VEC@h
+ bne 3f
#endif
- and r0,r8,r7
- cmpd r0,r7
- bne .Lsyscall_restore_math
-.Lsyscall_restore_math_cont:
+2: addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_BOOK3S
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Restore RI */
+#endif
+ bl restore_math
+#ifdef CONFIG_PPC_BOOK3S
+ li r11,0
+ mtmsrd r11,1
+#endif
+ ld r8,_MSR(r1)
+ ld r3,RESULT(r1)
+ li r11,-MAX_ERRNO
- cmpld r3,r11
+3: cmpld r3,r11
ld r5,_CCR(r1)
bge- .Lsyscall_error
.Lsyscall_error_cont:
@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r5,_CCR(r1)
b .Lsyscall_error_cont
-.Lsyscall_restore_math:
- /*
- * Some initial tests from restore_math to avoid the heavyweight
- * C code entry and MSR manipulations.
- */
- LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
- and. r0,r0,r8
- bne 1f
-
- ld r7,PACACURRENT(r13)
- lbz r0,THREAD+THREAD_LOAD_FP(r7)
-#ifdef CONFIG_ALTIVEC
- lbz r6,THREAD+THREAD_LOAD_VEC(r7)
- add r0,r0,r6
-#endif
- cmpdi r0,0
- beq .Lsyscall_restore_math_cont
-
-1: addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
- bl restore_math
-#ifdef CONFIG_PPC_BOOK3S
- li r11,0
- mtmsrd r11,1
-#endif
- /* Restore volatiles, reload MSR from updated one */
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
- b .Lsyscall_restore_math_cont
-
/* Traced system call support */
.Lsyscall_dotrace:
bl save_nvgprs
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e6d8354d79ef..f14f3c04ec7e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -824,7 +824,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
* r3 volatile parameter and return value for status
* r4-r10 volatile input and output value
* r11 volatile hypercall number and output value
- * r12 volatile
+ * r12 volatile input and output value
* r13-r31 nonvolatile
* LR nonvolatile
* CTR volatile
@@ -834,25 +834,26 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
* Other registers nonvolatile
*
* The intersection of volatile registers that don't contain possible
- * inputs is: r12, cr0, xer, ctr. We may use these as scratch regs
- * upon entry without saving.
+ * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
+ * without saving, though xer is not a good idea to use, as hardware may
+ * interpret some bits so it may be costly to change them.
*/
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
* There is a little bit of juggling to get syscall and hcall
- * working well. Save r10 in ctr to be restored in case it is a
- * hcall.
+ * working well. Save r13 in ctr to avoid using SPRG scratch
+ * register.
*
* Userspace syscalls have already saved the PPR, hcalls must save
* it before setting HMT_MEDIUM.
*/
#define SYSCALL_KVMTEST \
- mr r12,r13; \
+ mtctr r13; \
GET_PACA(r13); \
- mtctr r10; \
+ std r10,PACA_EXGEN+EX_R10(r13); \
KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
HMT_MEDIUM; \
- mr r9,r12; \
+ mfctr r9;
#else
#define SYSCALL_KVMTEST \
@@ -935,8 +936,8 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
* This is a hcall, so register convention is as above, with these
* differences:
* r13 = PACA
- * r12 = orig r13
- * ctr = orig r10
+ * ctr = orig r13
+ * orig r10 saved in PACA
*/
TRAMP_KVM_BEGIN(do_kvm_0xc00)
/*
@@ -944,14 +945,13 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00)
* HMT_MEDIUM. That allows the KVM code to save that value into the
* guest state (it is the guest's PPR value).
*/
- OPT_GET_SPR(r0, SPRN_PPR, CPU_FTR_HAS_PPR)
+ OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR)
HMT_MEDIUM
- OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r0, CPU_FTR_HAS_PPR)
+ OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR)
mfctr r10
- SET_SCRATCH0(r12)
+ SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13)
mfcr r9
- std r10,PACA_EXGEN+EX_R10(r13)
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
#endif
@@ -1325,10 +1325,18 @@ EXC_VIRT_NONE(0x5800, 0x100)
std r10,PACA_EXGEN+EX_R13(r13); \
EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
+/*
+ * Branch to soft_nmi_interrupt using the emergency stack. The emergency
+ * stack is one that is usable by maskable interrupts so long as MSR_EE
+ * remains off. It is used for recovery when something has corrupted the
+ * normal kernel stack, for example. The "soft NMI" must not use the process
+ * stack because we want irq disabled sections to avoid touching the stack
+ * at all (other than PMU interrupts), so use the emergency stack for this,
+ * and run it entirely with interrupts hard disabled.
+ */
EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
- ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
system_reset, soft_nmi_interrupt,
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 5adb390e773b..e6252c5a57a4 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -30,6 +30,7 @@
* Use unused space in the interrupt stack to save and restore
* registers for winkle support.
*/
+#define _MMCR0 GPR0
#define _SDR1 GPR3
#define _PTCR GPR3
#define _RPR GPR4
@@ -272,6 +273,14 @@ power_enter_stop:
b pnv_wakeup_noloss
.Lhandle_esl_ec_set:
+ /*
+ * POWER9 DD2 can incorrectly set PMAO when waking up after a
+ * state-loss idle. Saving and restoring MMCR0 over idle is a
+ * workaround.
+ */
+ mfspr r4,SPRN_MMCR0
+ std r4,_MMCR0(r1)
+
/*
* Check if the requested state is a deep idle state.
*/
@@ -450,10 +459,20 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
pnv_restore_hyp_resource_arch300:
/*
* Workaround for POWER9, if we lost resources, the ERAT
- * might have been mixed up and needs flushing.
+ * might have been mixed up and needs flushing. We also need
+ * to reload MMCR0 (see comment above). We also need to set
+ * then clear bit 60 in MMCRA to ensure the PMU starts running.
*/
blt cr3,1f
PPC_INVALIDATE_ERAT
+ ld r1,PACAR1(r13)
+ mfspr r4,SPRN_MMCRA
+ ori r4,r4,(1 << (63-60))
+ mtspr SPRN_MMCRA,r4
+ xori r4,r4,(1 << (63-60))
+ mtspr SPRN_MMCRA,r4
+ ld r4,_MMCR0(r1)
+ mtspr SPRN_MMCR0,r4
1:
/*
* POWER ISA 3. Use PSSCR to determine if we
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 0bcec745a672..f291f7826abc 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -145,6 +145,19 @@ notrace unsigned int __check_irq_replay(void)
/* Clear bit 0 which we wouldn't clear otherwise */
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ if (happened & PACA_IRQ_HARD_DIS) {
+ /*
+ * We may have missed a decrementer interrupt if hard disabled.
+ * Check the decrementer register in case we had a rollover
+ * while hard disabled.
+ */
+ if (!(happened & PACA_IRQ_DEC)) {
+ if (decrementer_check_overflow()) {
+ local_paca->irq_happened |= PACA_IRQ_DEC;
+ happened |= PACA_IRQ_DEC;
+ }
+ }
+ }
/*
* Force the delivery of pending soft-disabled interrupts on PS3.
@@ -170,7 +183,7 @@ notrace unsigned int __check_irq_replay(void)
* in case we also had a rollover while hard disabled
*/
local_paca->irq_happened &= ~PACA_IRQ_DEC;
- if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow())
+ if (happened & PACA_IRQ_DEC)
return 0x900;
/* Finally check if an external interrupt happened */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9f3e2c932dcc..1f0fd361e09b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -362,7 +362,8 @@ void enable_kernel_vsx(void)
cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
- if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
+ if (current->thread.regs &&
+ (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) {
check_if_tm_restore_required(current);
/*
* If a thread has already been reclaimed then the
@@ -386,7 +387,7 @@ void flush_vsx_to_thread(struct task_struct *tsk)
{
if (tsk->thread.regs) {
preempt_disable();
- if (tsk->thread.regs->msr & MSR_VSX) {
+ if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) {
BUG_ON(tsk != current);
giveup_vsx(tsk);
}
@@ -511,10 +512,6 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
- /*
- * Syscall exit makes a similar initial check before branching
- * to restore_math. Keep them in synch.
- */
if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 925a4ef90559..660ed39e9c9a 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
* If task is not current, it will have been flushed already to
* it's thread_struct during __switch_to().
*
- * A reclaim flushes ALL the state.
+ * A reclaim flushes ALL the state or if not in TM save TM SPRs
+ * in the appropriate thread structures from live.
*/
- if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
- tm_reclaim_current(TM_CAUSE_SIGNAL);
+ if (tsk != current)
+ return;
+ if (MSR_TM_SUSPENDED(mfmsr())) {
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+ } else {
+ tm_enable();
+ tm_save_sprs(&(tsk->thread));
+ }
}
#else
static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 997c88d54acf..8d3320562c70 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
hard_irq_disable();
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
- cpu_relax();
+ spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
@@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
static void nmi_ipi_lock(void)
{
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
- cpu_relax();
+ spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
}
static void nmi_ipi_unlock(void)
@@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
nmi_ipi_lock_start(&flags);
while (nmi_ipi_busy_count) {
nmi_ipi_unlock_end(&flags);
- cpu_relax();
+ spin_until_cond(nmi_ipi_busy_count == 0);
nmi_ipi_lock_start(&flags);
}
@@ -1003,21 +1003,13 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ NULL, },
};
-static __init long smp_setup_cpu_workfn(void *data __always_unused)
-{
- smp_ops->setup_cpu(boot_cpuid);
- return 0;
-}
-
void __init smp_cpus_done(unsigned int max_cpus)
{
/*
- * We want the setup_cpu() here to be called on the boot CPU, but
- * init might run on any CPU, so make sure it's invoked on the boot
- * CPU.
+ * We are running pinned to the boot CPU, see rest_init().
*/
if (smp_ops && smp_ops->setup_cpu)
- work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL);
+ smp_ops->setup_cpu(boot_cpuid);
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index b67f8b03a32d..34721a257a77 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
* This may be called from low level interrupt handlers at some
* point in future.
*/
- local_irq_save(*flags);
- while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
- cpu_relax();
+ raw_local_irq_save(*flags);
+ hard_irq_disable(); /* Make it soft-NMI safe */
+ while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
+ raw_local_irq_restore(*flags);
+ spin_until_cond(!test_bit(0, &__wd_smp_lock));
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ }
}
static inline void wd_smp_unlock(unsigned long *flags)
{
clear_bit_unlock(0, &__wd_smp_lock);
- local_irq_restore(*flags);
+ raw_local_irq_restore(*flags);
}
static void wd_lockup_ipi(struct pt_regs *regs)
@@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
nmi_panic(regs, "Hard LOCKUP");
}
-static void set_cpu_stuck(int cpu, u64 tb)
+static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
{
- cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
- cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
+ cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb;
cpumask_andnot(&wd_smp_cpus_pending,
@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
&wd_smp_cpus_stuck);
}
}
+static void set_cpu_stuck(int cpu, u64 tb)
+{
+ set_cpumask_stuck(cpumask_of(cpu), tb);
+}
static void watchdog_smp_panic(int cpu, u64 tb)
{
@@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
}
smp_flush_nmi_ipi(1000000);
- /* Take the stuck CPU out of the watch group */
- for_each_cpu(c, &wd_smp_cpus_pending)
- set_cpu_stuck(c, tb);
+ /* Take the stuck CPUs out of the watch group */
+ set_cpumask_stuck(&wd_smp_cpus_pending, tb);
-out:
wd_smp_unlock(&flags);
printk_safe_flush();
@@ -152,6 +159,11 @@ out:
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
+
+ return;
+
+out:
+ wd_smp_unlock(&flags);
}
static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
@@ -258,9 +270,11 @@ static void wd_timer_fn(unsigned long data)
void arch_touch_nmi_watchdog(void)
{
+ unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
- watchdog_timer_interrupt(cpu);
+ if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
+ watchdog_timer_interrupt(cpu);
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
@@ -283,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
static int start_wd_on_cpu(unsigned int cpu)
{
+ unsigned long flags;
+
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
return 0;
@@ -297,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0;
+ wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
if (cpumask_weight(&wd_cpus_enabled) == 1) {
cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
wd_smp_last_reset_tb = get_tb();
}
- smp_wmb();
+ wd_smp_unlock(&flags);
+
start_watchdog_timer_on(cpu);
return 0;
@@ -310,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu)
static int stop_wd_on_cpu(unsigned int cpu)
{
+ unsigned long flags;
+
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0; /* Can happen in CPU unplug case */
stop_watchdog_timer_on(cpu);
+ wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+ wd_smp_unlock(&flags);
+
wd_smp_clear_cpu_pending(cpu, get_tb());
return 0;