From 685659ee70db0bac47ffd619c726cf600e504fd7 Mon Sep 17 00:00:00 2001 From: yu liu Date: Tue, 14 Jun 2011 18:34:25 -0500 Subject: powerpc/e500: Save SPEFCSR in flush_spe_to_thread() giveup_spe() saves the SPE state which is protected by MSR[SPE]. However, modifying SPEFSCR does not trap when MSR[SPE]=0. And since SPEFSCR is already saved/restored in _switch(), not all the callers want to save SPEFSCR again. Thus, saving SPEFSCR should not belong to giveup_spe(). This patch moves SPEFSCR saving to flush_spe_to_thread(), and cleans up the caller that needs to save SPEFSCR accordingly. Signed-off-by: Liu Yu Acked-by: Kumar Gala Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/head_fsl_booke.S | 2 -- arch/powerpc/kernel/process.c | 1 + arch/powerpc/kernel/traps.c | 5 +---- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 5ecf54cfa7d4..aede4f87b682 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -792,8 +792,6 @@ _GLOBAL(giveup_spe) evmwumiaa evr6, evr6, evr6 /* evr6 <- ACC = 0 * 0 + ACC */ li r4,THREAD_ACC evstddx evr6, r4, r3 /* save off accumulator */ - mfspr r6,SPRN_SPEFSCR - stw r6,THREAD_SPEFSCR(r3) /* save spefscr register value */ beq 1f lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) lis r3,MSR_SPE@h diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 91e52df3d81d..60ac2a9251db 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -213,6 +213,7 @@ void flush_spe_to_thread(struct task_struct *tsk) #ifdef CONFIG_SMP BUG_ON(tsk != current); #endif + tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); giveup_spe(tsk); } preempt_enable(); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1a0141426cda..f19d9777d3c1 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1387,10 +1387,7 @@ void SPEFloatingPointException(struct pt_regs *regs) int code = 0; int err; - preempt_disable(); - if (regs->msr & MSR_SPE) - giveup_spe(current); - preempt_enable(); + flush_spe_to_thread(current); spefscr = current->thread.spefscr; fpexc_mode = current->thread.fpexc_mode; -- cgit v1.2.3 From c51584d52e3878aa9b2bb98cdfb87173e7acf560 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:27 -0500 Subject: powerpc/e500: SPE register saving: take arbitrary struct offset Previously, these macros hardcoded THREAD_EVR0 as the base of the save area, relative to the base register passed. This base offset is now passed as a separate macro parameter, allowing reuse with other SPE save areas, such as used by KVM. Acked-by: Kumar Gala Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/head_fsl_booke.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index aede4f87b682..fe37dd0dfd17 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -656,7 +656,7 @@ load_up_spe: cmpi 0,r4,0 beq 1f addi r4,r4,THREAD /* want THREAD of last_task_used_spe */ - SAVE_32EVRS(0,r10,r4) + SAVE_32EVRS(0,r10,r4,THREAD_EVR0) evxor evr10, evr10, evr10 /* clear out evr10 */ evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */ li r5,THREAD_ACC @@ -676,7 +676,7 @@ load_up_spe: stw r4,THREAD_USED_SPE(r5) evlddx evr4,r10,r5 evmra evr4,evr4 - REST_32EVRS(0,r10,r5) + REST_32EVRS(0,r10,r5,THREAD_EVR0) #ifndef CONFIG_SMP subi r4,r5,THREAD stw r4,last_task_used_spe@l(r3) @@ -787,7 +787,7 @@ _GLOBAL(giveup_spe) addi r3,r3,THREAD /* want THREAD of task */ lwz r5,PT_REGS(r3) cmpi 0,r5,0 - SAVE_32EVRS(0, r4, r3) + SAVE_32EVRS(0, r4, r3, THREAD_EVR0) evxor evr6, evr6, evr6 /* clear out evr6 */ evmwumiaa evr6, evr6, evr6 /* evr6 <- ACC = 0 * 0 + ACC */ li r4,THREAD_ACC -- cgit v1.2.3 From ecee273fc48f7f48f0c2f074335c43aaa790c308 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:29 -0500 Subject: KVM: PPC: booke: use shadow_msr Keep the guest MSR and the guest-mode true MSR separate, rather than modifying the guest MSR on each guest entry to produce a true MSR. Any bits which should be modified based on guest MSR must be explicitly propagated from vcpu->arch.shared->msr to vcpu->arch.shadow_msr in kvmppc_set_msr(). While we're modifying the guest entry code, reorder a few instructions to bury some load latencies. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 36e1c8a29be8..25de8e4808a4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -404,12 +404,12 @@ int main(void) DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); + DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); /* book3s */ #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); - DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); -- cgit v1.2.3 From 4cd35f675ba41a99a477e28a6add4a66833325f2 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 14 Jun 2011 18:34:31 -0500 Subject: KVM: PPC: e500: Save/restore SPE state This is done lazily. The SPE save will be done only if the guest has used SPE since the last preemption or heavyweight exit. Restore will be done only on demand, when enabling MSR_SPE in the shadow MSR, in response to an SPE fault or mtmsr emulation. For SPEFSCR, Linux already switches it on context switch (non-lazily), so the only remaining bit is to save it between qemu and the guest. Signed-off-by: Liu Yu Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 25de8e4808a4..ecd2b3ad7ff6 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -497,6 +497,13 @@ int main(void) DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7)); #endif +#if defined(CONFIG_KVM) && defined(CONFIG_SPE) + DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0])); + DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc)); + DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr)); + DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr)); +#endif + #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, arch.timing_exit.tv32.tbu)); -- cgit v1.2.3 From dd9ebf1f94354b010f2ac7a98bf69168636cb08e Mon Sep 17 00:00:00 2001 From: Liu Yu Date: Tue, 14 Jun 2011 18:35:14 -0500 Subject: KVM: PPC: e500: Add shadow PID support Dynamically assign host PIDs to guest PIDs, splitting each guest PID into multiple host (shadow) PIDs based on kernel/user and MSR[IS/DS]. Use both PID0 and PID1 so that the shadow PIDs for the right mode can be selected, that correspond both to guest TID = zero and guest TID = guest PID. This allows us to significantly reduce the frequency of needing to invalidate the entire TLB. When the guest mode or PID changes, we just update the host PID0/PID1. And since the allocation of shadow PIDs is global, multiple guests can share the TLB without conflict. Note that KVM does not yet support the guest setting PID1 or PID2 to a value other than zero. This will need to be fixed for nested KVM to work. Until then, we enforce the requirement for guest PID1/PID2 to stay zero by failing the emulation if the guest tries to set them to something else. Signed-off-by: Liu Yu Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ecd2b3ad7ff6..faf846131f45 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -402,6 +402,7 @@ int main(void) DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); + DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1)); DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared)); DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr)); DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); -- cgit v1.2.3 From b01c8b54a1a271c0fc4243845927fe1d250767a3 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:18:26 +0000 Subject: powerpc, KVM: Rework KVM checks in first-level interrupt handlers Instead of branching out-of-line with the DO_KVM macro to check if we are in a KVM guest at the time of an interrupt, this moves the KVM check inline in the first-level interrupt handlers. This speeds up the non-KVM case and makes sure that none of the interrupt handlers are missing the check. Because the first-level interrupt handlers are now larger, some things had to be move out of line in exceptions-64s.S. This all necessitated some minor changes to the interrupt entry code in KVM. This also streamlines the book3s_32 KVM test. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/exceptions-64s.S | 189 +++++++++++++++++++++-------------- 1 file changed, 115 insertions(+), 74 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a85f4874cba7..e76472cbf3b5 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -40,7 +40,6 @@ __start_interrupts: .globl system_reset_pSeries; system_reset_pSeries: HMT_MEDIUM; - DO_KVM 0x100; SET_SCRATCH0(r13) #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION @@ -65,67 +64,45 @@ BEGIN_FTR_SECTION beq . END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) #endif /* CONFIG_PPC_P7_NAP */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + NOTEST, 0x100) . = 0x200 -_machine_check_pSeries: - HMT_MEDIUM - DO_KVM 0x200 - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) +machine_check_pSeries_1: + /* This is moved out of line as it can be patched by FW, but + * some code path might still want to branch into the original + * vector + */ + b machine_check_pSeries . = 0x300 .globl data_access_pSeries data_access_pSeries: HMT_MEDIUM - DO_KVM 0x300 SET_SCRATCH0(r13) +#ifndef CONFIG_POWER4_ONLY BEGIN_FTR_SECTION - GET_PACA(r13) - std r9,PACA_EXSLB+EX_R9(r13) - std r10,PACA_EXSLB+EX_R10(r13) - mfspr r10,SPRN_DAR - mfspr r9,SPRN_DSISR - srdi r10,r10,60 - rlwimi r10,r9,16,0x20 - mfcr r9 - cmpwi r10,0x2c - beq do_stab_bolted_pSeries - ld r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXGEN+EX_R11(r13) - ld r11,PACA_EXSLB+EX_R9(r13) - std r12,PACA_EXGEN+EX_R12(r13) - GET_SCRATCH0(r12) - std r10,PACA_EXGEN+EX_R10(r13) - std r11,PACA_EXGEN+EX_R9(r13) - std r12,PACA_EXGEN+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD) -FTR_SECTION_ELSE - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD) -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB) + b data_access_check_stab +data_access_not_stab: +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) +#endif + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, + KVMTEST, 0x300) . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM - DO_KVM 0x380 SET_SCRATCH0(r13) - GET_PACA(r13) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR - std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ - mfcr r9 #ifdef __DISABLED__ /* Keep that around for when we re-implement dynamic VSIDs */ cmpdi r3,0 bge slb_miss_user_pseries #endif /* __DISABLED__ */ - std r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - GET_SCRATCH0(r10) - std r10,PACA_EXSLB+EX_R13(r13) - mfspr r12,SPRN_SRR1 /* and SRR1 */ + mfspr r12,SPRN_SRR1 #ifndef CONFIG_RELOCATABLE b .slb_miss_realmode #else @@ -147,24 +124,16 @@ data_access_slb_pSeries: .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: HMT_MEDIUM - DO_KVM 0x480 SET_SCRATCH0(r13) - GET_PACA(r13) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ - mfcr r9 #ifdef __DISABLED__ /* Keep that around for when we re-implement dynamic VSIDs */ cmpdi r3,0 bge slb_miss_user_pseries #endif /* __DISABLED__ */ - std r10,PACA_EXSLB+EX_R10(r13) - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - GET_SCRATCH0(r10) - std r10,PACA_EXSLB+EX_R13(r13) - mfspr r12,SPRN_SRR1 /* and SRR1 */ + mfspr r12,SPRN_SRR1 #ifndef CONFIG_RELOCATABLE b .slb_miss_realmode #else @@ -184,26 +153,46 @@ instruction_access_slb_pSeries: hardware_interrupt_pSeries: hardware_interrupt_hv: BEGIN_FTR_SECTION - _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD) + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, + EXC_STD, SOFTEN_TEST) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) FTR_SECTION_ELSE - _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV) + _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, + EXC_HV, SOFTEN_TEST_HV) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206) STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600) + STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700) + STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800) MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) - MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer) + MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer) STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00) + STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00) . = 0xc00 .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM - DO_KVM 0xc00 +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + SET_SCRATCH0(r13) + GET_PACA(r13) + std r9,PACA_EXGEN+EX_R9(r13) + std r10,PACA_EXGEN+EX_R10(r13) + mfcr r9 + KVMTEST(0xc00) + GET_SCRATCH0(r13) +#endif BEGIN_FTR_SECTION cmpdi r0,0x1ebe beq- 1f @@ -220,6 +209,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) rfid b . /* prevent speculative execution */ + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) + /* Fast LE/BE switch system call */ 1: mfspr r12,SPRN_SRR1 xori r12,r12,MSR_LE @@ -228,6 +219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) b . STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00) /* At 0xe??? we have a bunch of hypervisor exceptions, we branch * out of line to handle them @@ -262,30 +254,93 @@ vsx_unavailable_pSeries_1: #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) #endif /* CONFIG_CBE_RAS */ + STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300) + #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) #endif /* CONFIG_CBE_RAS */ + STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700) + #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802) #endif /* CONFIG_CBE_RAS */ . = 0x3000 /*** Out of line interrupts support ***/ + /* moved from 0x200 */ +machine_check_pSeries: + .globl machine_check_fwnmi +machine_check_fwnmi: + HMT_MEDIUM + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, + EXC_STD, KVMTEST, 0x200) + KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) + +#ifndef CONFIG_POWER4_ONLY + /* moved from 0x300 */ +data_access_check_stab: + GET_PACA(r13) + std r9,PACA_EXSLB+EX_R9(r13) + std r10,PACA_EXSLB+EX_R10(r13) + mfspr r10,SPRN_DAR + mfspr r9,SPRN_DSISR + srdi r10,r10,60 + rlwimi r10,r9,16,0x20 +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + lbz r9,PACA_KVM_SVCPU+SVCPU_IN_GUEST(r13) + rlwimi r10,r9,8,0x300 +#endif + mfcr r9 + cmpwi r10,0x2c + beq do_stab_bolted_pSeries + mtcrf 0x80,r9 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + b data_access_not_stab +do_stab_bolted_pSeries: + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + GET_SCRATCH0(r10) + std r10,PACA_EXSLB+EX_R13(r13) + EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) +#endif /* CONFIG_POWER4_ONLY */ + + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) + KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400) + KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) + + .align 7 /* moved from 0xe00 */ - STD_EXCEPTION_HV(., 0xe00, h_data_storage) - STD_EXCEPTION_HV(., 0xe20, h_instr_storage) - STD_EXCEPTION_HV(., 0xe40, emulation_assist) - STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */ + STD_EXCEPTION_HV(., 0xe02, h_data_storage) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02) + STD_EXCEPTION_HV(., 0xe22, h_instr_storage) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22) + STD_EXCEPTION_HV(., 0xe42, emulation_assist) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42) + STD_EXCEPTION_HV(., 0xe62, hmi_exception) /* need to flush cache ? */ + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) /* moved from 0xf00 */ STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40) /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -317,14 +372,6 @@ masked_Hinterrupt: hrfid b . - .align 7 -do_stab_bolted_pSeries: - std r11,PACA_EXSLB+EX_R11(r13) - std r12,PACA_EXSLB+EX_R12(r13) - GET_SCRATCH0(r10) - std r10,PACA_EXSLB+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) - #ifdef CONFIG_PPC_PSERIES /* * Vectors for the FWNMI option. Share common code. @@ -334,14 +381,8 @@ do_stab_bolted_pSeries: system_reset_fwnmi: HMT_MEDIUM SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) - - .globl machine_check_fwnmi - .align 7 -machine_check_fwnmi: - HMT_MEDIUM - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + NOTEST, 0x100) #endif /* CONFIG_PPC_PSERIES */ -- cgit v1.2.3 From 923c53caea446d246949c94703be83e68f251af7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:20:24 +0000 Subject: powerpc: Set up LPCR for running guest partitions In hypervisor mode, the LPCR controls several aspects of guest partitions, including virtual partition memory mode, and also controls whether the hypervisor decrementer interrupts are enabled. This sets up LPCR at boot time so that guest partitions will use a virtual real memory area (VRMA) composed of 16MB large pages, and hypervisor decrementer interrupts are disabled. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/cpu_setup_power7.S | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S index 4f9a93fcfe07..2ef6749688e9 100644 --- a/arch/powerpc/kernel/cpu_setup_power7.S +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -61,19 +61,23 @@ __init_LPCR: * LPES = 0b01 (HSRR0/1 used for 0x500) * PECE = 0b111 * DPFD = 4 + * HDICE = 0 + * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) + * VRMASD = 0b10000 (L=1, LP=00) * * Other bits untouched for now */ mfspr r3,SPRN_LPCR - ori r3,r3,(LPCR_LPES0|LPCR_LPES1) - xori r3,r3, LPCR_LPES0 + li r5,1 + rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) - li r5,7 - sldi r5,r5,LPCR_DPFD_SH - andc r3,r3,r5 li r5,4 - sldi r5,r5,LPCR_DPFD_SH - or r3,r3,r5 + rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 + clrrdi r3,r3,1 /* clear HDICE */ + li r5,4 + rldimi r3,r5, LPCR_VC_SH, 0 + li r5,0x10 + rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 mtspr SPRN_LPCR,r3 isync blr -- cgit v1.2.3 From 3c42bf8a717cb636e0ed2ed77194669e2ac3ed56 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:20:58 +0000 Subject: KVM: PPC: Split host-state fields out of kvmppc_book3s_shadow_vcpu There are several fields in struct kvmppc_book3s_shadow_vcpu that temporarily store bits of host state while a guest is running, rather than anything relating to the particular guest or vcpu. This splits them out into a new kvmppc_host_state structure and modifies the definitions in asm-offsets.c to suit. On 32-bit, we have a kvmppc_host_state structure inside the kvmppc_book3s_shadow_vcpu since the assembly code needs to be able to get to them both with one pointer. On 64-bit they are separate fields in the PACA. This means that on 64-bit we don't need to copy the kvmppc_host_state in and out on vcpu load/unload, and in future will mean that the book3s_hv code doesn't need a shadow_vcpu struct in the PACA at all. That does mean that we have to be careful not to rely on any values persisting in the hstate field of the paca across any point where we could block or get preempted. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 94 ++++++++++++++++++------------------ arch/powerpc/kernel/exceptions-64s.S | 2 +- 2 files changed, 48 insertions(+), 48 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index faf846131f45..dabfb7346f36 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -198,11 +198,6 @@ int main(void) DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER - DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); - DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb)); - DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max)); -#endif #endif /* CONFIG_PPC64 */ /* RTAS */ @@ -416,49 +411,54 @@ int main(void) DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); - DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - - offsetof(struct kvmppc_vcpu_book3s, vcpu)); - DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr)); - DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer)); - DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr)); - DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr)); - DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc)); - DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0])); - DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1])); - DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2])); - DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3])); - DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4])); - DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5])); - DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6])); - DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7])); - DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8])); - DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9])); - DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10])); - DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11])); - DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12])); - DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13])); - DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1)); - DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2)); - DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu, - vmhandler)); - DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu, - scratch0)); - DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu, - scratch1)); - DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu, - in_guest)); - DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu, - fault_dsisr)); - DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu, - fault_dar)); - DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu, - last_inst)); - DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu, - shadow_srr1)); + +#ifdef CONFIG_PPC_BOOK3S_64 +# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) +# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, kvm_hstate.f)) +#else /* 32-bit */ +# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, f)) +# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, hstate.f)) +#endif + + SVCPU_FIELD(SVCPU_CR, cr); + SVCPU_FIELD(SVCPU_XER, xer); + SVCPU_FIELD(SVCPU_CTR, ctr); + SVCPU_FIELD(SVCPU_LR, lr); + SVCPU_FIELD(SVCPU_PC, pc); + SVCPU_FIELD(SVCPU_R0, gpr[0]); + SVCPU_FIELD(SVCPU_R1, gpr[1]); + SVCPU_FIELD(SVCPU_R2, gpr[2]); + SVCPU_FIELD(SVCPU_R3, gpr[3]); + SVCPU_FIELD(SVCPU_R4, gpr[4]); + SVCPU_FIELD(SVCPU_R5, gpr[5]); + SVCPU_FIELD(SVCPU_R6, gpr[6]); + SVCPU_FIELD(SVCPU_R7, gpr[7]); + SVCPU_FIELD(SVCPU_R8, gpr[8]); + SVCPU_FIELD(SVCPU_R9, gpr[9]); + SVCPU_FIELD(SVCPU_R10, gpr[10]); + SVCPU_FIELD(SVCPU_R11, gpr[11]); + SVCPU_FIELD(SVCPU_R12, gpr[12]); + SVCPU_FIELD(SVCPU_R13, gpr[13]); + SVCPU_FIELD(SVCPU_FAULT_DSISR, fault_dsisr); + SVCPU_FIELD(SVCPU_FAULT_DAR, fault_dar); + SVCPU_FIELD(SVCPU_LAST_INST, last_inst); + SVCPU_FIELD(SVCPU_SHADOW_SRR1, shadow_srr1); #ifdef CONFIG_PPC_BOOK3S_32 - DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr)); + SVCPU_FIELD(SVCPU_SR, sr); #endif -#else +#ifdef CONFIG_PPC64 + SVCPU_FIELD(SVCPU_SLB, slb); + SVCPU_FIELD(SVCPU_SLB_MAX, slb_max); +#endif + + HSTATE_FIELD(HSTATE_HOST_R1, host_r1); + HSTATE_FIELD(HSTATE_HOST_R2, host_r2); + HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); + HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); + HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); + HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); + +#else /* CONFIG_PPC_BOOK3S */ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); @@ -468,7 +468,7 @@ int main(void) DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); #endif /* CONFIG_PPC_BOOK3S */ -#endif +#endif /* CONFIG_KVM */ #ifdef CONFIG_KVM_GUEST DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared, diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e76472cbf3b5..6da00550afea 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -298,7 +298,7 @@ data_access_check_stab: srdi r10,r10,60 rlwimi r10,r9,16,0x20 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER - lbz r9,PACA_KVM_SVCPU+SVCPU_IN_GUEST(r13) + lbz r9,HSTATE_IN_GUEST(r13) rlwimi r10,r9,8,0x300 #endif mfcr r9 -- cgit v1.2.3 From de56a948b9182fbcf92cb8212f114de096c2d574 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:21:34 +0000 Subject: KVM: PPC: Add support for Book3S processors in hypervisor mode This adds support for KVM running on 64-bit Book 3S processors, specifically POWER7, in hypervisor mode. Using hypervisor mode means that the guest can use the processor's supervisor mode. That means that the guest can execute privileged instructions and access privileged registers itself without trapping to the host. This gives excellent performance, but does mean that KVM cannot emulate a processor architecture other than the one that the hardware implements. This code assumes that the guest is running paravirtualized using the PAPR (Power Architecture Platform Requirements) interface, which is the interface that IBM's PowerVM hypervisor uses. That means that existing Linux distributions that run on IBM pSeries machines will also run under KVM without modification. In order to communicate the PAPR hypercalls to qemu, this adds a new KVM_EXIT_PAPR_HCALL exit code to include/linux/kvm.h. Currently the choice between book3s_hv support and book3s_pr support (i.e. the existing code, which runs the guest in user mode) has to be made at kernel configuration time, so a given kernel binary can only do one or the other. This new book3s_hv code doesn't support MMIO emulation at present. Since we are running paravirtualized guests, this isn't a serious restriction. With the guest running in supervisor mode, most exceptions go straight to the guest. We will never get data or instruction storage or segment interrupts, alignment interrupts, decrementer interrupts, program interrupts, single-step interrupts, etc., coming to the hypervisor from the guest. Therefore this introduces a new KVMTEST_NONHV macro for the exception entry path so that we don't have to do the KVM test on entry to those exception handlers. We do however get hypervisor decrementer, hypervisor data storage, hypervisor instruction storage, and hypervisor emulation assist interrupts, so we have to handle those. In hypervisor mode, real-mode accesses can access all of RAM, not just a limited amount. Therefore we put all the guest state in the vcpu.arch and use the shadow_vcpu in the PACA only for temporary scratch space. We allocate the vcpu with kzalloc rather than vzalloc, and we don't use anything in the kvmppc_vcpu_book3s struct, so we don't allocate it. We don't have a shared page with the guest, but we still need a kvm_vcpu_arch_shared struct to store the values of various registers, so we include one in the vcpu_arch struct. The POWER7 processor has a restriction that all threads in a core have to be in the same partition. MMU-on kernel code counts as a partition (partition 0), so we have to do a partition switch on every entry to and exit from the guest. At present we require the host and guest to run in single-thread mode because of this hardware restriction. This code allocates a hashed page table for the guest and initializes it with HPTEs for the guest's Virtual Real Memory Area (VRMA). We require that the guest memory is allocated using 16MB huge pages, in order to simplify the low-level memory management. This also means that we can get away without tracking paging activity in the host for now, since huge pages can't be paged or swapped. This also adds a few new exports needed by the book3s_hv code. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 79 ++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/exceptions-64s.S | 60 ++++++++++++++------------- arch/powerpc/kernel/process.c | 3 ++ arch/powerpc/kernel/setup-common.c | 3 ++ arch/powerpc/kernel/smp.c | 1 + 5 files changed, 118 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index dabfb7346f36..936267462cae 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -187,6 +187,7 @@ int main(void) DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); + DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use)); DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ @@ -392,6 +393,29 @@ int main(void) DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); + DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); + DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr)); +#ifdef CONFIG_ALTIVEC + DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr)); + DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr)); +#endif +#ifdef CONFIG_VSX + DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr)); +#endif + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); + DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); + DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); +#ifdef CONFIG_KVM_BOOK3S_64_HV + DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr)); + DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0)); + DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1)); + DEFINE(VCPU_SPRG0, offsetof(struct kvm_vcpu, arch.shregs.sprg0)); + DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1)); + DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); + DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); +#endif DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); @@ -403,17 +427,60 @@ int main(void) DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); /* book3s */ +#ifdef CONFIG_KVM_BOOK3S_64_HV + DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); + DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); + DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); + DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); + DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); + DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); + DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter)); + DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu)); + DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); + DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); +#endif #ifdef CONFIG_PPC_BOOK3S + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); + DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); + DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); + DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); + DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); + DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); + DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); + DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); + DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); + DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); + DEFINE(VCPU_LPCR, offsetof(struct kvm_vcpu, arch.lpcr)); + DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); + DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); + DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); + DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); + DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); + DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu)); + DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); + DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); + DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); + DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); + DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - + offsetof(struct kvmppc_vcpu_book3s, vcpu)); + DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); + DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); + DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_KVM_BOOK3S_PR # define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f)) +#else +# define SVCPU_FIELD(x, f) +#endif # define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, kvm_hstate.f)) #else /* 32-bit */ # define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, f)) @@ -453,11 +520,23 @@ int main(void) HSTATE_FIELD(HSTATE_HOST_R1, host_r1); HSTATE_FIELD(HSTATE_HOST_R2, host_r2); + HSTATE_FIELD(HSTATE_HOST_MSR, host_msr); HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); +#ifdef CONFIG_KVM_BOOK3S_64_HV + HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); + HSTATE_FIELD(HSTATE_MMCR, host_mmcr); + HSTATE_FIELD(HSTATE_PMC, host_pmc); + HSTATE_FIELD(HSTATE_PURR, host_purr); + HSTATE_FIELD(HSTATE_SPURR, host_spurr); + HSTATE_FIELD(HSTATE_DSCR, host_dscr); + HSTATE_FIELD(HSTATE_DABR, dabr); + HSTATE_FIELD(HSTATE_DECEXP, dec_expires); +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + #else /* CONFIG_PPC_BOOK3S */ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6da00550afea..163c041cec24 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -87,14 +87,14 @@ data_access_not_stab: END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) #endif EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, - KVMTEST, 0x300) + KVMTEST_PR, 0x300) . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM SET_SCRATCH0(r13) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR #ifdef __DISABLED__ @@ -125,7 +125,7 @@ data_access_slb_pSeries: instruction_access_slb_pSeries: HMT_MEDIUM SET_SCRATCH0(r13) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ #ifdef __DISABLED__ @@ -153,32 +153,32 @@ instruction_access_slb_pSeries: hardware_interrupt_pSeries: hardware_interrupt_hv: BEGIN_FTR_SECTION - _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, - EXC_STD, SOFTEN_TEST) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) - FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) - ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206) + FTR_SECTION_ELSE + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, + EXC_STD, SOFTEN_TEST_PR) + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE_206) STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600) STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700) STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800) MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) MASKABLE_EXCEPTION_HV(0x980, 0x982, decrementer) STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00) STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00) . = 0xc00 .globl system_call_pSeries @@ -219,7 +219,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) b . STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00) /* At 0xe??? we have a bunch of hypervisor exceptions, we branch * out of line to handle them @@ -254,23 +254,23 @@ vsx_unavailable_pSeries_1: #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_HV, 0x1202) #endif /* CONFIG_CBE_RAS */ STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300) #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_HV, 0x1602) #endif /* CONFIG_CBE_RAS */ STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700) #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_HV, 0x1802) #endif /* CONFIG_CBE_RAS */ . = 0x3000 @@ -297,7 +297,7 @@ data_access_check_stab: mfspr r9,SPRN_DSISR srdi r10,r10,60 rlwimi r10,r9,16,0x20 -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#ifdef CONFIG_KVM_BOOK3S_PR lbz r9,HSTATE_IN_GUEST(r13) rlwimi r10,r9,8,0x300 #endif @@ -316,11 +316,11 @@ do_stab_bolted_pSeries: EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) #endif /* CONFIG_POWER4_ONLY */ - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) - KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400) - KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900) + KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x300) + KVM_HANDLER_PR_SKIP(PACA_EXSLB, EXC_STD, 0x380) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400) + KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) .align 7 @@ -336,11 +336,11 @@ do_stab_bolted_pSeries: /* moved from 0xf00 */ STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00) STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20) STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40) + KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40) /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -417,7 +417,11 @@ slb_miss_user_pseries: /* KVM's trampoline code needs to be close to the interrupt handlers */ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#ifdef CONFIG_KVM_BOOK3S_PR #include "../kvm/book3s_rmhandlers.S" +#else +#include "../kvm/book3s_hv_rmhandlers.S" +#endif #endif .align 7 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 60ac2a9251db..ec2d0edeb134 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -96,6 +96,7 @@ void flush_fp_to_thread(struct task_struct *tsk) preempt_enable(); } } +EXPORT_SYMBOL_GPL(flush_fp_to_thread); void enable_kernel_fp(void) { @@ -145,6 +146,7 @@ void flush_altivec_to_thread(struct task_struct *tsk) preempt_enable(); } } +EXPORT_SYMBOL_GPL(flush_altivec_to_thread); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -186,6 +188,7 @@ void flush_vsx_to_thread(struct task_struct *tsk) preempt_enable(); } } +EXPORT_SYMBOL_GPL(flush_vsx_to_thread); #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 79fca2651b65..22051ef04bd9 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -375,6 +375,9 @@ void __init check_for_initrd(void) int threads_per_core, threads_shift; cpumask_t threads_core_mask; +EXPORT_SYMBOL_GPL(threads_per_core); +EXPORT_SYMBOL_GPL(threads_shift); +EXPORT_SYMBOL_GPL(threads_core_mask); static void __init cpu_init_thread_core_maps(int tpc) { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8ebc6700b98d..09a85a9045d6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -243,6 +243,7 @@ void smp_send_reschedule(int cpu) if (likely(smp_ops)) smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); } +EXPORT_SYMBOL_GPL(smp_send_reschedule); void arch_send_call_function_single_ipi(int cpu) { -- cgit v1.2.3 From a8606e20e41a8149456bafdf76ad29d47672027c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:22:05 +0000 Subject: KVM: PPC: Handle some PAPR hcalls in the kernel This adds the infrastructure for handling PAPR hcalls in the kernel, either early in the guest exit path while we are still in real mode, or later once the MMU has been turned back on and we are in the full kernel context. The advantage of handling hcalls in real mode if possible is that we avoid two partition switches -- and this will become more important when we support SMT4 guests, since a partition switch means we have to pull all of the threads in the core out of the guest. The disadvantage is that we can only access the kernel linear mapping, not anything vmalloced or ioremapped, since the MMU is off. This also adds code to handle the following hcalls in real mode: H_ENTER Add an HPTE to the hashed page table H_REMOVE Remove an HPTE from the hashed page table H_READ Read HPTEs from the hashed page table H_PROTECT Change the protection bits in an HPTE H_BULK_REMOVE Remove up to 4 HPTEs from the hashed page table H_SET_DABR Set the data address breakpoint register Plus code to handle the following hcalls in the kernel: H_CEDE Idle the vcpu until an interrupt or H_PROD hcall arrives H_PROD Wake up a ceded vcpu H_REGISTER_VPA Register a virtual processor area (VPA) The code that runs in real mode has to be in the base kernel, not in the module, if KVM is compiled as a module. The real-mode code can only access the kernel linear mapping, not vmalloc or ioremap space. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 936267462cae..c70d106bf1a4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -189,6 +189,7 @@ int main(void) DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use)); DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx)); + DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count)); DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); #endif /* CONFIG_PPC_STD_MMU_64 */ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); @@ -459,6 +460,7 @@ int main(void) DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); DEFINE(VCPU_LPCR, offsetof(struct kvm_vcpu, arch.lpcr)); + DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); -- cgit v1.2.3 From 371fefd6f2dc46668e00871930dde613b88d4bde Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:23:08 +0000 Subject: KVM: PPC: Allow book3s_hv guests to use SMT processor modes This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 6 ++++++ arch/powerpc/kernel/exceptions-64s.S | 31 ++++++++++++++++++++++--------- arch/powerpc/kernel/idle_power7.S | 2 -- 3 files changed, 28 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c70d106bf1a4..d0f2387fd792 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -471,6 +471,10 @@ int main(void) DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); + DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid)); + DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); + DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); + DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) - offsetof(struct kvmppc_vcpu_book3s, vcpu)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); @@ -530,6 +534,8 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_64_HV HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); + HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); + HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 163c041cec24..5bc06fdfa6c0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -49,19 +49,32 @@ BEGIN_FTR_SECTION * state loss at this time. */ mfspr r13,SPRN_SRR1 - rlwinm r13,r13,47-31,30,31 - cmpwi cr0,r13,1 - bne 1f - b .power7_wakeup_noloss -1: cmpwi cr0,r13,2 - bne 1f - b .power7_wakeup_loss + rlwinm. r13,r13,47-31,30,31 + beq 9f + + /* waking up from powersave (nap) state */ + cmpwi cr1,r13,2 /* Total loss of HV state is fatal, we could try to use the * PIR to locate a PACA, then use an emergency stack etc... * but for now, let's just stay stuck here */ -1: cmpwi cr0,r13,3 - beq . + bgt cr1,. + GET_PACA(r13) + +#ifdef CONFIG_KVM_BOOK3S_64_HV + lbz r0,PACAPROCSTART(r13) + cmpwi r0,0x80 + bne 1f + li r0,0 + stb r0,PACAPROCSTART(r13) + b kvm_start_guest +1: +#endif + + beq cr1,2f + b .power7_wakeup_noloss +2: b .power7_wakeup_loss +9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) #endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index f8f0bc7f1d4f..3a70845a51c7 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -73,7 +73,6 @@ _GLOBAL(power7_idle) b . _GLOBAL(power7_wakeup_loss) - GET_PACA(r13) ld r1,PACAR1(r13) REST_NVGPRS(r1) REST_GPR(2, r1) @@ -87,7 +86,6 @@ _GLOBAL(power7_wakeup_loss) rfid _GLOBAL(power7_wakeup_noloss) - GET_PACA(r13) ld r1,PACAR1(r13) ld r4,_MSR(r1) ld r5,_NIP(r1) -- cgit v1.2.3 From aa04b4cc5be64b4fb9ef4e0fdf2418e2f4737fb2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:25:44 +0000 Subject: KVM: PPC: Allocate RMAs (Real Mode Areas) at boot for use by guests This adds infrastructure which will be needed to allow book3s_hv KVM to run on older POWER processors, including PPC970, which don't support the Virtual Real Mode Area (VRMA) facility, but only the Real Mode Offset (RMO) facility. These processors require a physically contiguous, aligned area of memory for each guest. When the guest does an access in real mode (MMU off), the address is compared against a limit value, and if it is lower, the address is ORed with an offset value (from the Real Mode Offset Register (RMOR)) and the result becomes the real address for the access. The size of the RMA has to be one of a set of supported values, which usually includes 64MB, 128MB, 256MB and some larger powers of 2. Since we are unlikely to be able to allocate 64MB or more of physically contiguous memory after the kernel has been running for a while, we allocate a pool of RMAs at boot time using the bootmem allocator. The size and number of the RMAs can be set using the kvm_rma_size=xx and kvm_rma_count=xx kernel command line options. KVM exports a new capability, KVM_CAP_PPC_RMA, to signal the availability of the pool of preallocated RMAs. The capability value is 1 if the processor can use an RMA but doesn't require one (because it supports the VRMA facility), or 2 if the processor requires an RMA for each guest. This adds a new ioctl, KVM_ALLOCATE_RMA, which allocates an RMA from the pool and returns a file descriptor which can be used to map the RMA. It also returns the size of the RMA in the argument structure. Having an RMA means we will get multiple KMV_SET_USER_MEMORY_REGION ioctl calls from userspace. To cope with this, we now preallocate the kvm->arch.ram_pginfo array when the VM is created with a size sufficient for up to 64GB of guest memory. Subsequently we will get rid of this array and use memory associated with each memslot instead. This moves most of the code that translates the user addresses into host pfns (page frame numbers) out of kvmppc_prepare_vrma up one level to kvmppc_core_prepare_memory_region. Also, instead of having to look up the VMA for each page in order to check the page size, we now check that the pages we get are compound pages of 16MB. However, if we are adding memory that is mapped to an RMA, we don't bother with calling get_user_pages_fast and instead just offset from the base pfn for the RMA. Typically the RMA gets added after vcpus are created, which makes it inconvenient to have the LPCR (logical partition control register) value in the vcpu->arch struct, since the LPCR controls whether the processor uses RMA or VRMA for the guest. This moves the LPCR value into the kvm->arch struct and arranges for the MER (mediated external request) bit, which is the only bit that varies between vcpus, to be set in assembly code when going into the guest if there is a pending external interrupt request. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 4 +++- arch/powerpc/kernel/setup_64.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d0f2387fd792..f4aba938166b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -437,6 +437,8 @@ int main(void) DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter)); DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu)); + DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); + DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); #endif @@ -459,7 +461,7 @@ int main(void) DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); - DEFINE(VCPU_LPCR, offsetof(struct kvm_vcpu, arch.lpcr)); + DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a88bf2713d41..532054f24ecb 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -63,6 +63,7 @@ #include #include #include +#include #include "setup.h" @@ -580,6 +581,8 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff */ mmu_context_init(); + kvm_rma_init(); + ppc64_boot_msg(0x15, "Setup Done"); } -- cgit v1.2.3 From 969391c58a4efb8411d6881179945f425ad9cbb5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:26:11 +0000 Subject: powerpc, KVM: Split HVMODE_206 cpu feature bit into separate HV and architecture bits This replaces the single CPU_FTR_HVMODE_206 bit with two bits, one to indicate that we have a usable hypervisor mode, and another to indicate that the processor conforms to PowerISA version 2.06. We also add another bit to indicate that the processor conforms to ISA version 2.01 and set that for PPC970 and derivatives. Some PPC970 chips (specifically those in Apple machines) have a hypervisor mode in that MSR[HV] is always 1, but the hypervisor mode is not useful in the sense that there is no way to run any code in supervisor mode (HV=0 PR=0). On these processors, the LPES0 and LPES1 bits in HID4 are always 0, and we use that as a way of detecting that hypervisor mode is not useful. Where we have a feature section in assembly code around code that only applies on POWER7 in hypervisor mode, we use a construct like END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) The definition of END_FTR_SECTION_IFSET is such that the code will be enabled (not overwritten with nops) only if all bits in the provided mask are set. Note that the CPU feature check in __tlbie() only needs to check the ARCH_206 bit, not the HVMODE bit, because __tlbie() can only get called if we are running bare-metal, i.e. in hypervisor mode. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/cpu_setup_power7.S | 4 ++-- arch/powerpc/kernel/cpu_setup_ppc970.S | 26 ++++++++++++++++++++++---- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- arch/powerpc/kernel/paca.c | 2 +- 4 files changed, 27 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S index 2ef6749688e9..76797c5105d6 100644 --- a/arch/powerpc/kernel/cpu_setup_power7.S +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -45,12 +45,12 @@ _GLOBAL(__restore_cpu_power7) blr __init_hvmode_206: - /* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */ + /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ mfmsr r3 rldicl. r0,r3,4,63 bnelr ld r5,CPU_SPEC_FEATURES(r4) - LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) xor r5,r5,r6 std r5,CPU_SPEC_FEATURES(r4) blr diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S index 27f2507279d8..12fac8df01c5 100644 --- a/arch/powerpc/kernel/cpu_setup_ppc970.S +++ b/arch/powerpc/kernel/cpu_setup_ppc970.S @@ -76,7 +76,7 @@ _GLOBAL(__setup_cpu_ppc970) /* Do nothing if not running in HV mode */ mfmsr r0 rldicl. r0,r0,4,63 - beqlr + beq no_hv_mode mfspr r0,SPRN_HID0 li r11,5 /* clear DOZE and SLEEP */ @@ -90,7 +90,7 @@ _GLOBAL(__setup_cpu_ppc970MP) /* Do nothing if not running in HV mode */ mfmsr r0 rldicl. r0,r0,4,63 - beqlr + beq no_hv_mode mfspr r0,SPRN_HID0 li r11,0x15 /* clear DOZE and SLEEP */ @@ -109,6 +109,14 @@ load_hids: sync isync + /* Try to set LPES = 01 in HID4 */ + mfspr r0,SPRN_HID4 + clrldi r0,r0,1 /* clear LPES0 */ + ori r0,r0,HID4_LPES1 /* set LPES1 */ + sync + mtspr SPRN_HID4,r0 + isync + /* Save away cpu state */ LOAD_REG_ADDR(r5,cpu_state_storage) @@ -117,11 +125,21 @@ load_hids: std r3,CS_HID0(r5) mfspr r3,SPRN_HID1 std r3,CS_HID1(r5) - mfspr r3,SPRN_HID4 - std r3,CS_HID4(r5) + mfspr r4,SPRN_HID4 + std r4,CS_HID4(r5) mfspr r3,SPRN_HID5 std r3,CS_HID5(r5) + /* See if we successfully set LPES1 to 1; if not we are in Apple mode */ + andi. r4,r4,HID4_LPES1 + bnelr + +no_hv_mode: + /* Disable CPU_FTR_HVMODE and exit, since we don't have HV mode */ + ld r5,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE) + andc r5,r5,r6 + std r5,CPU_SPEC_FEATURES(r4) blr /* Called with no MMU context (typically MSR:IR/DR off) to diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5bc06fdfa6c0..a5345380bef3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -75,7 +75,7 @@ BEGIN_FTR_SECTION b .power7_wakeup_noloss 2: b .power7_wakeup_loss 9: -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, NOTEST, 0x100) @@ -173,7 +173,7 @@ hardware_interrupt_hv: _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE_206) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index efeb88184182..0a5a899846bb 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -167,7 +167,7 @@ void setup_paca(struct paca_struct *new_paca) * if we do a GET_PACA() before the feature fixups have been * applied */ - if (cpu_has_feature(CPU_FTR_HVMODE_206)) + if (cpu_has_feature(CPU_FTR_HVMODE)) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); -- cgit v1.2.3 From 9e368f2915601cd5bc7f5fd638b58435b018bbd7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Jun 2011 00:40:08 +0000 Subject: KVM: PPC: book3s_hv: Add support for PPC970-family processors This adds support for running KVM guests in supervisor mode on those PPC970 processors that have a usable hypervisor mode. Unfortunately, Apple G5 machines have supervisor mode disabled (MSR[HV] is forced to 1), but the YDL PowerStation does have a usable hypervisor mode. There are several differences between the PPC970 and POWER7 in how guests are managed. These differences are accommodated using the CPU_FTR_ARCH_201 (PPC970) and CPU_FTR_ARCH_206 (POWER7) CPU feature bits. Notably, on PPC970: * The LPCR, LPID or RMOR registers don't exist, and the functions of those registers are provided by bits in HID4 and one bit in HID0. * External interrupts can be directed to the hypervisor, but unlike POWER7 they are masked by MSR[EE] in non-hypervisor modes and use SRR0/1 not HSRR0/1. * There is no virtual RMA (VRMA) mode; the guest must use an RMO (real mode offset) area. * The TLB entries are not tagged with the LPID, so it is necessary to flush the whole TLB on partition switch. Furthermore, when switching partitions we have to ensure that no other CPU is executing the tlbie or tlbsync instructions in either the old or the new partition, otherwise undefined behaviour can occur. * The PMU has 8 counters (PMC registers) rather than 6. * The DSCR, PURR, SPURR, AMR, AMOR, UAMOR registers don't exist. * The SLB has 64 entries rather than 32. * There is no mediated external interrupt facility, so if we switch to a guest that has a virtual external interrupt pending but the guest has MSR[EE] = 0, we have to arrange to have an interrupt pending for it so that we can get control back once it re-enables interrupts. We do that by sending ourselves an IPI with smp_send_reschedule after hard-disabling interrupts. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/exceptions-64s.S | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f4aba938166b..54b935f2f5de 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -128,6 +128,7 @@ int main(void) DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); + DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a5345380bef3..41b02c792aa3 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -171,7 +171,7 @@ hardware_interrupt_hv: KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, - EXC_STD, SOFTEN_TEST_PR) + EXC_STD, SOFTEN_TEST_HV_201) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -- cgit v1.2.3