summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2017-07-24 14:26:06 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2017-07-26 16:41:52 +1000
commita25bd72badfa793ab5aeafd50dbd9db39f8c9179 (patch)
tree9945d409b81667476c7db1c8db2c263179379cc0 /arch/powerpc/kvm
parent029d9252b116fa52a95150819e62af1f6e420fe5 (diff)
downloadlinux-a25bd72badfa793ab5aeafd50dbd9db39f8c9179.tar.gz
linux-a25bd72badfa793ab5aeafd50dbd9db39f8c9179.tar.bz2
linux-a25bd72badfa793ab5aeafd50dbd9db39f8c9179.zip
powerpc/mm/radix: Workaround prefetch issue with KVM
There's a somewhat architectural issue with Radix MMU and KVM. When coming out of a guest with AIL (Alternate Interrupt Location, ie, MMU enabled), we start executing hypervisor code with the PID register still containing whatever the guest has been using. The problem is that the CPU can (and will) then start prefetching or speculatively load from whatever host context has that same PID (if any), thus bringing translations for that context into the TLB, which Linux doesn't know about. This can cause stale translations and subsequent crashes. Fixing this in a way that is neither racy nor a huge performance impact is difficult. We could just make the host invalidations always use broadcast forms but that would hurt single threaded programs for example. We chose to fix it instead by partitioning the PID space between guest and host. This is possible because today Linux only use 19 out of the 20 bits of PID space, so existing guests will work if we make the host use the top half of the 20 bits space. We additionally add support for a property to indicate to Linux the size of the PID register which will be useful if we eventually have processors with a larger PID space available. There is still an issue with malicious guests purposefully setting the PID register to a value in the hosts PID range. Hopefully future HW can prevent that, but in the meantime, we handle it with a pair of kludges: - On the way out of a guest, before we clear the current VCPU in the PACA, we check the PID and if it's outside of the permitted range we flush the TLB for that PID. - When context switching, if the mm is "new" on that CPU (the corresponding bit was set for the first time in the mm cpumask), we check if any sibling thread is in KVM (has a non-NULL VCPU pointer in the PACA). If that is the case, we also flush the PID for that CPU (core). This second part is needed to handle the case where a process is migrated (or starts a new pthread) on a sibling thread of the CPU coming out of KVM, as there's a window where stale translations can exist before we detect it and flush them out. A future optimization could be added by keeping track of whether the PID has ever been used and avoid doing that for completely fresh PIDs. We could similarily mark PIDs that have been the subject of a global invalidation as "fresh". But for now this will do. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> [mpe: Rework the asm to build with CONFIG_PPC_RADIX_MMU=n, drop unneeded include of kvm_book3s_asm.h] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S59
1 files changed, 51 insertions, 8 deletions
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index cb44065e2946..c52184a8efdf 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1443,12 +1443,14 @@ mc_cont:
ori r6,r6,1
mtspr SPRN_CTRLT,r6
4:
- /* Read the guest SLB and save it away */
+ /* Check if we are running hash or radix and store it in cr2 */
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
- cmpwi r0, 0
+ cmpwi cr2,r0,0
+
+ /* Read the guest SLB and save it away */
li r5, 0
- bne 3f /* for radix, save 0 entries */
+ bne cr2, 3f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0
li r6,0
@@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96)
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22:
- /* Clear out SLB */
- li r5,0
- slbmte r5,r5
- slbia
- ptesync
/* Restore host values of some registers */
BEGIN_FTR_SECTION
@@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION
mtspr SPRN_PID, r7
mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
+#ifdef CONFIG_PPC_RADIX_MMU
+ /*
+ * Are we running hash or radix ?
+ */
+ beq cr2,3f
+
+ /* Radix: Handle the case where the guest used an illegal PID */
+ LOAD_REG_ADDR(r4, mmu_base_pid)
+ lwz r3, VCPU_GUEST_PID(r9)
+ lwz r5, 0(r4)
+ cmpw cr0,r3,r5
+ blt 2f
+
+ /*
+ * Illegal PID, the HW might have prefetched and cached in the TLB
+ * some translations for the LPID 0 / guest PID combination which
+ * Linux doesn't know about, so we need to flush that PID out of
+ * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
+ * the right context.
+ */
+ li r0,0
+ mtspr SPRN_LPID,r0
+ isync
+
+ /* Then do a congruence class local flush */
+ ld r6,VCPU_KVM(r9)
+ lwz r0,KVM_TLB_SETS(r6)
+ mtctr r0
+ li r7,0x400 /* IS field = 0b01 */
+ ptesync
+ sldi r0,r3,32 /* RS has PID */
+1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
+ addi r7,r7,0x1000
+ bdnz 1b
+ ptesync
+
+2: /* Flush the ERAT on radix P9 DD1 guest exit */
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
+ b 4f
+#endif /* CONFIG_PPC_RADIX_MMU */
+ /* Hash: clear out SLB */
+3: li r5,0
+ slbmte r5,r5
+ slbia
+ ptesync
+4:
/*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do