summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2018-09-15 01:30:50 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2018-09-19 21:59:44 +1000
commit82d8f4c22f3514eface7e082750bc917193d91f9 (patch)
treefb4623b052c28a5caf07bd2ff128d7931fe3ce30
parent5141c182d75b4004c41ac2dc5af081b457b3e8cb (diff)
downloadlinux-82d8f4c22f3514eface7e082750bc917193d91f9.tar.gz
linux-82d8f4c22f3514eface7e082750bc917193d91f9.tar.bz2
linux-82d8f4c22f3514eface7e082750bc917193d91f9.zip
powerpc/64s/hash: Use POWER9 SLBIA IH=3 variant in switch_slb
POWER9 introduces SLBIA IH=3, which invalidates all SLB entries and associated lookaside information that have a class value of 1, which Linux assigns to user addresses. This matches what switch_slb wants, and allows a simple fast implementation that avoids the slb_cache complexity. As a side-effect, the POWER5 < DD2.1 SLB invalidation workaround is also avoided on POWER9. Process context switching rate is improved about 2.2% for a small process that hits the slb cache which is the best case for the current code. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/mm/slb.c85
-rw-r--r--arch/powerpc/xmon/xmon.c11
2 files changed, 56 insertions, 40 deletions
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index a8f27fee6a23..513c6596140d 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -279,7 +279,6 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
/* Flush all user entries from the segment table of the current processor. */
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
{
- unsigned long offset;
unsigned long pc = KSTK_EIP(tsk);
unsigned long stack = KSTK_ESP(tsk);
unsigned long exec_base;
@@ -291,45 +290,56 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
* which would update the slb_cache/slb_cache_ptr fields in the PACA.
*/
hard_irq_disable();
- offset = get_paca()->slb_cache_ptr;
- if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
- offset <= SLB_CACHE_ENTRIES) {
- unsigned long slbie_data = 0;
- int i;
-
- asm volatile("isync" : : : "memory");
- for (i = 0; i < offset; i++) {
- slbie_data = (unsigned long)get_paca()->slb_cache[i]
- << SID_SHIFT; /* EA */
- slbie_data |= user_segment_size(slbie_data)
- << SLBIE_SSIZE_SHIFT;
- slbie_data |= SLBIE_C; /* C set for user addresses */
- asm volatile("slbie %0" : : "r" (slbie_data));
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * SLBIA IH=3 invalidates all Class=1 SLBEs and their
+ * associated lookaside structures, which matches what
+ * switch_slb wants. So ARCH_300 does not use the slb
+ * cache.
+ */
+ asm volatile("isync ; " PPC_SLBIA(3)" ; isync");
+ } else {
+ unsigned long offset = get_paca()->slb_cache_ptr;
+
+ if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
+ offset <= SLB_CACHE_ENTRIES) {
+ unsigned long slbie_data = 0;
+ int i;
+
+ asm volatile("isync" : : : "memory");
+ for (i = 0; i < offset; i++) {
+ /* EA */
+ slbie_data = (unsigned long)
+ get_paca()->slb_cache[i] << SID_SHIFT;
+ slbie_data |= user_segment_size(slbie_data)
+ << SLBIE_SSIZE_SHIFT;
+ slbie_data |= SLBIE_C; /* user slbs have C=1 */
+ asm volatile("slbie %0" : : "r" (slbie_data));
+ }
+
+ /* Workaround POWER5 < DD2.1 issue */
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
+ asm volatile("slbie %0" : : "r" (slbie_data));
+
+ asm volatile("isync" : : : "memory");
+ } else {
+ struct slb_shadow *p = get_slb_shadow();
+ unsigned long ksp_esid_data =
+ be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
+ unsigned long ksp_vsid_data =
+ be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
+
+ asm volatile("isync\n"
+ PPC_SLBIA(1) "\n"
+ "slbmte %0,%1\n"
+ "isync"
+ :: "r"(ksp_vsid_data),
+ "r"(ksp_esid_data));
}
- /* Workaround POWER5 < DD2.1 issue */
- if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
- asm volatile("slbie %0" : : "r" (slbie_data));
-
- asm volatile("isync" : : : "memory");
- } else {
- struct slb_shadow *p = get_slb_shadow();
- unsigned long ksp_esid_data =
- be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
- unsigned long ksp_vsid_data =
- be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
-
- asm volatile("isync\n"
- PPC_SLBIA(1) "\n"
- "slbmte %0,%1\n"
- "isync"
- :: "r"(ksp_vsid_data),
- "r"(ksp_esid_data));
-
- asm volatile("isync" : : : "memory");
+ get_paca()->slb_cache_ptr = 0;
}
- get_paca()->slb_cache_ptr = 0;
copy_mm_to_paca(mm);
/*
@@ -455,6 +465,9 @@ static void insert_slb_entry(unsigned long vsid, unsigned long ea,
enum slb_index index;
int slb_cache_index;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return; /* ISAv3.0B and later does not use slb_cache */
+
/*
* We are irq disabled, hence should be safe to access PACA.
*/
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 4264aedc7775..cd43c168dc1b 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2393,10 +2393,13 @@ static void dump_one_paca(int cpu)
}
}
DUMP(p, vmalloc_sllp, "%#-*x");
- DUMP(p, slb_cache_ptr, "%#-*x");
- for (i = 0; i < SLB_CACHE_ENTRIES; i++)
- printf(" %-*s[%d] = 0x%016x\n",
- 22, "slb_cache", i, p->slb_cache[i]);
+
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+ DUMP(p, slb_cache_ptr, "%#-*x");
+ for (i = 0; i < SLB_CACHE_ENTRIES; i++)
+ printf(" %-*s[%d] = 0x%016x\n",
+ 22, "slb_cache", i, p->slb_cache[i]);
+ }
DUMP(p, rfi_flush_fallback_area, "%-*px");
#endif