summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/sparc64/kernel/Makefile2
-rw-r--r--arch/sparc64/kernel/binfmt_aout32.c13
-rw-r--r--arch/sparc64/kernel/dtlb_backend.S170
-rw-r--r--arch/sparc64/kernel/dtlb_base.S109
-rw-r--r--arch/sparc64/kernel/dtlb_miss.S39
-rw-r--r--arch/sparc64/kernel/etrap.S2
-rw-r--r--arch/sparc64/kernel/head.S46
-rw-r--r--arch/sparc64/kernel/itlb_base.S79
-rw-r--r--arch/sparc64/kernel/itlb_miss.S39
-rw-r--r--arch/sparc64/kernel/ktlb.S263
-rw-r--r--arch/sparc64/kernel/process.c25
-rw-r--r--arch/sparc64/kernel/rtrap.S6
-rw-r--r--arch/sparc64/kernel/smp.c12
-rw-r--r--arch/sparc64/kernel/trampoline.S33
-rw-r--r--arch/sparc64/kernel/tsb.S169
-rw-r--r--arch/sparc64/kernel/ttable.S6
-rw-r--r--arch/sparc64/kernel/vmlinux.lds.S3
-rw-r--r--arch/sparc64/kernel/winfixup.S8
-rw-r--r--arch/sparc64/mm/Makefile2
-rw-r--r--arch/sparc64/mm/init.c91
-rw-r--r--arch/sparc64/mm/tlb.c61
-rw-r--r--arch/sparc64/mm/tsb.c84
-rw-r--r--arch/sparc64/mm/ultra.S58
-rw-r--r--include/asm-sparc64/mmu.h1
-rw-r--r--include/asm-sparc64/mmu_context.h46
-rw-r--r--include/asm-sparc64/pgalloc.h1
-rw-r--r--include/asm-sparc64/pgtable.h9
-rw-r--r--include/asm-sparc64/processor.h14
-rw-r--r--include/asm-sparc64/tlbflush.h25
-rw-r--r--include/asm-sparc64/tsb.h165
30 files changed, 693 insertions, 888 deletions
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index 83d67eb18895..a482a9ffe5bc 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -38,5 +38,5 @@ else
CMODEL_CFLAG := -m64 -mcmodel=medlow
endif
-head.o: head.S ttable.S itlb_base.S dtlb_base.S dtlb_backend.S dtlb_prot.S \
+head.o: head.S ttable.S itlb_miss.S dtlb_miss.S ktlb.S tsb.S \
etrap.S rtrap.S winfixup.S entry.S
diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c
index 202a80c24b6f..a57d7f2b6f13 100644
--- a/arch/sparc64/kernel/binfmt_aout32.c
+++ b/arch/sparc64/kernel/binfmt_aout32.c
@@ -31,6 +31,7 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
static int load_aout32_binary(struct linux_binprm *, struct pt_regs * regs);
static int load_aout32_library(struct file*);
@@ -329,15 +330,9 @@ beyond_if:
current->mm->start_stack =
(unsigned long) create_aout32_tables((char __user *)bprm->p, bprm);
- if (!(orig_thr_flags & _TIF_32BIT)) {
- unsigned long pgd_cache = get_pgd_cache(current->mm->pgd);
-
- __asm__ __volatile__("stxa\t%0, [%1] %2\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (pgd_cache),
- "r" (TSB_REG), "i" (ASI_DMMU));
- }
+ tsb_context_switch(__pa(current->mm->pgd),
+ current->mm->context.sparc64_tsb);
+
start_thread32(regs, ex.a_entry, current->mm->start_stack);
if (current->ptrace & PT_PTRACED)
send_sig(SIGTRAP, current, 0);
diff --git a/arch/sparc64/kernel/dtlb_backend.S b/arch/sparc64/kernel/dtlb_backend.S
deleted file mode 100644
index acc889a7f9c1..000000000000
--- a/arch/sparc64/kernel/dtlb_backend.S
+++ /dev/null
@@ -1,170 +0,0 @@
-/* $Id: dtlb_backend.S,v 1.16 2001/10/09 04:02:11 davem Exp $
- * dtlb_backend.S: Back end to DTLB miss replacement strategy.
- * This is included directly into the trap table.
- *
- * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-
-#define VALID_SZ_BITS (_PAGE_VALID | _PAGE_SZBITS)
-
-#define VPTE_BITS (_PAGE_CP | _PAGE_CV | _PAGE_P )
-#define VPTE_SHIFT (PAGE_SHIFT - 3)
-
-/* Ways we can get here:
- *
- * 1) Nucleus loads and stores to/from PA-->VA direct mappings at tl>1.
- * 2) Nucleus loads and stores to/from user/kernel window save areas.
- * 3) VPTE misses from dtlb_base and itlb_base.
- *
- * We need to extract out the PMD and PGDIR indexes from the
- * linear virtual page table access address. The PTE index
- * is at the bottom, but we are not concerned with it. Bits
- * 0 to 2 are clear since each PTE is 8 bytes in size. Each
- * PMD and PGDIR entry are 4 bytes in size. Thus, this
- * address looks something like:
- *
- * |---------------------------------------------------------------|
- * | ... | PGDIR index | PMD index | PTE index | |
- * |---------------------------------------------------------------|
- * 63 F E D C B A 3 2 0 <- bit nr
- *
- * The variable bits above are defined as:
- * A --> 3 + (PAGE_SHIFT - log2(8))
- * --> 3 + (PAGE_SHIFT - 3) - 1
- * (ie. this is "bit 3" + PAGE_SIZE - size of PTE entry in bits - 1)
- * B --> A + 1
- * C --> B + (PAGE_SHIFT - log2(4))
- * --> B + (PAGE_SHIFT - 2) - 1
- * (ie. this is "bit B" + PAGE_SIZE - size of PMD entry in bits - 1)
- * D --> C + 1
- * E --> D + (PAGE_SHIFT - log2(4))
- * --> D + (PAGE_SHIFT - 2) - 1
- * (ie. this is "bit D" + PAGE_SIZE - size of PGDIR entry in bits - 1)
- * F --> E + 1
- *
- * (Note how "B" always evalutes to PAGE_SHIFT, all the other constants
- * cancel out.)
- *
- * For 8K PAGE_SIZE (thus, PAGE_SHIFT of 13) the bit numbers are:
- * A --> 12
- * B --> 13
- * C --> 23
- * D --> 24
- * E --> 34
- * F --> 35
- *
- * For 64K PAGE_SIZE (thus, PAGE_SHIFT of 16) the bit numbers are:
- * A --> 15
- * B --> 16
- * C --> 29
- * D --> 30
- * E --> 43
- * F --> 44
- *
- * Because bits both above and below each PGDIR and PMD index need to
- * be masked out, and the index can be as long as 14 bits (when using a
- * 64K PAGE_SIZE, and thus a PAGE_SHIFT of 16), we need 3 instructions
- * to extract each index out.
- *
- * Shifts do not pair very well on UltraSPARC-I, II, IIi, and IIe, so
- * we try to avoid using them for the entire operation. We could setup
- * a mask anywhere from bit 31 down to bit 10 using the sethi instruction.
- *
- * We need a mask covering bits B --> C and one covering D --> E.
- * For 8K PAGE_SIZE these masks are 0x00ffe000 and 0x7ff000000.
- * For 64K PAGE_SIZE these masks are 0x3fff0000 and 0xfffc0000000.
- * The second in each set cannot be loaded with a single sethi
- * instruction, because the upper bits are past bit 32. We would
- * need to use a sethi + a shift.
- *
- * For the time being, we use 2 shifts and a simple "and" mask.
- * We shift left to clear the bits above the index, we shift down
- * to clear the bits below the index (sans the log2(4 or 8) bits)
- * and a mask to clear the log2(4 or 8) bits. We need therefore
- * define 4 shift counts, all of which are relative to PAGE_SHIFT.
- *
- * Although unsupportable for other reasons, this does mean that
- * 512K and 4MB page sizes would be generaally supported by the
- * kernel. (ELF binaries would break with > 64K PAGE_SIZE since
- * the sections are only aligned that strongly).
- *
- * The operations performed for extraction are thus:
- *
- * ((X << FOO_SHIFT_LEFT) >> FOO_SHIFT_RIGHT) & ~0x3
- *
- */
-
-#define A (3 + (PAGE_SHIFT - 3) - 1)
-#define B (A + 1)
-#define C (B + (PAGE_SHIFT - 2) - 1)
-#define D (C + 1)
-#define E (D + (PAGE_SHIFT - 2) - 1)
-#define F (E + 1)
-
-#define PMD_SHIFT_LEFT (64 - D)
-#define PMD_SHIFT_RIGHT (64 - (D - B) - 2)
-#define PGDIR_SHIFT_LEFT (64 - F)
-#define PGDIR_SHIFT_RIGHT (64 - (F - D) - 2)
-#define LOW_MASK_BITS 0x3
-
-/* TLB1 ** ICACHE line 1: tl1 DTLB and quick VPTE miss */
- ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS
- add %g3, %g3, %g5 ! Compute VPTE base
- cmp %g4, %g5 ! VPTE miss?
- bgeu,pt %xcc, 1f ! Continue here
- andcc %g4, TAG_CONTEXT_BITS, %g5 ! tl0 miss Nucleus test
- ba,a,pt %xcc, from_tl1_trap ! Fall to tl0 miss
-1: sllx %g6, VPTE_SHIFT, %g4 ! Position TAG_ACCESS
- or %g4, %g5, %g4 ! Prepare TAG_ACCESS
-
-/* TLB1 ** ICACHE line 2: Quick VPTE miss */
- mov TSB_REG, %g1 ! Grab TSB reg
- ldxa [%g1] ASI_DMMU, %g5 ! Doing PGD caching?
- sllx %g6, PMD_SHIFT_LEFT, %g1 ! Position PMD offset
- be,pn %xcc, sparc64_vpte_nucleus ! Is it from Nucleus?
- srlx %g1, PMD_SHIFT_RIGHT, %g1 ! Mask PMD offset bits
- brnz,pt %g5, sparc64_vpte_continue ! Yep, go like smoke
- andn %g1, LOW_MASK_BITS, %g1 ! Final PMD mask
- sllx %g6, PGDIR_SHIFT_LEFT, %g5 ! Position PGD offset
-
-/* TLB1 ** ICACHE line 3: Quick VPTE miss */
- srlx %g5, PGDIR_SHIFT_RIGHT, %g5 ! Mask PGD offset bits
- andn %g5, LOW_MASK_BITS, %g5 ! Final PGD mask
- lduwa [%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD
- brz,pn %g5, vpte_noent ! Valid?
-sparc64_kpte_continue:
- sllx %g5, 11, %g5 ! Shift into place
-sparc64_vpte_continue:
- lduwa [%g5 + %g1] ASI_PHYS_USE_EC, %g5! Load PMD
- sllx %g5, 11, %g5 ! Shift into place
- brz,pn %g5, vpte_noent ! Valid?
-
-/* TLB1 ** ICACHE line 4: Quick VPTE miss */
- mov (VALID_SZ_BITS >> 61), %g1 ! upper vpte into %g1
- sllx %g1, 61, %g1 ! finish calc
- or %g5, VPTE_BITS, %g5 ! Prepare VPTE data
- or %g5, %g1, %g5 ! ...
- mov TLB_SFSR, %g1 ! Restore %g1 value
- stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load VPTE into TLB
- stxa %g4, [%g1 + %g1] ASI_DMMU ! Restore previous TAG_ACCESS
- retry ! Load PTE once again
-
-#undef VALID_SZ_BITS
-#undef VPTE_SHIFT
-#undef VPTE_BITS
-#undef A
-#undef B
-#undef C
-#undef D
-#undef E
-#undef F
-#undef PMD_SHIFT_LEFT
-#undef PMD_SHIFT_RIGHT
-#undef PGDIR_SHIFT_LEFT
-#undef PGDIR_SHIFT_RIGHT
-#undef LOW_MASK_BITS
-
diff --git a/arch/sparc64/kernel/dtlb_base.S b/arch/sparc64/kernel/dtlb_base.S
deleted file mode 100644
index 6528786840c0..000000000000
--- a/arch/sparc64/kernel/dtlb_base.S
+++ /dev/null
@@ -1,109 +0,0 @@
-/* $Id: dtlb_base.S,v 1.17 2001/10/11 22:33:52 davem Exp $
- * dtlb_base.S: Front end to DTLB miss replacement strategy.
- * This is included directly into the trap table.
- *
- * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-
-/* %g1 TLB_SFSR (%g1 + %g1 == TLB_TAG_ACCESS)
- * %g2 (KERN_HIGHBITS | KERN_LOWBITS)
- * %g3 VPTE base (0xfffffffe00000000) Spitfire/Blackbird (44-bit VA space)
- * (0xffe0000000000000) Cheetah (64-bit VA space)
- * %g7 __pa(current->mm->pgd)
- *
- * The VPTE base value is completely magic, but note that
- * few places in the kernel other than these TLB miss
- * handlers know anything about the VPTE mechanism or
- * how it works (see VPTE_SIZE, TASK_SIZE and PTRS_PER_PGD).
- * Consider the 44-bit VADDR Ultra-I/II case as an example:
- *
- * VA[0 : (1<<43)] produce VPTE index [%g3 : 0]
- * VA[0 : -(1<<43)] produce VPTE index [%g3-(1<<(43-PAGE_SHIFT+3)) : %g3]
- *
- * For Cheetah's 64-bit VADDR space this is:
- *
- * VA[0 : (1<<63)] produce VPTE index [%g3 : 0]
- * VA[0 : -(1<<63)] produce VPTE index [%g3-(1<<(63-PAGE_SHIFT+3)) : %g3]
- *
- * If you're paying attention you'll notice that this means half of
- * the VPTE table is above %g3 and half is below, low VA addresses
- * map progressively upwards from %g3, and high VA addresses map
- * progressively upwards towards %g3. This trick was needed to make
- * the same 8 instruction handler work both for Spitfire/Blackbird's
- * peculiar VA space hole configuration and the full 64-bit VA space
- * one of Cheetah at the same time.
- */
-
-/* Ways we can get here:
- *
- * 1) Nucleus loads and stores to/from PA-->VA direct mappings.
- * 2) Nucleus loads and stores to/from vmalloc() areas.
- * 3) User loads and stores.
- * 4) User space accesses by nucleus at tl0
- */
-
-#if PAGE_SHIFT == 13
-/*
- * To compute vpte offset, we need to do ((addr >> 13) << 3),
- * which can be optimized to (addr >> 10) if bits 10/11/12 can
- * be guaranteed to be 0 ... mmu_context.h does guarantee this
- * by only using 10 bits in the hwcontext value.
- */
-#define CREATE_VPTE_OFFSET1(r1, r2) nop
-#define CREATE_VPTE_OFFSET2(r1, r2) \
- srax r1, 10, r2
-#else
-#define CREATE_VPTE_OFFSET1(r1, r2) \
- srax r1, PAGE_SHIFT, r2
-#define CREATE_VPTE_OFFSET2(r1, r2) \
- sllx r2, 3, r2
-#endif
-
-/* DTLB ** ICACHE line 1: Quick user TLB misses */
- mov TLB_SFSR, %g1
- ldxa [%g1 + %g1] ASI_DMMU, %g4 ! Get TAG_ACCESS
- andcc %g4, TAG_CONTEXT_BITS, %g0 ! From Nucleus?
-from_tl1_trap:
- rdpr %tl, %g5 ! For TL==3 test
- CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset
- be,pn %xcc, kvmap ! Yep, special processing
- CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset
- cmp %g5, 4 ! Last trap level?
-
-/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses */
- be,pn %xcc, longpath ! Yep, cannot risk VPTE miss
- nop ! delay slot
- ldxa [%g3 + %g6] ASI_S, %g5 ! Load VPTE
-1: brgez,pn %g5, longpath ! Invalid, branch out
- nop ! Delay-slot
-9: stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB
- retry ! Trap return
- nop
-
-/* DTLB ** ICACHE line 3: winfixups+real_faults */
-longpath:
- rdpr %pstate, %g5 ! Move into alternate globals
- wrpr %g5, PSTATE_AG|PSTATE_MG, %pstate
- rdpr %tl, %g4 ! See where we came from.
- cmp %g4, 1 ! Is etrap/rtrap window fault?
- mov TLB_TAG_ACCESS, %g4 ! Prepare for fault processing
- ldxa [%g4] ASI_DMMU, %g5 ! Load faulting VA page
- be,pt %xcc, sparc64_realfault_common ! Jump to normal fault handling
- mov FAULT_CODE_DTLB, %g4 ! It was read from DTLB
-
-/* DTLB ** ICACHE line 4: Unused... */
- ba,a,pt %xcc, winfix_trampoline ! Call window fixup code
- nop
- nop
- nop
- nop
- nop
- nop
- nop
-
-#undef CREATE_VPTE_OFFSET1
-#undef CREATE_VPTE_OFFSET2
diff --git a/arch/sparc64/kernel/dtlb_miss.S b/arch/sparc64/kernel/dtlb_miss.S
new file mode 100644
index 000000000000..d0f1565cb564
--- /dev/null
+++ b/arch/sparc64/kernel/dtlb_miss.S
@@ -0,0 +1,39 @@
+/* DTLB ** ICACHE line 1: Context 0 check and TSB load */
+ ldxa [%g0] ASI_DMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer
+ ldxa [%g0] ASI_DMMU, %g6 ! Get TAG TARGET
+ srlx %g6, 48, %g5 ! Get context
+ brz,pn %g5, kvmap_dtlb ! Context 0 processing
+ nop ! Delay slot (fill me)
+ ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry
+ nop ! Push branch to next I$ line
+ cmp %g4, %g6 ! Compare TAG
+
+/* DTLB ** ICACHE line 2: TSB compare and TLB load */
+ bne,pn %xcc, tsb_miss_dtlb ! Miss
+ mov FAULT_CODE_DTLB, %g3
+ stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Load TLB
+ retry ! Trap done
+ nop
+ nop
+ nop
+ nop
+
+/* DTLB ** ICACHE line 3: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* DTLB ** ICACHE line 4: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
index 0d8eba21111b..567dbb765c34 100644
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -99,6 +99,7 @@ etrap_irq:
wrpr %g0, ETRAP_PSTATE2, %pstate
mov %l6, %g6
#ifdef CONFIG_SMP
+#error IMMU TSB usage must be fixed
mov TSB_REG, %g3
ldxa [%g3] ASI_IMMU, %g5
#endif
@@ -248,6 +249,7 @@ scetrap: rdpr %pil, %g2
mov %l6, %g6
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
#ifdef CONFIG_SMP
+#error IMMU TSB usage must be fixed
mov TSB_REG, %g3
ldxa [%g3] ASI_IMMU, %g5
#endif
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index b49dcd4504b0..d00e20693be1 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -429,17 +429,6 @@ setup_trap_table:
*
* %g6 --> current_thread_info()
*
- * MMU Globals (PSTATE_MG):
- *
- * %g1 --> TLB_SFSR
- * %g2 --> ((_PAGE_VALID | _PAGE_SZ4MB |
- * _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
- * ^ 0xfffff80000000000)
- * (this %g2 value is used for computing the PAGE_OFFSET kernel
- * TLB entries quickly, the virtual address of the fault XOR'd
- * with this %g2 value is the PTE to load into the TLB)
- * %g3 --> VPTE_BASE_CHEETAH or VPTE_BASE_SPITFIRE
- *
* Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()):
*
* %g6 --> __irq_work[smp_processor_id()]
@@ -450,40 +439,6 @@ setup_trap_table:
wrpr %o1, PSTATE_AG, %pstate
mov %o2, %g6
-#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000)
-#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
- wrpr %o1, PSTATE_MG, %pstate
- mov TSB_REG, %g1
- stxa %g0, [%g1] ASI_DMMU
- membar #Sync
- stxa %g0, [%g1] ASI_IMMU
- membar #Sync
- mov TLB_SFSR, %g1
- sethi %uhi(KERN_HIGHBITS), %g2
- or %g2, %ulo(KERN_HIGHBITS), %g2
- sllx %g2, 32, %g2
- or %g2, KERN_LOWBITS, %g2
-
- BRANCH_IF_ANY_CHEETAH(g3,g7,8f)
- ba,pt %xcc, 9f
- nop
-
-8:
- sethi %uhi(VPTE_BASE_CHEETAH), %g3
- or %g3, %ulo(VPTE_BASE_CHEETAH), %g3
- ba,pt %xcc, 2f
- sllx %g3, 32, %g3
-
-9:
- sethi %uhi(VPTE_BASE_SPITFIRE), %g3
- or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3
- sllx %g3, 32, %g3
-
-2:
- clr %g7
-#undef KERN_HIGHBITS
-#undef KERN_LOWBITS
-
/* Kill PROM timer */
sethi %hi(0x80000000), %o2
sllx %o2, 32, %o2
@@ -538,6 +493,7 @@ sparc64_boot_end:
#include "systbls.S"
#include "ktlb.S"
+#include "tsb.S"
#include "etrap.S"
#include "rtrap.S"
#include "winfixup.S"
diff --git a/arch/sparc64/kernel/itlb_base.S b/arch/sparc64/kernel/itlb_base.S
deleted file mode 100644
index 4951ff8f6877..000000000000
--- a/arch/sparc64/kernel/itlb_base.S
+++ /dev/null
@@ -1,79 +0,0 @@
-/* $Id: itlb_base.S,v 1.12 2002/02/09 19:49:30 davem Exp $
- * itlb_base.S: Front end to ITLB miss replacement strategy.
- * This is included directly into the trap table.
- *
- * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#if PAGE_SHIFT == 13
-/*
- * To compute vpte offset, we need to do ((addr >> 13) << 3),
- * which can be optimized to (addr >> 10) if bits 10/11/12 can
- * be guaranteed to be 0 ... mmu_context.h does guarantee this
- * by only using 10 bits in the hwcontext value.
- */
-#define CREATE_VPTE_OFFSET1(r1, r2) \
- srax r1, 10, r2
-#define CREATE_VPTE_OFFSET2(r1, r2) nop
-#else /* PAGE_SHIFT */
-#define CREATE_VPTE_OFFSET1(r1, r2) \
- srax r1, PAGE_SHIFT, r2
-#define CREATE_VPTE_OFFSET2(r1, r2) \
- sllx r2, 3, r2
-#endif /* PAGE_SHIFT */
-
-
-/* Ways we can get here:
- *
- * 1) Nucleus instruction misses from module code.
- * 2) All user instruction misses.
- *
- * All real page faults merge their code paths to the
- * sparc64_realfault_common label below.
- */
-
-/* ITLB ** ICACHE line 1: Quick user TLB misses */
- mov TLB_SFSR, %g1
- ldxa [%g1 + %g1] ASI_IMMU, %g4 ! Get TAG_ACCESS
- CREATE_VPTE_OFFSET1(%g4, %g6) ! Create VPTE offset
- CREATE_VPTE_OFFSET2(%g4, %g6) ! Create VPTE offset
- ldxa [%g3 + %g6] ASI_P, %g5 ! Load VPTE
-1: brgez,pn %g5, 3f ! Not valid, branch out
- sethi %hi(_PAGE_EXEC), %g4 ! Delay-slot
- andcc %g5, %g4, %g0 ! Executable?
-
-/* ITLB ** ICACHE line 2: Real faults */
- be,pn %xcc, 3f ! Nope, branch.
- nop ! Delay-slot
-2: stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load PTE into TLB
- retry ! Trap return
-3: rdpr %pstate, %g4 ! Move into alt-globals
- wrpr %g4, PSTATE_AG|PSTATE_MG, %pstate
- rdpr %tpc, %g5 ! And load faulting VA
- mov FAULT_CODE_ITLB, %g4 ! It was read from ITLB
-
-/* ITLB ** ICACHE line 3: Finish faults */
-sparc64_realfault_common: ! Called by dtlb_miss
- stb %g4, [%g6 + TI_FAULT_CODE]
- stx %g5, [%g6 + TI_FAULT_ADDR]
- ba,pt %xcc, etrap ! Save state
-1: rd %pc, %g7 ! ...
- call do_sparc64_fault ! Call fault handler
- add %sp, PTREGS_OFF, %o0! Compute pt_regs arg
- ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state
- nop
-
-/* ITLB ** ICACHE line 4: Window fixups */
-winfix_trampoline:
- rdpr %tpc, %g3 ! Prepare winfixup TNPC
- or %g3, 0x7c, %g3 ! Compute branch offset
- wrpr %g3, %tnpc ! Write it into TNPC
- done ! Do it to it
- nop
- nop
- nop
- nop
-
-#undef CREATE_VPTE_OFFSET1
-#undef CREATE_VPTE_OFFSET2
diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S
new file mode 100644
index 000000000000..6b6c8fee04bd
--- /dev/null
+++ b/arch/sparc64/kernel/itlb_miss.S
@@ -0,0 +1,39 @@
+/* ITLB ** ICACHE line 1: Context 0 check and TSB load */
+ ldxa [%g0] ASI_IMMU_TSB_8KB_PTR, %g1 ! Get TSB 8K pointer
+ ldxa [%g0] ASI_IMMU, %g6 ! Get TAG TARGET
+ srlx %g6, 48, %g5 ! Get context
+ brz,pn %g5, kvmap_itlb ! Context 0 processing
+ nop ! Delay slot (fill me)
+ ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry
+ cmp %g4, %g6 ! Compare TAG
+ sethi %hi(_PAGE_EXEC), %g4 ! Setup exec check
+
+/* ITLB ** ICACHE line 2: TSB compare and TLB load */
+ bne,pn %xcc, tsb_miss_itlb ! Miss
+ mov FAULT_CODE_ITLB, %g3
+ andcc %g5, %g4, %g0 ! Executable?
+ be,pn %xcc, tsb_do_fault
+ nop ! Delay slot, fill me
+ stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Load TLB
+ retry ! Trap done
+ nop
+
+/* ITLB ** ICACHE line 3: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+
+/* ITLB ** ICACHE line 4: */
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S
index d9244d3c9f73..2b5e71b68882 100644
--- a/arch/sparc64/kernel/ktlb.S
+++ b/arch/sparc64/kernel/ktlb.S
@@ -4,191 +4,170 @@
* Copyright (C) 1996 Eddie C. Dost (ecd@brainaid.de)
* Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
* Copyright (C) 1996,98,99 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
-*/
+ */
#include <linux/config.h>
#include <asm/head.h>
#include <asm/asi.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/tsb.h>
.text
.align 32
-/*
- * On a second level vpte miss, check whether the original fault is to the OBP
- * range (note that this is only possible for instruction miss, data misses to
- * obp range do not use vpte). If so, go back directly to the faulting address.
- * This is because we want to read the tpc, otherwise we have no way of knowing
- * the 8k aligned faulting address if we are using >8k kernel pagesize. This
- * also ensures no vpte range addresses are dropped into tlb while obp is
- * executing (see inherit_locked_prom_mappings() rant).
- */
-sparc64_vpte_nucleus:
- /* Note that kvmap below has verified that the address is
- * in the range MODULES_VADDR --> VMALLOC_END already. So
- * here we need only check if it is an OBP address or not.
- */
+ .globl kvmap_itlb
+kvmap_itlb:
+ /* g6: TAG TARGET */
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_IMMU, %g4
+
+kvmap_itlb_nonlinear:
+ /* Catch kernel NULL pointer calls. */
+ sethi %hi(PAGE_SIZE), %g5
+ cmp %g4, %g5
+ bleu,pn %xcc, kvmap_dtlb_longpath
+ nop
+
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_itlb_load)
+
+kvmap_itlb_tsb_miss:
sethi %hi(LOW_OBP_ADDRESS), %g5
cmp %g4, %g5
- blu,pn %xcc, kern_vpte
+ blu,pn %xcc, kvmap_itlb_vmalloc_addr
mov 0x1, %g5
sllx %g5, 32, %g5
cmp %g4, %g5
- blu,pn %xcc, vpte_insn_obp
+ blu,pn %xcc, kvmap_itlb_obp
nop
- /* These two instructions are patched by paginig_init(). */
-kern_vpte:
- sethi %hi(swapper_pgd_zero), %g5
- lduw [%g5 + %lo(swapper_pgd_zero)], %g5
-
- /* With kernel PGD in %g5, branch back into dtlb_backend. */
- ba,pt %xcc, sparc64_kpte_continue
- andn %g1, 0x3, %g1 /* Finish PMD offset adjustment. */
-
-vpte_noent:
- /* Restore previous TAG_ACCESS, %g5 is zero, and we will
- * skip over the trap instruction so that the top level
- * TLB miss handler will thing this %g5 value is just an
- * invalid PTE, thus branching to full fault processing.
- */
- mov TLB_SFSR, %g1
- stxa %g4, [%g1 + %g1] ASI_DMMU
- done
-
-vpte_insn_obp:
- /* Behave as if we are at TL0. */
- wrpr %g0, 1, %tl
- rdpr %tpc, %g4 /* Find original faulting iaddr */
- srlx %g4, 13, %g4 /* Throw out context bits */
- sllx %g4, 13, %g4 /* g4 has vpn + ctx0 now */
-
- /* Restore previous TAG_ACCESS. */
- mov TLB_SFSR, %g1
- stxa %g4, [%g1 + %g1] ASI_IMMU
-
- sethi %hi(prom_trans), %g5
- or %g5, %lo(prom_trans), %g5
-
-1: ldx [%g5 + 0x00], %g6 ! base
- brz,a,pn %g6, longpath ! no more entries, fail
- mov TLB_SFSR, %g1 ! and restore %g1
- ldx [%g5 + 0x08], %g1 ! len
- add %g6, %g1, %g1 ! end
- cmp %g6, %g4
- bgu,pt %xcc, 2f
- cmp %g4, %g1
- bgeu,pt %xcc, 2f
- ldx [%g5 + 0x10], %g1 ! PTE
-
- /* TLB load, restore %g1, and return from trap. */
- sub %g4, %g6, %g6
- add %g1, %g6, %g5
- mov TLB_SFSR, %g1
- stxa %g5, [%g0] ASI_ITLB_DATA_IN
- retry
+kvmap_itlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
+
+ TSB_LOCK_TAG(%g1, %g2, %g4)
+
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ brgez,a,pn %g5, kvmap_itlb_longpath
+ stx %g0, [%g1]
-2: ba,pt %xcc, 1b
- add %g5, (3 * 8), %g5 ! next entry
-
-kvmap_do_obp:
- sethi %hi(prom_trans), %g5
- or %g5, %lo(prom_trans), %g5
- srlx %g4, 13, %g4
- sllx %g4, 13, %g4
-
-1: ldx [%g5 + 0x00], %g6 ! base
- brz,a,pn %g6, longpath ! no more entries, fail
- mov TLB_SFSR, %g1 ! and restore %g1
- ldx [%g5 + 0x08], %g1 ! len
- add %g6, %g1, %g1 ! end
- cmp %g6, %g4
- bgu,pt %xcc, 2f
- cmp %g4, %g1
- bgeu,pt %xcc, 2f
- ldx [%g5 + 0x10], %g1 ! PTE
-
- /* TLB load, restore %g1, and return from trap. */
- sub %g4, %g6, %g6
- add %g1, %g6, %g5
- mov TLB_SFSR, %g1
- stxa %g5, [%g0] ASI_DTLB_DATA_IN
+ TSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
+
+kvmap_itlb_load:
+ stxa %g5, [%g0] ASI_ITLB_DATA_IN ! Reload TLB
retry
-2: ba,pt %xcc, 1b
- add %g5, (3 * 8), %g5 ! next entry
+kvmap_itlb_longpath:
+ rdpr %pstate, %g5
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+ rdpr %tpc, %g5
+ ba,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_ITLB, %g4
+
+kvmap_itlb_obp:
+ OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
+
+ TSB_LOCK_TAG(%g1, %g2, %g4)
+
+ TSB_WRITE(%g1, %g5, %g6)
+
+ ba,pt %xcc, kvmap_itlb_load
+ nop
+
+kvmap_dtlb_obp:
+ OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
+
+ TSB_LOCK_TAG(%g1, %g2, %g4)
+
+ TSB_WRITE(%g1, %g5, %g6)
+
+ ba,pt %xcc, kvmap_dtlb_load
+ nop
-/*
- * On a first level data miss, check whether this is to the OBP range (note
- * that such accesses can be made by prom, as well as by kernel using
- * prom_getproperty on "address"), and if so, do not use vpte access ...
- * rather, use information saved during inherit_prom_mappings() using 8k
- * pagesize.
- */
.align 32
-kvmap:
- brgez,pn %g4, kvmap_nonlinear
+ .globl kvmap_dtlb
+kvmap_dtlb:
+ /* %g6: TAG TARGET */
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+ brgez,pn %g4, kvmap_dtlb_nonlinear
nop
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000)
+#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
+
+ sethi %uhi(KERN_HIGHBITS), %g2
+ or %g2, %ulo(KERN_HIGHBITS), %g2
+ sllx %g2, 32, %g2
+ or %g2, KERN_LOWBITS, %g2
+
+#undef KERN_HIGHBITS
+#undef KERN_LOWBITS
+
.globl kvmap_linear_patch
kvmap_linear_patch:
-#endif
- ba,pt %xcc, kvmap_load
+ ba,pt %xcc, kvmap_dtlb_load
xor %g2, %g4, %g5
-#ifdef CONFIG_DEBUG_PAGEALLOC
- sethi %hi(swapper_pg_dir), %g5
- or %g5, %lo(swapper_pg_dir), %g5
- sllx %g4, 64 - (PGDIR_SHIFT + PGDIR_BITS), %g6
- srlx %g6, 64 - PAGE_SHIFT, %g6
- andn %g6, 0x3, %g6
- lduw [%g5 + %g6], %g5
- brz,pn %g5, longpath
- sllx %g4, 64 - (PMD_SHIFT + PMD_BITS), %g6
- srlx %g6, 64 - PAGE_SHIFT, %g6
- sllx %g5, 11, %g5
- andn %g6, 0x3, %g6
- lduwa [%g5 + %g6] ASI_PHYS_USE_EC, %g5
- brz,pn %g5, longpath
- sllx %g4, 64 - PMD_SHIFT, %g6
- srlx %g6, 64 - PAGE_SHIFT, %g6
- sllx %g5, 11, %g5
- andn %g6, 0x7, %g6
- ldxa [%g5 + %g6] ASI_PHYS_USE_EC, %g5
- brz,pn %g5, longpath
+kvmap_dtlb_vmalloc_addr:
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+
+ TSB_LOCK_TAG(%g1, %g2, %g4)
+
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ brgez,a,pn %g5, kvmap_dtlb_longpath
+ stx %g0, [%g1]
+
+ TSB_WRITE(%g1, %g5, %g6)
+
+ /* fallthrough to TLB load */
+
+kvmap_dtlb_load:
+ stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB
+ retry
+
+kvmap_dtlb_nonlinear:
+ /* Catch kernel NULL pointer derefs. */
+ sethi %hi(PAGE_SIZE), %g5
+ cmp %g4, %g5
+ bleu,pn %xcc, kvmap_dtlb_longpath
nop
- ba,a,pt %xcc, kvmap_load
-#endif
-kvmap_nonlinear:
+ KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+
+kvmap_dtlb_tsbmiss:
sethi %hi(MODULES_VADDR), %g5
cmp %g4, %g5
- blu,pn %xcc, longpath
+ blu,pn %xcc, kvmap_dtlb_longpath
mov (VMALLOC_END >> 24), %g5
sllx %g5, 24, %g5
cmp %g4, %g5
- bgeu,pn %xcc, longpath
+ bgeu,pn %xcc, kvmap_dtlb_longpath
nop
kvmap_check_obp:
sethi %hi(LOW_OBP_ADDRESS), %g5
cmp %g4, %g5
- blu,pn %xcc, kvmap_vmalloc_addr
+ blu,pn %xcc, kvmap_dtlb_vmalloc_addr
mov 0x1, %g5
sllx %g5, 32, %g5
cmp %g4, %g5
- blu,pn %xcc, kvmap_do_obp
+ blu,pn %xcc, kvmap_dtlb_obp
nop
-
-kvmap_vmalloc_addr:
- /* If we get here, a vmalloc addr was accessed, load kernel VPTE. */
- ldxa [%g3 + %g6] ASI_N, %g5
- brgez,pn %g5, longpath
+ ba,pt %xcc, kvmap_dtlb_vmalloc_addr
nop
-kvmap_load:
- /* PTE is valid, load into TLB and return from trap. */
- stxa %g5, [%g0] ASI_DTLB_DATA_IN ! Reload TLB
- retry
+kvmap_dtlb_longpath:
+ rdpr %pstate, %g5
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+ rdpr %tl, %g4
+ cmp %g4, 1
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ be,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_DTLB, %g4
+ ba,pt %xcc, winfix_trampoline
+ nop
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 059b0d025224..2784aab0d3e5 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -44,6 +44,7 @@
#include <asm/fpumacro.h>
#include <asm/head.h>
#include <asm/cpudata.h>
+#include <asm/mmu_context.h>
#include <asm/unistd.h>
/* #define VERBOSE_SHOWREGS */
@@ -433,30 +434,16 @@ void exit_thread(void)
void flush_thread(void)
{
struct thread_info *t = current_thread_info();
+ struct mm_struct *mm;
if (t->flags & _TIF_ABI_PENDING)
t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
- if (t->task->mm) {
- unsigned long pgd_cache = 0UL;
- if (test_thread_flag(TIF_32BIT)) {
- struct mm_struct *mm = t->task->mm;
- pgd_t *pgd0 = &mm->pgd[0];
- pud_t *pud0 = pud_offset(pgd0, 0);
+ mm = t->task->mm;
+ if (mm)
+ tsb_context_switch(__pa(mm->pgd),
+ mm->context.sparc64_tsb);
- if (pud_none(*pud0)) {
- pmd_t *page = pmd_alloc_one(mm, 0);
- pud_set(pud0, page);
- }
- pgd_cache = get_pgd_cache(pgd0);
- }
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (pgd_cache),
- "r" (TSB_REG),
- "i" (ASI_DMMU));
- }
set_thread_wsaved(0);
/* Turn off performance counters if on. */
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index b80eba0081ca..213eb4a9d8a4 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -223,10 +223,14 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3
ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4
ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
+#ifdef CONFIG_SMP
+#error IMMU TSB usage must be fixed
mov TSB_REG, %g6
brnz,a,pn %l3, 1f
ldxa [%g6] ASI_IMMU, %g5
-1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
+#endif
+1:
+ ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7
wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 1f7ad8a69052..d2d3369e7b5d 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -123,6 +123,7 @@ extern void inherit_locked_prom_mappings(int save_p);
static inline void cpu_setup_percpu_base(unsigned long cpu_id)
{
+#error IMMU TSB usage must be fixed
__asm__ __volatile__("mov %0, %%g5\n\t"
"stxa %0, [%1] %2\n\t"
"membar #Sync"
@@ -662,8 +663,6 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
extern unsigned long xcall_flush_tlb_mm;
extern unsigned long xcall_flush_tlb_pending;
extern unsigned long xcall_flush_tlb_kernel_range;
-extern unsigned long xcall_flush_tlb_all_spitfire;
-extern unsigned long xcall_flush_tlb_all_cheetah;
extern unsigned long xcall_report_regs;
extern unsigned long xcall_receive_signal;
@@ -794,15 +793,6 @@ void smp_report_regs(void)
smp_cross_call(&xcall_report_regs, 0, 0, 0);
}
-void smp_flush_tlb_all(void)
-{
- if (tlb_type == spitfire)
- smp_cross_call(&xcall_flush_tlb_all_spitfire, 0, 0, 0);
- else
- smp_cross_call(&xcall_flush_tlb_all_cheetah, 0, 0, 0);
- __flush_tlb_all();
-}
-
/* We know that the window frames of the user have been flushed
* to the stack before we get here because all callers of us
* are flush_tlb_*() routines, and these run after flush_cache_*()
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 9478551cb020..782d8c4973e4 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -295,39 +295,6 @@ do_unlock:
wrpr %g5, %tba
mov %o2, %g6
- wrpr %o1, PSTATE_MG, %pstate
-#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000)
-#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
-
- mov TSB_REG, %g1
- stxa %g0, [%g1] ASI_DMMU
- membar #Sync
- mov TLB_SFSR, %g1
- sethi %uhi(KERN_HIGHBITS), %g2
- or %g2, %ulo(KERN_HIGHBITS), %g2
- sllx %g2, 32, %g2
- or %g2, KERN_LOWBITS, %g2
-
- BRANCH_IF_ANY_CHEETAH(g3,g7,9f)
-
- ba,pt %xcc, 1f
- nop
-
-9:
- sethi %uhi(VPTE_BASE_CHEETAH), %g3
- or %g3, %ulo(VPTE_BASE_CHEETAH), %g3
- ba,pt %xcc, 2f
- sllx %g3, 32, %g3
-1:
- sethi %uhi(VPTE_BASE_SPITFIRE), %g3
- or %g3, %ulo(VPTE_BASE_SPITFIRE), %g3
- sllx %g3, 32, %g3
-
-2:
- clr %g7
-#undef KERN_HIGHBITS
-#undef KERN_LOWBITS
-
wrpr %o1, 0x0, %pstate
ldx [%g6 + TI_TASK], %g4
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
new file mode 100644
index 000000000000..44b9e6fed09f
--- /dev/null
+++ b/arch/sparc64/kernel/tsb.S
@@ -0,0 +1,169 @@
+/* tsb.S: Sparc64 TSB table handling.
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#include <asm/tsb.h>
+
+ .text
+ .align 32
+
+ /* Invoked from TLB miss handler, we are in the
+ * MMU global registers and they are setup like
+ * this:
+ *
+ * %g1: TSB entry pointer
+ * %g2: available temporary
+ * %g3: FAULT_CODE_{D,I}TLB
+ * %g4: available temporary
+ * %g5: available temporary
+ * %g6: TAG TARGET
+ * %g7: physical address base of the linux page
+ * tables for the current address space
+ */
+ .globl tsb_miss_dtlb
+tsb_miss_dtlb:
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g4
+ ba,pt %xcc, tsb_miss_page_table_walk
+ nop
+
+ .globl tsb_miss_itlb
+tsb_miss_itlb:
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_IMMU, %g4
+ ba,pt %xcc, tsb_miss_page_table_walk
+ nop
+
+tsb_miss_page_table_walk:
+ USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
+
+tsb_reload:
+ TSB_LOCK_TAG(%g1, %g2, %g4)
+
+ /* Load and check PTE. */
+ ldxa [%g5] ASI_PHYS_USE_EC, %g5
+ brgez,a,pn %g5, tsb_do_fault
+ stx %g0, [%g1]
+
+ TSB_WRITE(%g1, %g5, %g6)
+
+ /* Finally, load TLB and return from trap. */
+tsb_tlb_reload:
+ cmp %g3, FAULT_CODE_DTLB
+ bne,pn %xcc, tsb_itlb_load
+ nop
+
+tsb_dtlb_load:
+ stxa %g5, [%g0] ASI_DTLB_DATA_IN
+ retry
+
+tsb_itlb_load:
+ stxa %g5, [%g0] ASI_ITLB_DATA_IN
+ retry
+
+ /* No valid entry in the page tables, do full fault
+ * processing.
+ */
+
+ .globl tsb_do_fault
+tsb_do_fault:
+ cmp %g3, FAULT_CODE_DTLB
+ rdpr %pstate, %g5
+ bne,pn %xcc, tsb_do_itlb_fault
+ wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate
+
+tsb_do_dtlb_fault:
+ rdpr %tl, %g4
+ cmp %g4, 1
+ mov TLB_TAG_ACCESS, %g4
+ ldxa [%g4] ASI_DMMU, %g5
+ be,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_DTLB, %g4
+ ba,pt %xcc, winfix_trampoline
+ nop
+
+tsb_do_itlb_fault:
+ rdpr %tpc, %g5
+ ba,pt %xcc, sparc64_realfault_common
+ mov FAULT_CODE_ITLB, %g4
+
+ .globl sparc64_realfault_common
+sparc64_realfault_common:
+ stb %g4, [%g6 + TI_FAULT_CODE] ! Save fault code
+ stx %g5, [%g6 + TI_FAULT_ADDR] ! Save fault address
+ ba,pt %xcc, etrap ! Save trap state
+1: rd %pc, %g7 ! ...
+ call do_sparc64_fault ! Call fault handler
+ add %sp, PTREGS_OFF, %o0 ! Compute pt_regs arg
+ ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state
+ nop ! Delay slot (fill me)
+
+ .globl winfix_trampoline
+winfix_trampoline:
+ rdpr %tpc, %g3 ! Prepare winfixup TNPC
+ or %g3, 0x7c, %g3 ! Compute branch offset
+ wrpr %g3, %tnpc ! Write it into TNPC
+ done ! Trap return
+
+ /* Reload MMU related context switch state at
+ * schedule() time.
+ *
+ * %o0: page table physical address
+ * %o1: TSB address
+ */
+ .globl tsb_context_switch
+tsb_context_switch:
+ wrpr %g0, PSTATE_MG | PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV, %pstate
+
+ /* Set page table base alternate global. */
+ mov %o0, %g7
+
+ /* XXX can this happen? */
+ brz,pn %o1, 9f
+ nop
+
+ /* Lock TSB into D-TLB. */
+ sethi %hi(PAGE_SIZE), %o3
+ and %o3, %o1, %o3
+ sethi %hi(TSBMAP_BASE), %o2
+ add %o2, %o3, %o2
+
+ /* XXX handle PAGE_SIZE != 8K correctly... */
+ mov TSB_REG, %g1
+ stxa %o2, [%g1] ASI_DMMU
+ membar #Sync
+
+ stxa %o2, [%g1] ASI_IMMU
+ membar #Sync
+
+#define KERN_HIGHBITS ((_PAGE_VALID|_PAGE_SZBITS)^0xfffff80000000000)
+#define KERN_LOWBITS (_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W | _PAGE_L)
+ sethi %uhi(KERN_HIGHBITS), %g2
+ or %g2, %ulo(KERN_HIGHBITS), %g2
+ sllx %g2, 32, %g2
+ or %g2, KERN_LOWBITS, %g2
+#undef KERN_HIGHBITS
+#undef KERN_LOWBITS
+
+ xor %o1, %g2, %o1
+
+ /* We use entry 61 for this locked entry. This is the spitfire
+ * TLB entry number, and luckily cheetah masks the value with
+ * 15 ending us up with entry 13 which is what we want in that
+ * case too.
+ *
+ * XXX Interactions with prom_world()...
+ */
+ mov TLB_TAG_ACCESS, %g1
+ stxa %o2, [%g1] ASI_DMMU
+ membar #Sync
+ mov (61 << 3), %g1
+ stxa %o1, [%g1] ASI_DTLB_DATA_ACCESS
+ membar #Sync
+
+9:
+ wrpr %g0, PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE, %pstate
+
+ retl
+ mov %o2, %o0
diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S
index 8365bc1f81f3..56f060c8fbf0 100644
--- a/arch/sparc64/kernel/ttable.S
+++ b/arch/sparc64/kernel/ttable.S
@@ -78,9 +78,9 @@ tl0_vaw: TRAP(do_vaw)
tl0_cee: membar #Sync
TRAP_NOSAVE_7INSNS(__spitfire_cee_trap)
tl0_iamiss:
-#include "itlb_base.S"
+#include "itlb_miss.S"
tl0_damiss:
-#include "dtlb_base.S"
+#include "dtlb_miss.S"
tl0_daprot:
#include "dtlb_prot.S"
tl0_fecc: BTRAP(0x70) /* Fast-ECC on Cheetah */
@@ -241,7 +241,7 @@ tl1_cee: membar #Sync
tl1_iamiss: BTRAPTL1(0x64) BTRAPTL1(0x65) BTRAPTL1(0x66) BTRAPTL1(0x67)
tl1_damiss:
-#include "dtlb_backend.S"
+#include "dtlb_miss.S"
tl1_daprot:
#include "dtlb_prot.S"
tl1_fecc: BTRAPTL1(0x70) /* Fast-ECC on Cheetah */
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index 467d13a0d5c1..f018aaf45486 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -44,6 +44,9 @@ SECTIONS
__stop___ex_table = .;
. = ALIGN(8192);
+ swapper_tsb = .;
+ . += 8192;
+ . = ALIGN(8192);
__init_begin = .;
.init.text : {
_sinittext = .;
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index 39160926267b..f5d93aa99cbb 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -85,6 +85,7 @@ fill_fixup:
mov %o7, %g6
ldx [%g6 + TI_TASK], %g4
#ifdef CONFIG_SMP
+#error IMMU TSB usage must be fixed
mov TSB_REG, %g1
ldxa [%g1] ASI_IMMU, %g5
#endif
@@ -209,6 +210,7 @@ fill_fixup_mna:
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
#ifdef CONFIG_SMP
+#error IMMU TSB usage must be fixed
mov TSB_REG, %g1
ldxa [%g1] ASI_IMMU, %g5
#endif
@@ -278,11 +280,6 @@ window_mna_from_user_common:
ba,pt %xcc, rtrap
clr %l6
- /* These are only needed for 64-bit mode processes which
- * put their stack pointer into the VPTE area and there
- * happens to be a VPTE tlb entry mapped there during
- * a spill/fill trap to that stack frame.
- */
.globl winfix_dax, fill_fixup_dax, spill_fixup_dax
winfix_dax:
andn %g3, 0x7f, %g3
@@ -318,6 +315,7 @@ fill_fixup_dax:
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
#ifdef CONFIG_SMP
+#error IMMU TSB usage must be fixed
mov TSB_REG, %g1
ldxa [%g1] ASI_IMMU, %g5
#endif
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile
index 9d0960e69f48..e415bf942bcd 100644
--- a/arch/sparc64/mm/Makefile
+++ b/arch/sparc64/mm/Makefile
@@ -5,6 +5,6 @@
EXTRA_AFLAGS := -ansi
EXTRA_CFLAGS := -Werror
-obj-y := ultra.o tlb.o fault.o init.o generic.o
+obj-y := ultra.o tlb.o tsb.o fault.o init.o generic.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 1e44ee26cee8..da068f6b2595 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -408,8 +408,7 @@ unsigned long prom_virt_to_phys(unsigned long promva, int *error)
/* The obp translations are saved based on 8k pagesize, since obp can
* use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
- * HI_OBP_ADDRESS range are handled in ktlb.S and do not use the vpte
- * scheme (also, see rant in inherit_locked_prom_mappings()).
+ * HI_OBP_ADDRESS range are handled in ktlb.S.
*/
static inline int in_obp_range(unsigned long vaddr)
{
@@ -539,75 +538,6 @@ static void __init inherit_prom_mappings(void)
prom_printf("done.\n");
}
-/* The OBP specifications for sun4u mark 0xfffffffc00000000 and
- * upwards as reserved for use by the firmware (I wonder if this
- * will be the same on Cheetah...). We use this virtual address
- * range for the VPTE table mappings of the nucleus so we need
- * to zap them when we enter the PROM. -DaveM
- */
-static void __flush_nucleus_vptes(void)
-{
- unsigned long prom_reserved_base = 0xfffffffc00000000UL;
- int i;
-
- /* Only DTLB must be checked for VPTE entries. */
- if (tlb_type == spitfire) {
- for (i = 0; i < 63; i++) {
- unsigned long tag;
-
- /* Spitfire Errata #32 workaround */
- /* NOTE: Always runs on spitfire, so no cheetah+
- * page size encodings.
- */
- __asm__ __volatile__("stxa %0, [%1] %2\n\t"
- "flush %%g6"
- : /* No outputs */
- : "r" (0),
- "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
-
- tag = spitfire_get_dtlb_tag(i);
- if (((tag & ~(PAGE_MASK)) == 0) &&
- ((tag & (PAGE_MASK)) >= prom_reserved_base)) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- spitfire_put_dtlb_data(i, 0x0UL);
- }
- }
- } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
- for (i = 0; i < 512; i++) {
- unsigned long tag = cheetah_get_dtlb_tag(i, 2);
-
- if ((tag & ~PAGE_MASK) == 0 &&
- (tag & PAGE_MASK) >= prom_reserved_base) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- cheetah_put_dtlb_data(i, 0x0UL, 2);
- }
-
- if (tlb_type != cheetah_plus)
- continue;
-
- tag = cheetah_get_dtlb_tag(i, 3);
-
- if ((tag & ~PAGE_MASK) == 0 &&
- (tag & PAGE_MASK) >= prom_reserved_base) {
- __asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
- cheetah_put_dtlb_data(i, 0x0UL, 3);
- }
- }
- } else {
- /* Implement me :-) */
- BUG();
- }
-}
-
static int prom_ditlb_set;
struct prom_tlb_entry {
int tlb_ent;
@@ -635,9 +565,6 @@ void prom_world(int enter)
: "i" (PSTATE_IE));
if (enter) {
- /* Kick out nucleus VPTEs. */
- __flush_nucleus_vptes();
-
/* Install PROM world. */
for (i = 0; i < 16; i++) {
if (prom_dtlb[i].tlb_ent != -1) {
@@ -1039,18 +966,7 @@ out:
struct pgtable_cache_struct pgt_quicklists;
#endif
-/* OK, we have to color these pages. The page tables are accessed
- * by non-Dcache enabled mapping in the VPTE area by the dtlb_backend.S
- * code, as well as by PAGE_OFFSET range direct-mapped addresses by
- * other parts of the kernel. By coloring, we make sure that the tlbmiss
- * fast handlers do not get data from old/garbage dcache lines that
- * correspond to an old/stale virtual address (user/kernel) that
- * previously mapped the pagetable page while accessing vpte range
- * addresses. The idea is that if the vpte color and PAGE_OFFSET range
- * color is the same, then when the kernel initializes the pagetable
- * using the later address range, accesses with the first address
- * range will see the newly initialized data rather than the garbage.
- */
+/* XXX We don't need to color these things in the D-cache any longer. */
#ifdef DCACHE_ALIASING_POSSIBLE
#define DC_ALIAS_SHIFT 1
#else
@@ -1419,6 +1335,9 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
kernel_map_range(phys_start, phys_end,
(enable ? PAGE_KERNEL : __pgprot(0)));
+ flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
+ PAGE_OFFSET + phys_end);
+
/* we should perform an IPI and flush all tlbs,
* but that can deadlock->flush only current cpu.
*/
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 8b104be4662b..78357cc2a0b7 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -25,6 +25,8 @@ void flush_tlb_pending(void)
struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
if (mp->tlb_nr) {
+ flush_tsb_user(mp);
+
if (CTX_VALID(mp->mm->context)) {
#ifdef CONFIG_SMP
smp_flush_tlb_pending(mp->mm, mp->tlb_nr,
@@ -89,62 +91,3 @@ no_cache_flush:
if (nr >= TLB_BATCH_NR)
flush_tlb_pending();
}
-
-void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
- struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
- unsigned long nr = mp->tlb_nr;
- long s = start, e = end, vpte_base;
-
- if (mp->fullmm)
- return;
-
- /* If start is greater than end, that is a real problem. */
- BUG_ON(start > end);
-
- /* However, straddling the VA space hole is quite normal. */
- s &= PMD_MASK;
- e = (e + PMD_SIZE - 1) & PMD_MASK;
-
- vpte_base = (tlb_type == spitfire ?
- VPTE_BASE_SPITFIRE :
- VPTE_BASE_CHEETAH);
-
- if (unlikely(nr != 0 && mm != mp->mm)) {
- flush_tlb_pending();
- nr = 0;
- }
-
- if (nr == 0)
- mp->mm = mm;
-
- start = vpte_base + (s >> (PAGE_SHIFT - 3));
- end = vpte_base + (e >> (PAGE_SHIFT - 3));
-
- /* If the request straddles the VA space hole, we
- * need to swap start and end. The reason this
- * occurs is that "vpte_base" is the center of
- * the linear page table mapping area. Thus,
- * high addresses with the sign bit set map to
- * addresses below vpte_base and non-sign bit
- * addresses map to addresses above vpte_base.
- */
- if (end < start) {
- unsigned long tmp = start;
-
- start = end;
- end = tmp;
- }
-
- while (start < end) {
- mp->vaddrs[nr] = start;
- mp->tlb_nr = ++nr;
- if (nr >= TLB_BATCH_NR) {
- flush_tlb_pending();
- nr = 0;
- }
- start += PAGE_SIZE;
- }
- if (nr)
- flush_tlb_pending();
-}
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
new file mode 100644
index 000000000000..15e8af58b1d2
--- /dev/null
+++ b/arch/sparc64/mm/tsb.c
@@ -0,0 +1,84 @@
+/* arch/sparc64/mm/tsb.c
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#define TSB_ENTRY_ALIGNMENT 16
+
+struct tsb {
+ unsigned long tag;
+ unsigned long pte;
+} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
+
+/* We use an 8K TSB for the whole kernel, this allows to
+ * handle about 4MB of modules and vmalloc mappings without
+ * incurring many hash conflicts.
+ */
+#define KERNEL_TSB_SIZE_BYTES 8192
+#define KERNEL_TSB_NENTRIES \
+ (KERNEL_TSB_SIZE_BYTES / sizeof(struct tsb))
+
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+static inline unsigned long tsb_hash(unsigned long vaddr)
+{
+ vaddr >>= PAGE_SHIFT;
+ return vaddr & (KERNEL_TSB_NENTRIES - 1);
+}
+
+static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context)
+{
+ if (context == ~0UL)
+ return 1;
+
+ return (entry->tag == ((vaddr >> 22) | (context << 48)));
+}
+
+/* TSB flushes need only occur on the processor initiating the address
+ * space modification, not on each cpu the address space has run on.
+ * Only the TLB flush needs that treatment.
+ */
+
+void flush_tsb_kernel_range(unsigned long start, unsigned long end)
+{
+ unsigned long v;
+
+ for (v = start; v < end; v += PAGE_SIZE) {
+ struct tsb *ent = &swapper_tsb[tsb_hash(v)];
+
+ if (tag_compare(ent, v, 0)) {
+ ent->tag = 0UL;
+ membar_storeload_storestore();
+ }
+ }
+}
+
+void flush_tsb_user(struct mmu_gather *mp)
+{
+ struct mm_struct *mm = mp->mm;
+ struct tsb *tsb = (struct tsb *) mm->context.sparc64_tsb;
+ unsigned long ctx = ~0UL;
+ int i;
+
+ if (CTX_VALID(mm->context))
+ ctx = CTX_HWBITS(mm->context);
+
+ for (i = 0; i < mp->tlb_nr; i++) {
+ unsigned long v = mp->vaddrs[i];
+ struct tsb *ent;
+
+ v &= ~0x1UL;
+
+ ent = &tsb[tsb_hash(v)];
+ if (tag_compare(ent, v, ctx)) {
+ ent->tag = 0UL;
+ membar_storeload_storestore();
+ }
+ }
+}
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index e4c9151fa116..22791f29552e 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -453,64 +453,6 @@ xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
nop
nop
- .data
-
-errata32_hwbug:
- .xword 0
-
- .text
-
- /* These two are not performance critical... */
- .globl xcall_flush_tlb_all_spitfire
-xcall_flush_tlb_all_spitfire:
- /* Spitfire Errata #32 workaround. */
- sethi %hi(errata32_hwbug), %g4
- stx %g0, [%g4 + %lo(errata32_hwbug)]
-
- clr %g2
- clr %g3
-1: ldxa [%g3] ASI_DTLB_DATA_ACCESS, %g4
- and %g4, _PAGE_L, %g5
- brnz,pn %g5, 2f
- mov TLB_TAG_ACCESS, %g7
-
- stxa %g0, [%g7] ASI_DMMU
- membar #Sync
- stxa %g0, [%g3] ASI_DTLB_DATA_ACCESS
- membar #Sync
-
- /* Spitfire Errata #32 workaround. */
- sethi %hi(errata32_hwbug), %g4
- stx %g0, [%g4 + %lo(errata32_hwbug)]
-
-2: ldxa [%g3] ASI_ITLB_DATA_ACCESS, %g4
- and %g4, _PAGE_L, %g5
- brnz,pn %g5, 2f
- mov TLB_TAG_ACCESS, %g7
-
- stxa %g0, [%g7] ASI_IMMU
- membar #Sync
- stxa %g0, [%g3] ASI_ITLB_DATA_ACCESS
- membar #Sync
-
- /* Spitfire Errata #32 workaround. */
- sethi %hi(errata32_hwbug), %g4
- stx %g0, [%g4 + %lo(errata32_hwbug)]
-
-2: add %g2, 1, %g2
- cmp %g2, SPITFIRE_HIGHEST_LOCKED_TLBENT
- ble,pt %icc, 1b
- sll %g2, 3, %g3
- flush %g6
- retry
-
- .globl xcall_flush_tlb_all_cheetah
-xcall_flush_tlb_all_cheetah:
- mov 0x80, %g2
- stxa %g0, [%g2] ASI_DMMU_DEMAP
- stxa %g0, [%g2] ASI_IMMU_DEMAP
- retry
-
/* These just get rescheduled to PIL vectors. */
.globl xcall_call_function
xcall_call_function:
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 8627eed6e83d..36384cf7faa6 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -92,6 +92,7 @@
typedef struct {
unsigned long sparc64_ctx_val;
+ unsigned long *sparc64_tsb;
} mm_context_t;
#endif /* !__ASSEMBLY__ */
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 57ee7b306189..34640a370ab4 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -25,7 +25,13 @@ extern void get_new_mmu_context(struct mm_struct *mm);
* This just needs to set mm->context to an invalid context.
*/
#define init_new_context(__tsk, __mm) \
- (((__mm)->context.sparc64_ctx_val = 0UL), 0)
+({ unsigned long __pg = get_zeroed_page(GFP_KERNEL); \
+ (__mm)->context.sparc64_ctx_val = 0UL; \
+ (__mm)->context.sparc64_tsb = \
+ (unsigned long *) __pg; \
+ (__pg ? 0 : -ENOMEM); \
+})
+
/* Destroy a dead context. This occurs when mmput drops the
* mm_users count to zero, the mmaps have been released, and
@@ -35,7 +41,8 @@ extern void get_new_mmu_context(struct mm_struct *mm);
* this task if valid.
*/
#define destroy_context(__mm) \
-do { spin_lock(&ctx_alloc_lock); \
+do { free_page((unsigned long)(__mm)->context.sparc64_tsb); \
+ spin_lock(&ctx_alloc_lock); \
if (CTX_VALID((__mm)->context)) { \
unsigned long nr = CTX_NRBITS((__mm)->context); \
mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); \
@@ -43,35 +50,7 @@ do { spin_lock(&ctx_alloc_lock); \
spin_unlock(&ctx_alloc_lock); \
} while(0)
-/* Reload the two core values used by TLB miss handler
- * processing on sparc64. They are:
- * 1) The physical address of mm->pgd, when full page
- * table walks are necessary, this is where the
- * search begins.
- * 2) A "PGD cache". For 32-bit tasks only pgd[0] is
- * ever used since that maps the entire low 4GB
- * completely. To speed up TLB miss processing we
- * make this value available to the handlers. This
- * decreases the amount of memory traffic incurred.
- */
-#define reload_tlbmiss_state(__tsk, __mm) \
-do { \
- register unsigned long paddr asm("o5"); \
- register unsigned long pgd_cache asm("o4"); \
- paddr = __pa((__mm)->pgd); \
- pgd_cache = 0UL; \
- if (task_thread_info(__tsk)->flags & _TIF_32BIT) \
- pgd_cache = get_pgd_cache((__mm)->pgd); \
- __asm__ __volatile__("wrpr %%g0, 0x494, %%pstate\n\t" \
- "mov %3, %%g4\n\t" \
- "mov %0, %%g7\n\t" \
- "stxa %1, [%%g4] %2\n\t" \
- "membar #Sync\n\t" \
- "wrpr %%g0, 0x096, %%pstate" \
- : /* no outputs */ \
- : "r" (paddr), "r" (pgd_cache),\
- "i" (ASI_DMMU), "i" (TSB_REG)); \
-} while(0)
+extern unsigned long tsb_context_switch(unsigned long pgd_pa, unsigned long *tsb);
/* Set MMU context in the actual hardware. */
#define load_secondary_context(__mm) \
@@ -101,7 +80,8 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
if (!ctx_valid || (old_mm != mm)) {
load_secondary_context(mm);
- reload_tlbmiss_state(tsk, mm);
+ tsb_context_switch(__pa(mm->pgd),
+ mm->context.sparc64_tsb);
}
/* Even if (mm == old_mm) we _must_ check
@@ -139,7 +119,7 @@ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm
load_secondary_context(mm);
__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
- reload_tlbmiss_state(current, mm);
+ tsb_context_switch(__pa(mm->pgd), mm->context.sparc64_tsb);
}
#endif /* !(__ASSEMBLY__) */
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index a96067cca963..baf59c00ea47 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -61,6 +61,7 @@ static __inline__ void free_pgd_slow(pgd_t *pgd)
free_page((unsigned long)pgd);
}
+/* XXX This crap can die, no longer using virtual page tables... */
#ifdef DCACHE_ALIASING_POSSIBLE
#define VPTE_COLOR(address) (((address) >> (PAGE_SHIFT + 10)) & 1UL)
#define DCACHE_COLOR(address) (((address) >> PAGE_SHIFT) & 1UL)
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index f0a9b44d3eb5..f3ba1e058195 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -25,7 +25,8 @@
#include <asm/const.h>
/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB).
- * The page copy blockops can use 0x2000000 to 0x10000000.
+ * The page copy blockops can use 0x2000000 to 0x4000000.
+ * The TSB is mapped in the 0x4000000 to 0x6000000 range.
* The PROM resides in an area spanning 0xf0000000 to 0x100000000.
* The vmalloc area spans 0x100000000 to 0x200000000.
* Since modules need to be in the lowest 32-bits of the address space,
@@ -34,6 +35,7 @@
* 0x400000000.
*/
#define TLBTEMP_BASE _AC(0x0000000002000000,UL)
+#define TSBMAP_BASE _AC(0x0000000004000000,UL)
#define MODULES_VADDR _AC(0x0000000010000000,UL)
#define MODULES_LEN _AC(0x00000000e0000000,UL)
#define MODULES_END _AC(0x00000000f0000000,UL)
@@ -296,11 +298,6 @@ static inline pte_t pte_modify(pte_t orig_pte, pgprot_t new_prot)
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-/* extract the pgd cache used for optimizing the tlb miss
- * slow path when executing 32-bit compat processes
- */
-#define get_pgd_cache(pgd) ((unsigned long) pgd_val(*pgd) << 11)
-
/* Find an entry in the second-level page table.. */
#define pmd_offset(pudp, address) \
((pmd_t *) pud_page(*(pudp)) + \
diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h
index cd8d9b4c8658..b3889f3f943a 100644
--- a/include/asm-sparc64/processor.h
+++ b/include/asm-sparc64/processor.h
@@ -28,6 +28,8 @@
* User lives in his very own context, and cannot reference us. Note
* that TASK_SIZE is a misnomer, it really gives maximum user virtual
* address that the kernel will allocate out.
+ *
+ * XXX No longer using virtual page tables, kill this upper limit...
*/
#define VA_BITS 44
#ifndef __ASSEMBLY__
@@ -37,18 +39,6 @@
#endif
#define TASK_SIZE ((unsigned long)-VPTE_SIZE)
-/*
- * The vpte base must be able to hold the entire vpte, half
- * of which lives above, and half below, the base. And it
- * is placed as close to the highest address range as possible.
- */
-#define VPTE_BASE_SPITFIRE (-(VPTE_SIZE/2))
-#if 1
-#define VPTE_BASE_CHEETAH VPTE_BASE_SPITFIRE
-#else
-#define VPTE_BASE_CHEETAH 0xffe0000000000000
-#endif
-
#ifndef __ASSEMBLY__
typedef struct {
diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 3ef9909ac3ac..9ad5d9c51d42 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -5,6 +5,11 @@
#include <linux/mm.h>
#include <asm/mmu_context.h>
+/* TSB flush operations. */
+struct mmu_gather;
+extern void flush_tsb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_tsb_user(struct mmu_gather *mp);
+
/* TLB flush operations. */
extern void flush_tlb_pending(void);
@@ -14,28 +19,36 @@ extern void flush_tlb_pending(void);
#define flush_tlb_page(vma,addr) flush_tlb_pending()
#define flush_tlb_mm(mm) flush_tlb_pending()
+/* Local cpu only. */
extern void __flush_tlb_all(void);
+
extern void __flush_tlb_page(unsigned long context, unsigned long page, unsigned long r);
extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
#ifndef CONFIG_SMP
-#define flush_tlb_all() __flush_tlb_all()
#define flush_tlb_kernel_range(start,end) \
- __flush_tlb_kernel_range(start,end)
+do { flush_tsb_kernel_range(start,end); \
+ __flush_tlb_kernel_range(start,end); \
+} while (0)
#else /* CONFIG_SMP */
-extern void smp_flush_tlb_all(void);
extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
-#define flush_tlb_all() smp_flush_tlb_all()
#define flush_tlb_kernel_range(start, end) \
- smp_flush_tlb_kernel_range(start, end)
+do { flush_tsb_kernel_range(start,end); \
+ smp_flush_tlb_kernel_range(start, end); \
+} while (0)
#endif /* ! CONFIG_SMP */
-extern void flush_tlb_pgtables(struct mm_struct *, unsigned long, unsigned long);
+static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ /* We don't use virtual page tables for TLB miss processing
+ * any more. Nowadays we use the TSB.
+ */
+}
#endif /* _SPARC64_TLBFLUSH_H */
diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h
new file mode 100644
index 000000000000..03d272e0e477
--- /dev/null
+++ b/include/asm-sparc64/tsb.h
@@ -0,0 +1,165 @@
+#ifndef _SPARC64_TSB_H
+#define _SPARC64_TSB_H
+
+/* The sparc64 TSB is similar to the powerpc hashtables. It's a
+ * power-of-2 sized table of TAG/PTE pairs. The cpu precomputes
+ * pointers into this table for 8K and 64K page sizes, and also a
+ * comparison TAG based upon the virtual address and context which
+ * faults.
+ *
+ * TLB miss trap handler software does the actual lookup via something
+ * of the form:
+ *
+ * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1
+ * ldxa [%g0] ASI_{D,I}MMU, %g6
+ * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4
+ * cmp %g4, %g6
+ * bne,pn %xcc, tsb_miss_{d,i}tlb
+ * mov FAULT_CODE_{D,I}TLB, %g3
+ * stxa %g5, [%g0] ASI_{D,I}TLB_DATA_IN
+ * retry
+ *
+
+ * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte
+ * PTE. The TAG is of the same layout as the TLB TAG TARGET mmu
+ * register which is:
+ *
+ * -------------------------------------------------
+ * | - | CONTEXT | - | VADDR bits 63:22 |
+ * -------------------------------------------------
+ * 63 61 60 48 47 42 41 0
+ *
+ * Like the powerpc hashtables we need to use locking in order to
+ * synchronize while we update the entries. PTE updates need locking
+ * as well.
+ *
+ * We need to carefully choose a lock bits for the TSB entry. We
+ * choose to use bit 47 in the tag. Also, since we never map anything
+ * at page zero in context zero, we use zero as an invalid tag entry.
+ * When the lock bit is set, this forces a tag comparison failure.
+ *
+ * Currently, we allocate an 8K TSB per-process and we use it for both
+ * I-TLB and D-TLB misses. Perhaps at some point we'll add code that
+ * monitors the number of active pages in the process as we get
+ * major/minor faults, and grow the TSB in response. The only trick
+ * in implementing that is synchronizing the freeing of the old TSB
+ * wrt. parallel TSB updates occuring on other processors. On
+ * possible solution is to use RCU for the freeing of the TSB.
+ */
+
+#define TSB_TAG_LOCK (1 << (47 - 32))
+
+#define TSB_MEMBAR membar #StoreStore
+
+#define TSB_LOCK_TAG(TSB, REG1, REG2) \
+99: lduwa [TSB] ASI_N, REG1; \
+ sethi %hi(TSB_TAG_LOCK), REG2;\
+ andcc REG1, REG2, %g0; \
+ bne,pn %icc, 99b; \
+ nop; \
+ casa [TSB] ASI_N, REG1, REG2;\
+ cmp REG1, REG2; \
+ bne,pn %icc, 99b; \
+ nop; \
+ TSB_MEMBAR
+
+#define TSB_WRITE(TSB, TTE, TAG) \
+ stx TTE, [TSB + 0x08]; \
+ TSB_MEMBAR; \
+ stx TAG, [TSB + 0x00];
+
+ /* Do a kernel page table walk. Leaves physical PTE pointer in
+ * REG1. Jumps to FAIL_LABEL on early page table walk termination.
+ * VADDR will not be clobbered, but REG2 will.
+ */
+#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \
+ sethi %hi(swapper_pg_dir), REG1; \
+ or REG1, %lo(swapper_pg_dir), REG1; \
+ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x3, REG2; \
+ lduw [REG1 + REG2], REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x3, REG2; \
+ lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x7, REG2; \
+ add REG1, REG2, REG1;
+
+ /* Do a user page table walk in MMU globals. Leaves physical PTE
+ * pointer in REG1. Jumps to FAIL_LABEL on early page table walk
+ * termination. Physical base of page tables is in PHYS_PGD which
+ * will not be modified.
+ *
+ * VADDR will not be clobbered, but REG1 and REG2 will.
+ */
+#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \
+ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x3, REG2; \
+ lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x3, REG2; \
+ lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ sllx REG1, 11, REG1; \
+ andn REG2, 0x7, REG2; \
+ add REG1, REG2, REG1;
+
+/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
+ * If no entry is found, FAIL_LABEL will be branched to. On success
+ * the resulting PTE value will be left in REG1. VADDR is preserved
+ * by this routine.
+ */
+#define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \
+ sethi %hi(prom_trans), REG1; \
+ or REG1, %lo(prom_trans), REG1; \
+97: ldx [REG1 + 0x00], REG2; \
+ brz,pn REG2, FAIL_LABEL; \
+ nop; \
+ ldx [REG1 + 0x08], REG3; \
+ add REG2, REG3, REG3; \
+ cmp REG2, VADDR; \
+ bgu,pt %xcc, 98f; \
+ cmp VADDR, REG3; \
+ bgeu,pt %xcc, 98f; \
+ ldx [REG1 + 0x10], REG3; \
+ sub VADDR, REG2, REG2; \
+ ba,pt %xcc, 99f; \
+ add REG3, REG2, REG1; \
+98: ba,pt %xcc, 97b; \
+ add REG1, (3 * 8), REG1; \
+99:
+
+ /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
+ * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries
+ * and the found TTE will be left in REG1. REG3 and REG4 must
+ * be an even/odd pair of registers.
+ *
+ * VADDR and TAG will be preserved and not clobbered by this macro.
+ */
+ /* XXX non-8K base page size support... */
+#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+ sethi %hi(swapper_tsb), REG1; \
+ or REG1, %lo(swapper_tsb), REG1; \
+ srlx VADDR, 13, REG2; \
+ and REG2, (512 - 1), REG2; \
+ sllx REG2, 4, REG2; \
+ add REG1, REG2, REG2; \
+ ldda [REG2] ASI_NUCLEUS_QUAD_LDD, REG3; \
+ cmp REG3, TAG; \
+ be,a,pt %xcc, OK_LABEL; \
+ mov REG4, REG1;
+
+#endif /* !(_SPARC64_TSB_H) */