summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile2
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c2
-rw-r--r--arch/powerpc/mm/book3s64/Makefile2
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c11
-rw-r--r--arch/powerpc/mm/book3s64/hugetlbpage.c10
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c13
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c84
-rw-r--r--arch/powerpc/mm/book3s64/slice.c20
-rw-r--r--arch/powerpc/mm/cacheflush.c2
-rw-r--r--arch/powerpc/mm/drmem.c4
-rw-r--r--arch/powerpc/mm/fault.c37
-rw-r--r--arch/powerpc/mm/hugetlbpage.c455
-rw-r--r--arch/powerpc/mm/init-common.c15
-rw-r--r--arch/powerpc/mm/kasan/8xx.c21
-rw-r--r--arch/powerpc/mm/kasan/init_book3e_64.c2
-rw-r--r--arch/powerpc/mm/kasan/init_book3s_64.c2
-rw-r--r--arch/powerpc/mm/mem.c69
-rw-r--r--arch/powerpc/mm/mmu_context.c2
-rw-r--r--arch/powerpc/mm/mmu_decl.h8
-rw-r--r--arch/powerpc/mm/nohash/40x.c161
-rw-r--r--arch/powerpc/mm/nohash/8xx.c43
-rw-r--r--arch/powerpc/mm/nohash/Makefile5
-rw-r--r--arch/powerpc/mm/nohash/book3e_pgtable.c4
-rw-r--r--arch/powerpc/mm/nohash/kaslr_booke.c2
-rw-r--r--arch/powerpc/mm/nohash/kup.c2
-rw-r--r--arch/powerpc/mm/nohash/mmu_context.c5
-rw-r--r--arch/powerpc/mm/nohash/tlb.c407
-rw-r--r--arch/powerpc/mm/nohash/tlb_64e.c314
-rw-r--r--arch/powerpc/mm/nohash/tlb_low.S27
-rw-r--r--arch/powerpc/mm/nohash/tlb_low_64e.S428
-rw-r--r--arch/powerpc/mm/numa.c14
-rw-r--r--arch/powerpc/mm/pgtable.c94
-rw-r--r--arch/powerpc/mm/pgtable_32.c2
-rw-r--r--arch/powerpc/mm/pgtable_64.c6
-rw-r--r--arch/powerpc/mm/ptdump/Makefile2
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c2
36 files changed, 674 insertions, 1605 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 503a6e249940..0fe2f085c05a 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -3,8 +3,6 @@
# Makefile for the linux ppc-specific parts of the memory manager.
#
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-
obj-y := fault.o mem.o pgtable.o maccess.o pageattr.o \
init_$(BITS).o pgtable_$(BITS).o \
pgtable-frag.o ioremap.o ioremap_$(BITS).o \
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 100f999871bc..625fe7d08e06 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -184,7 +184,7 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
static bool is_module_segment(unsigned long addr)
{
- if (!IS_ENABLED(CONFIG_MODULES))
+ if (!IS_ENABLED(CONFIG_EXECMEM))
return false;
if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
return false;
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
index cad2abc1730f..33af5795856a 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -1,7 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-y := $(NO_MINIMAL_TOC)
-
obj-y += mmu_context.o pgtable.o trace.o
ifdef CONFIG_PPC_64S_HASH_MMU
CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 01c3b4b65241..6727a15ab94f 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1233,10 +1233,6 @@ void __init hash__early_init_mmu(void)
__pmd_table_size = H_PMD_TABLE_SIZE;
__pud_table_size = H_PUD_TABLE_SIZE;
__pgd_table_size = H_PGD_TABLE_SIZE;
- /*
- * 4k use hugepd format, so for hash set then to
- * zero
- */
__pmd_val_bits = HASH_PMD_VAL_BITS;
__pud_val_bits = HASH_PUD_VAL_BITS;
__pgd_val_bits = HASH_PGD_VAL_BITS;
@@ -1546,6 +1542,13 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
goto bail;
}
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled()) {
+ if (hugeshift == PMD_SHIFT && psize == MMU_PAGE_16M)
+ hugeshift = mmu_psize_defs[MMU_PAGE_16M].shift;
+ if (hugeshift == PUD_SHIFT && psize == MMU_PAGE_16G)
+ hugeshift = mmu_psize_defs[MMU_PAGE_16G].shift;
+ }
+
/*
* Add _PAGE_PRESENT to the required access perm. If there are parallel
* updates to the pte that can possibly clear _PAGE_PTE, catch that too.
diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c
index 5a2e512e96db..83c3361b358b 100644
--- a/arch/powerpc/mm/book3s64/hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/hugetlbpage.c
@@ -53,6 +53,16 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
/* If PTE permissions don't match, take page fault */
if (unlikely(!check_pte_access(access, old_pte)))
return 1;
+ /*
+ * If hash-4k, hugepages use seeral contiguous PxD entries
+ * so bail out and let mm make the page young or dirty
+ */
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES)) {
+ if (!(old_pte & _PAGE_ACCESSED))
+ return 1;
+ if ((access & _PAGE_WRITE) && !(old_pte & _PAGE_DIRTY))
+ return 1;
+ }
/*
* Try to lock the PTE, add ACCESSED and DIRTY if it was
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 83823db3488b..f4d8d3c40e5c 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -170,6 +170,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
{
unsigned long old_pmd;
+ VM_WARN_ON_ONCE(!pmd_present(*pmdp));
old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return __pmd(old_pmd);
@@ -460,18 +461,6 @@ static inline void pgtable_free(void *table, int index)
case PUD_INDEX:
__pud_free(table);
break;
-#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
- /* 16M hugepd directory at pud level */
- case HTLB_16M_INDEX:
- BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
- kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
- break;
- /* 16G hugepd directory at the pgd level */
- case HTLB_16G_INDEX:
- BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
- kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
- break;
-#endif
/* We don't free pgd table via RCU callback */
default:
BUG();
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 15e88f1439ec..b0d927009af8 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -17,6 +17,7 @@
#include <linux/hugetlb.h>
#include <linux/string_helpers.h>
#include <linux/memory.h>
+#include <linux/kfence.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
@@ -31,6 +32,7 @@
#include <asm/uaccess.h>
#include <asm/ultravisor.h>
#include <asm/set_memory.h>
+#include <asm/kfence.h>
#include <trace/events/thp.h>
@@ -293,7 +295,8 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
static int __meminit create_physical_mapping(unsigned long start,
unsigned long end,
- int nid, pgprot_t _prot)
+ int nid, pgprot_t _prot,
+ unsigned long mapping_sz_limit)
{
unsigned long vaddr, addr, mapping_size = 0;
bool prev_exec, exec = false;
@@ -301,7 +304,10 @@ static int __meminit create_physical_mapping(unsigned long start,
int psize;
unsigned long max_mapping_size = memory_block_size;
- if (debug_pagealloc_enabled_or_kfence())
+ if (mapping_sz_limit < max_mapping_size)
+ max_mapping_size = mapping_sz_limit;
+
+ if (debug_pagealloc_enabled())
max_mapping_size = PAGE_SIZE;
start = ALIGN(start, PAGE_SIZE);
@@ -356,8 +362,74 @@ static int __meminit create_physical_mapping(unsigned long start,
return 0;
}
+#ifdef CONFIG_KFENCE
+static bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
+
+static int __init parse_kfence_early_init(char *arg)
+{
+ int val;
+
+ if (get_option(&arg, &val))
+ kfence_early_init = !!val;
+ return 0;
+}
+early_param("kfence.sample_interval", parse_kfence_early_init);
+
+static inline phys_addr_t alloc_kfence_pool(void)
+{
+ phys_addr_t kfence_pool;
+
+ /*
+ * TODO: Support to enable KFENCE after bootup depends on the ability to
+ * split page table mappings. As such support is not currently
+ * implemented for radix pagetables, support enabling KFENCE
+ * only at system startup for now.
+ *
+ * After support for splitting mappings is available on radix,
+ * alloc_kfence_pool() & map_kfence_pool() can be dropped and
+ * mapping for __kfence_pool memory can be
+ * split during arch_kfence_init_pool().
+ */
+ if (!kfence_early_init)
+ goto no_kfence;
+
+ kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+ if (!kfence_pool)
+ goto no_kfence;
+
+ memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
+ return kfence_pool;
+
+no_kfence:
+ disable_kfence();
+ return 0;
+}
+
+static inline void map_kfence_pool(phys_addr_t kfence_pool)
+{
+ if (!kfence_pool)
+ return;
+
+ if (create_physical_mapping(kfence_pool, kfence_pool + KFENCE_POOL_SIZE,
+ -1, PAGE_KERNEL, PAGE_SIZE))
+ goto err;
+
+ memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+ __kfence_pool = __va(kfence_pool);
+ return;
+
+err:
+ memblock_phys_free(kfence_pool, KFENCE_POOL_SIZE);
+ disable_kfence();
+}
+#else
+static inline phys_addr_t alloc_kfence_pool(void) { return 0; }
+static inline void map_kfence_pool(phys_addr_t kfence_pool) { }
+#endif
+
static void __init radix_init_pgtable(void)
{
+ phys_addr_t kfence_pool;
unsigned long rts_field;
phys_addr_t start, end;
u64 i;
@@ -365,6 +437,8 @@ static void __init radix_init_pgtable(void)
/* We don't support slb for radix */
slb_set_size(0);
+ kfence_pool = alloc_kfence_pool();
+
/*
* Create the linear mapping
*/
@@ -381,9 +455,11 @@ static void __init radix_init_pgtable(void)
}
WARN_ON(create_physical_mapping(start, end,
- -1, PAGE_KERNEL));
+ -1, PAGE_KERNEL, ~0UL));
}
+ map_kfence_pool(kfence_pool);
+
if (!cpu_has_feature(CPU_FTR_HVMODE) &&
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
@@ -875,7 +951,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
}
return create_physical_mapping(__pa(start), __pa(end),
- nid, prot);
+ nid, prot, ~0UL);
}
int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index c0b58afb9a47..ef3ce37f1bb3 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -282,12 +282,10 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long found, next_end;
- struct vm_unmapped_area_info info;
-
- info.flags = 0;
- info.length = len;
- info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
- info.align_offset = 0;
+ struct vm_unmapped_area_info info = {
+ .length = len,
+ .align_mask = PAGE_MASK & ((1ul << pshift) - 1),
+ };
/*
* Check till the allow max value for this mmap request
*/
@@ -326,13 +324,13 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
{
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
unsigned long found, prev;
- struct vm_unmapped_area_info info;
+ struct vm_unmapped_area_info info = {
+ .flags = VM_UNMAPPED_AREA_TOPDOWN,
+ .length = len,
+ .align_mask = PAGE_MASK & ((1ul << pshift) - 1),
+ };
unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
- info.align_offset = 0;
/*
* If we are trying to allocate above DEFAULT_MAP_WINDOW
* Add the different to the mmap_base.
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
index 15189592da09..7186516eca52 100644
--- a/arch/powerpc/mm/cacheflush.c
+++ b/arch/powerpc/mm/cacheflush.c
@@ -78,7 +78,7 @@ EXPORT_SYMBOL(flush_icache_range);
#ifdef CONFIG_HIGHMEM
/**
- * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * flush_dcache_icache_phys() - Flush a page by its physical address
* @physaddr: the physical address of the page
*/
static void flush_dcache_icache_phys(unsigned long physaddr)
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index c110ab8fa8a3..8dd7b340d51f 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -491,10 +491,8 @@ static int __init drmem_init(void)
const __be32 *prop;
dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (!dn) {
- pr_info("No dynamic reconfiguration memory found\n");
+ if (!dn)
return 0;
- }
if (init_drmem_lmb_size(dn)) {
of_node_put(dn);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 53335ae21a40..81c77ddce2e3 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -71,23 +71,26 @@ static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long add
return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
}
-static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
+ struct mm_struct *mm, struct vm_area_struct *vma)
{
- struct mm_struct *mm = current->mm;
/*
* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
- mmap_read_unlock(mm);
+ if (mm)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
return __bad_area_nosemaphore(regs, address, si_code);
}
static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
+ struct mm_struct *mm,
struct vm_area_struct *vma)
{
- struct mm_struct *mm = current->mm;
int pkey;
/*
@@ -109,7 +112,10 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
*/
pkey = vma_pkey(vma);
- mmap_read_unlock(mm);
+ if (mm)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
/*
* If we are in kernel mode, bail out with a SEGV, this will
@@ -124,9 +130,10 @@ static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
return 0;
}
-static noinline int bad_access(struct pt_regs *regs, unsigned long address)
+static noinline int bad_access(struct pt_regs *regs, unsigned long address,
+ struct mm_struct *mm, struct vm_area_struct *vma)
{
- return __bad_area(regs, address, SEGV_ACCERR);
+ return __bad_area(regs, address, SEGV_ACCERR, mm, vma);
}
static int do_sigbus(struct pt_regs *regs, unsigned long address,
@@ -361,13 +368,13 @@ static void sanity_check_fault(bool is_write, bool is_user,
* Define the correct "is_write" bit in error_code based
* on the processor family
*/
-#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+#ifdef CONFIG_BOOKE
#define page_fault_is_write(__err) ((__err) & ESR_DST)
#else
#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE)
#endif
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+#ifdef CONFIG_BOOKE
#define page_fault_is_bad(__err) (0)
#elif defined(CONFIG_PPC_8xx)
#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
@@ -479,13 +486,13 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
if (unlikely(access_pkey_error(is_write, is_exec,
(error_code & DSISR_KEYFAULT), vma))) {
- vma_end_read(vma);
- goto lock_mmap;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return bad_access_pkey(regs, address, NULL, vma);
}
if (unlikely(access_error(is_write, is_exec, vma))) {
- vma_end_read(vma);
- goto lock_mmap;
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return bad_access(regs, address, NULL, vma);
}
fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
@@ -521,10 +528,10 @@ retry:
if (unlikely(access_pkey_error(is_write, is_exec,
(error_code & DSISR_KEYFAULT), vma)))
- return bad_access_pkey(regs, address, vma);
+ return bad_access_pkey(regs, address, mm, vma);
if (unlikely(access_error(is_write, is_exec, vma)))
- return bad_access(regs, address);
+ return bad_access(regs, address, mm, vma);
/*
* If for any reason at all we couldn't handle the fault,
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 594a4b7b2ca2..6b043180220a 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -28,8 +28,6 @@
bool hugetlb_disabled = false;
-#define hugepd_none(hpd) (hpd_val(hpd) == 0)
-
#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \
__builtin_ffs(sizeof(void *)))
@@ -42,156 +40,43 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long s
return __find_linux_pte(mm->pgd, addr, NULL, NULL);
}
-static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
- unsigned long address, unsigned int pdshift,
- unsigned int pshift, spinlock_t *ptl)
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long sz)
{
- struct kmem_cache *cachep;
- pte_t *new;
- int i;
- int num_hugepd;
-
- if (pshift >= pdshift) {
- cachep = PGT_CACHE(PTE_T_ORDER);
- num_hugepd = 1 << (pshift - pdshift);
- } else {
- cachep = PGT_CACHE(pdshift - pshift);
- num_hugepd = 1;
- }
-
- if (!cachep) {
- WARN_ONCE(1, "No page table cache created for hugetlb tables");
- return -ENOMEM;
- }
-
- new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
- BUG_ON(pshift > HUGEPD_SHIFT_MASK);
- BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
+ addr &= ~(sz - 1);
- if (!new)
- return -ENOMEM;
+ p4d = p4d_offset(pgd_offset(mm, addr), addr);
+ if (!mm_pud_folded(mm) && sz >= P4D_SIZE)
+ return (pte_t *)p4d;
- /*
- * Make sure other cpus find the hugepd set only after a
- * properly initialized page table is visible to them.
- * For more details look for comment in __pte_alloc().
- */
- smp_wmb();
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
+ return NULL;
+ if (!mm_pmd_folded(mm) && sz >= PUD_SIZE)
+ return (pte_t *)pud;
- spin_lock(ptl);
- /*
- * We have multiple higher-level entries that point to the same
- * actual pte location. Fill in each as we go and backtrack on error.
- * We need all of these so the DTLB pgtable walk code can find the
- * right higher-level entry without knowing if it's a hugepage or not.
- */
- for (i = 0; i < num_hugepd; i++, hpdp++) {
- if (unlikely(!hugepd_none(*hpdp)))
- break;
- hugepd_populate(hpdp, new, pshift);
- }
- /* If we bailed from the for loop early, an error occurred, clean up */
- if (i < num_hugepd) {
- for (i = i - 1 ; i >= 0; i--, hpdp--)
- *hpdp = __hugepd(0);
- kmem_cache_free(cachep, new);
- } else {
- kmemleak_ignore(new);
- }
- spin_unlock(ptl);
- return 0;
-}
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return NULL;
-/*
- * At this point we do the placement change only for BOOK3S 64. This would
- * possibly work on other subarchs.
- */
-pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, unsigned long sz)
-{
- pgd_t *pg;
- p4d_t *p4;
- pud_t *pu;
- pmd_t *pm;
- hugepd_t *hpdp = NULL;
- unsigned pshift = __ffs(sz);
- unsigned pdshift = PGDIR_SHIFT;
- spinlock_t *ptl;
-
- addr &= ~(sz-1);
- pg = pgd_offset(mm, addr);
- p4 = p4d_offset(pg, addr);
+ if (sz >= PMD_SIZE) {
+ /* On 8xx, all hugepages are handled as contiguous PTEs */
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ int i;
-#ifdef CONFIG_PPC_BOOK3S_64
- if (pshift == PGDIR_SHIFT)
- /* 16GB huge page */
- return (pte_t *) p4;
- else if (pshift > PUD_SHIFT) {
- /*
- * We need to use hugepd table
- */
- ptl = &mm->page_table_lock;
- hpdp = (hugepd_t *)p4;
- } else {
- pdshift = PUD_SHIFT;
- pu = pud_alloc(mm, p4, addr);
- if (!pu)
- return NULL;
- if (pshift == PUD_SHIFT)
- return (pte_t *)pu;
- else if (pshift > PMD_SHIFT) {
- ptl = pud_lockptr(mm, pu);
- hpdp = (hugepd_t *)pu;
- } else {
- pdshift = PMD_SHIFT;
- pm = pmd_alloc(mm, pu, addr);
- if (!pm)
- return NULL;
- if (pshift == PMD_SHIFT)
- /* 16MB hugepage */
- return (pte_t *)pm;
- else {
- ptl = pmd_lockptr(mm, pm);
- hpdp = (hugepd_t *)pm;
+ for (i = 0; i < sz / PMD_SIZE; i++) {
+ if (!pte_alloc_huge(mm, pmd + i, addr))
+ return NULL;
}
}
+ return (pte_t *)pmd;
}
-#else
- if (pshift >= PGDIR_SHIFT) {
- ptl = &mm->page_table_lock;
- hpdp = (hugepd_t *)p4;
- } else {
- pdshift = PUD_SHIFT;
- pu = pud_alloc(mm, p4, addr);
- if (!pu)
- return NULL;
- if (pshift >= PUD_SHIFT) {
- ptl = pud_lockptr(mm, pu);
- hpdp = (hugepd_t *)pu;
- } else {
- pdshift = PMD_SHIFT;
- pm = pmd_alloc(mm, pu, addr);
- if (!pm)
- return NULL;
- ptl = pmd_lockptr(mm, pm);
- hpdp = (hugepd_t *)pm;
- }
- }
-#endif
- if (!hpdp)
- return NULL;
-
- if (IS_ENABLED(CONFIG_PPC_8xx) && pshift < PMD_SHIFT)
- return pte_alloc_huge(mm, (pmd_t *)hpdp, addr);
-
- BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
- if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr,
- pdshift, pshift, ptl))
- return NULL;
-
- return hugepte_offset(*hpdp, addr, pdshift);
+ return pte_alloc_huge(mm, pmd, addr);
}
#ifdef CONFIG_PPC_BOOK3S_64
@@ -248,264 +133,6 @@ int __init alloc_bootmem_huge_page(struct hstate *h, int nid)
return __alloc_bootmem_huge_page(h, nid);
}
-#ifndef CONFIG_PPC_BOOK3S_64
-#define HUGEPD_FREELIST_SIZE \
- ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
-
-struct hugepd_freelist {
- struct rcu_head rcu;
- unsigned int index;
- void *ptes[];
-};
-
-static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
-
-static void hugepd_free_rcu_callback(struct rcu_head *head)
-{
- struct hugepd_freelist *batch =
- container_of(head, struct hugepd_freelist, rcu);
- unsigned int i;
-
- for (i = 0; i < batch->index; i++)
- kmem_cache_free(PGT_CACHE(PTE_T_ORDER), batch->ptes[i]);
-
- free_page((unsigned long)batch);
-}
-
-static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
-{
- struct hugepd_freelist **batchp;
-
- batchp = &get_cpu_var(hugepd_freelist_cur);
-
- if (atomic_read(&tlb->mm->mm_users) < 2 ||
- mm_is_thread_local(tlb->mm)) {
- kmem_cache_free(PGT_CACHE(PTE_T_ORDER), hugepte);
- put_cpu_var(hugepd_freelist_cur);
- return;
- }
-
- if (*batchp == NULL) {
- *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
- (*batchp)->index = 0;
- }
-
- (*batchp)->ptes[(*batchp)->index++] = hugepte;
- if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
- call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback);
- *batchp = NULL;
- }
- put_cpu_var(hugepd_freelist_cur);
-}
-#else
-static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {}
-#endif
-
-/* Return true when the entry to be freed maps more than the area being freed */
-static bool range_is_outside_limits(unsigned long start, unsigned long end,
- unsigned long floor, unsigned long ceiling,
- unsigned long mask)
-{
- if ((start & mask) < floor)
- return true;
- if (ceiling) {
- ceiling &= mask;
- if (!ceiling)
- return true;
- }
- return end - 1 > ceiling - 1;
-}
-
-static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
- unsigned long start, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pte_t *hugepte = hugepd_page(*hpdp);
- int i;
-
- unsigned long pdmask = ~((1UL << pdshift) - 1);
- unsigned int num_hugepd = 1;
- unsigned int shift = hugepd_shift(*hpdp);
-
- /* Note: On fsl the hpdp may be the first of several */
- if (shift > pdshift)
- num_hugepd = 1 << (shift - pdshift);
-
- if (range_is_outside_limits(start, end, floor, ceiling, pdmask))
- return;
-
- for (i = 0; i < num_hugepd; i++, hpdp++)
- *hpdp = __hugepd(0);
-
- if (shift >= pdshift)
- hugepd_free(tlb, hugepte);
- else
- pgtable_free_tlb(tlb, hugepte,
- get_hugepd_cache_index(pdshift - shift));
-}
-
-static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pgtable_t token = pmd_pgtable(*pmd);
-
- if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK))
- return;
-
- pmd_clear(pmd);
- pte_free_tlb(tlb, token, addr);
- mm_dec_nr_ptes(tlb->mm);
-}
-
-static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pmd_t *pmd;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- do {
- unsigned long more;
-
- pmd = pmd_offset(pud, addr);
- next = pmd_addr_end(addr, end);
- if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
- if (pmd_none_or_clear_bad(pmd))
- continue;
-
- /*
- * if it is not hugepd pointer, we should already find
- * it cleared.
- */
- WARN_ON(!IS_ENABLED(CONFIG_PPC_8xx));
-
- hugetlb_free_pte_range(tlb, pmd, addr, end, floor, ceiling);
-
- continue;
- }
- /*
- * Increment next by the size of the huge mapping since
- * there may be more than one entry at this level for a
- * single hugepage, but all of them point to
- * the same kmem cache that holds the hugepte.
- */
- more = addr + (1UL << hugepd_shift(*(hugepd_t *)pmd));
- if (more > next)
- next = more;
-
- free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
- addr, next, floor, ceiling);
- } while (addr = next, addr != end);
-
- if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK))
- return;
-
- pmd = pmd_offset(pud, start & PUD_MASK);
- pud_clear(pud);
- pmd_free_tlb(tlb, pmd, start & PUD_MASK);
- mm_dec_nr_pmds(tlb->mm);
-}
-
-static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pud_t *pud;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- do {
- pud = pud_offset(p4d, addr);
- next = pud_addr_end(addr, end);
- if (!is_hugepd(__hugepd(pud_val(*pud)))) {
- if (pud_none_or_clear_bad(pud))
- continue;
- hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
- ceiling);
- } else {
- unsigned long more;
- /*
- * Increment next by the size of the huge mapping since
- * there may be more than one entry at this level for a
- * single hugepage, but all of them point to
- * the same kmem cache that holds the hugepte.
- */
- more = addr + (1UL << hugepd_shift(*(hugepd_t *)pud));
- if (more > next)
- next = more;
-
- free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
- addr, next, floor, ceiling);
- }
- } while (addr = next, addr != end);
-
- if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK))
- return;
-
- pud = pud_offset(p4d, start & PGDIR_MASK);
- p4d_clear(p4d);
- pud_free_tlb(tlb, pud, start & PGDIR_MASK);
- mm_dec_nr_puds(tlb->mm);
-}
-
-/*
- * This function frees user-level page tables of a process.
- */
-void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pgd_t *pgd;
- p4d_t *p4d;
- unsigned long next;
-
- /*
- * Because there are a number of different possible pagetable
- * layouts for hugepage ranges, we limit knowledge of how
- * things should be laid out to the allocation path
- * (huge_pte_alloc(), above). Everything else works out the
- * structure as it goes from information in the hugepd
- * pointers. That means that we can't here use the
- * optimization used in the normal page free_pgd_range(), of
- * checking whether we're actually covering a large enough
- * range to have to do anything at the top level of the walk
- * instead of at the bottom.
- *
- * To make sense of this, you should probably go read the big
- * block comment at the top of the normal free_pgd_range(),
- * too.
- */
-
- do {
- next = pgd_addr_end(addr, end);
- pgd = pgd_offset(tlb->mm, addr);
- p4d = p4d_offset(pgd, addr);
- if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
- if (p4d_none_or_clear_bad(p4d))
- continue;
- hugetlb_free_pud_range(tlb, p4d, addr, next, floor, ceiling);
- } else {
- unsigned long more;
- /*
- * Increment next by the size of the huge mapping since
- * there may be more than one entry at the pgd level
- * for a single hugepage, but all of them point to the
- * same kmem cache that holds the hugepte.
- */
- more = addr + (1UL << hugepd_shift(*(hugepd_t *)pgd));
- if (more > next)
- next = more;
-
- free_hugepd_range(tlb, (hugepd_t *)p4d, PGDIR_SHIFT,
- addr, next, floor, ceiling);
- }
- } while (addr = next, addr != end);
-}
-
bool __init arch_hugetlb_valid_size(unsigned long size)
{
int shift = __ffs(size);
@@ -552,44 +179,14 @@ static int __init hugetlbpage_init(void)
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
unsigned shift;
- unsigned pdshift;
if (!mmu_psize_defs[psize].shift)
continue;
shift = mmu_psize_to_shift(psize);
-#ifdef CONFIG_PPC_BOOK3S_64
- if (shift > PGDIR_SHIFT)
- continue;
- else if (shift > PUD_SHIFT)
- pdshift = PGDIR_SHIFT;
- else if (shift > PMD_SHIFT)
- pdshift = PUD_SHIFT;
- else
- pdshift = PMD_SHIFT;
-#else
- if (shift < PUD_SHIFT)
- pdshift = PMD_SHIFT;
- else if (shift < PGDIR_SHIFT)
- pdshift = PUD_SHIFT;
- else
- pdshift = PGDIR_SHIFT;
-#endif
-
if (add_huge_page_size(1ULL << shift) < 0)
continue;
- /*
- * if we have pdshift and shift value same, we don't
- * use pgt cache for hugepd.
- */
- if (pdshift > shift) {
- if (!IS_ENABLED(CONFIG_PPC_8xx))
- pgtable_cache_add(pdshift - shift);
- } else if (IS_ENABLED(CONFIG_PPC_E500) ||
- IS_ENABLED(CONFIG_PPC_8xx)) {
- pgtable_cache_add(PTE_T_ORDER);
- }
configured = true;
}
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index d3a7726ecf51..2978fcbe307e 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -31,6 +31,9 @@ EXPORT_SYMBOL_GPL(kernstart_virt_addr);
bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
+#ifdef CONFIG_KFENCE
+bool __ro_after_init kfence_disabled;
+#endif
static int __init parse_nosmep(char *p)
{
@@ -70,7 +73,7 @@ void setup_kup(void)
#define CTOR(shift) static void ctor_##shift(void *addr) \
{ \
- memset(addr, 0, sizeof(void *) << (shift)); \
+ memset(addr, 0, sizeof(pgd_t) << (shift)); \
}
CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7);
@@ -114,18 +117,14 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
void pgtable_cache_add(unsigned int shift)
{
char *name;
- unsigned long table_size = sizeof(void *) << shift;
+ unsigned long table_size = sizeof(pgd_t) << shift;
unsigned long align = table_size;
/* When batching pgtable pointers for RCU freeing, we store
* the index size in the low bits. Table alignment must be
* big enough to fit it.
- *
- * Likewise, hugeapge pagetable pointers contain a (different)
- * shift value in the low bits. All tables must be aligned so
- * as to leave enough 0 bits in the address to contain it. */
- unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
- HUGEPD_SHIFT_MASK + 1);
+ */
+ unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1;
struct kmem_cache *new = NULL;
/* It would be nice if this was a BUILD_BUG_ON(), but at the
diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
index 2784224054f8..989d6cdf4141 100644
--- a/arch/powerpc/mm/kasan/8xx.c
+++ b/arch/powerpc/mm/kasan/8xx.c
@@ -6,28 +6,33 @@
#include <linux/memblock.h>
#include <linux/hugetlb.h>
+#include <asm/pgalloc.h>
+
static int __init
kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block)
{
pmd_t *pmd = pmd_off_k(k_start);
unsigned long k_cur, k_next;
- for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd += 2, block += SZ_8M) {
- pte_basic_t *new;
+ for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++, block += SZ_4M) {
+ pte_t *ptep;
+ int i;
k_next = pgd_addr_end(k_cur, k_end);
- k_next = pgd_addr_end(k_next, k_end);
if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
continue;
- new = memblock_alloc(sizeof(pte_basic_t), SZ_4K);
- if (!new)
+ ptep = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
+ if (!ptep)
return -ENOMEM;
- *new = pte_val(pte_mkhuge(pfn_pte(PHYS_PFN(__pa(block)), PAGE_KERNEL)));
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte_t pte = pte_mkhuge(pfn_pte(PHYS_PFN(__pa(block + i * PAGE_SIZE)), PAGE_KERNEL));
- hugepd_populate_kernel((hugepd_t *)pmd, (pte_t *)new, PAGE_SHIFT_8M);
- hugepd_populate_kernel((hugepd_t *)pmd + 1, (pte_t *)new, PAGE_SHIFT_8M);
+ __set_pte_at(&init_mm, k_cur, ptep + i, pte, 1);
+ }
+ pmd_populate_kernel(&init_mm, pmd, ptep);
+ *pmd = __pmd(pmd_val(*pmd) | _PMD_PAGE_8M);
}
return 0;
}
diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c
index 11519e88dc6b..43c03b84ff32 100644
--- a/arch/powerpc/mm/kasan/init_book3e_64.c
+++ b/arch/powerpc/mm/kasan/init_book3e_64.c
@@ -112,7 +112,7 @@ void __init kasan_init(void)
pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO);
for_each_mem_range(i, &start, &end)
- kasan_init_phys_region((void *)start, (void *)end);
+ kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end));
if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE);
diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c
index 9300d641cf9a..3fb5ce4f48f4 100644
--- a/arch/powerpc/mm/kasan/init_book3s_64.c
+++ b/arch/powerpc/mm/kasan/init_book3s_64.c
@@ -62,7 +62,7 @@ void __init kasan_init(void)
}
for_each_mem_range(i, &start, &end)
- kasan_init_phys_region((void *)start, (void *)end);
+ kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end));
for (i = 0; i < PTRS_PER_PTE; i++)
__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 3a440004b97d..da21cb018984 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -16,6 +16,8 @@
#include <linux/highmem.h>
#include <linux/suspend.h>
#include <linux/dma-direct.h>
+#include <linux/execmem.h>
+#include <linux/vmalloc.h>
#include <asm/swiotlb.h>
#include <asm/machdep.h>
@@ -30,7 +32,7 @@
#include <mm/mmu_decl.h>
-unsigned long long memory_limit;
+unsigned long long memory_limit __initdata;
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -288,8 +290,6 @@ void __init mem_init(void)
swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags);
#endif
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-
kasan_late_init();
memblock_free_all();
@@ -406,3 +406,66 @@ int devmem_is_allowed(unsigned long pfn)
* the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed.
*/
EXPORT_SYMBOL_GPL(walk_system_ram_range);
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+ pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+ unsigned long fallback_start = 0, fallback_end = 0;
+ unsigned long start, end;
+
+ /*
+ * BOOK3S_32 and 8xx define MODULES_VADDR for text allocations and
+ * allow allocating data in the entire vmalloc space
+ */
+#ifdef MODULES_VADDR
+ unsigned long limit = (unsigned long)_etext - SZ_32M;
+
+ BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+
+ /* First try within 32M limit from _etext to avoid branch trampolines */
+ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) {
+ start = limit;
+ fallback_start = MODULES_VADDR;
+ fallback_end = MODULES_END;
+ } else {
+ start = MODULES_VADDR;
+ }
+
+ end = MODULES_END;
+#else
+ start = VMALLOC_START;
+ end = VMALLOC_END;
+#endif
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = start,
+ .end = end,
+ .pgprot = prot,
+ .alignment = 1,
+ .fallback_start = fallback_start,
+ .fallback_end = fallback_end,
+ },
+ [EXECMEM_KPROBES] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = kprobes_prot,
+ .alignment = 1,
+ },
+ [EXECMEM_MODULE_DATA] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index b24c19078eb1..3e3af29b4523 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -21,7 +21,7 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
#ifdef CONFIG_PPC_BOOK3S_32
tsk->thread.sr0 = mm->context.sr0;
#endif
-#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP)
tsk->thread.pid = mm->context.id;
#endif
}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 6949c2c937e7..b2d1eea09761 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -20,9 +20,9 @@
#include <asm/trace.h>
/*
- * On 40x and 8xx, we directly inline tlbia and tlbivax
+ * On 8xx, we directly inline tlbia
*/
-#if defined(CONFIG_40x) || defined(CONFIG_PPC_8xx)
+#ifdef CONFIG_PPC_8xx
static inline void _tlbil_all(void)
{
asm volatile ("sync; tlbia; isync" : : : "memory");
@@ -35,7 +35,7 @@ static inline void _tlbil_pid(unsigned int pid)
}
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
-#else /* CONFIG_40x || CONFIG_PPC_8xx */
+#else /* CONFIG_PPC_8xx */
extern void _tlbil_all(void);
extern void _tlbil_pid(unsigned int pid);
#ifdef CONFIG_PPC_BOOK3E_64
@@ -43,7 +43,7 @@ extern void _tlbil_pid_noind(unsigned int pid);
#else
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
#endif
-#endif /* !(CONFIG_40x || CONFIG_PPC_8xx) */
+#endif /* !CONFIG_PPC_8xx */
/*
* On 8xx, we directly inline tlbie, on others, it's extern
diff --git a/arch/powerpc/mm/nohash/40x.c b/arch/powerpc/mm/nohash/40x.c
deleted file mode 100644
index e835e80c09db..000000000000
--- a/arch/powerpc/mm/nohash/40x.c
+++ /dev/null
@@ -1,161 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * This file contains the routines for initializing the MMU
- * on the 4xx series of chips.
- * -- paulus
- *
- * Derived from arch/ppc/mm/init.c:
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- * and Cort Dougan (PReP) (cort@cs.nmt.edu)
- * Copyright (C) 1996 Paul Mackerras
- *
- * Derived from "arch/i386/mm/init.c"
- * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/highmem.h>
-#include <linux/memblock.h>
-
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/mmu.h>
-#include <linux/uaccess.h>
-#include <asm/smp.h>
-#include <asm/bootx.h>
-#include <asm/machdep.h>
-#include <asm/setup.h>
-
-#include <mm/mmu_decl.h>
-
-/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
- */
-void __init MMU_init_hw(void)
-{
- int i;
- unsigned long zpr;
-
- /*
- * The Zone Protection Register (ZPR) defines how protection will
- * be applied to every page which is a member of a given zone.
- * The zone index bits (of ZSEL) in the PTE are used for software
- * indicators. We use the 4 upper bits of virtual address to select
- * the zone. We set all zones above TASK_SIZE to zero, allowing
- * only kernel access as indicated in the PTE. For zones below
- * TASK_SIZE, we set a 01 binary (a value of 10 will not work)
- * to allow user access as indicated in the PTE. This also allows
- * kernel access as indicated in the PTE.
- */
-
- for (i = 0, zpr = 0; i < TASK_SIZE >> 28; i++)
- zpr |= 1 << (30 - i * 2);
-
- mtspr(SPRN_ZPR, zpr);
-
- flush_instruction_cache();
-
- /*
- * Set up the real-mode cache parameters for the exception vector
- * handlers (which are run in real-mode).
- */
-
- mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */
-
- /*
- * Cache instruction and data space where the exception
- * vectors and the kernel live in real-mode.
- */
-
- mtspr(SPRN_DCCR, 0xFFFF0000); /* 2GByte of data space at 0x0. */
- mtspr(SPRN_ICCR, 0xFFFF0000); /* 2GByte of instr. space at 0x0. */
-}
-
-#define LARGE_PAGE_SIZE_16M (1<<24)
-#define LARGE_PAGE_SIZE_4M (1<<22)
-
-unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
-{
- unsigned long v, s, mapped;
- phys_addr_t p;
-
- v = KERNELBASE;
- p = 0;
- s = total_lowmem;
-
- if (IS_ENABLED(CONFIG_KFENCE))
- return 0;
-
- if (debug_pagealloc_enabled())
- return 0;
-
- if (strict_kernel_rwx_enabled())
- return 0;
-
- while (s >= LARGE_PAGE_SIZE_16M) {
- pmd_t *pmdp;
- unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_RW;
-
- pmdp = pmd_off_k(v);
- *pmdp++ = __pmd(val);
- *pmdp++ = __pmd(val);
- *pmdp++ = __pmd(val);
- *pmdp++ = __pmd(val);
-
- v += LARGE_PAGE_SIZE_16M;
- p += LARGE_PAGE_SIZE_16M;
- s -= LARGE_PAGE_SIZE_16M;
- }
-
- while (s >= LARGE_PAGE_SIZE_4M) {
- pmd_t *pmdp;
- unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_RW;
-
- pmdp = pmd_off_k(v);
- *pmdp = __pmd(val);
-
- v += LARGE_PAGE_SIZE_4M;
- p += LARGE_PAGE_SIZE_4M;
- s -= LARGE_PAGE_SIZE_4M;
- }
-
- mapped = total_lowmem - s;
-
- /* If the size of RAM is not an exact power of two, we may not
- * have covered RAM in its entirety with 16 and 4 MiB
- * pages. Consequently, restrict the top end of RAM currently
- * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
- * coverage with normal-sized pages (or other reasons) do not
- * attempt to allocate outside the allowed range.
- */
- memblock_set_current_limit(mapped);
-
- return mapped;
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size)
-{
- /* We don't currently support the first MEMBLOCK not mapping 0
- * physical on those processors
- */
- BUG_ON(first_memblock_base != 0);
-
- /* 40x can only access 16MB at the moment (see head_40x.S) */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
-}
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 43d4842bb1c7..388bba0ab3e7 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -11,6 +11,7 @@
#include <linux/hugetlb.h>
#include <asm/fixmap.h>
+#include <asm/pgalloc.h>
#include <mm/mmu_decl.h>
@@ -48,20 +49,6 @@ unsigned long p_block_mapped(phys_addr_t pa)
return 0;
}
-static pte_t __init *early_hugepd_alloc_kernel(hugepd_t *pmdp, unsigned long va)
-{
- if (hpd_val(*pmdp) == 0) {
- pte_t *ptep = memblock_alloc(sizeof(pte_basic_t), SZ_4K);
-
- if (!ptep)
- return NULL;
-
- hugepd_populate_kernel((hugepd_t *)pmdp, ptep, PAGE_SHIFT_8M);
- hugepd_populate_kernel((hugepd_t *)pmdp + 1, ptep, PAGE_SHIFT_8M);
- }
- return hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT);
-}
-
static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
pgprot_t prot, int psize, bool new)
{
@@ -75,26 +62,36 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
if (WARN_ON(slab_is_available()))
return -EINVAL;
- if (psize == MMU_PAGE_512K)
+ if (psize == MMU_PAGE_512K) {
ptep = early_pte_alloc_kernel(pmdp, va);
- else
- ptep = early_hugepd_alloc_kernel((hugepd_t *)pmdp, va);
+ /* The PTE should never be already present */
+ if (WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
+ return -EINVAL;
+ } else {
+ if (WARN_ON(!pmd_none(*pmdp) || !pmd_none(*(pmdp + 1))))
+ return -EINVAL;
+
+ ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+
+ ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
+ pmd_populate_kernel(&init_mm, pmdp + 1, ptep);
+
+ ptep = (pte_t *)pmdp;
+ }
} else {
if (psize == MMU_PAGE_512K)
ptep = pte_offset_kernel(pmdp, va);
else
- ptep = hugepte_offset(*(hugepd_t *)pmdp, va, PGDIR_SHIFT);
+ ptep = (pte_t *)pmdp;
}
if (WARN_ON(!ptep))
return -ENOMEM;
- /* The PTE should never be already present */
- if (new && WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
- return -EINVAL;
-
set_huge_pte_at(&init_mm, va, ptep,
- pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)), psize);
+ pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)),
+ 1UL << mmu_psize_to_shift(psize));
return 0;
}
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
index f3894e79d5f7..cf60c776c883 100644
--- a/arch/powerpc/mm/nohash/Makefile
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -1,10 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-
obj-y += mmu_context.o tlb.o tlb_low.o kup.o
-obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o
-obj-$(CONFIG_40x) += 40x.o
+obj-$(CONFIG_PPC_BOOK3E_64) += tlb_64e.o tlb_low_64e.o book3e_pgtable.o
obj-$(CONFIG_44x) += 44x.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
obj-$(CONFIG_PPC_E500) += e500.o
diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
index 1c5e4ecbebeb..ad2a7c26f2a0 100644
--- a/arch/powerpc/mm/nohash/book3e_pgtable.c
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -29,10 +29,10 @@ int __meminit vmemmap_create_mapping(unsigned long start,
_PAGE_KERNEL_RW;
/* PTEs only contain page size encodings up to 32M */
- BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
+ BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].shift - 10 > 0xf);
/* Encode the size in the PTE */
- flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
+ flags |= (mmu_psize_defs[mmu_vmemmap_psize].shift - 10) << 8;
/* For each PTE for that area, map things. Note that we don't
* increment phys because all PTEs are of the large size and
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index cdff129abb14..5c8d1bb98b3e 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -376,7 +376,7 @@ notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size)
create_kaslr_tlb_entry(1, tlb_virt, tlb_phys);
}
- /* Copy the kernel to it's new location and run */
+ /* Copy the kernel to its new location and run */
memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz);
flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz);
diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c
index e1f7de2e54ec..c20c4f357fbf 100644
--- a/arch/powerpc/mm/nohash/kup.c
+++ b/arch/powerpc/mm/nohash/kup.c
@@ -15,8 +15,6 @@
void setup_kuap(bool disabled)
{
if (disabled) {
- if (IS_ENABLED(CONFIG_40x))
- disable_kuep = true;
if (smp_processor_id() == boot_cpuid)
cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
return;
diff --git a/arch/powerpc/mm/nohash/mmu_context.c b/arch/powerpc/mm/nohash/mmu_context.c
index ccd5819b1bd9..0b181da40ddb 100644
--- a/arch/powerpc/mm/nohash/mmu_context.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -219,9 +219,6 @@ static void set_context(unsigned long id, pgd_t *pgd)
/* sync */
mb();
} else if (kuap_is_disabled()) {
- if (IS_ENABLED(CONFIG_40x))
- mb(); /* sync */
-
mtspr(SPRN_PID, id);
isync();
}
@@ -306,7 +303,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
if (IS_ENABLED(CONFIG_BDI_SWITCH))
abatron_pteptrs[1] = next->pgd;
set_context(id, next->pgd);
-#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP)
tsk->thread.pid = id;
#endif
raw_spin_unlock(&context_lock);
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 5ffa0af4328a..b653a7be4cb1 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -53,37 +53,30 @@
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
[MMU_PAGE_4K] = {
.shift = 12,
- .enc = BOOK3E_PAGESZ_4K,
},
[MMU_PAGE_2M] = {
.shift = 21,
- .enc = BOOK3E_PAGESZ_2M,
},
[MMU_PAGE_4M] = {
.shift = 22,
- .enc = BOOK3E_PAGESZ_4M,
},
[MMU_PAGE_16M] = {
.shift = 24,
- .enc = BOOK3E_PAGESZ_16M,
},
[MMU_PAGE_64M] = {
.shift = 26,
- .enc = BOOK3E_PAGESZ_64M,
},
[MMU_PAGE_256M] = {
.shift = 28,
- .enc = BOOK3E_PAGESZ_256M,
},
[MMU_PAGE_1G] = {
.shift = 30,
- .enc = BOOK3E_PAGESZ_1GB,
},
};
static inline int mmu_get_tsize(int psize)
{
- return mmu_psize_defs[psize].enc;
+ return mmu_psize_defs[psize].shift - 10;
}
#else
static inline int mmu_get_tsize(int psize)
@@ -110,28 +103,6 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
};
#endif
-/* The variables below are currently only used on 64-bit Book3E
- * though this will probably be made common with other nohash
- * implementations at some point
- */
-#ifdef CONFIG_PPC64
-
-int mmu_pte_psize; /* Page size used for PTE pages */
-int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
-int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
-unsigned long linear_map_top; /* Top of linear mapping */
-
-
-/*
- * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
- * exceptions. This is used for bolted and e6500 TLB miss handlers which
- * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
- * this is set to zero.
- */
-int extlb_level_exc;
-
-#endif /* CONFIG_PPC64 */
-
#ifdef CONFIG_PPC_E500
/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
DEFINE_PER_CPU(int, next_tlbcam_idx);
@@ -358,381 +329,7 @@ void tlb_flush(struct mmu_gather *tlb)
flush_tlb_mm(tlb->mm);
}
-/*
- * Below are functions specific to the 64-bit variant of Book3E though that
- * may change in the future
- */
-
-#ifdef CONFIG_PPC64
-
-/*
- * Handling of virtual linear page tables or indirect TLB entries
- * flushing when PTE pages are freed
- */
-void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
-{
- int tsize = mmu_psize_defs[mmu_pte_psize].enc;
-
- if (book3e_htw_mode != PPC_HTW_NONE) {
- unsigned long start = address & PMD_MASK;
- unsigned long end = address + PMD_SIZE;
- unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
-
- /* This isn't the most optimal, ideally we would factor out the
- * while preempt & CPU mask mucking around, or even the IPI but
- * it will do for now
- */
- while (start < end) {
- __flush_tlb_page(tlb->mm, start, tsize, 1);
- start += size;
- }
- } else {
- unsigned long rmask = 0xf000000000000000ul;
- unsigned long rid = (address & rmask) | 0x1000000000000000ul;
- unsigned long vpte = address & ~rmask;
-
- vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
- vpte |= rid;
- __flush_tlb_page(tlb->mm, vpte, tsize, 0);
- }
-}
-
-static void __init setup_page_sizes(void)
-{
- unsigned int tlb0cfg;
- unsigned int tlb0ps;
- unsigned int eptcfg;
- int i, psize;
-
-#ifdef CONFIG_PPC_E500
- unsigned int mmucfg = mfspr(SPRN_MMUCFG);
- int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
-
- if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
- unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
- unsigned int min_pg, max_pg;
-
- min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
- max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
-
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- struct mmu_psize_def *def;
- unsigned int shift;
-
- def = &mmu_psize_defs[psize];
- shift = def->shift;
-
- if (shift == 0 || shift & 1)
- continue;
-
- /* adjust to be in terms of 4^shift Kb */
- shift = (shift - 10) >> 1;
-
- if ((shift >= min_pg) && (shift <= max_pg))
- def->flags |= MMU_PAGE_SIZE_DIRECT;
- }
-
- goto out;
- }
-
- if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
- u32 tlb1cfg, tlb1ps;
-
- tlb0cfg = mfspr(SPRN_TLB0CFG);
- tlb1cfg = mfspr(SPRN_TLB1CFG);
- tlb1ps = mfspr(SPRN_TLB1PS);
- eptcfg = mfspr(SPRN_EPTCFG);
-
- if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
- book3e_htw_mode = PPC_HTW_E6500;
-
- /*
- * We expect 4K subpage size and unrestricted indirect size.
- * The lack of a restriction on indirect size is a Freescale
- * extension, indicated by PSn = 0 but SPSn != 0.
- */
- if (eptcfg != 2)
- book3e_htw_mode = PPC_HTW_NONE;
-
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- struct mmu_psize_def *def = &mmu_psize_defs[psize];
-
- if (!def->shift)
- continue;
-
- if (tlb1ps & (1U << (def->shift - 10))) {
- def->flags |= MMU_PAGE_SIZE_DIRECT;
-
- if (book3e_htw_mode && psize == MMU_PAGE_2M)
- def->flags |= MMU_PAGE_SIZE_INDIRECT;
- }
- }
-
- goto out;
- }
-#endif
-
- tlb0cfg = mfspr(SPRN_TLB0CFG);
- tlb0ps = mfspr(SPRN_TLB0PS);
- eptcfg = mfspr(SPRN_EPTCFG);
-
- /* Look for supported direct sizes */
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- struct mmu_psize_def *def = &mmu_psize_defs[psize];
-
- if (tlb0ps & (1U << (def->shift - 10)))
- def->flags |= MMU_PAGE_SIZE_DIRECT;
- }
-
- /* Indirect page sizes supported ? */
- if ((tlb0cfg & TLBnCFG_IND) == 0 ||
- (tlb0cfg & TLBnCFG_PT) == 0)
- goto out;
-
- book3e_htw_mode = PPC_HTW_IBM;
-
- /* Now, we only deal with one IND page size for each
- * direct size. Hopefully all implementations today are
- * unambiguous, but we might want to be careful in the
- * future.
- */
- for (i = 0; i < 3; i++) {
- unsigned int ps, sps;
-
- sps = eptcfg & 0x1f;
- eptcfg >>= 5;
- ps = eptcfg & 0x1f;
- eptcfg >>= 5;
- if (!ps || !sps)
- continue;
- for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
- struct mmu_psize_def *def = &mmu_psize_defs[psize];
-
- if (ps == (def->shift - 10))
- def->flags |= MMU_PAGE_SIZE_INDIRECT;
- if (sps == (def->shift - 10))
- def->ind = ps + 10;
- }
- }
-
-out:
- /* Cleanup array and print summary */
- pr_info("MMU: Supported page sizes\n");
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- struct mmu_psize_def *def = &mmu_psize_defs[psize];
- const char *__page_type_names[] = {
- "unsupported",
- "direct",
- "indirect",
- "direct & indirect"
- };
- if (def->flags == 0) {
- def->shift = 0;
- continue;
- }
- pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10),
- __page_type_names[def->flags & 0x3]);
- }
-}
-
-static void __init setup_mmu_htw(void)
-{
- /*
- * If we want to use HW tablewalk, enable it by patching the TLB miss
- * handlers to branch to the one dedicated to it.
- */
-
- switch (book3e_htw_mode) {
- case PPC_HTW_IBM:
- patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
- patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
- break;
-#ifdef CONFIG_PPC_E500
- case PPC_HTW_E6500:
- extlb_level_exc = EX_TLB_SIZE;
- patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
- patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
- break;
-#endif
- }
- pr_info("MMU: Book3E HW tablewalk %s\n",
- book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
-}
-
-/*
- * Early initialization of the MMU TLB code
- */
-static void early_init_this_mmu(void)
-{
- unsigned int mas4;
-
- /* Set MAS4 based on page table setting */
-
- mas4 = 0x4 << MAS4_WIMGED_SHIFT;
- switch (book3e_htw_mode) {
- case PPC_HTW_E6500:
- mas4 |= MAS4_INDD;
- mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
- mas4 |= MAS4_TLBSELD(1);
- mmu_pte_psize = MMU_PAGE_2M;
- break;
-
- case PPC_HTW_IBM:
- mas4 |= MAS4_INDD;
- mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
- mmu_pte_psize = MMU_PAGE_1M;
- break;
-
- case PPC_HTW_NONE:
- mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
- mmu_pte_psize = mmu_virtual_psize;
- break;
- }
- mtspr(SPRN_MAS4, mas4);
-
-#ifdef CONFIG_PPC_E500
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
- unsigned int num_cams;
- bool map = true;
-
- /* use a quarter of the TLBCAM for bolted linear map */
- num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
-
- /*
- * Only do the mapping once per core, or else the
- * transient mapping would cause problems.
- */
-#ifdef CONFIG_SMP
- if (hweight32(get_tensr()) > 1)
- map = false;
-#endif
-
- if (map)
- linear_map_top = map_mem_in_cams(linear_map_top,
- num_cams, false, true);
- }
-#endif
-
- /* A sync won't hurt us after mucking around with
- * the MMU configuration
- */
- mb();
-}
-
-static void __init early_init_mmu_global(void)
-{
- /* XXX This should be decided at runtime based on supported
- * page sizes in the TLB, but for now let's assume 16M is
- * always there and a good fit (which it probably is)
- *
- * Freescale booke only supports 4K pages in TLB0, so use that.
- */
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
- mmu_vmemmap_psize = MMU_PAGE_4K;
- else
- mmu_vmemmap_psize = MMU_PAGE_16M;
-
- /* XXX This code only checks for TLB 0 capabilities and doesn't
- * check what page size combos are supported by the HW. It
- * also doesn't handle the case where a separate array holds
- * the IND entries from the array loaded by the PT.
- */
- /* Look for supported page sizes */
- setup_page_sizes();
-
- /* Look for HW tablewalk support */
- setup_mmu_htw();
-
-#ifdef CONFIG_PPC_E500
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
- if (book3e_htw_mode == PPC_HTW_NONE) {
- extlb_level_exc = EX_TLB_SIZE;
- patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
- patch_exception(0x1e0,
- exc_instruction_tlb_miss_bolted_book3e);
- }
- }
-#endif
-
- /* Set the global containing the top of the linear mapping
- * for use by the TLB miss code
- */
- linear_map_top = memblock_end_of_DRAM();
-
- ioremap_bot = IOREMAP_BASE;
-}
-
-static void __init early_mmu_set_memory_limit(void)
-{
-#ifdef CONFIG_PPC_E500
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
- /*
- * Limit memory so we dont have linear faults.
- * Unlike memblock_set_current_limit, which limits
- * memory available during early boot, this permanently
- * reduces the memory available to Linux. We need to
- * do this because highmem is not supported on 64-bit.
- */
- memblock_enforce_memory_limit(linear_map_top);
- }
-#endif
-
- memblock_set_current_limit(linear_map_top);
-}
-
-/* boot cpu only */
-void __init early_init_mmu(void)
-{
- early_init_mmu_global();
- early_init_this_mmu();
- early_mmu_set_memory_limit();
-}
-
-void early_init_mmu_secondary(void)
-{
- early_init_this_mmu();
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size)
-{
- /* On non-FSL Embedded 64-bit, we adjust the RMA size to match
- * the bolted TLB entry. We know for now that only 1G
- * entries are supported though that may eventually
- * change.
- *
- * on FSL Embedded 64-bit, usually all RAM is bolted, but with
- * unusual memory sizes it's possible for some RAM to not be mapped
- * (such RAM is not used at all by Linux, since we don't support
- * highmem on 64-bit). We limit ppc64_rma_size to what would be
- * mappable if this memblock is the only one. Additional memblocks
- * can only increase, not decrease, the amount that ends up getting
- * mapped. We still limit max to 1G even if we'll eventually map
- * more. This is due to what the early init code is set up to do.
- *
- * We crop it to the size of the first MEMBLOCK to
- * avoid going over total available memory just in case...
- */
-#ifdef CONFIG_PPC_E500
- if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
- unsigned long linear_sz;
- unsigned int num_cams;
-
- /* use a quarter of the TLBCAM for bolted linear map */
- num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
-
- linear_sz = map_mem_in_cams(first_memblock_size, num_cams,
- true, true);
-
- ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
- } else
-#endif
- ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
-
- /* Finally limit subsequent allocations */
- memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
-}
-#else /* ! CONFIG_PPC64 */
+#ifndef CONFIG_PPC64
void __init early_init_mmu(void)
{
unsigned long root = of_get_flat_dt_root();
diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c
new file mode 100644
index 000000000000..113edf76d3ce
--- /dev/null
+++ b/arch/powerpc/mm/nohash/tlb_64e.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
+ * IBM Corp.
+ *
+ * Derived from arch/ppc/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/memblock.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/code-patching.h>
+#include <asm/cputhreads.h>
+
+#include <mm/mmu_decl.h>
+
+/* The variables below are currently only used on 64-bit Book3E
+ * though this will probably be made common with other nohash
+ * implementations at some point
+ */
+int mmu_pte_psize; /* Page size used for PTE pages */
+int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
+int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
+unsigned long linear_map_top; /* Top of linear mapping */
+
+
+/*
+ * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
+ * exceptions. This is used for bolted and e6500 TLB miss handlers which
+ * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
+ * this is set to zero.
+ */
+int extlb_level_exc;
+
+/*
+ * Handling of virtual linear page tables or indirect TLB entries
+ * flushing when PTE pages are freed
+ */
+void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
+{
+ int tsize = mmu_psize_defs[mmu_pte_psize].shift - 10;
+
+ if (book3e_htw_mode != PPC_HTW_NONE) {
+ unsigned long start = address & PMD_MASK;
+ unsigned long end = address + PMD_SIZE;
+ unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
+
+ /* This isn't the most optimal, ideally we would factor out the
+ * while preempt & CPU mask mucking around, or even the IPI but
+ * it will do for now
+ */
+ while (start < end) {
+ __flush_tlb_page(tlb->mm, start, tsize, 1);
+ start += size;
+ }
+ } else {
+ unsigned long rmask = 0xf000000000000000ul;
+ unsigned long rid = (address & rmask) | 0x1000000000000000ul;
+ unsigned long vpte = address & ~rmask;
+
+ vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
+ vpte |= rid;
+ __flush_tlb_page(tlb->mm, vpte, tsize, 0);
+ }
+}
+
+static void __init setup_page_sizes(void)
+{
+ unsigned int tlb0cfg;
+ unsigned int eptcfg;
+ int psize;
+
+ unsigned int mmucfg = mfspr(SPRN_MMUCFG);
+
+ if ((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
+ unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
+ unsigned int min_pg, max_pg;
+
+ min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
+ max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+ struct mmu_psize_def *def;
+ unsigned int shift;
+
+ def = &mmu_psize_defs[psize];
+ shift = def->shift;
+
+ if (shift == 0 || shift & 1)
+ continue;
+
+ /* adjust to be in terms of 4^shift Kb */
+ shift = (shift - 10) >> 1;
+
+ if ((shift >= min_pg) && (shift <= max_pg))
+ def->flags |= MMU_PAGE_SIZE_DIRECT;
+ }
+
+ goto out;
+ }
+
+ if ((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
+ u32 tlb1cfg, tlb1ps;
+
+ tlb0cfg = mfspr(SPRN_TLB0CFG);
+ tlb1cfg = mfspr(SPRN_TLB1CFG);
+ tlb1ps = mfspr(SPRN_TLB1PS);
+ eptcfg = mfspr(SPRN_EPTCFG);
+
+ if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
+ book3e_htw_mode = PPC_HTW_E6500;
+
+ /*
+ * We expect 4K subpage size and unrestricted indirect size.
+ * The lack of a restriction on indirect size is a Freescale
+ * extension, indicated by PSn = 0 but SPSn != 0.
+ */
+ if (eptcfg != 2)
+ book3e_htw_mode = PPC_HTW_NONE;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+ struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+ if (!def->shift)
+ continue;
+
+ if (tlb1ps & (1U << (def->shift - 10))) {
+ def->flags |= MMU_PAGE_SIZE_DIRECT;
+
+ if (book3e_htw_mode && psize == MMU_PAGE_2M)
+ def->flags |= MMU_PAGE_SIZE_INDIRECT;
+ }
+ }
+
+ goto out;
+ }
+out:
+ /* Cleanup array and print summary */
+ pr_info("MMU: Supported page sizes\n");
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+ struct mmu_psize_def *def = &mmu_psize_defs[psize];
+ const char *__page_type_names[] = {
+ "unsupported",
+ "direct",
+ "indirect",
+ "direct & indirect"
+ };
+ if (def->flags == 0) {
+ def->shift = 0;
+ continue;
+ }
+ pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10),
+ __page_type_names[def->flags & 0x3]);
+ }
+}
+
+/*
+ * Early initialization of the MMU TLB code
+ */
+static void early_init_this_mmu(void)
+{
+ unsigned int mas4;
+
+ /* Set MAS4 based on page table setting */
+
+ mas4 = 0x4 << MAS4_WIMGED_SHIFT;
+ switch (book3e_htw_mode) {
+ case PPC_HTW_E6500:
+ mas4 |= MAS4_INDD;
+ mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
+ mas4 |= MAS4_TLBSELD(1);
+ mmu_pte_psize = MMU_PAGE_2M;
+ break;
+
+ case PPC_HTW_NONE:
+ mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
+ mmu_pte_psize = mmu_virtual_psize;
+ break;
+ }
+ mtspr(SPRN_MAS4, mas4);
+
+ unsigned int num_cams;
+ bool map = true;
+
+ /* use a quarter of the TLBCAM for bolted linear map */
+ num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+
+ /*
+ * Only do the mapping once per core, or else the
+ * transient mapping would cause problems.
+ */
+#ifdef CONFIG_SMP
+ if (hweight32(get_tensr()) > 1)
+ map = false;
+#endif
+
+ if (map)
+ linear_map_top = map_mem_in_cams(linear_map_top,
+ num_cams, false, true);
+
+ /* A sync won't hurt us after mucking around with
+ * the MMU configuration
+ */
+ mb();
+}
+
+static void __init early_init_mmu_global(void)
+{
+ /*
+ * Freescale booke only supports 4K pages in TLB0, so use that.
+ */
+ mmu_vmemmap_psize = MMU_PAGE_4K;
+
+ /* XXX This code only checks for TLB 0 capabilities and doesn't
+ * check what page size combos are supported by the HW. It
+ * also doesn't handle the case where a separate array holds
+ * the IND entries from the array loaded by the PT.
+ */
+ /* Look for supported page sizes */
+ setup_page_sizes();
+
+ /*
+ * If we want to use HW tablewalk, enable it by patching the TLB miss
+ * handlers to branch to the one dedicated to it.
+ */
+ extlb_level_exc = EX_TLB_SIZE;
+ switch (book3e_htw_mode) {
+ case PPC_HTW_E6500:
+ patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
+ patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
+ break;
+ }
+
+ pr_info("MMU: Book3E HW tablewalk %s\n",
+ book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
+
+ /* Set the global containing the top of the linear mapping
+ * for use by the TLB miss code
+ */
+ linear_map_top = memblock_end_of_DRAM();
+
+ ioremap_bot = IOREMAP_BASE;
+}
+
+static void __init early_mmu_set_memory_limit(void)
+{
+ /*
+ * Limit memory so we dont have linear faults.
+ * Unlike memblock_set_current_limit, which limits
+ * memory available during early boot, this permanently
+ * reduces the memory available to Linux. We need to
+ * do this because highmem is not supported on 64-bit.
+ */
+ memblock_enforce_memory_limit(linear_map_top);
+
+ memblock_set_current_limit(linear_map_top);
+}
+
+/* boot cpu only */
+void __init early_init_mmu(void)
+{
+ early_init_mmu_global();
+ early_init_this_mmu();
+ early_mmu_set_memory_limit();
+}
+
+void early_init_mmu_secondary(void)
+{
+ early_init_this_mmu();
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+{
+ /*
+ * On FSL Embedded 64-bit, usually all RAM is bolted, but with
+ * unusual memory sizes it's possible for some RAM to not be mapped
+ * (such RAM is not used at all by Linux, since we don't support
+ * highmem on 64-bit). We limit ppc64_rma_size to what would be
+ * mappable if this memblock is the only one. Additional memblocks
+ * can only increase, not decrease, the amount that ends up getting
+ * mapped. We still limit max to 1G even if we'll eventually map
+ * more. This is due to what the early init code is set up to do.
+ *
+ * We crop it to the size of the first MEMBLOCK to
+ * avoid going over total available memory just in case...
+ */
+ unsigned long linear_sz;
+ unsigned int num_cams;
+
+ /* use a quarter of the TLBCAM for bolted linear map */
+ num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+
+ linear_sz = map_mem_in_cams(first_memblock_size, num_cams, true, true);
+ ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
+
+ /* Finally limit subsequent allocations */
+ memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
+}
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index e1199608ff4d..c4d296e73731 100644
--- a/arch/powerpc/mm/nohash/tlb_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -32,32 +32,7 @@
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
-#if defined(CONFIG_40x)
-
-/*
- * 40x implementation needs only tlbil_va
- */
-_GLOBAL(__tlbil_va)
- /* We run the search with interrupts disabled because we have to change
- * the PID and I don't want to preempt when that happens.
- */
- mfmsr r5
- mfspr r6,SPRN_PID
- wrteei 0
- mtspr SPRN_PID,r4
- tlbsx. r3, 0, r3
- mtspr SPRN_PID,r6
- wrtee r5
- bne 1f
- sync
- /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is
- * clear. Since 25 is the V bit in the TLB_TAG, loading this value
- * will invalidate the TLB entry. */
- tlbwe r3, r3, TLB_TAG
- isync
-1: blr
-
-#elif defined(CONFIG_PPC_8xx)
+#if defined(CONFIG_PPC_8xx)
/*
* Nothing to do for 8xx, everything is inline
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index 7e0b8fe1c279..de568297d5c5 100644
--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -450,11 +450,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
tlb_miss_huge_e6500:
beq tlb_miss_fault_e6500
- li r10,1
- andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
- rldimi r14,r10,63,0 /* Set PD_HUGE */
- xor r14,r14,r15 /* Clear size bits */
- ldx r14,0,r14
+ rlwinm r15,r14,32-_PAGE_PSIZE_SHIFT,0x1e
/*
* Now we build the MAS for a huge page.
@@ -465,7 +461,6 @@ tlb_miss_huge_e6500:
* MAS 2,3+7: Needs to be redone similar to non-tablewalk handler
*/
- subi r15,r15,10 /* Convert psize to tsize */
mfspr r10,SPRN_MAS1
rlwinm r10,r10,0,~MAS1_IND
rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
@@ -511,232 +506,6 @@ itlb_miss_fault_e6500:
tlb_epilog_bolted
b exc_instruction_storage_book3e
-/**********************************************************************
- * *
- * TLB miss handling for Book3E with TLB reservation and HES support *
- * *
- **********************************************************************/
-
-
-/* Data TLB miss */
- START_EXCEPTION(data_tlb_miss)
- TLB_MISS_PROLOG
-
- /* Now we handle the fault proper. We only save DEAR in normal
- * fault case since that's the only interesting values here.
- * We could probably also optimize by not saving SRR0/1 in the
- * linear mapping case but I'll leave that for later
- */
- mfspr r14,SPRN_ESR
- mfspr r16,SPRN_DEAR /* get faulting address */
- srdi r15,r16,44 /* get region */
- xoris r15,r15,0xc
- cmpldi cr0,r15,0 /* linear mapping ? */
- beq tlb_load_linear /* yes -> go to linear map load */
- cmpldi cr1,r15,1 /* vmalloc mapping ? */
-
- /* The page tables are mapped virtually linear. At this point, though,
- * we don't know whether we are trying to fault in a first level
- * virtual address or a virtual page table address. We can get that
- * from bit 0x1 of the region ID which we have set for a page table
- */
- andis. r10,r15,0x1
- bne- virt_page_table_tlb_miss
-
- std r14,EX_TLB_ESR(r12); /* save ESR */
- std r16,EX_TLB_DEAR(r12); /* save DEAR */
-
- /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */
- li r11,_PAGE_PRESENT
- oris r11,r11,_PAGE_ACCESSED@h
-
- /* We do the user/kernel test for the PID here along with the RW test
- */
- srdi. r15,r16,60 /* Check for user region */
-
- /* We pre-test some combination of permissions to avoid double
- * faults:
- *
- * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
- * ESR_ST is 0x00800000
- * _PAGE_BAP_SW is 0x00000010
- * So the shift is >> 19. This tests for supervisor writeability.
- * If the page happens to be supervisor writeable and not user
- * writeable, we will take a new fault later, but that should be
- * a rare enough case.
- *
- * We also move ESR_ST in _PAGE_DIRTY position
- * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
- *
- * MAS1 is preset for all we need except for TID that needs to
- * be cleared for kernel translations
- */
- rlwimi r11,r14,32-19,27,27
- rlwimi r11,r14,32-16,19,19
- beq normal_tlb_miss_user
- /* XXX replace the RMW cycles with immediate loads + writes */
-1: mfspr r10,SPRN_MAS1
- rlwinm r10,r10,0,16,1 /* Clear TID */
- mtspr SPRN_MAS1,r10
- beq+ cr1,normal_tlb_miss
-
- /* We got a crappy address, just fault with whatever DEAR and ESR
- * are here
- */
- TLB_MISS_EPILOG_ERROR
- b exc_data_storage_book3e
-
-/* Instruction TLB miss */
- START_EXCEPTION(instruction_tlb_miss)
- TLB_MISS_PROLOG
-
- /* If we take a recursive fault, the second level handler may need
- * to know whether we are handling a data or instruction fault in
- * order to get to the right store fault handler. We provide that
- * info by writing a crazy value in ESR in our exception frame
- */
- li r14,-1 /* store to exception frame is done later */
-
- /* Now we handle the fault proper. We only save DEAR in the non
- * linear mapping case since we know the linear mapping case will
- * not re-enter. We could indeed optimize and also not save SRR0/1
- * in the linear mapping case but I'll leave that for later
- *
- * Faulting address is SRR0 which is already in r16
- */
- srdi r15,r16,44 /* get region */
- xoris r15,r15,0xc
- cmpldi cr0,r15,0 /* linear mapping ? */
- beq tlb_load_linear /* yes -> go to linear map load */
- cmpldi cr1,r15,1 /* vmalloc mapping ? */
-
- /* We do the user/kernel test for the PID here along with the RW test
- */
- li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */
- oris r11,r11,_PAGE_ACCESSED@h
-
- srdi. r15,r16,60 /* Check for user region */
- std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */
- beq normal_tlb_miss_user
-
- li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */
- oris r11,r11,_PAGE_ACCESSED@h
- /* XXX replace the RMW cycles with immediate loads + writes */
- mfspr r10,SPRN_MAS1
- rlwinm r10,r10,0,16,1 /* Clear TID */
- mtspr SPRN_MAS1,r10
- beq+ cr1,normal_tlb_miss
-
- /* We got a crappy address, just fault */
- TLB_MISS_EPILOG_ERROR
- b exc_instruction_storage_book3e
-
-/*
- * This is the guts of the first-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = faulting address
- * r15 = region ID
- * r14 = crap (free to use)
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = PTE permission mask
- * r10 = crap (free to use)
- */
-normal_tlb_miss_user:
-#ifdef CONFIG_PPC_KUAP
- mfspr r14,SPRN_MAS1
- rlwinm. r14,r14,0,0x3fff0000
- beq- normal_tlb_miss_access_fault /* KUAP fault */
-#endif
-normal_tlb_miss:
- /* So we first construct the page table address. We do that by
- * shifting the bottom of the address (not the region ID) by
- * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and
- * or'ing the fourth high bit.
- *
- * NOTE: For 64K pages, we do things slightly differently in
- * order to handle the weird page table format used by linux
- */
- srdi r15,r16,44
- oris r10,r15,0x1
- rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
- sldi r15,r10,44
- clrrdi r14,r14,19
- or r10,r15,r14
-
- ld r14,0(r10)
-
-finish_normal_tlb_miss:
- /* Check if required permissions are met */
- andc. r15,r11,r14
- bne- normal_tlb_miss_access_fault
-
- /* Now we build the MAS:
- *
- * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
- * MAS 1 : Almost fully setup
- * - PID already updated by caller if necessary
- * - TSIZE need change if !base page size, not
- * yet implemented for now
- * MAS 2 : Defaults not useful, need to be redone
- * MAS 3+7 : Needs to be done
- *
- * TODO: mix up code below for better scheduling
- */
- clrrdi r10,r16,12 /* Clear low crap in EA */
- rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */
- mtspr SPRN_MAS2,r10
-
- /* Check page size, if not standard, update MAS1 */
- rldicl r10,r14,64-8,64-8
- cmpldi cr0,r10,BOOK3E_PAGESZ_4K
- beq- 1f
- mfspr r11,SPRN_MAS1
- rlwimi r11,r14,31,21,24
- rlwinm r11,r11,0,21,19
- mtspr SPRN_MAS1,r11
-1:
- /* Move RPN in position */
- rldicr r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
- clrldi r15,r11,12 /* Clear crap at the top */
- rlwimi r15,r14,32-8,22,25 /* Move in U bits */
- rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */
-
- /* Mask out SW and UW if !DIRTY (XXX optimize this !) */
- andi. r11,r14,_PAGE_DIRTY
- bne 1f
- li r11,MAS3_SW|MAS3_UW
- andc r15,r15,r11
-1:
- srdi r16,r15,32
- mtspr SPRN_MAS3,r15
- mtspr SPRN_MAS7,r16
-
- tlbwe
-
-normal_tlb_miss_done:
- /* We don't bother with restoring DEAR or ESR since we know we are
- * level 0 and just going back to userland. They are only needed
- * if you are going to take an access fault
- */
- TLB_MISS_EPILOG_SUCCESS
- rfi
-
-normal_tlb_miss_access_fault:
- /* We need to check if it was an instruction miss */
- andi. r10,r11,_PAGE_BAP_UX
- bne 1f
- ld r14,EX_TLB_DEAR(r12)
- ld r15,EX_TLB_ESR(r12)
- mtspr SPRN_DEAR,r14
- mtspr SPRN_ESR,r15
- TLB_MISS_EPILOG_ERROR
- b exc_data_storage_book3e
-1: TLB_MISS_EPILOG_ERROR
- b exc_instruction_storage_book3e
-
-
/*
* This is the guts of the second-level TLB miss handler for direct
* misses. We are entered with:
@@ -893,201 +662,6 @@ virt_page_table_tlb_miss_whacko_fault:
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
-
-/**************************************************************
- * *
- * TLB miss handling for Book3E with hw page table support *
- * *
- **************************************************************/
-
-
-/* Data TLB miss */
- START_EXCEPTION(data_tlb_miss_htw)
- TLB_MISS_PROLOG
-
- /* Now we handle the fault proper. We only save DEAR in normal
- * fault case since that's the only interesting values here.
- * We could probably also optimize by not saving SRR0/1 in the
- * linear mapping case but I'll leave that for later
- */
- mfspr r14,SPRN_ESR
- mfspr r16,SPRN_DEAR /* get faulting address */
- srdi r11,r16,44 /* get region */
- xoris r11,r11,0xc
- cmpldi cr0,r11,0 /* linear mapping ? */
- beq tlb_load_linear /* yes -> go to linear map load */
- cmpldi cr1,r11,1 /* vmalloc mapping ? */
-
- /* We do the user/kernel test for the PID here along with the RW test
- */
- srdi. r11,r16,60 /* Check for user region */
- ld r15,PACAPGD(r13) /* Load user pgdir */
- beq htw_tlb_miss
-
- /* XXX replace the RMW cycles with immediate loads + writes */
-1: mfspr r10,SPRN_MAS1
- rlwinm r10,r10,0,16,1 /* Clear TID */
- mtspr SPRN_MAS1,r10
- ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
- beq+ cr1,htw_tlb_miss
-
- /* We got a crappy address, just fault with whatever DEAR and ESR
- * are here
- */
- TLB_MISS_EPILOG_ERROR
- b exc_data_storage_book3e
-
-/* Instruction TLB miss */
- START_EXCEPTION(instruction_tlb_miss_htw)
- TLB_MISS_PROLOG
-
- /* If we take a recursive fault, the second level handler may need
- * to know whether we are handling a data or instruction fault in
- * order to get to the right store fault handler. We provide that
- * info by keeping a crazy value for ESR in r14
- */
- li r14,-1 /* store to exception frame is done later */
-
- /* Now we handle the fault proper. We only save DEAR in the non
- * linear mapping case since we know the linear mapping case will
- * not re-enter. We could indeed optimize and also not save SRR0/1
- * in the linear mapping case but I'll leave that for later
- *
- * Faulting address is SRR0 which is already in r16
- */
- srdi r11,r16,44 /* get region */
- xoris r11,r11,0xc
- cmpldi cr0,r11,0 /* linear mapping ? */
- beq tlb_load_linear /* yes -> go to linear map load */
- cmpldi cr1,r11,1 /* vmalloc mapping ? */
-
- /* We do the user/kernel test for the PID here along with the RW test
- */
- srdi. r11,r16,60 /* Check for user region */
- ld r15,PACAPGD(r13) /* Load user pgdir */
- beq htw_tlb_miss
-
- /* XXX replace the RMW cycles with immediate loads + writes */
-1: mfspr r10,SPRN_MAS1
- rlwinm r10,r10,0,16,1 /* Clear TID */
- mtspr SPRN_MAS1,r10
- ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
- beq+ htw_tlb_miss
-
- /* We got a crappy address, just fault */
- TLB_MISS_EPILOG_ERROR
- b exc_instruction_storage_book3e
-
-
-/*
- * This is the guts of the second-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = virtual page table faulting address
- * r15 = PGD pointer
- * r14 = ESR
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = crap (free to use)
- * r10 = crap (free to use)
- *
- * It can be re-entered by the linear mapping miss handler. However, to
- * avoid too much complication, it will save/restore things for us
- */
-htw_tlb_miss:
-#ifdef CONFIG_PPC_KUAP
- mfspr r10,SPRN_MAS1
- rlwinm. r10,r10,0,0x3fff0000
- beq- htw_tlb_miss_fault /* KUAP fault */
-#endif
- /* Search if we already have a TLB entry for that virtual address, and
- * if we do, bail out.
- *
- * MAS1:IND should be already set based on MAS4
- */
- PPC_TLBSRX_DOT(0,R16)
- beq htw_tlb_miss_done
-
- /* Now, we need to walk the page tables. First check if we are in
- * range.
- */
- rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
- bne- htw_tlb_miss_fault
-
- /* Get the PGD pointer */
- cmpldi cr0,r15,0
- beq- htw_tlb_miss_fault
-
- /* Get to PGD entry */
- rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
- clrrdi r10,r11,3
- ldx r15,r10,r15
- cmpdi cr0,r15,0
- bge htw_tlb_miss_fault
-
- /* Get to PUD entry */
- rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
- clrrdi r10,r11,3
- ldx r15,r10,r15
- cmpdi cr0,r15,0
- bge htw_tlb_miss_fault
-
- /* Get to PMD entry */
- rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
- clrrdi r10,r11,3
- ldx r15,r10,r15
- cmpdi cr0,r15,0
- bge htw_tlb_miss_fault
-
- /* Ok, we're all right, we can now create an indirect entry for
- * a 1M or 256M page.
- *
- * The last trick is now that because we use "half" pages for
- * the HTW (1M IND is 2K and 256M IND is 32K) we need to account
- * for an added LSB bit to the RPN. For 64K pages, there is no
- * problem as we already use 32K arrays (half PTE pages), but for
- * 4K page we need to extract a bit from the virtual address and
- * insert it into the "PA52" bit of the RPN.
- */
- rlwimi r15,r16,32-9,20,20
- /* Now we build the MAS:
- *
- * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
- * MAS 1 : Almost fully setup
- * - PID already updated by caller if necessary
- * - TSIZE for now is base ind page size always
- * MAS 2 : Use defaults
- * MAS 3+7 : Needs to be done
- */
- ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
-
- srdi r16,r10,32
- mtspr SPRN_MAS3,r10
- mtspr SPRN_MAS7,r16
-
- tlbwe
-
-htw_tlb_miss_done:
- /* We don't bother with restoring DEAR or ESR since we know we are
- * level 0 and just going back to userland. They are only needed
- * if you are going to take an access fault
- */
- TLB_MISS_EPILOG_SUCCESS
- rfi
-
-htw_tlb_miss_fault:
- /* We need to check if it was an instruction miss. We know this
- * though because r14 would contain -1
- */
- cmpdi cr0,r14,-1
- beq 1f
- mtspr SPRN_DEAR,r16
- mtspr SPRN_ESR,r14
- TLB_MISS_EPILOG_ERROR
- b exc_data_storage_book3e
-1: TLB_MISS_EPILOG_ERROR
- b exc_instruction_storage_book3e
-
/*
* This is the guts of "any" level TLB miss handler for kernel linear
* mapping misses. We are entered with:
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a490724e84ad..aa89899f0c1a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -896,7 +896,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
static int __init parse_numa_properties(void)
{
- struct device_node *memory;
+ struct device_node *memory, *pci;
int default_nid = 0;
unsigned long i;
const __be32 *associativity;
@@ -1010,6 +1010,18 @@ new_range:
goto new_range;
}
+ for_each_node_by_name(pci, "pci") {
+ int nid = NUMA_NO_NODE;
+
+ associativity = of_get_associativity(pci);
+ if (associativity) {
+ nid = associativity_to_nid(associativity);
+ initialize_form1_numa_distance(associativity);
+ }
+ if (likely(nid >= 0) && !node_online(nid))
+ node_set_online(nid);
+ }
+
/*
* Now do the same thing for each MEMBLOCK listed in the
* ibm,dynamic-memory property in the
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 9e7ba9c3851f..ab0656115424 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -297,11 +297,8 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
}
#if defined(CONFIG_PPC_8xx)
-void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
- pte_t pte, unsigned long sz)
+static void __set_huge_pte_at(pmd_t *pmd, pte_t *ptep, pte_basic_t val)
{
- pmd_t *pmd = pmd_off(mm, addr);
- pte_basic_t val;
pte_basic_t *entry = (pte_basic_t *)ptep;
int num, i;
@@ -311,15 +308,60 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
*/
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
- pte = set_pte_filter(pte, addr);
-
- val = pte_val(pte);
-
num = number_of_cells_per_pte(pmd, val, 1);
for (i = 0; i < num; i++, entry++, val += SZ_4K)
*entry = val;
}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned long sz)
+{
+ pmd_t *pmdp = pmd_off(mm, addr);
+
+ pte = set_pte_filter(pte, addr);
+
+ if (sz == SZ_8M) { /* Flag both PMD entries as 8M and fill both page tables */
+ *pmdp = __pmd(pmd_val(*pmdp) | _PMD_PAGE_8M);
+ *(pmdp + 1) = __pmd(pmd_val(*(pmdp + 1)) | _PMD_PAGE_8M);
+
+ __set_huge_pte_at(pmdp, pte_offset_kernel(pmdp, 0), pte_val(pte));
+ __set_huge_pte_at(pmdp, pte_offset_kernel(pmdp + 1, 0), pte_val(pte) + SZ_4M);
+ } else {
+ __set_huge_pte_at(pmdp, ptep, pte_val(pte));
+ }
+}
+#else
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned long sz)
+{
+ unsigned long pdsize;
+ int i;
+
+ pte = set_pte_filter(pte, addr);
+
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
+
+ if (sz < PMD_SIZE)
+ pdsize = PAGE_SIZE;
+ else if (sz < PUD_SIZE)
+ pdsize = PMD_SIZE;
+ else if (sz < P4D_SIZE)
+ pdsize = PUD_SIZE;
+ else if (sz < PGDIR_SIZE)
+ pdsize = P4D_SIZE;
+ else
+ pdsize = PGDIR_SIZE;
+
+ for (i = 0; i < sz / pdsize; i++, ptep++, addr += pdsize) {
+ __set_pte_at(mm, addr, ptep, pte, 0);
+ pte = __pte(pte_val(pte) + ((unsigned long long)pdsize / PAGE_SIZE << PFN_PTE_SHIFT));
+ }
+}
#endif
#endif /* CONFIG_HUGETLB_PAGE */
@@ -367,11 +409,10 @@ unsigned long vmalloc_to_phys(void *va)
EXPORT_SYMBOL_GPL(vmalloc_to_phys);
/*
- * We have 4 cases for pgds and pmds:
+ * We have 3 cases for pgds and pmds:
* (1) invalid (all zeroes)
* (2) pointer to next table, as normal; bottom 6 bits == 0
* (3) leaf pte for huge page _PAGE_PTE set
- * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
*
* So long as we atomically load page table pointers we are safe against teardown,
* we can follow the address down to the page and take a ref on it.
@@ -382,11 +423,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hpage_shift)
{
pgd_t *pgdp;
+#ifdef CONFIG_PPC64
p4d_t p4d, *p4dp;
pud_t pud, *pudp;
+#endif
pmd_t pmd, *pmdp;
pte_t *ret_pte;
- hugepd_t *hpdp = NULL;
unsigned pdshift;
if (hpage_shift)
@@ -401,8 +443,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
* page fault or a page unmap. The return pte_t * is still not
* stable. So should be checked there for above conditions.
* Top level is an exception because it is folded into p4d.
+ *
+ * On PPC32, P4D/PUD/PMD are folded into PGD so go straight to
+ * PMD level.
*/
pgdp = pgdir + pgd_index(ea);
+#ifdef CONFIG_PPC64
p4dp = p4d_offset(pgdp, ea);
p4d = READ_ONCE(*p4dp);
pdshift = P4D_SHIFT;
@@ -415,11 +461,6 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
goto out;
}
- if (is_hugepd(__hugepd(p4d_val(p4d)))) {
- hpdp = (hugepd_t *)&p4d;
- goto out_huge;
- }
-
/*
* Even if we end up with an unmap, the pgtable will not
* be freed, because we do an rcu free and here we are
@@ -437,13 +478,11 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
goto out;
}
- if (is_hugepd(__hugepd(pud_val(pud)))) {
- hpdp = (hugepd_t *)&pud;
- goto out_huge;
- }
-
- pdshift = PMD_SHIFT;
pmdp = pmd_offset(&pud, ea);
+#else
+ pmdp = pmd_offset(pud_offset(p4d_offset(pgdp, ea), ea), ea);
+#endif
+ pdshift = PMD_SHIFT;
pmd = READ_ONCE(*pmdp);
/*
@@ -476,19 +515,8 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
goto out;
}
- if (is_hugepd(__hugepd(pmd_val(pmd)))) {
- hpdp = (hugepd_t *)&pmd;
- goto out_huge;
- }
-
return pte_offset_kernel(&pmd, ea);
-out_huge:
- if (!hpdp)
- return NULL;
-
- ret_pte = hugepte_offset(*hpdp, ea, pdshift);
- pdshift = hugepd_shift(*hpdp);
out:
if (hpage_shift)
*hpage_shift = pdshift;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index cfd622ebf774..787b22206386 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -48,7 +48,7 @@ notrace void __init early_ioremap_init(void)
early_ioremap_setup();
}
-static void __init *early_alloc_pgtable(unsigned long size)
+void __init *early_alloc_pgtable(unsigned long size)
{
void *ptr = memblock_alloc(size, size);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 9b99113cb51a..6621cfc3baf8 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -102,7 +102,7 @@ struct page *p4d_page(p4d_t p4d)
{
if (p4d_leaf(p4d)) {
if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
- VM_WARN_ON(!p4d_huge(p4d));
+ VM_WARN_ON(!p4d_leaf(p4d));
return pte_page(p4d_pte(p4d));
}
return virt_to_page(p4d_pgtable(p4d));
@@ -113,7 +113,7 @@ struct page *pud_page(pud_t pud)
{
if (pud_leaf(pud)) {
if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
- VM_WARN_ON(!pud_huge(pud));
+ VM_WARN_ON(!pud_leaf(pud));
return pte_page(pud_pte(pud));
}
return virt_to_page(pud_pgtable(pud));
@@ -132,7 +132,7 @@ struct page *pmd_page(pmd_t pmd)
* enabled so these checks can't be used.
*/
if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
- VM_WARN_ON(!(pmd_leaf(pmd) || pmd_huge(pmd)));
+ VM_WARN_ON(!pmd_leaf(pmd));
return pte_page(pmd_pte(pmd));
}
return virt_to_page(pmd_page_vaddr(pmd));
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
index dc896d2874f3..0f7a050f327e 100644
--- a/arch/powerpc/mm/ptdump/Makefile
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -2,7 +2,7 @@
obj-y += ptdump.o
-obj-$(CONFIG_4xx) += shared.o
+obj-$(CONFIG_44x) += shared.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
obj-$(CONFIG_PPC_E500) += shared.o
obj-$(CONFIG_PPC_BOOK3S_32) += shared.o
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index 9a601587836b..a6baa6166d94 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -491,7 +491,7 @@ static void walk_vmemmap(struct pg_state *st)
* Traverse the vmemmaped memory and dump pages that are in the hash
* pagetable.
*/
- while (ptr->list) {
+ while (ptr) {
hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize);
ptr = ptr->list;
}