diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-10 05:29:27 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-10 05:29:27 -0700 |
commit | b970afcfcabd63cd3832e95db096439c177c3592 (patch) | |
tree | b63e662c780e02617916f4c0269e2adddc67f5a0 /arch/powerpc/include/asm | |
parent | 8ea5b2abd07e2280a332bd9c1a7f4dd15b9b6c13 (diff) | |
parent | 8150a153c013aa2dd1ffae43370b89ac1347a7fb (diff) | |
download | linux-stable-b970afcfcabd63cd3832e95db096439c177c3592.tar.gz linux-stable-b970afcfcabd63cd3832e95db096439c177c3592.tar.bz2 linux-stable-b970afcfcabd63cd3832e95db096439c177c3592.zip |
Merge tag 'powerpc-5.2-1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"Slightly delayed due to the issue with printk() calling
probe_kernel_read() interacting with our new user access prevention
stuff, but all fixed now.
The only out-of-area changes are the addition of a cpuhp_state, small
additions to Documentation and MAINTAINERS updates.
Highlights:
- Support for Kernel Userspace Access/Execution Prevention (like
SMAP/SMEP/PAN/PXN) on some 64-bit and 32-bit CPUs. This prevents
the kernel from accidentally accessing userspace outside
copy_to/from_user(), or ever executing userspace.
- KASAN support on 32-bit.
- Rework of where we map the kernel, vmalloc, etc. on 64-bit hash to
use the same address ranges we use with the Radix MMU.
- A rewrite into C of large parts of our idle handling code for
64-bit Book3S (ie. power8 & power9).
- A fast path entry for syscalls on 32-bit CPUs, for a 12-17% speedup
in the null_syscall benchmark.
- On 64-bit bare metal we have support for recovering from errors
with the time base (our clocksource), however if that fails
currently we hang in __delay() and never crash. We now have support
for detecting that case and short circuiting __delay() so we at
least panic() and reboot.
- Add support for optionally enabling the DAWR on Power9, which had
to be disabled by default due to a hardware erratum. This has the
effect of enabling hardware breakpoints for GDB, the downside is a
badly behaved program could crash the machine by pointing the DAWR
at cache inhibited memory. This is opt-in obviously.
- xmon, our crash handler, gets support for a read only mode where
operations that could change memory or otherwise disturb the system
are disabled.
Plus many clean-ups, reworks and minor fixes etc.
Thanks to: Christophe Leroy, Akshay Adiga, Alastair D'Silva, Alexey
Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Anju T Sudhakar,
Anton Blanchard, Ben Hutchings, Bo YU, Breno Leitao, Cédric Le Goater,
Christopher M. Riedl, Christoph Hellwig, Colin Ian King, David Gibson,
Ganesh Goudar, Gautham R. Shenoy, George Spelvin, Greg Kroah-Hartman,
Greg Kurz, Horia Geantă, Jagadeesh Pagadala, Joel Stanley, Joe
Perches, Julia Lawall, Laurentiu Tudor, Laurent Vivier, Lukas Bulwahn,
Madhavan Srinivasan, Mahesh Salgaonkar, Mathieu Malaterre, Michael
Neuling, Mukesh Ojha, Nathan Fontenot, Nathan Lynch, Nicholas Piggin,
Nick Desaulniers, Oliver O'Halloran, Peng Hao, Qian Cai, Ravi
Bangoria, Rick Lindsley, Russell Currey, Sachin Sant, Stewart Smith,
Sukadev Bhattiprolu, Thomas Huth, Tobin C. Harding, Tyrel Datwyler,
Valentin Schneider, Wei Yongjun, Wen Yang, YueHaibing"
* tag 'powerpc-5.2-1' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (205 commits)
powerpc/64s: Use early_mmu_has_feature() in set_kuap()
powerpc/book3s/64: check for NULL pointer in pgd_alloc()
powerpc/mm: Fix hugetlb page initialization
ocxl: Fix return value check in afu_ioctl()
powerpc/mm: fix section mismatch for setup_kup()
powerpc/mm: fix redundant inclusion of pgtable-frag.o in Makefile
powerpc/mm: Fix makefile for KASAN
powerpc/kasan: add missing/lost Makefile
selftests/powerpc: Add a signal fuzzer selftest
powerpc/booke64: set RI in default MSR
ocxl: Provide global MMIO accessors for external drivers
ocxl: move event_fd handling to frontend
ocxl: afu_irq only deals with IRQ IDs, not offsets
ocxl: Allow external drivers to use OpenCAPI contexts
ocxl: Create a clear delineation between ocxl backend & frontend
ocxl: Don't pass pci_dev around
ocxl: Split pci.c
ocxl: Remove some unused exported symbols
ocxl: Remove superfluous 'extern' from headers
ocxl: read_pasid never returns an error, so make it void
...
Diffstat (limited to 'arch/powerpc/include/asm')
68 files changed, 1425 insertions, 806 deletions
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h new file mode 100644 index 000000000000..677e9babef80 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_32_KUP_H +#define _ASM_POWERPC_BOOK3S_32_KUP_H + +#include <asm/book3s/32/mmu-hash.h> + +#ifdef __ASSEMBLY__ + +.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */ +101: mtsrin \gpr1, \gpr2 + addi \gpr1, \gpr1, 0x111 /* next VSID */ + rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ + addis \gpr2, \gpr2, 0x1000 /* address of next segment */ + bdnz 101b + isync +.endm + +.macro kuep_lock gpr1, gpr2 +#ifdef CONFIG_PPC_KUEP + li \gpr1, NUM_USER_SEGMENTS + li \gpr2, 0 + mtctr \gpr1 + mfsrin \gpr1, \gpr2 + oris \gpr1, \gpr1, SR_NX@h /* set Nx */ + kuep_update_sr \gpr1, \gpr2 +#endif +.endm + +.macro kuep_unlock gpr1, gpr2 +#ifdef CONFIG_PPC_KUEP + li \gpr1, NUM_USER_SEGMENTS + li \gpr2, 0 + mtctr \gpr1 + mfsrin \gpr1, \gpr2 + rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */ + kuep_update_sr \gpr1, \gpr2 +#endif +.endm + +#ifdef CONFIG_PPC_KUAP + +.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */ +101: mtsrin \gpr1, \gpr2 + addi \gpr1, \gpr1, 0x111 /* next VSID */ + rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ + addis \gpr2, \gpr2, 0x1000 /* address of next segment */ + cmplw \gpr2, \gpr3 + blt- 101b + isync +.endm + +.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 + lwz \gpr2, KUAP(\thread) + rlwinm. \gpr3, \gpr2, 28, 0xf0000000 + stw \gpr2, STACK_REGS_KUAP(\sp) + beq+ 102f + li \gpr1, 0 + stw \gpr1, KUAP(\thread) + mfsrin \gpr1, \gpr2 + oris \gpr1, \gpr1, SR_KS@h /* set Ks */ + kuap_update_sr \gpr1, \gpr2, \gpr3 +102: +.endm + +.macro kuap_restore sp, current, gpr1, gpr2, gpr3 + lwz \gpr2, STACK_REGS_KUAP(\sp) + rlwinm. \gpr3, \gpr2, 28, 0xf0000000 + stw \gpr2, THREAD + KUAP(\current) + beq+ 102f + mfsrin \gpr1, \gpr2 + rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */ + kuap_update_sr \gpr1, \gpr2, \gpr3 +102: +.endm + +.macro kuap_check current, gpr +#ifdef CONFIG_PPC_KUAP_DEBUG + lwz \gpr2, KUAP(thread) +999: twnei \gpr, 0 + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) +#endif +.endm + +#endif /* CONFIG_PPC_KUAP */ + +#else /* !__ASSEMBLY__ */ + +#ifdef CONFIG_PPC_KUAP + +#include <linux/sched.h> + +static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) +{ + barrier(); /* make sure thread.kuap is updated before playing with SRs */ + while (addr < end) { + mtsrin(sr, addr); + sr += 0x111; /* next VSID */ + sr &= 0xf0ffffff; /* clear VSID overflow */ + addr += 0x10000000; /* address of next segment */ + } + isync(); /* Context sync required after mtsrin() */ +} + +static inline void allow_user_access(void __user *to, const void __user *from, u32 size) +{ + u32 addr, end; + + if (__builtin_constant_p(to) && to == NULL) + return; + + addr = (__force u32)to; + + if (!addr || addr >= TASK_SIZE || !size) + return; + + end = min(addr + size, TASK_SIZE); + current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); + kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ +} + +static inline void prevent_user_access(void __user *to, const void __user *from, u32 size) +{ + u32 addr = (__force u32)to; + u32 end = min(addr + size, TASK_SIZE); + + if (!addr || addr >= TASK_SIZE || !size) + return; + + current->thread.kuap = 0; + kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + if (!is_write) + return false; + + return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !"); +} + +#endif /* CONFIG_PPC_KUAP */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */ diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 5cb588395fdc..2e277ca0170f 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -10,8 +10,6 @@ * BATs */ -#include <asm/page.h> - /* Block size masks */ #define BL_128K 0x000 #define BL_256K 0x001 @@ -49,8 +47,6 @@ struct ppc_bat { u32 batu; u32 batl; }; - -typedef pte_t *pgtable_t; #endif /* !__ASSEMBLY__ */ /* @@ -63,6 +59,11 @@ typedef pte_t *pgtable_t; #define PP_RWRW 2 /* Supervisor read/write, User read/write */ #define PP_RXRX 3 /* Supervisor read, User read */ +/* Values for Segment Registers */ +#define SR_NX 0x10000000 /* No Execute */ +#define SR_KP 0x20000000 /* User key */ +#define SR_KS 0x40000000 /* Supervisor key */ + #ifndef __ASSEMBLY__ /* diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 3633502e102c..998317702630 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -5,28 +5,6 @@ #include <linux/threads.h> #include <linux/slab.h> -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern void __bad_pte(pmd_t *pmd); - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - static inline pgd_t *pgd_alloc(struct mm_struct *mm) { return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), @@ -59,24 +37,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, *pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT); } -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) - -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -extern pgtable_t pte_alloc_one(struct mm_struct *mm); -void pte_frag_destroy(void *pte_frag); -pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); -void pte_fragment_free(unsigned long *table, int kernel); - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - static inline void pgtable_free(void *table, unsigned index_size) { if (!index_size) { @@ -87,7 +47,6 @@ static inline void pgtable_free(void *table, unsigned index_size) } } -#define check_pgt_cache() do { } while (0) #define get_hugepd_cache_index(x) (x) #ifdef CONFIG_SMP diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index aa8406b8f7ba..838de59f6754 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -134,15 +134,24 @@ static inline bool pte_user(pte_t pte) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) + +#ifndef __ASSEMBLY__ + +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); + +#endif /* !__ASSEMBLY__ */ + /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ +#include <asm/fixmap.h> + #ifdef CONFIG_HIGHMEM #define KVIRT_TOP PKMAP_BASE #else -#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#define KVIRT_TOP FIXADDR_START #endif /* @@ -373,8 +382,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); - /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} static inline int pte_read(pte_t pte) { return 1; } diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index cf5ba5254299..8fd8599c9395 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -2,10 +2,10 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H #define _ASM_POWERPC_BOOK3S_64_HASH_4K_H -#define H_PTE_INDEX_SIZE 9 -#define H_PMD_INDEX_SIZE 7 -#define H_PUD_INDEX_SIZE 9 -#define H_PGD_INDEX_SIZE 9 +#define H_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 4KB = 2MB +#define H_PMD_INDEX_SIZE 7 // size: 8B << 7 = 1KB, maps: 2^7 x 2MB = 256MB +#define H_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB +#define H_PGD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 128GB = 64TB /* * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB @@ -13,6 +13,21 @@ */ #define MAX_EA_BITS_PER_CONTEXT 46 +#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2) + +/* + * Our page table limit us to 64TB. Hence for the kernel mapping, + * each MAP area is limited to 16 TB. + * The four map areas are: linear mapping, vmap, IO and vmemmap + */ +#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT) + +/* + * Define the address range of the kernel non-linear virtual area + * 16TB + */ +#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000) + #ifndef __ASSEMBLY__ #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index f82ee8a3b561..d1d9177d9ebd 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -2,16 +2,29 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H -#define H_PTE_INDEX_SIZE 8 -#define H_PMD_INDEX_SIZE 10 -#define H_PUD_INDEX_SIZE 10 -#define H_PGD_INDEX_SIZE 8 +#define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB +#define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB +#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB +#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB + /* * Each context is 512TB size. SLB miss for first context/default context * is handled in the hotpath. */ #define MAX_EA_BITS_PER_CONTEXT 49 +#define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT + +/* + * We use one context for each MAP area. + */ +#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT) + +/* + * Define the address range of the kernel non-linear virtual area + * 2PB + */ +#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000) /* * 64k aligned address free up few of the lower bits of RPN for us diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 54b7af6cd27f..1d1183048cfd 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -29,6 +29,10 @@ #define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) +/* + * Top 2 bits are ignored in page table walk. + */ +#define EA_MASK (~(0xcUL << 60)) /* * We store the slot details in the second half of page table. @@ -42,59 +46,63 @@ #endif /* - * Define the address range of the kernel non-linear virtual area. In contrast - * to the linear mapping, this is managed using the kernel page tables and then - * inserted into the hash page table to actually take effect, similarly to user - * mappings. + * +------------------------------+ + * | | + * | | + * | | + * +------------------------------+ Kernel virtual map end (0xc00e000000000000) + * | | + * | | + * | 512TB/16TB of vmemmap | + * | | + * | | + * +------------------------------+ Kernel vmemmap start + * | | + * | 512TB/16TB of IO map | + * | | + * +------------------------------+ Kernel IO map start + * | | + * | 512TB/16TB of vmap | + * | | + * +------------------------------+ Kernel virt start (0xc008000000000000) + * | | + * | | + * | | + * +------------------------------+ Kernel linear (0xc.....) */ -#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) -/* - * Allow virtual mapping of one context size. - * 512TB for 64K page size - * 64TB for 4K page size - */ -#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT) +#define H_VMALLOC_START H_KERN_VIRT_START +#define H_VMALLOC_SIZE H_KERN_MAP_SIZE +#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) -/* - * 8TB IO mapping size - */ -#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */ +#define H_KERN_IO_START H_VMALLOC_END +#define H_KERN_IO_SIZE H_KERN_MAP_SIZE +#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE) -/* - * The vmalloc space starts at the beginning of the kernel non-linear virtual - * region, and occupies 504T (64K) or 56T (4K) - */ -#define H_VMALLOC_START H_KERN_VIRT_START -#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE) -#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) +#define H_VMEMMAP_START H_KERN_IO_END +#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE +#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE) -#define H_KERN_IO_START H_VMALLOC_END +#define NON_LINEAR_REGION_ID(ea) ((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2) /* * Region IDs */ -#define REGION_SHIFT 60UL -#define REGION_MASK (0xfUL << REGION_SHIFT) -#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) - -#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START)) -#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) -#define VMEMMAP_REGION_ID (0xfUL) /* Server only */ -#define USER_REGION_ID (0UL) +#define USER_REGION_ID 0 +#define LINEAR_MAP_REGION_ID 1 +#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START) +#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START) +#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START) /* * Defines the address of the vmemap area, in its own region on * hash table CPUs. */ -#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) - #ifdef CONFIG_PPC_MM_SLICES #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #endif /* CONFIG_PPC_MM_SLICES */ - /* PTEIDX nibble */ #define _PTEIDX_SECONDARY 0x8 #define _PTEIDX_GROUP_IX 0x7 @@ -103,6 +111,25 @@ #define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1) #ifndef __ASSEMBLY__ +static inline int get_region_id(unsigned long ea) +{ + int region_id; + int id = (ea >> 60UL); + + if (id == 0) + return USER_REGION_ID; + + if (ea < H_KERN_VIRT_START) + return LINEAR_MAP_REGION_ID; + + VM_BUG_ON(id != 0xc); + BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2); + + region_id = NON_LINEAR_REGION_ID(ea); + VM_BUG_ON(region_id > VMEMMAP_REGION_ID); + return region_id; +} + #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) #define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) static inline int hash__pgd_bad(pgd_t pgd) diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index ec2a55a553c7..56140d19c85f 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -62,4 +62,76 @@ extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t new_pte); +/* + * This should work for other subarchs too. But right now we use the + * new format only for 64bit book3s + */ +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + BUG_ON(!hugepd_ok(hpd)); + /* + * We have only four bits to encode, MMU page size + */ + BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); + return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK); +} + +static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) +{ + return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2; +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return mmu_psize_to_shift(hugepd_mmu_psize(hpd)); +} +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + if (radix_enabled()) + return radix__flush_hugetlb_page(vma, vmaddr); +} + +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, + unsigned int pdshift) +{ + unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); + + return hugepd_page(hpd) + idx; +} + +static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2)); +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); + +static inline int check_and_get_huge_psize(int shift) +{ + int mmu_psize; + + if (shift > SLICE_HIGH_SHIFT) + return -EINVAL; + + mmu_psize = shift_to_mmu_psize(shift); + + /* + * We need to make sure that for different page sizes reported by + * firmware we only add hugetlb support for page sizes that can be + * supported by linux page table layout. + * For now we have + * Radix: 2M and 1G + * Hash: 16M and 16G + */ + if (radix_enabled()) { + if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G) + return -EINVAL; + } else { + if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) + return -EINVAL; + } + return mmu_psize; +} + #endif diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h new file mode 100644 index 000000000000..f254de956d6a --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H +#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H + +#include <linux/const.h> + +#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) +#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) +#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) +#define AMR_KUAP_SHIFT 62 + +#ifdef __ASSEMBLY__ + +.macro kuap_restore_amr gpr +#ifdef CONFIG_PPC_KUAP + BEGIN_MMU_FTR_SECTION_NESTED(67) + ld \gpr, STACK_REGS_KUAP(r1) + mtspr SPRN_AMR, \gpr + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +.macro kuap_check_amr gpr1, gpr2 +#ifdef CONFIG_PPC_KUAP_DEBUG + BEGIN_MMU_FTR_SECTION_NESTED(67) + mfspr \gpr1, SPRN_AMR + li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) + sldi \gpr2, \gpr2, AMR_KUAP_SHIFT +999: tdne \gpr1, \gpr2 + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr +#ifdef CONFIG_PPC_KUAP + BEGIN_MMU_FTR_SECTION_NESTED(67) + .ifnb \msr_pr_cr + bne \msr_pr_cr, 99f + .endif + mfspr \gpr1, SPRN_AMR + std \gpr1, STACK_REGS_KUAP(r1) + li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) + sldi \gpr2, \gpr2, AMR_KUAP_SHIFT + cmpd \use_cr, \gpr1, \gpr2 + beq \use_cr, 99f + // We don't isync here because we very recently entered via rfid + mtspr SPRN_AMR, \gpr2 + isync +99: + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +#else /* !__ASSEMBLY__ */ + +#ifdef CONFIG_PPC_KUAP + +#include <asm/reg.h> + +/* + * We support individually allowing read or write, but we don't support nesting + * because that would require an expensive read/modify write of the AMR. + */ + +static inline void set_kuap(unsigned long value) +{ + if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) + return; + + /* + * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both + * before and after the move to AMR. See table 6 on page 1134. + */ + isync(); + mtspr(SPRN_AMR, value); + isync(); +} + +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + // This is written so we can resolve to a single case at build time + if (__builtin_constant_p(to) && to == NULL) + set_kuap(AMR_KUAP_BLOCK_WRITE); + else if (__builtin_constant_p(from) && from == NULL) + set_kuap(AMR_KUAP_BLOCK_READ); + else + set_kuap(0); +} + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + set_kuap(AMR_KUAP_BLOCKED); +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} +#endif /* CONFIG_PPC_KUAP */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index a28a28079edb..1e4705516a54 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -588,7 +588,8 @@ extern void slb_set_size(u16 size); #endif #define MAX_VMALLOC_CTX_CNT 1 -#define MAX_MEMMAP_CTX_CNT 1 +#define MAX_IO_CTX_CNT 1 +#define MAX_VMEMMAP_CTX_CNT 1 /* * 256MB segment @@ -601,13 +602,10 @@ extern void slb_set_size(u16 size); * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 * because of the modulo operation in vsid scramble. * - * We add one extra context to MIN_USER_CONTEXT so that we can map kernel - * context easily. The +1 is to map the unused 0xe region mapping. */ #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) #define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ - MAX_MEMMAP_CTX_CNT + 2) - + MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT) /* * For platforms that support on 65bit VA we limit the context bits */ @@ -657,8 +655,8 @@ extern void slb_set_size(u16 size); /* 4 bits per slice and we have one slice per 1TB */ #define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) -#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41) - +#define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE) +#define TASK_SLICE_ARRAY_SZ(x) ((x)->hash_context->slb_addr_limit >> 41) #ifndef __ASSEMBLY__ #ifdef CONFIG_PPC_SUBPAGE_PROT @@ -687,12 +685,41 @@ struct subpage_prot_table { #define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS) extern void subpage_prot_free(struct mm_struct *mm); -extern void subpage_prot_init_new_context(struct mm_struct *mm); #else static inline void subpage_prot_free(struct mm_struct *mm) {} -static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } #endif /* CONFIG_PPC_SUBPAGE_PROT */ +/* + * One bit per slice. We have lower slices which cover 256MB segments + * upto 4G range. That gets us 16 low slices. For the rest we track slices + * in 1TB size. + */ +struct slice_mask { + u64 low_slices; + DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); +}; + +struct hash_mm_context { + u16 user_psize; /* page size index */ + + /* SLB page size encodings*/ + unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ]; + unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; + unsigned long slb_addr_limit; +#ifdef CONFIG_PPC_64K_PAGES + struct slice_mask mask_64k; +#endif + struct slice_mask mask_4k; +#ifdef CONFIG_HUGETLB_PAGE + struct slice_mask mask_16m; + struct slice_mask mask_16g; +#endif + +#ifdef CONFIG_PPC_SUBPAGE_PROT + struct subpage_prot_table *spt; +#endif /* CONFIG_PPC_SUBPAGE_PROT */ +}; + #if 0 /* * The code below is equivalent to this function for arguments @@ -747,7 +774,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, /* * Bad address. We return VSID 0 for that */ - if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) + if ((ea & EA_MASK) >= H_PGTABLE_RANGE) return 0; if (!mmu_has_feature(MMU_FTR_68_BIT_VA)) @@ -774,28 +801,29 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff] * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff] * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff] - - * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ] - * 0x00006 - Not used - Can map 0xe000000000000000 range. - * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ] * - * So we can compute the context from the region (top nibble) by - * subtracting 11, or 0xc - 1. + * vmap, IO, vmemap + * + * 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff] + * 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff] + * 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff] + * */ static inline unsigned long get_kernel_context(unsigned long ea) { - unsigned long region_id = REGION_ID(ea); + unsigned long region_id = get_region_id(ea); unsigned long ctx; /* - * For linear mapping we do support multiple context + * Depending on Kernel config, kernel region can have one context + * or more. */ - if (region_id == KERNEL_REGION_ID) { + if (region_id == LINEAR_MAP_REGION_ID) { /* * We already verified ea to be not beyond the addr limit. */ - ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT); + ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT); } else - ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT; + ctx = region_id + MAX_KERNEL_CTX_CNT - 1; return ctx; } diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 1ceee000c18d..74d24201fc4f 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -25,15 +25,22 @@ struct mmu_psize_def { }; }; extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +#endif /* __ASSEMBLY__ */ /* - * For BOOK3s 64 with 4k and 64K linux page size - * we want to use pointers, because the page table - * actually store pfn + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and + * page_to_nid does a page->section->node lookup + * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce + * memory requirements with large number of sections. + * 51 bits is the max physical real address on POWER9 */ -typedef pte_t *pgtable_t; - -#endif /* __ASSEMBLY__ */ +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ + defined(CONFIG_PPC_64K_PAGES) +#define MAX_PHYSMEM_BITS 51 +#else +#define MAX_PHYSMEM_BITS 46 +#endif /* 64-bit classic hash table MMU */ #include <asm/book3s/64/mmu-hash.h> @@ -89,16 +96,6 @@ struct spinlock; /* Maximum possible number of NPUs in a system. */ #define NV_MAX_NPUS 8 -/* - * One bit per slice. We have lower slices which cover 256MB segments - * upto 4G range. That gets us 16 low slices. For the rest we track slices - * in 1TB size. - */ -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); -}; - typedef struct { union { /* @@ -112,7 +109,6 @@ typedef struct { mm_context_id_t id; mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE]; }; - u16 user_psize; /* page size index */ /* Number of bits in the mm_cpumask */ atomic_t active_cpus; @@ -122,27 +118,9 @@ typedef struct { /* NPU NMMU context */ struct npu_context *npu_context; + struct hash_mm_context *hash_context; -#ifdef CONFIG_PPC_MM_SLICES - /* SLB page size encodings*/ - unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; - unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; - unsigned long slb_addr_limit; -# ifdef CONFIG_PPC_64K_PAGES - struct slice_mask mask_64k; -# endif - struct slice_mask mask_4k; -# ifdef CONFIG_HUGETLB_PAGE - struct slice_mask mask_16m; - struct slice_mask mask_16g; -# endif -#else - u16 sllp; /* SLB page size encoding */ -#endif unsigned long vdso_base; -#ifdef CONFIG_PPC_SUBPAGE_PROT - struct subpage_prot_table spt; -#endif /* CONFIG_PPC_SUBPAGE_PROT */ /* * pagetable fragment support */ @@ -163,6 +141,60 @@ typedef struct { #endif } mm_context_t; +static inline u16 mm_ctx_user_psize(mm_context_t *ctx) +{ + return ctx->hash_context->user_psize; +} + +static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) +{ + ctx->hash_context->user_psize = user_psize; +} + +static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) +{ + return ctx->hash_context->low_slices_psize; +} + +static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) +{ + return ctx->hash_context->high_slices_psize; +} + +static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) +{ + return ctx->hash_context->slb_addr_limit; +} + +static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) +{ + ctx->hash_context->slb_addr_limit = limit; +} + +static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) +{ +#ifdef CONFIG_PPC_64K_PAGES + if (psize == MMU_PAGE_64K) + return &ctx->hash_context->mask_64k; +#endif +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_16M) + return &ctx->hash_context->mask_16m; + if (psize == MMU_PAGE_16G) + return &ctx->hash_context->mask_16g; +#endif + BUG_ON(psize != MMU_PAGE_4K); + + return &ctx->hash_context->mask_4k; +} + +#ifdef CONFIG_PPC_SUBPAGE_PROT +static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) +{ + return ctx->hash_context->spt; +} +#endif + /* * The current system page and segment sizes */ diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 138bc2ecc0c4..d45e4449619f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -19,29 +19,7 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - -extern pte_t *pte_fragment_alloc(struct mm_struct *, int); extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); -extern void pte_fragment_free(unsigned long *, int); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); #ifdef CONFIG_SMP @@ -81,6 +59,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); + if (unlikely(!pgd)) + return pgd; + /* * Don't scan the PGD for pointers, it contains references to PUDs but * those references are not full pointers and so can't be recognised by @@ -185,31 +166,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, *pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS); } -static inline pgtable_t pmd_pgtable(pmd_t pmd) -{ - return (pgtable_t)pmd_page_vaddr(pmd); -} - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)pte_fragment_alloc(mm, 1); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - return (pgtable_t)pte_fragment_alloc(mm, 0); -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { @@ -221,8 +177,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, pgtable_free_tlb(tlb, table, PTE_INDEX); } -#define check_pgt_cache() do { } while (0) - extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; static inline void update_page_count(int psize, long count) { diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 581f91be9dd4..7dede2e34b70 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -277,9 +277,11 @@ extern unsigned long __vmalloc_end; extern unsigned long __kernel_virt_start; extern unsigned long __kernel_virt_size; extern unsigned long __kernel_io_start; +extern unsigned long __kernel_io_end; #define KERN_VIRT_START __kernel_virt_start -#define KERN_VIRT_SIZE __kernel_virt_size #define KERN_IO_START __kernel_io_start +#define KERN_IO_END __kernel_io_end + extern struct page *vmemmap; extern unsigned long ioremap_bot; extern unsigned long pci_io_base; @@ -296,8 +298,7 @@ extern unsigned long pci_io_base; #include <asm/barrier.h> /* - * The second half of the kernel virtual space is used for IO mappings, - * it's itself carved into the PIO region (ISA and PHB IO space) and + * IO space itself carved into the PIO region (ISA and PHB IO space) and * the ioremap space * * ISA_IO_BASE = KERN_IO_START, 64K reserved area @@ -310,7 +311,7 @@ extern unsigned long pci_io_base; #define PHB_IO_BASE (ISA_IO_END) #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) -#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) +#define IOREMAP_END (KERN_IO_END) /* Advertise special mapping type for AGP */ #define HAVE_PAGE_AGP @@ -992,7 +993,8 @@ extern struct page *pgd_page(pgd_t pgd); (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while(0) + +static inline void pte_unmap(pte_t *pte) { } /* to find an entry in a kernel page-table-directory */ /* This now only contains the vmalloc pages */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h index 863c3e8286fb..d5f5ab73dc7f 100644 --- a/arch/powerpc/include/asm/book3s/64/radix-4k.h +++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h @@ -5,10 +5,11 @@ /* * For 4K page size supported index is 13/9/9/9 */ -#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */ -#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ -#define RADIX_PUD_INDEX_SIZE 9 -#define RADIX_PGD_INDEX_SIZE 13 +#define RADIX_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 4K = 2MB +#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB +#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB +#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB + /* * One fragment per per page */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h index ccb78ca9d0c5..54e33828b0fb 100644 --- a/arch/powerpc/include/asm/book3s/64/radix-64k.h +++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h @@ -5,10 +5,10 @@ /* * For 64K page size supported index is 13/9/9/5 */ -#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */ -#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ -#define RADIX_PUD_INDEX_SIZE 9 -#define RADIX_PGD_INDEX_SIZE 13 +#define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB +#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB +#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB +#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB /* * We use a 256 byte PTE page fragment in radix diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 5ab134eeed20..574eca33f893 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -72,19 +72,17 @@ * | | * | | * | | - * +------------------------------+ Kernel IO map end (0xc010000000000000) + * +------------------------------+ Kernel vmemmap end (0xc010000000000000) * | | + * | 512TB | * | | - * | 1/2 of virtual map | + * +------------------------------+ Kernel IO map end/vmemap start * | | + * | 512TB | * | | - * +------------------------------+ Kernel IO map start + * +------------------------------+ Kernel vmap end/ IO map start * | | - * | 1/4 of virtual map | - * | | - * +------------------------------+ Kernel vmemap start - * | | - * | 1/4 of virtual map | + * | 512TB | * | | * +------------------------------+ Kernel virt start (0xc008000000000000) * | | @@ -93,24 +91,24 @@ * +------------------------------+ Kernel linear (0xc.....) */ -#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) -#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000) - +#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) /* - * The vmalloc space starts at the beginning of that region, and - * occupies a quarter of it on radix config. - * (we keep a quarter for the virtual memmap) + * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick + * the same value as hash. */ +#define RADIX_KERN_MAP_SIZE (1UL << 49) + #define RADIX_VMALLOC_START RADIX_KERN_VIRT_START -#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2) +#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE #define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE) -/* - * Defines the address of the vmemap area, in its own region on - * hash table CPUs. - */ -#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) -#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1)) +#define RADIX_KERN_IO_START RADIX_VMALLOC_END +#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE +#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE) + +#define RADIX_VMEMMAP_START RADIX_KERN_IO_END +#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE +#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE) #ifndef __ASSEMBLY__ #define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h index db0dedab65ee..f0d3194ba41b 100644 --- a/arch/powerpc/include/asm/book3s/64/slice.h +++ b/arch/powerpc/include/asm/book3s/64/slice.h @@ -2,8 +2,6 @@ #ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H #define _ASM_POWERPC_BOOK3S_64_SLICE_H -#ifdef CONFIG_PPC_MM_SLICES - #define SLICE_LOW_SHIFT 28 #define SLICE_LOW_TOP (0x100000000ul) #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) @@ -13,15 +11,6 @@ #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) -#else /* CONFIG_PPC_MM_SLICES */ - -#define get_slice_psize(mm, addr) ((mm)->context.user_psize) -#define slice_set_user_psize(mm, psize) \ -do { \ - (mm)->context.user_psize = (psize); \ - (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ -} while (0) - -#endif /* CONFIG_PPC_MM_SLICES */ +#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64 #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 43e5f31fe64d..9844b3ded187 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -27,10 +27,11 @@ * the THREAD_WINKLE_BITS are set, which indicate which threads have not * yet woken from the winkle state. */ -#define PNV_CORE_IDLE_LOCK_BIT 0x10000000 +#define NR_PNV_CORE_IDLE_LOCK_BIT 28 +#define PNV_CORE_IDLE_LOCK_BIT (1ULL << NR_PNV_CORE_IDLE_LOCK_BIT) +#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 16 #define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000 -#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT 0x00080000 #define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000 #define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8 #define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00 @@ -68,16 +69,6 @@ #define ERR_DEEP_STATE_ESL_MISMATCH -2 #ifndef __ASSEMBLY__ -/* Additional SPRs that need to be saved/restored during stop */ -struct stop_sprs { - u64 pid; - u64 ldbar; - u64 fscr; - u64 hfscr; - u64 mmcr1; - u64 mmcr2; - u64 mmcra; -}; #define PNV_IDLE_NAME_LEN 16 struct pnv_idle_states_t { @@ -92,10 +83,6 @@ struct pnv_idle_states_t { extern struct pnv_idle_states_t *pnv_idle_states; extern int nr_pnv_idle_states; -extern u32 pnv_fastsleep_workaround_at_entry[]; -extern u32 pnv_fastsleep_workaround_at_exit[]; - -extern u64 pnv_first_deep_stop_state; unsigned long pnv_cpu_offline(unsigned int cpu); int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 7c1d8e74b25d..7f3279b014db 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -17,6 +17,9 @@ struct drmem_lmb { u32 drc_index; u32 aa_index; u32 flags; +#ifdef CONFIG_MEMORY_HOTPLUG + int nid; +#endif }; struct drmem_lmb_info { @@ -104,4 +107,22 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0xffffffff; } +#ifdef CONFIG_MEMORY_HOTPLUG +static inline void lmb_set_nid(struct drmem_lmb *lmb) +{ + lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); +} +static inline void lmb_clear_nid(struct drmem_lmb *lmb) +{ + lmb->nid = -1; +} +#else +static inline void lmb_set_nid(struct drmem_lmb *lmb) +{ +} +static inline void lmb_clear_nid(struct drmem_lmb *lmb) +{ +} +#endif + #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 937bb630093f..bef4e05a6823 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -497,6 +497,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) RESTORE_CTR(r1, area); \ b bad_stack; \ 3: EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1, cr0; \ beq 4f; /* if from kernel mode */ \ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ SAVE_PPR(area, r9); \ @@ -691,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) */ #define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \ EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1; \ EXCEPTION_PROLOG_COMMON_2(area); \ EXCEPTION_PROLOG_COMMON_3(trap); \ /* Volatile regs are potentially clobbered here */ \ diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 188776befaf9..e2099c0a15c3 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -219,5 +219,6 @@ extern void fadump_cleanup(void); static inline int is_fadump_active(void) { return 0; } static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } +static inline void fadump_cleanup(void) { } #endif #endif diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 40a6c9261a6b..f6fc31f8baff 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -100,6 +100,9 @@ label##5: \ #define END_MMU_FTR_SECTION(msk, val) \ END_MMU_FTR_SECTION_NESTED(msk, val, 97) +#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \ + END_MMU_FTR_SECTION_NESTED((msk), (msk), label) + #define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk)) #define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index b9fbed84ddca..0cfc365d814b 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -22,7 +22,12 @@ #include <asm/kmap_types.h> #endif +#ifdef CONFIG_KASAN +#include <asm/kasan.h> +#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) +#else #define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) +#endif /* * Here we define all the compile-time 'special' virtual diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 88b38b37c21b..3a6aa57b9d90 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; + allow_write_to_user(uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -62,6 +63,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, if (!ret) *oval = oldval; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } @@ -75,6 +77,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; + allow_write_to_user(uaddr, sizeof(*uaddr)); __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -95,6 +98,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 8d40565ad0c3..20a101046cff 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -6,82 +6,16 @@ #include <asm/page.h> #ifdef CONFIG_PPC_BOOK3S_64 - #include <asm/book3s/64/hugetlb.h> -/* - * This should work for other subarchs too. But right now we use the - * new format only for 64bit book3s - */ -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); - /* - * We have only four bits to encode, MMU page size - */ - BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); - return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK); -} - -static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) -{ - return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2; -} - -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ - return mmu_psize_to_shift(hugepd_mmu_psize(hpd)); -} -static inline void flush_hugetlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - if (radix_enabled()) - return radix__flush_hugetlb_page(vma, vmaddr); -} - -#else - -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); -#ifdef CONFIG_PPC_8xx - return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK); -#else - return (pte_t *)((hpd_val(hpd) & - ~HUGEPD_SHIFT_MASK) | PD_HUGE); -#endif -} - -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ -#ifdef CONFIG_PPC_8xx - return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; -#else - return hpd_val(hpd) & HUGEPD_SHIFT_MASK; -#endif -} - +#elif defined(CONFIG_PPC_FSL_BOOK3E) +#include <asm/nohash/hugetlb-book3e.h> +#elif defined(CONFIG_PPC_8xx) +#include <asm/nohash/32/hugetlb-8xx.h> #endif /* CONFIG_PPC_BOOK3S_64 */ +extern bool hugetlb_disabled; -static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, - unsigned pdshift) -{ - /* - * On FSL BookE, we have multiple higher-level table entries that - * point to the same hugepte. Just use the first one since they're all - * identical. So for that case, idx=0. - */ - unsigned long idx = 0; - - pte_t *dir = hugepd_page(hpd); -#ifdef CONFIG_PPC_8xx - idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT; -#elif !defined(CONFIG_PPC_FSL_BOOK3E) - idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); -#endif - - return dir + idx; -} +void hugetlbpage_init_default(void); void flush_dcache_icache_hugepage(struct page *page); @@ -99,15 +33,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte); -#ifdef CONFIG_PPC_8xx -static inline void flush_hugetlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - flush_tlb_page(vma, vmaddr); -} -#else -void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -#endif #define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index ece4dc89c90b..0fe8c1e46bbc 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void) extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); int hw_breakpoint_handler(struct die_args *args); +extern int set_dawr(struct arch_hw_breakpoint *brk); +extern bool dawr_force_enable; +static inline bool dawr_enabled(void) +{ + return dawr_force_enable; +} + #else /* CONFIG_HAVE_HW_BREAKPOINT */ static inline void hw_breakpoint_disable(void) { } static inline void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { } +static inline bool dawr_enabled(void) { return false; } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* __KERNEL__ */ #endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */ diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 69f516ecb2fd..7c2ef0e42661 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -33,6 +33,7 @@ */ #define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL #define THREAD_IMC_ENABLE 0x8000000000000000ULL +#define TRACE_IMC_ENABLE 0x4000000000000000ULL /* * For debugfs interface for imc-mode and imc-command @@ -59,6 +60,34 @@ struct imc_events { char *scale; }; +/* + * Trace IMC hardware updates a 64bytes record on + * Core Performance Monitoring Counter (CPMC) + * overflow. Here is the layout for the trace imc record + * + * DW 0 : Timebase + * DW 1 : Program Counter + * DW 2 : PIDR information + * DW 3 : CPMC1 + * DW 4 : CPMC2 + * DW 5 : CPMC3 + * Dw 6 : CPMC4 + * DW 7 : Timebase + * ..... + * + * The following is the data structure to hold trace imc data. + */ +struct trace_imc_data { + u64 tb1; + u64 ip; + u64 val; + u64 cpmc1; + u64 cpmc2; + u64 cpmc3; + u64 cpmc4; + u64 tb2; +}; + /* Event attribute array index */ #define IMC_FORMAT_ATTR 0 #define IMC_EVENT_ATTR 1 @@ -69,6 +98,13 @@ struct imc_events { #define IMC_EVENT_OFFSET_MASK 0xffffffffULL /* + * Macro to mask bits 0:21 of first double word(which is the timebase) to + * compare with 8th double word (timebase) of trace imc record data. + */ +#define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL + + +/* * Device tree parser code detects IMC pmu support and * registers new IMC pmus. This structure will hold the * pmu functions, events, counter memory information @@ -113,6 +149,7 @@ struct imc_pmu_ref { enum { IMC_TYPE_THREAD = 0x1, + IMC_TYPE_TRACE = 0x2, IMC_TYPE_CORE = 0x4, IMC_TYPE_CHIP = 0x10, }; @@ -123,6 +160,8 @@ enum { #define IMC_DOMAIN_NEST 1 #define IMC_DOMAIN_CORE 2 #define IMC_DOMAIN_THREAD 3 +/* For trace-imc the domain is still thread but it operates in trace-mode */ +#define IMC_DOMAIN_TRACE 4 extern int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id); diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h new file mode 100644 index 000000000000..296e51c2f066 --- /dev/null +++ b/arch/powerpc/include/asm/kasan.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifdef CONFIG_KASAN +#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn) +#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn) +#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn) +#else +#define _GLOBAL_KASAN(fn) _GLOBAL(fn) +#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(fn) +#define EXPORT_SYMBOL_KASAN(fn) +#endif + +#ifndef __ASSEMBLY__ + +#include <asm/page.h> + +#define KASAN_SHADOW_SCALE_SHIFT 3 + +#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ + (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT)) + +#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) + +#define KASAN_SHADOW_END 0UL + +#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START) + +#ifdef CONFIG_KASAN +void kasan_early_init(void); +void kasan_mmu_init(void); +void kasan_init(void); +#else +static inline void kasan_init(void) { } +static inline void kasan_mmu_init(void) { } +#endif + +#endif /* __ASSEMBLY */ +#endif diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h new file mode 100644 index 000000000000..5b5e39643a27 --- /dev/null +++ b/arch/powerpc/include/asm/kup.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_H_ +#define _ASM_POWERPC_KUP_H_ + +#ifdef CONFIG_PPC64 +#include <asm/book3s/64/kup-radix.h> +#endif +#ifdef CONFIG_PPC_8xx +#include <asm/nohash/32/kup-8xx.h> +#endif +#ifdef CONFIG_PPC_BOOK3S_32 +#include <asm/book3s/32/kup.h> +#endif + +#ifdef __ASSEMBLY__ +#ifndef CONFIG_PPC_KUAP +.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 +.endm + +.macro kuap_restore sp, current, gpr1, gpr2, gpr3 +.endm + +.macro kuap_check current, gpr +.endm + +#endif + +#else /* !__ASSEMBLY__ */ + +#include <asm/pgtable.h> + +void setup_kup(void); + +#ifdef CONFIG_PPC_KUEP +void setup_kuep(bool disabled); +#else +static inline void setup_kuep(bool disabled) { } +#endif /* CONFIG_PPC_KUEP */ + +#ifdef CONFIG_PPC_KUAP +void setup_kuap(bool disabled); +#else +static inline void setup_kuap(bool disabled) { } +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) { } +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) { } +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } +#endif /* CONFIG_PPC_KUAP */ + +static inline void allow_read_from_user(const void __user *from, unsigned long size) +{ + allow_user_access(NULL, from, size); +} + +static inline void allow_write_to_user(void __user *to, unsigned long size) +{ + allow_user_access(to, NULL, size); +} + +static inline void prevent_read_from_user(const void __user *from, unsigned long size) +{ + prevent_user_access(NULL, from, size); +} + +static inline void prevent_write_to_user(void __user *to, unsigned long size) +{ + prevent_user_access(to, NULL, size); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_KUP_H_ */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 17996bc9382b..23247a132ce8 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -31,7 +31,7 @@ enum MCE_Version { enum MCE_Severity { MCE_SEV_NO_ERROR = 0, MCE_SEV_WARNING = 1, - MCE_SEV_ERROR_SYNC = 2, + MCE_SEV_SEVERE = 2, MCE_SEV_FATAL = 3, }; @@ -56,6 +56,14 @@ enum MCE_ErrorType { MCE_ERROR_TYPE_LINK = 7, }; +enum MCE_ErrorClass { + MCE_ECLASS_UNKNOWN = 0, + MCE_ECLASS_HARDWARE, + MCE_ECLASS_HARD_INDETERMINATE, + MCE_ECLASS_SOFTWARE, + MCE_ECLASS_SOFT_INDETERMINATE, +}; + enum MCE_UeErrorType { MCE_UE_ERROR_INDETERMINATE = 0, MCE_UE_ERROR_IFETCH = 1, @@ -110,73 +118,75 @@ enum MCE_LinkErrorType { }; struct machine_check_event { - enum MCE_Version version:8; /* 0x00 */ - uint8_t in_use; /* 0x01 */ - enum MCE_Severity severity:8; /* 0x02 */ - enum MCE_Initiator initiator:8; /* 0x03 */ - enum MCE_ErrorType error_type:8; /* 0x04 */ - enum MCE_Disposition disposition:8; /* 0x05 */ - uint8_t reserved_1[2]; /* 0x06 */ - uint64_t gpr3; /* 0x08 */ - uint64_t srr0; /* 0x10 */ - uint64_t srr1; /* 0x18 */ - union { /* 0x20 */ + enum MCE_Version version:8; + u8 in_use; + enum MCE_Severity severity:8; + enum MCE_Initiator initiator:8; + enum MCE_ErrorType error_type:8; + enum MCE_ErrorClass error_class:8; + enum MCE_Disposition disposition:8; + bool sync_error; + u16 cpu; + u64 gpr3; + u64 srr0; + u64 srr1; + union { struct { enum MCE_UeErrorType ue_error_type:8; - uint8_t effective_address_provided; - uint8_t physical_address_provided; - uint8_t reserved_1[5]; - uint64_t effective_address; - uint64_t physical_address; - uint8_t reserved_2[8]; + u8 effective_address_provided; + u8 physical_address_provided; + u8 reserved_1[5]; + u64 effective_address; + u64 physical_address; + u8 reserved_2[8]; } ue_error; struct { enum MCE_SlbErrorType slb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } slb_error; struct { enum MCE_EratErrorType erat_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } erat_error; struct { enum MCE_TlbErrorType tlb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } tlb_error; struct { enum MCE_UserErrorType user_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } user_error; struct { enum MCE_RaErrorType ra_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } ra_error; struct { enum MCE_LinkErrorType link_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } link_error; } u; }; @@ -194,6 +204,8 @@ struct mce_error_info { } u; enum MCE_Severity severity:8; enum MCE_Initiator initiator:8; + enum MCE_ErrorClass error_class:8; + bool sync_error; }; #define MAX_MC_EVT 100 @@ -210,6 +222,7 @@ extern void release_mce_event(void); extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest); +unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 8ddd4a91bdc1..ba94ce8c22d7 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -107,6 +107,11 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) +/* + * Supports KUAP (key 0 controlling userspace addresses) on radix + */ +#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000) + /* MMU feature bit sets for various CPUs */ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 @@ -124,6 +129,9 @@ #ifndef __ASSEMBLY__ #include <linux/bug.h> #include <asm/cputable.h> +#include <asm/page.h> + +typedef pte_t *pgtable_t; #ifdef CONFIG_PPC_FSL_BOOK3E #include <asm/percpu.h> @@ -164,7 +172,10 @@ enum { #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | -#endif +#ifdef CONFIG_PPC_KUAP + MMU_FTR_RADIX_KUAP | +#endif /* CONFIG_PPC_KUAP */ +#endif /* CONFIG_PPC_RADIX_MMU */ 0, }; @@ -341,21 +352,6 @@ static inline bool strict_kernel_rwx_enabled(void) */ #define MMU_PAGE_COUNT 16 -/* - * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS - * if we increase SECTIONS_WIDTH we will not store node details in page->flags and - * page_to_nid does a page->section->node lookup - * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce - * memory requirements with large number of sections. - * 51 bits is the max physical real address on POWER9 - */ -#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ - defined (CONFIG_PPC_64K_PAGES) -#define MAX_PHYSMEM_BITS 51 -#elif defined(CONFIG_PPC64) -#define MAX_PHYSMEM_BITS 46 -#endif - #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/mmu.h> #else /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6ee8195a2ffb..611204e588b9 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -52,6 +52,7 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, { return false; } +static inline void mm_iommu_init(struct mm_struct *mm) { } #endif extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); extern void set_context(unsigned long id, pgd_t *pgd); @@ -228,13 +229,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } -#ifdef CONFIG_PPC_BOOK3E_64 -static inline void arch_exit_mmap(struct mm_struct *mm) -{ -} -#else extern void arch_exit_mmap(struct mm_struct *mm); -#endif static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h new file mode 100644 index 000000000000..a46616937d20 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H +#define _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H + +#define PAGE_SHIFT_8M 23 + +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + BUG_ON(!hugepd_ok(hpd)); + + return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK); +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; +} + +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, + unsigned int pdshift) +{ + unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT; + + return hugepd_page(hpd) + idx; +} + +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + flush_tlb_page(vma, vmaddr); +} + +static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | + (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K)); +} + +static inline int check_and_get_huge_psize(int shift) +{ + return shift_to_mmu_psize(shift); +} + +#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h new file mode 100644 index 000000000000..1c3133b5f86a --- /dev/null +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_8XX_H_ +#define _ASM_POWERPC_KUP_8XX_H_ + +#include <asm/bug.h> + +#ifdef CONFIG_PPC_KUAP + +#ifdef __ASSEMBLY__ + +.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 + lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */ + mfspr \gpr1, SPRN_MD_AP + mtspr SPRN_MD_AP, \gpr2 + stw \gpr1, STACK_REGS_KUAP(\sp) +.endm + +.macro kuap_restore sp, current, gpr1, gpr2, gpr3 + lwz \gpr1, STACK_REGS_KUAP(\sp) + mtspr SPRN_MD_AP, \gpr1 +.endm + +.macro kuap_check current, gpr +#ifdef CONFIG_PPC_KUAP_DEBUG + mfspr \gpr, SPRN_MD_AP + rlwinm \gpr, \gpr, 16, 0xffff +999: twnei \gpr, MD_APG_KUAP@h + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) +#endif +.endm + +#else /* !__ASSEMBLY__ */ + +#include <asm/reg.h> + +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + mtspr(SPRN_MD_AP, MD_APG_INIT); +} + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), + "Bug: fault blocked by AP register !"); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* CONFIG_PPC_KUAP */ + +#endif /* _ASM_POWERPC_KUP_8XX_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 0a1a3fc54e54..76af5b0cb16e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -35,11 +35,18 @@ * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => No user => 01 (all accesses performed according to page definition) + * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * We define all 16 groups so that all other bits of APG can take any value + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + */ +#define MI_APG_INIT 0x4fffffff + +/* + * 0 => Kernel => 01 (all accesses performed according to page definition) + * 1 => User => 10 (all accesses performed according to swaped page definition) + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) */ -#define MI_APG_INIT 0x44444444 +#define MI_APG_KUEP 0x6fffffff /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -108,11 +115,18 @@ * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => No user => 01 (all accesses performed according to page definition) + * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * We define all 16 groups so that all other bits of APG can take any value + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + */ +#define MD_APG_INIT 0x4fffffff + +/* + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 10 (all accesses performed according to swaped page definition) + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) */ -#define MD_APG_INIT 0x44444444 +#define MD_APG_KUAP 0x6fffffff /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in @@ -167,9 +181,26 @@ #ifdef CONFIG_PPC_MM_SLICES #include <asm/nohash/32/slice.h> #define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) +#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE #endif +#if defined(CONFIG_PPC_4K_PAGES) +#define mmu_virtual_psize MMU_PAGE_4K +#elif defined(CONFIG_PPC_16K_PAGES) +#define mmu_virtual_psize MMU_PAGE_16K +#define PTE_FRAG_NR 4 +#define PTE_FRAG_SIZE_SHIFT 12 +#define PTE_FRAG_SIZE (1UL << 12) +#else +#error "Unsupported PAGE_SIZE" +#endif + +#define mmu_linear_psize MMU_PAGE_8M + #ifndef __ASSEMBLY__ + +#include <linux/mmdebug.h> + struct slice_mask { u64 low_slices; DECLARE_BITMAP(high_slices, 0); @@ -185,14 +216,56 @@ typedef struct { unsigned char high_slices_psize[0]; unsigned long slb_addr_limit; struct slice_mask mask_base_psize; /* 4k or 16k */ -# ifdef CONFIG_HUGETLB_PAGE struct slice_mask mask_512k; struct slice_mask mask_8m; -# endif #endif void *pte_frag; } mm_context_t; +#ifdef CONFIG_PPC_MM_SLICES +static inline u16 mm_ctx_user_psize(mm_context_t *ctx) +{ + return ctx->user_psize; +} + +static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) +{ + ctx->user_psize = user_psize; +} + +static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) +{ + return ctx->low_slices_psize; +} + +static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) +{ + return ctx->high_slices_psize; +} + +static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) +{ + return ctx->slb_addr_limit; +} + +static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) +{ + ctx->slb_addr_limit = limit; +} + +static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) +{ + if (psize == MMU_PAGE_512K) + return &ctx->mask_512k; + if (psize == MMU_PAGE_8M) + return &ctx->mask_8m; + + BUG_ON(psize != mmu_virtual_psize); + + return &ctx->mask_base_psize; +} +#endif /* CONFIG_PPC_MM_SLICE */ + #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) @@ -242,17 +315,4 @@ extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf; #endif /* !__ASSEMBLY__ */ -#if defined(CONFIG_PPC_4K_PAGES) -#define mmu_virtual_psize MMU_PAGE_4K -#elif defined(CONFIG_PPC_16K_PAGES) -#define mmu_virtual_psize MMU_PAGE_16K -#define PTE_FRAG_NR 4 -#define PTE_FRAG_SIZE_SHIFT 12 -#define PTE_FRAG_SIZE (1UL << 12) -#else -#error "Unsupported PAGE_SIZE" -#endif - -#define mmu_linear_psize MMU_PAGE_8M - #endif /* _ASM_POWERPC_MMU_8XX_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu.h b/arch/powerpc/include/asm/nohash/32/mmu.h deleted file mode 100644 index 7d94a36d57d2..000000000000 --- a/arch/powerpc/include/asm/nohash/32/mmu.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_32_MMU_H_ -#define _ASM_POWERPC_NOHASH_32_MMU_H_ - -#include <asm/page.h> - -#if defined(CONFIG_40x) -/* 40x-style software loaded TLB */ -#include <asm/nohash/32/mmu-40x.h> -#elif defined(CONFIG_44x) -/* 44x-style software loaded TLB */ -#include <asm/nohash/32/mmu-44x.h> -#elif defined(CONFIG_PPC_BOOK3E_MMU) -/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ -#include <asm/nohash/mmu-book3e.h> -#elif defined (CONFIG_PPC_8xx) -/* Motorola/Freescale 8xx software loaded TLB */ -#include <asm/nohash/32/mmu-8xx.h> -#endif - -#ifndef __ASSEMBLY__ -typedef pte_t *pgtable_t; -#endif - -#endif /* _ASM_POWERPC_NOHASH_32_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index bd186e85b4f7..11eac371e7e0 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -6,39 +6,6 @@ #include <linux/slab.h> /* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern void __bad_pte(pmd_t *pmd); - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - -static inline pgd_t *pgd_alloc(struct mm_struct *mm) -{ - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), - pgtable_gfp_flags(mm, GFP_KERNEL)); -} - -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); -} - -/* * We don't have any real pmd's, and this code never triggers because * the pgd will always be present.. */ @@ -47,96 +14,22 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #define __pmd_free_tlb(tlb,x,a) do { } while (0) /* #define pgd_populate(mm, pmd, pte) BUG() */ -#ifndef CONFIG_BOOKE - -static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, - pte_t *pte) -{ - *pmdp = __pmd(__pa(pte) | _PMD_PRESENT); -} - -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pte_page) -{ - *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT); -} - -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) -#else - static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *pte) { - *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT); + if (IS_ENABLED(CONFIG_BOOKE)) + *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT); + else + *pmdp = __pmd(__pa(pte) | _PMD_PRESENT); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pte_page) { - *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT); + if (IS_ENABLED(CONFIG_BOOKE)) + *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT); + else + *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT); } -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) -#endif - -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -extern pgtable_t pte_alloc_one(struct mm_struct *mm); -void pte_frag_destroy(void *pte_frag); -pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); -void pte_fragment_free(unsigned long *table, int kernel); - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - -static inline void pgtable_free(void *table, unsigned index_size) -{ - if (!index_size) { - pte_fragment_free((unsigned long *)table, 0); - } else { - BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); - kmem_cache_free(PGT_CACHE(index_size), table); - } -} - -#define check_pgt_cache() do { } while (0) -#define get_hugepd_cache_index(x) (x) - -#ifdef CONFIG_SMP -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - unsigned long pgf = (unsigned long)table; - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - pgf |= shift; - tlb_remove_table(tlb, (void *)pgf); -} - -static inline void __tlb_remove_table(void *_table) -{ - void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); - unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; - - pgtable_free(table, shift); -} -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - -static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, - unsigned long address) -{ - tlb_flush_pgtable(tlb, address); - pgtable_free_tlb(tlb, table, 0); -} #endif /* _ASM_POWERPC_PGALLOC_32_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index bed433358260..0284f8f5305f 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -64,15 +64,24 @@ extern int icache_44x_need_flush; #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +#ifndef __ASSEMBLY__ + +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); + +#endif /* !__ASSEMBLY__ */ + + /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ +#include <asm/fixmap.h> + #ifdef CONFIG_HIGHMEM #define KVIRT_TOP PKMAP_BASE #else -#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#define KVIRT_TOP FIXADDR_START #endif /* @@ -379,8 +388,6 @@ static inline int pte_young(pte_t pte) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h index 777d62e40ac0..39eb0154ae2d 100644 --- a/arch/powerpc/include/asm/nohash/32/slice.h +++ b/arch/powerpc/include/asm/nohash/32/slice.h @@ -13,6 +13,8 @@ #define SLICE_NUM_HIGH 0ul #define GET_HIGH_SLICE_INDEX(addr) (addr & 0) +#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW + #endif /* CONFIG_PPC_MM_SLICES */ #endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h deleted file mode 100644 index e6585480dfc4..000000000000 --- a/arch/powerpc/include/asm/nohash/64/mmu.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_64_MMU_H_ -#define _ASM_POWERPC_NOHASH_64_MMU_H_ - -/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ -#include <asm/nohash/mmu-book3e.h> - -#ifndef __ASSEMBLY__ -typedef struct page *pgtable_t; -#endif - -#endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 66d086f85bd5..62321cd12da9 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -18,37 +18,6 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - -static inline pgd_t *pgd_alloc(struct mm_struct *mm) -{ - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), - pgtable_gfp_flags(mm, GFP_KERNEL)); -} - -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); -} - #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -76,11 +45,9 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, (unsigned long)page_address(pte_page)); + pmd_set(pmd, (unsigned long)pte_page); } -#define pmd_pgtable(pmd) pmd_page(pmd) - static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), @@ -92,91 +59,9 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd); } - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *page; - pte_t *pte; - - pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT); - if (!pte) - return NULL; - page = virt_to_page(pte); - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pgtable_page_dtor(ptepage); - __free_page(ptepage); -} - -static inline void pgtable_free(void *table, int shift) -{ - if (!shift) { - pgtable_page_dtor(virt_to_page(table)); - free_page((unsigned long)table); - } else { - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - kmem_cache_free(PGT_CACHE(shift), table); - } -} - -#define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - unsigned long pgf = (unsigned long)table; - - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - pgf |= shift; - tlb_remove_table(tlb, (void *)pgf); -} - -static inline void __tlb_remove_table(void *_table) -{ - void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); - unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; - - pgtable_free(table, shift); -} - -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - -static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, - unsigned long address) -{ - tlb_flush_pgtable(tlb, address); - pgtable_free_tlb(tlb, page_address(table), 0); -} - #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) -#ifndef CONFIG_PPC_64K_PAGES #define __pud_free_tlb(tlb, pud, addr) \ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) -#endif /* CONFIG_PPC_64K_PAGES */ - -#define check_pgt_cache() do { } while (0) - #endif /* _ASM_POWERPC_PGALLOC_64_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index e77ed9761632..b9f66cf15c31 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -10,10 +10,6 @@ #include <asm/barrier.h> #include <asm/asm-const.h> -#ifdef CONFIG_PPC_64K_PAGES -#error "Page size not supported" -#endif - #define FIRST_USER_ADDRESS 0UL /* @@ -23,11 +19,7 @@ PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1) -#else #define PMD_CACHE_INDEX PMD_INDEX_SIZE -#endif #define PUD_CACHE_INDEX PUD_INDEX_SIZE /* @@ -73,7 +65,6 @@ #define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) #define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) -#define VMEMMAP_REGION_ID (0xfUL) /* Server only */ #define USER_REGION_ID (0UL) /* @@ -205,7 +196,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while(0) + +static inline void pte_unmap(pte_t *pte) { } /* to find an entry in a kernel page-table-directory */ /* This now only contains the vmalloc pages */ diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h deleted file mode 100644 index ad0d6e3cc1c5..000000000000 --- a/arch/powerpc/include/asm/nohash/64/slice.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H -#define _ASM_POWERPC_NOHASH_64_SLICE_H - -#ifdef CONFIG_PPC_64K_PAGES -#define get_slice_psize(mm, addr) MMU_PAGE_64K -#else /* CONFIG_PPC_64K_PAGES */ -#define get_slice_psize(mm, addr) MMU_PAGE_4K -#endif /* !CONFIG_PPC_64K_PAGES */ -#define slice_set_user_psize(mm, psize) do { BUG(); } while (0) - -#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h new file mode 100644 index 000000000000..ecd8694cb229 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H +#define _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H + +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + if (WARN_ON(!hugepd_ok(hpd))) + return NULL; + + return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE); +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return hpd_val(hpd) & HUGEPD_SHIFT_MASK; +} + +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, + unsigned int pdshift) +{ + /* + * On FSL BookE, we have multiple higher-level table entries that + * point to the same hugepte. Just use the first one since they're all + * identical. So for that case, idx=0. + */ + return hugepd_page(hpd); +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); + +static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + /* We use the old format for PPC_FSL_BOOK3E */ + *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); +} + +static inline int check_and_get_huge_psize(int shift) +{ + if (shift & 1) /* Not a power of 4 */ + return -EINVAL; + + return shift_to_mmu_psize(shift); +} + +#endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */ diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h index e20072972e35..4c9777d256fb 100644 --- a/arch/powerpc/include/asm/nohash/mmu-book3e.h +++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h @@ -306,6 +306,8 @@ extern int book3e_htw_mode; #define mmu_cleanup_all NULL +#define MAX_PHYSMEM_BITS 44 + #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h index a037cb1efb57..edc793e5f08f 100644 --- a/arch/powerpc/include/asm/nohash/mmu.h +++ b/arch/powerpc/include/asm/nohash/mmu.h @@ -2,10 +2,18 @@ #ifndef _ASM_POWERPC_NOHASH_MMU_H_ #define _ASM_POWERPC_NOHASH_MMU_H_ -#ifdef CONFIG_PPC64 -#include <asm/nohash/64/mmu.h> -#else -#include <asm/nohash/32/mmu.h> +#if defined(CONFIG_40x) +/* 40x-style software loaded TLB */ +#include <asm/nohash/32/mmu-40x.h> +#elif defined(CONFIG_44x) +/* 44x-style software loaded TLB */ +#include <asm/nohash/32/mmu-44x.h> +#elif defined(CONFIG_PPC_BOOK3E_MMU) +/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ +#include <asm/nohash/mmu-book3e.h> +#elif defined (CONFIG_PPC_8xx) +/* Motorola/Freescale 8xx software loaded TLB */ +#include <asm/nohash/32/mmu-8xx.h> #endif #endif /* _ASM_POWERPC_NOHASH_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 0634f2949438..332b13b4ecdb 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_NOHASH_PGALLOC_H #include <linux/mm.h> +#include <linux/slab.h> extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #ifdef CONFIG_PPC64 @@ -16,9 +17,64 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, } #endif /* !CONFIG_PPC_BOOK3E */ +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), + pgtable_gfp_flags(mm, GFP_KERNEL)); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} + #ifdef CONFIG_PPC64 #include <asm/nohash/64/pgalloc.h> #else #include <asm/nohash/32/pgalloc.h> #endif + +static inline void pgtable_free(void *table, int shift) +{ + if (!shift) { + pte_fragment_free((unsigned long *)table, 0); + } else { + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(shift), table); + } +} + +#define get_hugepd_cache_index(x) (x) + +#ifdef CONFIG_SMP +static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +{ + unsigned long pgf = (unsigned long)table; + + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + pgf |= shift; + tlb_remove_table(tlb, (void *)pgf); +} + +static inline void __tlb_remove_table(void *_table) +{ + void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); + unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; + + pgtable_free(table, shift); +} + +#else +static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +{ + pgtable_free(table, shift); +} +#endif + +static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, + unsigned long address) +{ + tlb_flush_pgtable(tlb, address); + pgtable_free_tlb(tlb, table, 0); +} #endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */ diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h index dd40d200f274..813918f40765 100644 --- a/arch/powerpc/include/asm/nohash/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h @@ -60,13 +60,8 @@ #define _PAGE_SPECIAL _PAGE_SW0 /* Base page size */ -#ifdef CONFIG_PPC_64K_PAGES -#define _PAGE_PSIZE _PAGE_PSIZE_64K -#define PTE_RPN_SHIFT (28) -#else #define _PAGE_PSIZE _PAGE_PSIZE_4K #define PTE_RPN_SHIFT (24) -#endif #define PTE_WIMGE_SHIFT (19) #define PTE_BAP_SHIFT (2) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 870fb7b239ea..e1577cfa7186 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -186,8 +186,8 @@ #define OPAL_XIVE_FREE_IRQ 140 #define OPAL_XIVE_SYNC 141 #define OPAL_XIVE_DUMP 142 -#define OPAL_XIVE_RESERVED3 143 -#define OPAL_XIVE_RESERVED4 144 +#define OPAL_XIVE_GET_QUEUE_STATE 143 +#define OPAL_XIVE_SET_QUEUE_STATE 144 #define OPAL_SIGNAL_SYSTEM_RESET 145 #define OPAL_NPU_INIT_CONTEXT 146 #define OPAL_NPU_DESTROY_CONTEXT 147 @@ -209,8 +209,10 @@ #define OPAL_SENSOR_GROUP_ENABLE 163 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 +#define OPAL_HANDLE_HMI2 166 #define OPAL_NX_COPROC_INIT 167 -#define OPAL_LAST 167 +#define OPAL_XIVE_GET_VP_STATE 170 +#define OPAL_LAST 170 #define QUIESCE_HOLD 1 /* Spin all calls at entry */ #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ @@ -634,6 +636,15 @@ struct OpalHMIEvent { } u; }; +/* OPAL_HANDLE_HMI2 out_flags */ +enum { + OPAL_HMI_FLAGS_TB_RESYNC = (1ull << 0), /* Timebase has been resynced */ + OPAL_HMI_FLAGS_DEC_LOST = (1ull << 1), /* DEC lost, needs to be reprogrammed */ + OPAL_HMI_FLAGS_HDEC_LOST = (1ull << 2), /* HDEC lost, needs to be reprogrammed */ + OPAL_HMI_FLAGS_TOD_TB_FAIL = (1ull << 3), /* TOD/TB recovery failed. */ + OPAL_HMI_FLAGS_NEW_EVENT = (1ull << 63), /* An event has been created */ +}; + enum { OPAL_P7IOC_DIAG_TYPE_NONE = 0, OPAL_P7IOC_DIAG_TYPE_RGC = 1, @@ -1118,6 +1129,7 @@ enum { enum { OPAL_IMC_COUNTERS_NEST = 1, OPAL_IMC_COUNTERS_CORE = 2, + OPAL_IMC_COUNTERS_TRACE = 3, }; diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a55b01c90bb1..4cc37e708bc7 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -203,6 +203,7 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data); int64_t opal_handle_hmi(void); +int64_t opal_handle_hmi2(__be64 *out_flags); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); @@ -279,6 +280,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id); int64_t opal_xive_free_irq(uint32_t girq); int64_t opal_xive_sync(uint32_t type, uint32_t id); int64_t opal_xive_dump(uint32_t type, uint32_t id); +int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio, + __be32 *out_qtoggle, + __be32 *out_qindex); +int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio, + uint32_t qtoggle, + uint32_t qindex); +int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01); int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, uint64_t desc, uint16_t pe_number); @@ -352,6 +360,7 @@ int opal_power_control_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); extern int opal_hmi_exception_early(struct pt_regs *regs); +extern int opal_hmi_exception_early2(struct pt_regs *regs); extern int opal_handle_hmi_exception(struct pt_regs *regs); extern void opal_shutdown(void); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 134e912d403f..62f27e0aef7c 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -174,7 +174,6 @@ struct paca_struct { u8 irq_soft_mask; /* mask for irq soft masking */ u8 irq_happened; /* irq happened while soft-disabled */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ - u8 nap_state_lost; /* NV GPR values lost in power7_idle */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE u8 pmcregs_in_use; /* pseries puts this in lppaca */ #endif @@ -184,23 +183,28 @@ struct paca_struct { #endif #ifdef CONFIG_PPC_POWERNV - /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */ - u32 *core_idle_state_ptr; - u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */ - /* Mask to indicate thread id in core */ - u8 thread_mask; - /* Mask to denote subcore sibling threads */ - u8 subcore_sibling_mask; - /* Flag to request this thread not to stop */ - atomic_t dont_stop; - /* The PSSCR value that the kernel requested before going to stop */ - u64 requested_psscr; - - /* - * Save area for additional SPRs that need to be - * saved/restored during cpuidle stop. - */ - struct stop_sprs stop_sprs; + /* PowerNV idle fields */ + /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */ + unsigned long idle_state; + union { + /* P7/P8 specific fields */ + struct { + /* PNV_THREAD_RUNNING/NAP/SLEEP */ + u8 thread_idle_state; + /* Mask to denote subcore sibling threads */ + u8 subcore_sibling_mask; + }; + + /* P9 specific fields */ + struct { +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* The PSSCR value that the kernel requested before going to stop */ + u64 requested_psscr; + /* Flag to request this thread not to stop */ + atomic_t dont_stop; +#endif + }; + }; #endif #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index ed870468ef6f..dbc8c0679480 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -28,11 +28,15 @@ #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) #ifndef __ASSEMBLY__ -#ifdef CONFIG_HUGETLB_PAGE -extern bool hugetlb_disabled; -extern unsigned int HPAGE_SHIFT; -#else +#ifndef CONFIG_HUGETLB_PAGE #define HPAGE_SHIFT PAGE_SHIFT +#elif defined(CONFIG_PPC_BOOK3S_64) +extern unsigned int hpage_shift; +#define HPAGE_SHIFT hpage_shift +#elif defined(CONFIG_PPC_8xx) +#define HPAGE_SHIFT 19 /* 512k pages */ +#elif defined(CONFIG_PPC_FSL_BOOK3E) +#define HPAGE_SHIFT 22 /* 4M pages */ #endif #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) @@ -132,18 +136,7 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#ifdef CONFIG_PPC_BOOK3S_64 -/* - * On hash the vmalloc and other regions alias to the kernel region when passed - * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can - * return true for some vmalloc addresses, which is incorrect. So explicitly - * check that the address is in the kernel region. - */ -#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \ - pfn_valid(virt_to_pfn(kaddr))) -#else #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) -#endif /* * On Book-E parts we need __va to parse the device tree and we can't diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index e11f03007b57..2b2c60a1a66d 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -20,10 +20,61 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) #define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +{ + return (pte_t *)pte_fragment_alloc(mm, 1); +} + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm) +{ + return (pgtable_t)pte_fragment_alloc(mm, 0); +} + +void pte_frag_destroy(void *pte_frag); +void pte_fragment_free(unsigned long *table, int kernel); + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + pte_fragment_free((unsigned long *)pte, 1); +} + +static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) +{ + pte_fragment_free((unsigned long *)ptepage, 0); +} + +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages, the allocation size will be + * (2^index_size * sizeof(pointer)) and allocations are drawn from + * the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf + +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) pgtable_cache[shift] + +static inline void check_pgt_cache(void) { } + #ifdef CONFIG_PPC_BOOK3S #include <asm/book3s/pgalloc.h> #else #include <asm/nohash/pgalloc.h> #endif +static inline pgtable_t pmd_pgtable(pmd_t pmd) +{ + return (pgtable_t)pmd_page_vaddr(pmd); +} + #endif /* _ASM_POWERPC_PGALLOC_H */ diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index a89c67b62680..b169bbf95fcb 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -33,11 +33,7 @@ static inline __be64 pmd_raw(pmd_t x) return x.pmd; } -/* - * 64 bit hash always use 4 level table. Everybody else use 4 level - * only for 4K page size. - */ -#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) +/* 64 bit always use 4 level table. */ typedef struct { __be64 pud; } pud_t; #define __pud(x) ((pud_t) { cpu_to_be64(x) }) #define __pud_raw(x) ((pud_t) { (x) }) @@ -51,7 +47,6 @@ static inline __be64 pud_raw(pud_t x) return x.pud; } -#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ @@ -77,7 +72,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; * With hash config 64k pages additionally define a bigger "real PTE" type that * gathers the "second half" part of the PTE for pseudo 64k pages */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) +#ifdef CONFIG_PPC_64K_PAGES typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 3b0edf041b2e..d11b4c61d686 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -23,18 +23,13 @@ static inline unsigned long pmd_val(pmd_t x) return x.pmd; } -/* - * 64 bit hash always use 4 level table. Everybody else use 4 level - * only for 4K page size. - */ -#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) +/* 64 bit always use 4 level table. */ typedef struct { unsigned long pud; } pud_t; #define __pud(x) ((pud_t) { (x) }) static inline unsigned long pud_val(pud_t x) { return x.pud; } -#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ @@ -54,7 +49,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; * With hash config 64k pages additionally define a bigger "real PTE" type that * gathers the "second half" part of the PTE for pseudo 64k pages */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) +#ifdef CONFIG_PPC_64K_PAGES typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 505550fb2935..3f53be60fb01 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -89,9 +89,6 @@ extern void paging_init(void); */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); -extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, - struct page **pages, int *nr); #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #endif @@ -108,6 +105,12 @@ void mark_initmem_nx(void); static inline void mark_initmem_nx(void) { } #endif +#ifdef CONFIG_PPC_DEBUG_WX +void ptdump_check_wx(void); +#else +static inline void ptdump_check_wx(void) { } +#endif + /* * When used, PTE_FRAG_NR is defined in subarch pgtable.h * so we are sure it is included when arriving here. diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 3351bcf42f2d..706ac5df546f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -164,6 +164,9 @@ struct thread_struct { unsigned long rtas_sp; /* stack pointer for when in RTAS */ #endif #endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + unsigned long kuap; /* opened segments for user access */ +#endif /* Debug Registers */ struct debug_reg debug; struct thread_fp_state fp_state; @@ -411,14 +414,17 @@ static inline unsigned long get_clean_sp(unsigned long sp, int is_32) } #endif +/* asm stubs */ +extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); +extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); +extern unsigned long isa206_idle_insn_mayloss(unsigned long type); + extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ -extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/ + extern void power7_idle_type(unsigned long type); -extern unsigned long power9_idle_stop(unsigned long psscr_val); -extern unsigned long power9_offline_stop(unsigned long psscr_val); extern void power9_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 64271e562fed..6f047730e642 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -52,10 +52,17 @@ struct pt_regs }; }; + union { + struct { #ifdef CONFIG_PPC64 - unsigned long ppr; - unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */ + unsigned long ppr; +#endif +#ifdef CONFIG_PPC_KUAP + unsigned long kuap; #endif + }; + unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */ + }; }; #endif diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c5b2aff0ce8e..10caa145f98b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -168,6 +168,7 @@ #define PSSCR_ESL 0x00200000 /* Enable State Loss */ #define PSSCR_SD 0x00400000 /* Status Disable */ #define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ +#define PSSCR_PLS_SHIFT 60 #define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */ #define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */ #define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */ @@ -758,10 +759,9 @@ #define SRR1_WAKERESET 0x00100000 /* System reset */ #define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */ #define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ -#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, - * may not be recoverable */ -#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ -#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ +#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */ +#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */ +#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */ #define SRR1_PROGTM 0x00200000 /* TM Bad Thing */ #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ #define SRR1_PROGILL 0x00080000 /* Illegal instruction */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index eb2a33d5df26..e382bd6ede84 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -41,7 +41,7 @@ #if defined(CONFIG_PPC_BOOK3E_64) #define MSR_64BIT MSR_CM -#define MSR_ (MSR_ME | MSR_CE) +#define MSR_ (MSR_ME | MSR_RI | MSR_CE) #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index 44816cbc4198..c6f466f4c241 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -4,9 +4,7 @@ #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/slice.h> -#elif defined(CONFIG_PPC64) -#include <asm/nohash/64/slice.h> -#elif defined(CONFIG_PPC_MMU_NOHASH) +#elif defined(CONFIG_PPC_MMU_NOHASH_32) #include <asm/nohash/32/slice.h> #endif @@ -38,6 +36,11 @@ void slice_setup_new_exec(void); static inline void slice_init_new_context_exec(struct mm_struct *mm) {} +static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + #endif /* CONFIG_PPC_MM_SLICES */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 68da49320592..3192d454a733 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -17,9 +17,9 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int ni extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 -extern void resize_hpt_for_hotplug(unsigned long new_mem_size); +extern int resize_hpt_for_hotplug(unsigned long new_mem_size); #else -static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { } +static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; } #endif #ifdef CONFIG_NUMA diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 1647de15a31e..9bf6dffb4090 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -4,14 +4,17 @@ #ifdef __KERNEL__ +#ifndef CONFIG_KASAN #define __HAVE_ARCH_STRNCPY #define __HAVE_ARCH_STRNCMP +#define __HAVE_ARCH_MEMCHR +#define __HAVE_ARCH_MEMCMP +#define __HAVE_ARCH_MEMSET16 +#endif + #define __HAVE_ARCH_MEMSET #define __HAVE_ARCH_MEMCPY #define __HAVE_ARCH_MEMMOVE -#define __HAVE_ARCH_MEMCMP -#define __HAVE_ARCH_MEMCHR -#define __HAVE_ARCH_MEMSET16 #define __HAVE_ARCH_MEMCPY_FLUSHCACHE extern char * strcpy(char *,const char *); @@ -27,7 +30,27 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); extern void * memcpy_flushcache(void *,const void *,__kernel_size_t); +void *__memset(void *s, int c, __kernel_size_t count); +void *__memcpy(void *to, const void *from, __kernel_size_t n); +void *__memmove(void *to, const void *from, __kernel_size_t n); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif + #ifdef CONFIG_PPC64 +#ifndef CONFIG_KASAN #define __HAVE_ARCH_MEMSET32 #define __HAVE_ARCH_MEMSET64 @@ -49,8 +72,11 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) { return __memset64(p, v, n * 8); } +#endif #else +#ifndef CONFIG_KASAN #define __HAVE_ARCH_STRLEN +#endif extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); #endif diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h index eab4779f6b84..c993482237ed 100644 --- a/arch/powerpc/include/asm/task_size_64.h +++ b/arch/powerpc/include/asm/task_size_64.h @@ -20,7 +20,7 @@ /* * For now 512TB is only supported with book3s and 64K linux page size. */ -#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES) +#ifdef CONFIG_PPC_64K_PAGES /* * Max value currently used: */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 54bf7e68a7e1..57e968413d1e 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -36,6 +36,8 @@ extern unsigned long ppc_proc_freq; extern unsigned long ppc_tb_freq; #define DEFAULT_TB_FREQ 125000000UL +extern bool tb_invalid; + struct div_result { u64 result_high; u64 result_low; diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 58ef8c43a89d..08cd60cd70b7 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -54,6 +54,22 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit, TP_ARGS(regs) ); +#ifdef CONFIG_PPC_DOORBELL +DEFINE_EVENT(ppc64_interrupt_class, doorbell_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs) +); + +DEFINE_EVENT(ppc64_interrupt_class, doorbell_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs) +); +#endif + #ifdef CONFIG_PPC_PSERIES extern int hcall_tracepoint_regfunc(void); extern void hcall_tracepoint_unregfunc(void); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 4d6d905e9138..76f34346b642 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -6,6 +6,7 @@ #include <asm/processor.h> #include <asm/page.h> #include <asm/extable.h> +#include <asm/kup.h> /* * The fs value determines whether argument validity checking should be @@ -140,6 +141,7 @@ extern long __put_user_bad(void); #define __put_user_size(x, ptr, size, retval) \ do { \ retval = 0; \ + allow_write_to_user(ptr, size); \ switch (size) { \ case 1: __put_user_asm(x, ptr, retval, "stb"); break; \ case 2: __put_user_asm(x, ptr, retval, "sth"); break; \ @@ -147,6 +149,7 @@ do { \ case 8: __put_user_asm2(x, ptr, retval); break; \ default: __put_user_bad(); \ } \ + prevent_write_to_user(ptr, size); \ } while (0) #define __put_user_nocheck(x, ptr, size) \ @@ -239,6 +242,7 @@ do { \ __chk_user_ptr(ptr); \ if (size > sizeof(x)) \ (x) = __get_user_bad(); \ + allow_read_from_user(ptr, size); \ switch (size) { \ case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \ @@ -246,6 +250,7 @@ do { \ case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ + prevent_read_from_user(ptr, size); \ } while (0) /* @@ -305,15 +310,21 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { - return __copy_tofrom_user(to, from, n); + unsigned long ret; + + allow_user_access(to, from, n); + ret = __copy_tofrom_user(to, from, n); + prevent_user_access(to, from, n); + return ret; } #endif /* __powerpc64__ */ static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; + ret = 1; switch (n) { case 1: @@ -338,14 +349,18 @@ static inline unsigned long raw_copy_from_user(void *to, } barrier_nospec(); - return __copy_tofrom_user((__force void __user *)to, from, n); + allow_read_from_user(from, n); + ret = __copy_tofrom_user((__force void __user *)to, from, n); + prevent_read_from_user(from, n); + return ret; } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { + unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; + ret = 1; switch (n) { case 1: @@ -365,17 +380,24 @@ static inline unsigned long raw_copy_to_user(void __user *to, return 0; } - return __copy_tofrom_user(to, (__force const void __user *)from, n); + allow_write_to_user(to, n); + ret = __copy_tofrom_user(to, (__force const void __user *)from, n); + prevent_write_to_user(to, n); + return ret; } extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { + unsigned long ret = size; might_fault(); - if (likely(access_ok(addr, size))) - return __clear_user(addr, size); - return size; + if (likely(access_ok(addr, size))) { + allow_write_to_user(addr, size); + ret = __clear_user(addr, size); + prevent_write_to_user(addr, size); + } + return ret; } extern long strncpy_from_user(char *dst, const char __user *src, long count); diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 3c704f5dd3ae..b579a943407b 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -109,12 +109,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); extern void xive_native_sync_source(u32 hw_irq); +extern void xive_native_sync_queue(u32 hw_irq); extern bool is_xive_irq(struct irq_chip *chip); extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); extern int xive_native_disable_vp(u32 vp_id); extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); extern bool xive_native_has_single_escalation(void); +extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio, + u64 *out_qpage, + u64 *out_qsize, + u64 *out_qeoi_page, + u32 *out_escalate_irq, + u64 *out_qflags); + +extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle, + u32 *qindex); +extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, + u32 qindex); +extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state); + #else static inline bool xive_enabled(void) { return false; } |