Merge "Use hugepages to map kernel mem on 8xx" into next

Merge Christophe's large series to use huge pages for the linear mapping on 8xx. From his cover letter: The main purpose of this big series is to: - reorganise huge page handling to avoid using mm_slices. - use huge pages to map kernel memory on the 8xx. The 8xx supports 4 page sizes: 4k, 16k, 512k and 8M. It uses 2 Level page tables, PGD having 1024 entries, each entry covering 4M address space. Then each page table has 1024 entries. At the time being, page sizes are managed in PGD entries, implying the use of mm_slices as it can't mix several pages of the same size in one page table. The first purpose of this series is to reorganise things so that standard page tables can also handle 512k pages. This is done by adding a new _PAGE_HUGE flag which will be copied into the Level 1 entry in the TLB miss handler. That done, we have 2 types of pages: - PGD entries to regular page tables handling 4k/16k and 512k pages - PGD entries to hugepd tables handling 8M pages. There is no need to mix 8M pages with other sizes, because a 8M page will use more than what a single PGD covers. Then comes the second purpose of this series. At the time being, the 8xx has implemented special handling in the TLB miss handlers in order to transparently map kernel linear address space and the IMMR using huge pages by building the TLB entries in assembly at the time of the exception. As mm_slices is only for user space pages, and also because it would anyway not be convenient to slice kernel address space, it was not possible to use huge pages for kernel address space. But after step one of the series, it is now more flexible to use huge pages. This series drop all assembly 'just in time' handling of huge pages and use huge pages in page tables instead. Once the above is done, then comes icing on the cake: - Use huge pages for KASAN shadow mapping - Allow pinned TLBs with strict kernel rwx - Allow pinned TLBs with debug pagealloc Then, last but not least, those modifications for the 8xx allows the following improvement on book3s/32: - Mapping KASAN shadow with BATs - Allowing BATs with debug pagealloc All this allows to considerably simplify TLB miss handlers and associated initialisation. The overhead of reading page tables is negligible compared to the reduction of the miss handlers. While we were at touching pte_update(), some cleanup was done there too. Tested widely on 8xx and 832x. Boot tested on QEMU MAC99.
author: Michael Ellerman <mpe@ellerman.id.au> 2020-05-26 22:54:27 +1000
committer: Michael Ellerman <mpe@ellerman.id.au> 2020-05-26 22:54:27 +1000
commit: bb5f33c069402035a3d6a2091ee68cac6999d774 (patch)
tree: 51d34ca45536df30bb4fe84aa03d8d666cefea1c /arch/powerpc/include/asm/book3s/32
parent: 82a1b8ed5604cccf30b6ff03bcd61640cd26369b (diff)
parent: 7974c4732642f710b5111165ae1f7f7fed822282 (diff)
download: linux-stable-bb5f33c069402035a3d6a2091ee68cac6999d774.tar.gz
linux-stable-bb5f33c069402035a3d6a2091ee68cac6999d774.tar.bz2
linux-stable-bb5f33c069402035a3d6a2091ee68cac6999d774.zip
1 files changed, 31 insertions, 47 deletions
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 0d4bccb4b9f2..8a091d125f2d 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -218,7 +218,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
  */
 
 #define pte_clear(mm, addr, ptep) \
-	do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0)
+	do { pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0); } while (0)
 
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define	pmd_bad(pmd)		(pmd_val(pmd) & _PMD_BAD)
@@ -253,84 +253,68 @@ extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
  * and the PTE may be either 32 or 64 bit wide. In the later case,
  * when using atomic updates, only the low part of the PTE is
  * accessed atomically.
- *
- * In addition, on 44x, we also maintain a global flag indicating
- * that an executable user mapping was modified, which is needed
- * to properly flush the virtually tagged instruction cache of
- * those implementations.
  */
-#ifndef CONFIG_PTE_64BIT
-static inline unsigned long pte_update(pte_t *p,
-				       unsigned long clr,
-				       unsigned long set)
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
+				     unsigned long clr, unsigned long set, int huge)
 {
-	unsigned long old, tmp;
-
-	__asm__ __volatile__("\
-1:	lwarx	%0,0,%3\n\
-	andc	%1,%0,%4\n\
-	or	%1,%1,%5\n"
-"	stwcx.	%1,0,%3\n\
-	bne-	1b"
-	: "=&r" (old), "=&r" (tmp), "=m" (*p)
-	: "r" (p), "r" (clr), "r" (set), "m" (*p)
-	: "cc" );
-
-	return old;
-}
-#else /* CONFIG_PTE_64BIT */
-static inline unsigned long long pte_update(pte_t *p,
-					    unsigned long clr,
-					    unsigned long set)
-{
-	unsigned long long old;
+	pte_basic_t old;
 	unsigned long tmp;
 
-	__asm__ __volatile__("\
-1:	lwarx	%L0,0,%4\n\
-	lwzx	%0,0,%3\n\
-	andc	%1,%L0,%5\n\
-	or	%1,%1,%6\n"
-"	stwcx.	%1,0,%4\n\
-	bne-	1b"
+	__asm__ __volatile__(
+#ifndef CONFIG_PTE_64BIT
+"1:	lwarx	%0, 0, %3\n"
+"	andc	%1, %0, %4\n"
+#else
+"1:	lwarx	%L0, 0, %3\n"
+"	lwz	%0, -4(%3)\n"
+"	andc	%1, %L0, %4\n"
+#endif
+"	or	%1, %1, %5\n"
+"	stwcx.	%1, 0, %3\n"
+"	bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*p)
-	: "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p)
+#ifndef CONFIG_PTE_64BIT
+	: "r" (p),
+#else
+	: "b" ((unsigned long)(p) + 4),
+#endif
+	  "r" (clr), "r" (set), "m" (*p)
 	: "cc" );
 
 	return old;
 }
-#endif /* CONFIG_PTE_64BIT */
 
 /*
  * 2.6 calls this without flushing the TLB entry; this is wrong
  * for our hash-based implementation, we fix that up here.
  */
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep)
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
-	old = pte_update(ptep, _PAGE_ACCESSED, 0);
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
 	if (old & _PAGE_HASHPTE) {
 		unsigned long ptephys = __pa(ptep) & PAGE_MASK;
-		flush_hash_pages(context, addr, ptephys, 1);
+		flush_hash_pages(mm->context.id, addr, ptephys, 1);
 	}
 	return (old & _PAGE_ACCESSED) != 0;
 }
 #define ptep_test_and_clear_young(__vma, __addr, __ptep) \
-	__ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep)
+	__ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep)
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 				       pte_t *ptep)
 {
-	return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
+	return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	pte_update(ptep, _PAGE_RW, 0);
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
 }
 
 static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
@@ -341,7 +325,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 	unsigned long set = pte_val(entry) &
 		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
 
-	pte_update(ptep, 0, set);
+	pte_update(vma->vm_mm, address, ptep, 0, set, 0);
 
 	flush_tlb_page(vma, address);
 }
@@ -539,7 +523,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
 			      | (pte_val(pte) & ~_PAGE_HASHPTE));
 	else
-		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
+		pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0);
 
 #elif defined(CONFIG_PTE_64BIT)
 	/* Second case is 32-bit with 64-bit PTE.  In this case, we
author	Michael Ellerman <mpe@ellerman.id.au>	2020-05-26 22:54:27 +1000
committer	Michael Ellerman <mpe@ellerman.id.au>	2020-05-26 22:54:27 +1000
commit	bb5f33c069402035a3d6a2091ee68cac6999d774 (patch)
tree	51d34ca45536df30bb4fe84aa03d8d666cefea1c /arch/powerpc/include/asm/book3s/32
parent	82a1b8ed5604cccf30b6ff03bcd61640cd26369b (diff)
parent	7974c4732642f710b5111165ae1f7f7fed822282 (diff)
download	linux-stable-bb5f33c069402035a3d6a2091ee68cac6999d774.tar.gz linux-stable-bb5f33c069402035a3d6a2091ee68cac6999d774.tar.bz2 linux-stable-bb5f33c069402035a3d6a2091ee68cac6999d774.zip