sparc64: Support transparent huge pages.

This is relatively easy since PMD's now cover exactly 4MB of memory. Our PMD entries are 32-bits each, so we use a special encoding. The lowest bit, PMD_ISHUGE, determines the interpretation. This is possible because sparc64's page tables are purely software entities so we can use whatever encoding scheme we want. We just have to make the TLB miss assembler page table walkers aware of the layout. set_pmd_at() works much like set_pte_at() but it has to operate in two page from a table of non-huge PTEs, so we have to queue up TLB flushes based upon what mappings are valid in the PTE table. In the second regime we are going from huge-page to non-huge-page, and in that case we need only queue up a single TLB flush to push out the huge page mapping. We still have 5 bits remaining in the huge PMD encoding so we can very likely support any new pieces of THP state tracking that might get added in the future. With lots of help from Johannes Weiner. Signed-off-by: David S. Miller <davem@davemloft.net> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: David Miller <davem@davemloft.net> 2012-10-08 16:34:29 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-10-09 16:23:06 +0900
commit: 9e695d2ecc8451cc2c1603d60b5c8e7f5581923a (patch)
tree: 77528ae73fe70d1bae3ced18a50e59fd81d2372c /arch/sparc/mm/tlb.c
parent: f5c8ad47284ca01dafc37da5a72bb9644174d387 (diff)
download: linux-9e695d2ecc8451cc2c1603d60b5c8e7f5581923a.tar.gz
linux-9e695d2ecc8451cc2c1603d60b5c8e7f5581923a.tar.bz2
linux-9e695d2ecc8451cc2c1603d60b5c8e7f5581923a.zip
1 files changed, 102 insertions, 16 deletions
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index b1f279cd00bf..3e8fec391fe0 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -43,16 +43,37 @@ void flush_tlb_pending(void)
 	put_cpu_var(tlb_batch);
 }
 
-void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
-		   pte_t *ptep, pte_t orig, int fullmm)
+static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+			      bool exec)
 {
 	struct tlb_batch *tb = &get_cpu_var(tlb_batch);
 	unsigned long nr;
 
 	vaddr &= PAGE_MASK;
-	if (pte_exec(orig))
+	if (exec)
 		vaddr |= 0x1UL;
 
+	nr = tb->tlb_nr;
+
+	if (unlikely(nr != 0 && mm != tb->mm)) {
+		flush_tlb_pending();
+		nr = 0;
+	}
+
+	if (nr == 0)
+		tb->mm = mm;
+
+	tb->vaddrs[nr] = vaddr;
+	tb->tlb_nr = ++nr;
+	if (nr >= TLB_BATCH_NR)
+		flush_tlb_pending();
+
+	put_cpu_var(tlb_batch);
+}
+
+void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+		   pte_t *ptep, pte_t orig, int fullmm)
+{
 	if (tlb_type != hypervisor &&
 	    pte_dirty(orig)) {
 		unsigned long paddr, pfn = pte_pfn(orig);
@@ -77,26 +98,91 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
 	}
 
 no_cache_flush:
+	if (!fullmm)
+		tlb_batch_add_one(mm, vaddr, pte_exec(orig));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
+			       pmd_t pmd, bool exec)
+{
+	unsigned long end;
+	pte_t *pte;
+
+	pte = pte_offset_map(&pmd, vaddr);
+	end = vaddr + HPAGE_SIZE;
+	while (vaddr < end) {
+		if (pte_val(*pte) & _PAGE_VALID)
+			tlb_batch_add_one(mm, vaddr, exec);
+		pte++;
+		vaddr += PAGE_SIZE;
+	}
+	pte_unmap(pte);
+}
 
-	if (fullmm) {
-		put_cpu_var(tlb_batch);
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	pmd_t orig = *pmdp;
+
+	*pmdp = pmd;
+
+	if (mm == &init_mm)
 		return;
+
+	if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) {
+		if (pmd_val(pmd) & PMD_ISHUGE)
+			mm->context.huge_pte_count++;
+		else
+			mm->context.huge_pte_count--;
+		if (mm->context.huge_pte_count == 1)
+			hugetlb_setup(mm);
 	}
 
-	nr = tb->tlb_nr;
+	if (!pmd_none(orig)) {
+		bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
 
-	if (unlikely(nr != 0 && mm != tb->mm)) {
-		flush_tlb_pending();
-		nr = 0;
+		addr &= HPAGE_MASK;
+		if (pmd_val(orig) & PMD_ISHUGE)
+			tlb_batch_add_one(mm, addr, exec);
+		else
+			tlb_batch_pmd_scan(mm, addr, orig, exec);
 	}
+}
 
-	if (nr == 0)
-		tb->mm = mm;
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
+{
+	struct list_head *lh = (struct list_head *) pgtable;
 
-	tb->vaddrs[nr] = vaddr;
-	tb->tlb_nr = ++nr;
-	if (nr >= TLB_BATCH_NR)
-		flush_tlb_pending();
+	assert_spin_locked(&mm->page_table_lock);
 
-	put_cpu_var(tlb_batch);
+	/* FIFO */
+	if (!mm->pmd_huge_pte)
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) mm->pmd_huge_pte);
+	mm->pmd_huge_pte = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
+{
+	struct list_head *lh;
+	pgtable_t pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	pgtable = mm->pmd_huge_pte;
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		mm->pmd_huge_pte = NULL;
+	else {
+		mm->pmd_huge_pte = (pgtable_t) lh->next;
+		list_del(lh);
+	}
+	pte_val(pgtable[0]) = 0;
+	pte_val(pgtable[1]) = 0;
+
+	return pgtable;
 }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
author	David Miller <davem@davemloft.net>	2012-10-08 16:34:29 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-10-09 16:23:06 +0900
commit	9e695d2ecc8451cc2c1603d60b5c8e7f5581923a (patch)
tree	77528ae73fe70d1bae3ced18a50e59fd81d2372c /arch/sparc/mm/tlb.c
parent	f5c8ad47284ca01dafc37da5a72bb9644174d387 (diff)
download	linux-9e695d2ecc8451cc2c1603d60b5c8e7f5581923a.tar.gz linux-9e695d2ecc8451cc2c1603d60b5c8e7f5581923a.tar.bz2 linux-9e695d2ecc8451cc2c1603d60b5c8e7f5581923a.zip