summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/hugetlbpage.c3
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c37
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c12
-rw-r--r--arch/powerpc/mm/subpage-prot.c4
-rw-r--r--arch/powerpc/mm/tlb-radix.c98
5 files changed, 126 insertions, 28 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7c5f479c5c00..8a9a49c13865 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -337,7 +337,8 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
if (shift >= pdshift)
hugepd_free(tlb, hugepte);
else
- pgtable_free_tlb(tlb, hugepte, pdshift - shift);
+ pgtable_free_tlb(tlb, hugepte,
+ get_hugepd_cache_index(pdshift - shift));
}
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index abb43646927a..a4ca57612558 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -19,6 +19,7 @@
#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <asm/mmu_context.h>
+#include <asm/pte-walk.h>
static DEFINE_MUTEX(mem_list_mutex);
@@ -27,6 +28,7 @@ struct mm_iommu_table_group_mem_t {
struct rcu_head rcu;
unsigned long used;
atomic64_t mapped;
+ unsigned int pageshift;
u64 ua; /* userspace address */
u64 entries; /* number of entries in hpas[] */
u64 *hpas; /* vmalloc'ed */
@@ -125,6 +127,8 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
{
struct mm_iommu_table_group_mem_t *mem;
long i, j, ret = 0, locked_entries = 0;
+ unsigned int pageshift;
+ unsigned long flags;
struct page *page = NULL;
mutex_lock(&mem_list_mutex);
@@ -159,6 +163,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
goto unlock_exit;
}
+ /*
+ * For a starting point for a maximum page size calculation
+ * we use @ua and @entries natural alignment to allow IOMMU pages
+ * smaller than huge pages but still bigger than PAGE_SIZE.
+ */
+ mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
if (!mem->hpas) {
kfree(mem);
@@ -199,6 +209,23 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
}
}
populate:
+ pageshift = PAGE_SHIFT;
+ if (PageCompound(page)) {
+ pte_t *pte;
+ struct page *head = compound_head(page);
+ unsigned int compshift = compound_order(head);
+
+ local_irq_save(flags); /* disables as well */
+ pte = find_linux_pte(mm->pgd, ua, NULL, &pageshift);
+ local_irq_restore(flags);
+
+ /* Double check it is still the same pinned page */
+ if (pte && pte_page(*pte) == head &&
+ pageshift == compshift)
+ pageshift = max_t(unsigned int, pageshift,
+ PAGE_SHIFT);
+ }
+ mem->pageshift = min(mem->pageshift, pageshift);
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
@@ -349,7 +376,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
EXPORT_SYMBOL_GPL(mm_iommu_find);
long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
- unsigned long ua, unsigned long *hpa)
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
u64 *va = &mem->hpas[entry];
@@ -357,6 +384,9 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (entry >= mem->entries)
return -EFAULT;
+ if (pageshift > mem->pageshift)
+ return -EFAULT;
+
*hpa = *va | (ua & ~PAGE_MASK);
return 0;
@@ -364,7 +394,7 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
- unsigned long ua, unsigned long *hpa)
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
void *va = &mem->hpas[entry];
@@ -373,6 +403,9 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (entry >= mem->entries)
return -EFAULT;
+ if (pageshift > mem->pageshift)
+ return -EFAULT;
+
pa = (void *) vmalloc_to_phys(va);
if (!pa)
return -EFAULT;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index c1f4ca45c93a..4afbfbb64bfd 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -409,6 +409,18 @@ static inline void pgtable_free(void *table, int index)
case PUD_INDEX:
kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
break;
+#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
+ /* 16M hugepd directory at pud level */
+ case HTLB_16M_INDEX:
+ BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
+ kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
+ break;
+ /* 16G hugepd directory at the pgd level */
+ case HTLB_16G_INDEX:
+ BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
+ kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
+ break;
+#endif
/* We don't free pgd table via RCU callback */
default:
BUG();
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 75cb646a79c3..9d16ee251fc0 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -186,9 +186,6 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
* in a 2-bit field won't allow writes to a page that is otherwise
* write-protected.
*/
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wpragmas"
-#pragma GCC diagnostic ignored "-Wattribute-alias"
SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
unsigned long, len, u32 __user *, map)
{
@@ -272,4 +269,3 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
up_write(&mm->mmap_sem);
return err;
}
-#pragma GCC diagnostic pop
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 67a6e86d3e7e..1135b43a597c 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
-void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
+static inline void __radix__flush_tlb_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ bool flush_all_sizes)
{
- struct mm_struct *mm = vma->vm_mm;
unsigned long pid;
unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
unsigned long page_size = 1UL << page_shift;
unsigned long nr_pages = (end - start) >> page_shift;
bool local, full;
-#ifdef CONFIG_HUGETLB_PAGE
- if (is_vm_hugetlb_page(vma))
- return radix__flush_hugetlb_tlb_range(vma, start, end);
-#endif
-
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
@@ -738,37 +733,64 @@ is_local:
_tlbie_pid(pid, RIC_FLUSH_TLB);
}
} else {
- bool hflush = false;
+ bool hflush = flush_all_sizes;
+ bool gflush = flush_all_sizes;
unsigned long hstart, hend;
+ unsigned long gstart, gend;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
- hend = end >> HPAGE_PMD_SHIFT;
- if (hstart < hend) {
- hstart <<= HPAGE_PMD_SHIFT;
- hend <<= HPAGE_PMD_SHIFT;
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
hflush = true;
+
+ if (hflush) {
+ hstart = (start + PMD_SIZE - 1) & PMD_MASK;
+ hend = end & PMD_MASK;
+ if (hstart == hend)
+ hflush = false;
+ }
+
+ if (gflush) {
+ gstart = (start + PUD_SIZE - 1) & PUD_MASK;
+ gend = end & PUD_MASK;
+ if (gstart == gend)
+ gflush = false;
}
-#endif
asm volatile("ptesync": : :"memory");
if (local) {
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
- HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ PMD_SIZE, MMU_PAGE_2M);
+ if (gflush)
+ __tlbiel_va_range(gstart, gend, pid,
+ PUD_SIZE, MMU_PAGE_1G);
asm volatile("ptesync": : :"memory");
} else {
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbie_va_range(hstart, hend, pid,
- HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ PMD_SIZE, MMU_PAGE_2M);
+ if (gflush)
+ __tlbie_va_range(gstart, gend, pid,
+ PUD_SIZE, MMU_PAGE_1G);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
}
preempt_enable();
}
+
+void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+ __radix__flush_tlb_range(vma->vm_mm, start, end, false);
+}
EXPORT_SYMBOL(radix__flush_tlb_range);
static int radix_get_mmu_psize(int page_size)
@@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb)
int psize = 0;
struct mm_struct *mm = tlb->mm;
int page_size = tlb->page_size;
+ unsigned long start = tlb->start;
+ unsigned long end = tlb->end;
/*
* if page size is not something we understand, do a full mm flush
@@ -847,15 +871,45 @@ void radix__tlb_flush(struct mmu_gather *tlb)
*/
if (tlb->fullmm) {
__flush_all_mm(mm, true);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+ } else if (mm_tlb_flush_nested(mm)) {
+ /*
+ * If there is a concurrent invalidation that is clearing ptes,
+ * then it's possible this invalidation will miss one of those
+ * cleared ptes and miss flushing the TLB. If this invalidate
+ * returns before the other one flushes TLBs, that can result
+ * in it returning while there are still valid TLBs inside the
+ * range to be invalidated.
+ *
+ * See mm/memory.c:tlb_finish_mmu() for more details.
+ *
+ * The solution to this is ensure the entire range is always
+ * flushed here. The problem for powerpc is that the flushes
+ * are page size specific, so this "forced flush" would not
+ * do the right thing if there are a mix of page sizes in
+ * the range to be invalidated. So use __flush_tlb_range
+ * which invalidates all possible page sizes in the range.
+ *
+ * PWC flush probably is not be required because the core code
+ * shouldn't free page tables in this path, but accounting
+ * for the possibility makes us a bit more robust.
+ *
+ * need_flush_all is an uncommon case because page table
+ * teardown should be done with exclusive locks held (but
+ * after locks are dropped another invalidate could come
+ * in), it could be optimized further if necessary.
+ */
+ if (!tlb->need_flush_all)
+ __radix__flush_tlb_range(mm, start, end, true);
+ else
+ radix__flush_all_mm(mm);
+#endif
} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
if (!tlb->need_flush_all)
radix__flush_tlb_mm(mm);
else
radix__flush_all_mm(mm);
} else {
- unsigned long start = tlb->start;
- unsigned long end = tlb->end;
-
if (!tlb->need_flush_all)
radix__flush_tlb_range_psize(mm, start, end, psize);
else
@@ -1043,6 +1097,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
+ if (!cpu_possible(sib))
+ continue;
if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
flush = true;
}