From 94171b19c3f1f4d9d4c0e3aaa1aa161def1ec7ea Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 27 Jul 2017 11:54:53 +0530 Subject: powerpc/mm: Rename find_linux_pte_or_hugepte() Add newer helpers to make the function usage simpler. It is always recommended to use find_current_mm_pte() for walking the page table. If we cannot use find_current_mm_pte(), it should be documented why the said usage of __find_linux_pte() is safe against a parallel THP split. For now we have KVM code using __find_linux_pte(). This is because kvm code ends up calling __find_linux_pte() in real mode with MSR_EE=0 but with PACA soft_enabled = 1. We may want to fix that later and make sure we keep the MSR_EE and PACA soft_enabled in sync. When we do that we can switch kvm to use find_linux_pte(). Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 5 +++-- arch/powerpc/kvm/book3s_64_mmu_radix.c | 28 ++++++++++++++-------------- arch/powerpc/kvm/book3s_64_vio_hv.c | 12 +++++++++++- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 18 +++++++++--------- arch/powerpc/kvm/e500_mmu_host.c | 3 ++- 5 files changed, 39 insertions(+), 27 deletions(-) (limited to 'arch/powerpc/kvm') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8cb0190e2a73..4b219db39c47 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "trace_hv.h" @@ -597,8 +598,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * hugepage split and collapse. */ local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, + hva, NULL, NULL); if (ptep) { pte = kvmppc_read_update_linux_pte(ptep, 1); if (__pte_write(pte)) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index f6b3e67c5762..7d719c8aa0bb 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -17,6 +17,7 @@ #include #include #include +#include /* * Supported radix tree geometry. @@ -359,8 +360,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, if (writing) pgflags |= _PAGE_DIRTY; local_irq_save(flags); - ptep = __find_linux_pte_or_hugepte(current->mm->pgd, hva, - NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL); if (ptep) { pte = READ_ONCE(*ptep); if (pte_present(pte) && @@ -374,8 +374,12 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, spin_unlock(&kvm->mmu_lock); return RESUME_GUEST; } - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, - gpa, NULL, &shift); + /* + * We are walking the secondary page table here. We can do this + * without disabling irq. + */ + ptep = __find_linux_pte(kvm->arch.pgtable, + gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); @@ -427,8 +431,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pgflags |= _PAGE_WRITE; } else { local_irq_save(flags); - ptep = __find_linux_pte_or_hugepte(current->mm->pgd, - hva, NULL, NULL); + ptep = find_current_mm_pte(current->mm->pgd, + hva, NULL, NULL); if (ptep && pte_write(*ptep) && pte_dirty(*ptep)) pgflags |= _PAGE_WRITE; local_irq_restore(flags); @@ -499,8 +503,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; unsigned long old; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, gpa, shift); @@ -525,8 +528,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; int ref = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, gpa, shift); @@ -545,8 +547,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned int shift; int ref = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) ref = 1; return ref; @@ -562,8 +563,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, unsigned int shift; int ret = 0; - ptep = __find_linux_pte_or_hugepte(kvm->arch.pgtable, gpa, - NULL, &shift); + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { ret = 1; if (shift) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 3adfd2f5301c..c32e9bfe75b1 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_BUG @@ -353,7 +354,16 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, pte_t *ptep, pte; unsigned shift = 0; - ptep = __find_linux_pte_or_hugepte(vcpu->arch.pgdir, ua, NULL, &shift); + /* + * Called in real mode with MSR_EE = 0. We are safe here. + * It is ok to do the lookup with arch.pgdir here, because + * we are doing this on secondary cpus and current task there + * is not the hypervisor. Also this is safe against THP in the + * host, because an IPI to primary thread will wait for the secondary + * to exit which will agains result in the below page table walk + * to finish. + */ + ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); if (!ptep || !pte_present(*ptep)) return -ENXIO; pte = *ptep; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 584c74c8119f..fedb0139524c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -22,6 +22,7 @@ #include #include #include +#include /* Translate address of a vmalloc'd thing to a linear map address */ static void *real_vmalloc_addr(void *x) @@ -31,9 +32,9 @@ static void *real_vmalloc_addr(void *x) /* * assume we don't have huge pages in vmalloc space... * So don't worry about THP collapse/split. Called - * Only in realmode, hence won't need irq_save/restore. + * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. */ - p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL, NULL); + p = find_init_mm_pte(addr, NULL); if (!p || !pte_present(*p)) return NULL; addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); @@ -230,14 +231,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, * If we had a page table table change after lookup, we would * retry via mmu_notifier_retry. */ - if (realmode) - ptep = __find_linux_pte_or_hugepte(pgdir, hva, NULL, - &hpage_shift); - else { + if (!realmode) local_irq_save(irq_flags); - ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, - &hpage_shift); - } + /* + * If called in real mode we have MSR_EE = 0. Otherwise + * we disable irq above. + */ + ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 77fd043b3ecc..c6c734424c70 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "e500.h" #include "timing.h" @@ -476,7 +477,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, * can't run hence pfn won't change. */ local_irq_save(flags); - ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL, NULL); + ptep = find_linux_pte(pgdir, hva, NULL, NULL); if (ptep) { pte_t pte = READ_ONCE(*ptep); -- cgit v1.2.3