From d2db7773ba864df6b4e19643dfc54838550d8049 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:37 +0100 Subject: kvm: arm/arm64: Fix stage2_flush_memslot for 4 level page table So far we have only supported 3 level page table with fixed IPA of 40bits, where PUD is folded. With 4 level page tables, we need to check if the PUD entry is valid or not. Fix stage2_flush_memslot() to do this check, before walking down the table. Acked-by: Christoffer Dall Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ed162a6c57c5..ee7ce8fa4a12 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -412,7 +412,8 @@ static void stage2_flush_memslot(struct kvm *kvm, pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { next = stage2_pgd_addr_end(addr, end); - stage2_flush_puds(kvm, pgd, addr, next); + if (!stage2_pgd_none(*pgd)) + stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } -- cgit v1.2.3 From 7788a28062aca211979ecd2c334e06e2ef5281cc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:38 +0100 Subject: kvm: arm/arm64: Remove spurious WARN_ON On a 4-level page table pgd entry can be empty, unlike a 3-level page table. Remove the spurious WARN_ON() in stage_get_pud(). Acked-by: Christoffer Dall Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ee7ce8fa4a12..4a285d760ce0 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1005,7 +1005,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pud_t *pud; pgd = kvm->arch.pgd + stage2_pgd_index(addr); - if (WARN_ON(stage2_pgd_none(*pgd))) { + if (stage2_pgd_none(*pgd)) { if (!cache) return NULL; pud = mmu_memory_cache_alloc(cache); -- cgit v1.2.3 From 5b6c6742b5350a6fb5c631fb99a6bc046a62739c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:42 +0100 Subject: kvm: arm/arm64: Allow arch specific configurations for VM Allow the arch backends to perform VM specific initialisation. This will be later used to handle IPA size configuration and per-VM VTCR configuration on arm64. Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index c92053bc3f96..327d0fd28380 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -120,8 +120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret, cpu; - if (type) - return -EINVAL; + ret = kvm_arm_config_vm(kvm, type); + if (ret) + return ret; kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); if (!kvm->arch.last_vcpu_ran) -- cgit v1.2.3 From e55cac5bf2a9cc86b57a9533d6b9e5005bc19b5c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:44 +0100 Subject: kvm: arm/arm64: Prepare for VM specific stage2 translations Right now the stage2 page table for a VM is hard coded, assuming an IPA of 40bits. As we are about to add support for per VM IPA, prepare the stage2 page table helpers to accept the kvm instance to make the right decision for the VM. No functional changes. Adds stage2_pgd_size(kvm) to replace S2_PGD_SIZE. Also, moves some of the definitions in arm32 to align with the arm64. Also drop the _AC() specifier constants wherever possible. Cc: Christoffer Dall Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 2 +- virt/kvm/arm/mmu.c | 119 ++++++++++++++++++------------------ virt/kvm/arm/vgic/vgic-kvm-device.c | 2 +- 3 files changed, 63 insertions(+), 60 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 327d0fd28380..43e716bc3f08 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -545,7 +545,7 @@ static void update_vttbr(struct kvm *kvm) /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm->arch.pgd); - BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); + BUG_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid; diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 4a285d760ce0..7e477b3cae5b 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -45,7 +45,6 @@ static phys_addr_t hyp_idmap_vector; static unsigned long io_map_base; -#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) #define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) @@ -150,20 +149,20 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { - pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL); - stage2_pgd_clear(pgd); + pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL); + stage2_pgd_clear(kvm, pgd); kvm_tlb_flush_vmid_ipa(kvm, addr); - stage2_pud_free(pud_table); + stage2_pud_free(kvm, pud_table); put_page(virt_to_page(pgd)); } static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) { - pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0); - VM_BUG_ON(stage2_pud_huge(*pud)); - stage2_pud_clear(pud); + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); + VM_BUG_ON(stage2_pud_huge(kvm, *pud)); + stage2_pud_clear(kvm, pud); kvm_tlb_flush_vmid_ipa(kvm, addr); - stage2_pmd_free(pmd_table); + stage2_pmd_free(kvm, pmd_table); put_page(virt_to_page(pud)); } @@ -252,7 +251,7 @@ static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (stage2_pte_table_empty(start_pte)) + if (stage2_pte_table_empty(kvm, start_pte)) clear_stage2_pmd_entry(kvm, pmd, start_addr); } @@ -262,9 +261,9 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, phys_addr_t next, start_addr = addr; pmd_t *pmd, *start_pmd; - start_pmd = pmd = stage2_pmd_offset(pud, addr); + start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr); do { - next = stage2_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(kvm, addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { pmd_t old_pmd = *pmd; @@ -281,7 +280,7 @@ static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, } } while (pmd++, addr = next, addr != end); - if (stage2_pmd_table_empty(start_pmd)) + if (stage2_pmd_table_empty(kvm, start_pmd)) clear_stage2_pud_entry(kvm, pud, start_addr); } @@ -291,14 +290,14 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, phys_addr_t next, start_addr = addr; pud_t *pud, *start_pud; - start_pud = pud = stage2_pud_offset(pgd, addr); + start_pud = pud = stage2_pud_offset(kvm, pgd, addr); do { - next = stage2_pud_addr_end(addr, end); - if (!stage2_pud_none(*pud)) { - if (stage2_pud_huge(*pud)) { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) { pud_t old_pud = *pud; - stage2_pud_clear(pud); + stage2_pud_clear(kvm, pud); kvm_tlb_flush_vmid_ipa(kvm, addr); kvm_flush_dcache_pud(old_pud); put_page(virt_to_page(pud)); @@ -308,7 +307,7 @@ static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, } } while (pud++, addr = next, addr != end); - if (stage2_pud_table_empty(start_pud)) + if (stage2_pud_table_empty(kvm, start_pud)) clear_stage2_pgd_entry(kvm, pgd, start_addr); } @@ -332,7 +331,7 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) assert_spin_locked(&kvm->mmu_lock); WARN_ON(size & ~PAGE_MASK); - pgd = kvm->arch.pgd + stage2_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); do { /* * Make sure the page table is still active, as another thread @@ -341,8 +340,8 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) */ if (!READ_ONCE(kvm->arch.pgd)) break; - next = stage2_pgd_addr_end(addr, end); - if (!stage2_pgd_none(*pgd)) + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) unmap_stage2_puds(kvm, pgd, addr, next); /* * If the range is too large, release the kvm->mmu_lock @@ -371,9 +370,9 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, pmd_t *pmd; phys_addr_t next; - pmd = stage2_pmd_offset(pud, addr); + pmd = stage2_pmd_offset(kvm, pud, addr); do { - next = stage2_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(kvm, addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) kvm_flush_dcache_pmd(*pmd); @@ -389,11 +388,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, pud_t *pud; phys_addr_t next; - pud = stage2_pud_offset(pgd, addr); + pud = stage2_pud_offset(kvm, pgd, addr); do { - next = stage2_pud_addr_end(addr, end); - if (!stage2_pud_none(*pud)) { - if (stage2_pud_huge(*pud)) + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) kvm_flush_dcache_pud(*pud); else stage2_flush_pmds(kvm, pud, addr, next); @@ -409,10 +408,10 @@ static void stage2_flush_memslot(struct kvm *kvm, phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + stage2_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); do { - next = stage2_pgd_addr_end(addr, end); - if (!stage2_pgd_none(*pgd)) + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -898,7 +897,7 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) } /* Allocate the HW PGD, making sure that each page gets its own refcount */ - pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO); + pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO); if (!pgd) return -ENOMEM; @@ -987,7 +986,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) spin_lock(&kvm->mmu_lock); if (kvm->arch.pgd) { - unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); + unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); pgd = READ_ONCE(kvm->arch.pgd); kvm->arch.pgd = NULL; } @@ -995,7 +994,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm) /* Free the HW pgd, one page at a time */ if (pgd) - free_pages_exact(pgd, S2_PGD_SIZE); + free_pages_exact(pgd, stage2_pgd_size(kvm)); } static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, @@ -1004,16 +1003,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pgd_t *pgd; pud_t *pud; - pgd = kvm->arch.pgd + stage2_pgd_index(addr); - if (stage2_pgd_none(*pgd)) { + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + if (stage2_pgd_none(kvm, *pgd)) { if (!cache) return NULL; pud = mmu_memory_cache_alloc(cache); - stage2_pgd_populate(pgd, pud); + stage2_pgd_populate(kvm, pgd, pud); get_page(virt_to_page(pgd)); } - return stage2_pud_offset(pgd, addr); + return stage2_pud_offset(kvm, pgd, addr); } static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, @@ -1026,15 +1025,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache if (!pud) return NULL; - if (stage2_pud_none(*pud)) { + if (stage2_pud_none(kvm, *pud)) { if (!cache) return NULL; pmd = mmu_memory_cache_alloc(cache); - stage2_pud_populate(pud, pmd); + stage2_pud_populate(kvm, pud, pmd); get_page(virt_to_page(pud)); } - return stage2_pmd_offset(pud, addr); + return stage2_pmd_offset(kvm, pud, addr); } static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache @@ -1208,8 +1207,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (writable) pte = kvm_s2pte_mkwrite(pte); - ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, - KVM_NR_MEM_OBJS); + ret = mmu_topup_memory_cache(&cache, + kvm_mmu_cache_min_pages(kvm), + KVM_NR_MEM_OBJS); if (ret) goto out; spin_lock(&kvm->mmu_lock); @@ -1297,19 +1297,21 @@ static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) /** * stage2_wp_pmds - write protect PUD range + * kvm: kvm instance for the VM * @pud: pointer to pud entry * @addr: range start address * @end: range end address */ -static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) { pmd_t *pmd; phys_addr_t next; - pmd = stage2_pmd_offset(pud, addr); + pmd = stage2_pmd_offset(kvm, pud, addr); do { - next = stage2_pmd_addr_end(addr, end); + next = stage2_pmd_addr_end(kvm, addr, end); if (!pmd_none(*pmd)) { if (pmd_thp_or_huge(*pmd)) { if (!kvm_s2pmd_readonly(pmd)) @@ -1329,18 +1331,19 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) * * Process PUD entries, for a huge PUD we cause a panic. */ -static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) { pud_t *pud; phys_addr_t next; - pud = stage2_pud_offset(pgd, addr); + pud = stage2_pud_offset(kvm, pgd, addr); do { - next = stage2_pud_addr_end(addr, end); - if (!stage2_pud_none(*pud)) { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { /* TODO:PUD not supported, revisit later if supported */ - BUG_ON(stage2_pud_huge(*pud)); - stage2_wp_pmds(pud, addr, next); + BUG_ON(stage2_pud_huge(kvm, *pud)); + stage2_wp_pmds(kvm, pud, addr, next); } } while (pud++, addr = next, addr != end); } @@ -1356,7 +1359,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + stage2_pgd_index(addr); + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1370,9 +1373,9 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) cond_resched_lock(&kvm->mmu_lock); if (!READ_ONCE(kvm->arch.pgd)) break; - next = stage2_pgd_addr_end(addr, end); - if (stage2_pgd_present(*pgd)) - stage2_wp_puds(pgd, addr, next); + next = stage2_pgd_addr_end(kvm, addr, end); + if (stage2_pgd_present(kvm, *pgd)) + stage2_wp_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -1521,7 +1524,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, up_read(¤t->mm->mmap_sem); /* We need minimum second+third level pages */ - ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, + ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm), KVM_NR_MEM_OBJS); if (ret) return ret; @@ -1764,7 +1767,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) } /* Userspace should not be able to register out-of-bounds IPAs */ - VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); + VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); if (fault_status == FSC_ACCESS) { handle_access_fault(vcpu, fault_ipa); @@ -2063,7 +2066,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * space addressable by the KVM guest IPA space. */ if (memslot->base_gfn + memslot->npages >= - (KVM_PHYS_SIZE >> PAGE_SHIFT)) + (kvm_phys_size(kvm) >> PAGE_SHIFT)) return -EFAULT; down_read(¤t->mm->mmap_sem); diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 6ada2432e37c..114dce9f4bf5 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -25,7 +25,7 @@ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, phys_addr_t addr, phys_addr_t alignment) { - if (addr & ~KVM_PHYS_MASK) + if (addr & ~kvm_phys_mask(kvm)) return -E2BIG; if (!IS_ALIGNED(addr, alignment)) -- cgit v1.2.3 From 8ad50c8985d805923f52a80698010a0a5123c07d Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 26 Sep 2018 17:32:50 +0100 Subject: vgic: Add support for 52bit guest physical address Add support for handling 52bit guest physical address to the VGIC layer. So far we have limited the guest physical address to 48bits, by explicitly masking the upper bits. This patch removes the restriction. We do not have to check if the host supports 52bit as the gpa is always validated during an access. (e.g, kvm_{read/write}_guest, kvm_is_visible_gfn()). Also, the ITS table save-restore is also not affected with the enhancement. The DTE entries already store the bits[51:8] of the ITT_addr (with a 256byte alignment). Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Kristina Martsenko [ Macro clean ups, fix PROPBASER and PENDBASER accesses ] Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-its.c | 36 ++++++++++-------------------------- virt/kvm/arm/vgic/vgic-mmio-v3.c | 2 -- 2 files changed, 10 insertions(+), 28 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 12502251727e..eb2a390a6c86 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -241,13 +241,6 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id, list_for_each_entry(dev, &(its)->device_list, dev_list) \ list_for_each_entry(ite, &(dev)->itt_head, ite_list) -/* - * We only implement 48 bits of PA at the moment, although the ITS - * supports more. Let's be restrictive here. - */ -#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) -#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) - #define GIC_LPI_OFFSET 8192 #define VITS_TYPER_IDBITS 16 @@ -759,6 +752,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, { int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; u64 indirect_ptr, type = GITS_BASER_TYPE(baser); + phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); int esz = GITS_BASER_ENTRY_SIZE(baser); int index; gfn_t gfn; @@ -783,7 +777,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (id >= (l1_tbl_size / esz)) return false; - addr = BASER_ADDRESS(baser) + id * esz; + addr = base + id * esz; gfn = addr >> PAGE_SHIFT; if (eaddr) @@ -798,7 +792,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, /* Each 1st level entry is represented by a 64-bit value. */ if (kvm_read_guest_lock(its->dev->kvm, - BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), + base + index * sizeof(indirect_ptr), &indirect_ptr, sizeof(indirect_ptr))) return false; @@ -808,11 +802,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (!(indirect_ptr & BIT_ULL(63))) return false; - /* - * Mask the guest physical address and calculate the frame number. - * Any address beyond our supported 48 bits of PA will be caught - * by the actual check in the final step. - */ + /* Mask the guest physical address and calculate the frame number. */ indirect_ptr &= GENMASK_ULL(51, 16); /* Find the address of the actual entry */ @@ -1304,9 +1294,6 @@ static u64 vgic_sanitise_its_baser(u64 reg) GITS_BASER_OUTER_CACHEABILITY_SHIFT, vgic_sanitise_outer_cacheability); - /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */ - reg &= ~GENMASK_ULL(15, 12); - /* We support only one (ITS) page size: 64K */ reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; @@ -1325,11 +1312,8 @@ static u64 vgic_sanitise_its_cbaser(u64 reg) GITS_CBASER_OUTER_CACHEABILITY_SHIFT, vgic_sanitise_outer_cacheability); - /* - * Sanitise the physical address to be 64k aligned. - * Also limit the physical addresses to 48 bits. - */ - reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12)); + /* Sanitise the physical address to be 64k aligned. */ + reg &= ~GENMASK_ULL(15, 12); return reg; } @@ -1375,7 +1359,7 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) if (!its->enabled) return; - cbaser = CBASER_ADDRESS(its->cbaser); + cbaser = GITS_CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, @@ -2233,7 +2217,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its) if (!(baser & GITS_BASER_VALID)) return 0; - l1_gpa = BASER_ADDRESS(baser); + l1_gpa = GITS_BASER_ADDR_48_to_52(baser); if (baser & GITS_BASER_INDIRECT) { l1_esz = GITS_LVL1_ENTRY_SIZE; @@ -2305,7 +2289,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); u64 baser = its->baser_coll_table; - gpa_t gpa = BASER_ADDRESS(baser); + gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser); struct its_collection *collection; u64 val; size_t max_size, filled = 0; @@ -2354,7 +2338,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) if (!(baser & GITS_BASER_VALID)) return 0; - gpa = BASER_ADDRESS(baser); + gpa = GITS_BASER_ADDR_48_to_52(baser); max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index a2a175b08b17..b3d1f0985117 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -364,7 +364,6 @@ static u64 vgic_sanitise_pendbaser(u64 reg) vgic_sanitise_outer_cacheability); reg &= ~PENDBASER_RES0_MASK; - reg &= ~GENMASK_ULL(51, 48); return reg; } @@ -382,7 +381,6 @@ static u64 vgic_sanitise_propbaser(u64 reg) vgic_sanitise_outer_cacheability); reg &= ~PROPBASER_RES0_MASK; - reg &= ~GENMASK_ULL(51, 48); return reg; } -- cgit v1.2.3 From 0f62f0e95be29200ab2ab98ca870e22c9b148dfa Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 26 Sep 2018 17:32:52 +0100 Subject: kvm: arm64: Set a limit on the IPA size So far we have restricted the IPA size of the VM to the default value (40bits). Now that we can manage the IPA size per VM and support dynamic stage2 page tables, we can allow VMs to have larger IPA. This patch introduces a the maximum IPA size supported on the host. This is decided by the following factors : 1) Maximum PARange supported by the CPUs - This can be inferred from the system wide safe value. 2) Maximum PA size supported by the host kernel (48 vs 52) 3) Number of levels in the host page table (as we base our stage2 tables on the host table helpers). Since the stage2 page table code is dependent on the stage1 page table, we always ensure that : Number of Levels at Stage1 >= Number of Levels at Stage2 So we limit the IPA to make sure that the above condition is satisfied. This will affect the following combinations of VA_BITS and IPA for different page sizes. Host configuration | Unsupported IPA ranges 39bit VA, 4K | [44, 48] 36bit VA, 16K | [41, 48] 42bit VA, 64K | [47, 52] Supporting the above combinations need independent stage2 page table manipulation code, which would need substantial changes. We could purse the solution independently and switch the page table code once we have it ready. Cc: Catalin Marinas Cc: Marc Zyngier Cc: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 43e716bc3f08..631f9a3ad99a 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1413,6 +1413,8 @@ static int init_common_resources(void) kvm_vmid_bits = kvm_get_vmid_bits(); kvm_info("%d-bit VMID\n", kvm_vmid_bits); + kvm_set_ipa_limit(); + return 0; } -- cgit v1.2.3 From bca607ebc76af9540e4aad5b2241a7323354be43 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Oct 2018 13:40:36 +0100 Subject: KVM: arm/arm64: Rename kvm_arm_config_vm to kvm_arm_setup_stage2 VM tends to be a very overloaded term in KVM, so let's keep it to describe the virtual machine. For the virtual memory setup, let's use the "stage2" suffix. Reviewed-by: Eric Auger Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 631f9a3ad99a..91c464c9cd21 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -120,7 +120,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret, cpu; - ret = kvm_arm_config_vm(kvm, type); + ret = kvm_arm_setup_stage2(kvm, type); if (ret) return ret; -- cgit v1.2.3 From 9d47bb0d9ea8528373b4c6f9bca6c7f402900297 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Oct 2018 13:41:32 +0100 Subject: KVM: arm64: Drop __cpu_init_stage2 on the VHE path __cpu_init_stage2 doesn't do anything anymore on arm64, and is totally non-sensical if running VHE (as VHE is 64bit only). Reviewed-by: Eric Auger Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 91c464c9cd21..4ce99bb223bc 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1310,16 +1310,10 @@ static void cpu_hyp_reinit(void) { cpu_hyp_reset(); - if (is_kernel_in_hyp_mode()) { - /* - * __cpu_init_stage2() is safe to call even if the PM - * event was cancelled before the CPU was reset. - */ - __cpu_init_stage2(); + if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); - } else { + else cpu_init_hyp_mode(NULL); - } if (vgic_present) kvm_vgic_init_cpu_hardware(); -- cgit v1.2.3 From fd2ef358282c849c193aa36dadbf4f07f7dcd29b Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Mon, 1 Oct 2018 16:54:35 +0100 Subject: KVM: arm/arm64: Ensure only THP is candidate for adjustment PageTransCompoundMap() returns true for hugetlbfs and THP hugepages. This behaviour incorrectly leads to stage 2 faults for unsupported hugepage sizes (e.g., 64K hugepage with 4K pages) to be treated as THP faults. Tighten the check to filter out hugetlbfs pages. This also leads to consistently mapping all unsupported hugepage sizes as PTE level entries at stage 2. Signed-off-by: Punit Agrawal Reviewed-by: Suzuki Poulose Cc: Christoffer Dall Cc: Marc Zyngier Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 7e477b3cae5b..c23a1b323aad 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1231,8 +1231,14 @@ static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap) { kvm_pfn_t pfn = *pfnp; gfn_t gfn = *ipap >> PAGE_SHIFT; + struct page *page = pfn_to_page(pfn); - if (PageTransCompoundMap(pfn_to_page(pfn))) { + /* + * PageTransCompoungMap() returns true for THP and + * hugetlbfs. Make sure the adjustment is done only for THP + * pages. + */ + if (!PageHuge(page) && PageTransCompoundMap(page)) { unsigned long mask; /* * The address we faulted on is backed by a transparent huge -- cgit v1.2.3 From da5a3ce66b8bb51b0ea8a89f42aac153903f90fb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Oct 2018 17:42:10 +0100 Subject: KVM: arm64: Fix caching of host MDCR_EL2 value At boot time, KVM stashes the host MDCR_EL2 value, but only does this when the kernel is not running in hyp mode (i.e. is non-VHE). In these cases, the stashed value of MDCR_EL2.HPMN happens to be zero, which can lead to CONSTRAINED UNPREDICTABLE behaviour. Since we use this value to derive the MDCR_EL2 value when switching to/from a guest, after a guest have been run, the performance counters do not behave as expected. This has been observed to result in accesses via PMXEVTYPER_EL0 and PMXEVCNTR_EL0 not affecting the relevant counters, resulting in events not being counted. In these cases, only the fixed-purpose cycle counter appears to work as expected. Fix this by always stashing the host MDCR_EL2 value, regardless of VHE. Cc: Christopher Dall Cc: James Morse Cc: Will Deacon Cc: stable@vger.kernel.org Fixes: 1e947bad0b63b351 ("arm64: KVM: Skip HYP setup when already running in HYP") Tested-by: Robin Murphy Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 4ce99bb223bc..4c5ff06b18f9 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1296,8 +1296,6 @@ static void cpu_init_hyp_mode(void *dummy) __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); - - kvm_arm_init_debug(); } static void cpu_hyp_reset(void) @@ -1315,6 +1313,8 @@ static void cpu_hyp_reinit(void) else cpu_init_hyp_mode(NULL); + kvm_arm_init_debug(); + if (vgic_present) kvm_vgic_init_cpu_hardware(); } -- cgit v1.2.3 From 375bdd3b5d4f7cf146f0df1488b4671b141dd799 Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Sat, 13 Oct 2018 00:12:48 +0800 Subject: arm/arm64: KVM: Rename function kvm_arch_dev_ioctl_check_extension() Rename kvm_arch_dev_ioctl_check_extension() to kvm_arch_vm_ioctl_check_extension(), because it does not have any relationship with device. Renaming this function can make code readable. Cc: James Morse Reviewed-by: Suzuki K Poulose Signed-off-by: Dongjiu Geng Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 4c5ff06b18f9..7c9d7b51ce89 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -241,7 +241,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; default: - r = kvm_arch_dev_ioctl_check_extension(kvm, ext); + r = kvm_arch_vm_ioctl_check_extension(kvm, ext); break; } return r; -- cgit v1.2.3 From 58bf437ff64eac8aca606e42d7e4623e40b61fa1 Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Sat, 13 Oct 2018 00:12:49 +0800 Subject: arm/arm64: KVM: Enable 32 bits kvm vcpu events support The commit 539aee0edb9f ("KVM: arm64: Share the parts of get/set events useful to 32bit") shares the get/set events helper for arm64 and arm32, but forgot to share the cap extension code. User space will check whether KVM supports vcpu events by checking the KVM_CAP_VCPU_EVENTS extension Acked-by: James Morse Reviewed-by : Suzuki K Poulose Signed-off-by: Dongjiu Geng Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 7c9d7b51ce89..11b98b2b0486 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -213,6 +213,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_VCPU_EVENTS: r = 1; break; case KVM_CAP_ARM_SET_DEVICE_ADDR: -- cgit v1.2.3