From 8383741ab2e773a992f1f0f8acdca5e7a4687c49 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 27 Oct 2021 11:18:00 +0100 Subject: KVM: arm64: Get rid of host SVE tracking/saving The SVE host tracking in KVM is pretty involved. It relies on a set of flags tracking the ownership of the SVE register, as well as that of the EL0 access. It is also pretty scary: __hyp_sve_save_host() computes a thread_struct pointer and obtains a sve_state which gets directly accessed without further ado, even on nVHE. How can this even work? The answer to that is that it doesn't, and that this is mostly dead code. Closer examination shows that on executing a syscall, userspace loses its SVE state entirely. This is part of the ABI. Another thing to notice is that although the kernel provides helpers such as kernel_neon_begin()/end(), they only deal with the FP/NEON state, and not SVE. Given that you can only execute a guest as the result of a syscall, and that the kernel cannot use SVE by itself, it becomes pretty obvious that there is never any host SVE state to save, and that this code is only there to increase confusion. Get rid of the TIF_SVE tracking and host save infrastructure altogether. Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 7a0af1d39303..e65c2956b881 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -143,16 +143,6 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); } -static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu) -{ - struct thread_struct *thread; - - thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct, - uw.fpsimd_state); - - __sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr); -} - static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) { sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); @@ -169,21 +159,14 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) */ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) { - bool sve_guest, sve_host; + bool sve_guest; u8 esr_ec; u64 reg; if (!system_supports_fpsimd()) return false; - if (system_supports_sve()) { - sve_guest = vcpu_has_sve(vcpu); - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - } else { - sve_guest = false; - sve_host = false; - } - + sve_guest = vcpu_has_sve(vcpu); esr_ec = kvm_vcpu_trap_get_class(vcpu); /* Don't handle SVE traps for non-SVE vcpus here: */ @@ -207,11 +190,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - if (sve_host) - __hyp_sve_save_host(vcpu); - else - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; } -- cgit v1.2.3 From e66425fc9ba33e9716d6e7c6bc78bb62f981d4df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Nov 2021 15:59:23 +0000 Subject: KVM: arm64: Remove unused __sve_save_state Now that we don't have any users left for __sve_save_state, remove it altogether. Should we ever need to save the SVE state from the hypervisor again, we can always re-introduce it. Suggested-by: Zenghui Yu Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/fpsimd.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index e950875e31ce..61e6f3ba7b7d 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -25,9 +25,3 @@ SYM_FUNC_START(__sve_restore_state) sve_load 0, x1, x2, 3 ret SYM_FUNC_END(__sve_restore_state) - -SYM_FUNC_START(__sve_save_state) - mov x2, #1 - sve_save 0, x1, x2, 3 - ret -SYM_FUNC_END(__sve_save_state) -- cgit v1.2.3 From af9a0e21d817f40595aa629de32d3bd96582abef Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Oct 2021 14:10:35 +0100 Subject: KVM: arm64: Introduce flag shadowing TIF_FOREIGN_FPSTATE We currently have to maintain a mapping the thread_info structure at EL2 in order to be able to check the TIF_FOREIGN_FPSTATE flag. In order to eventually get rid of this, start with a vcpu flag that shadows the thread flag on each entry into the hypervisor. Reviewed-by: Mark Brown Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index e65c2956b881..a243a2a82131 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -49,7 +49,7 @@ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) * trap the accesses. */ if (!system_supports_fpsimd() || - vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE) vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_FP_HOST); -- cgit v1.2.3 From bee14bca735a6f897a6ec3e42f3e5d2d8966e87e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Oct 2021 14:18:00 +0100 Subject: KVM: arm64: Stop mapping current thread_info at EL2 Now that we can track an equivalent of TIF_FOREIGN_FPSTATE, drop the mapping of current's thread_info at EL2. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 1 - arch/arm64/kvm/hyp/nvhe/switch.c | 1 - arch/arm64/kvm/hyp/vhe/switch.c | 1 - 3 files changed, 3 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index a243a2a82131..11e8580f2fdc 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -29,7 +29,6 @@ #include #include #include -#include struct kvm_exception_table_entry { int insn, fixup; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c0e3fed26d93..329c706af39f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 5a2cb5d9bc4b..1d162b9c78bf 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -24,7 +24,6 @@ #include #include #include -#include /* VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); -- cgit v1.2.3 From ed4ed15d571065eb66ea718d7f6050553586417d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Dec 2021 17:10:47 +0000 Subject: KVM: arm64: Generate hyp_constants.h for the host In order to avoid exposing hypervisor (EL2) data structures directly to the host, generate hyp_constants.h to provide constants such as structure sizes to the host without dragging in the definitions themselves. Signed-off-by: Will Deacon Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211202171048.26924-3-will@kernel.org --- arch/arm64/kvm/hyp/hyp-constants.c | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 arch/arm64/kvm/hyp/hyp-constants.c (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c new file mode 100644 index 000000000000..b3742a6691e8 --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp-constants.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +int main(void) +{ + DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page)); + return 0; +} -- cgit v1.2.3 From 9429f4b0412d05243237c7695c59d0a7b1174492 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 2 Dec 2021 17:10:48 +0000 Subject: KVM: arm64: Move host EL1 code out of hyp/ directory kvm/hyp/reserved_mem.c contains host code executing at EL1 and is not linked into the hypervisor object. Move the file into kvm/pkvm.c and rework the headers so that the definitions shared between the host and the hypervisor live in asm/kvm_pkvm.h. Signed-off-by: Will Deacon Tested-by: Fuad Tabba Reviewed-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211202171048.26924-4-will@kernel.org --- arch/arm64/kvm/hyp/Makefile | 2 +- arch/arm64/kvm/hyp/include/nvhe/mm.h | 57 ------------------ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 1 + arch/arm64/kvm/hyp/nvhe/mm.c | 1 + arch/arm64/kvm/hyp/nvhe/setup.c | 1 + arch/arm64/kvm/hyp/reserved_mem.c | 109 ---------------------------------- 6 files changed, 4 insertions(+), 167 deletions(-) delete mode 100644 arch/arm64/kvm/hyp/reserved_mem.c (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index b726332eec49..687598e41b21 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o +obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index c9a8f535212e..ef6a58a04235 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -10,9 +10,6 @@ #include #include -#define HYP_MEMBLOCK_REGIONS 128 -extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; -extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); extern struct kvm_pgtable pkvm_pgtable; extern hyp_spinlock_t pkvm_pgd_lock; extern struct hyp_pool hpool; @@ -39,58 +36,4 @@ static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size, *end = ALIGN(*end, PAGE_SIZE); } -static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) -{ - unsigned long total = 0, i; - - /* Provision the worst case scenario */ - for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) { - nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE); - total += nr_pages; - } - - return total; -} - -static inline unsigned long __hyp_pgtable_total_pages(void) -{ - unsigned long res = 0, i; - - /* Cover all of memory with page-granularity */ - for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { - struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i]; - res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT); - } - - return res; -} - -static inline unsigned long hyp_s1_pgtable_pages(void) -{ - unsigned long res; - - res = __hyp_pgtable_total_pages(); - - /* Allow 1 GiB for private mappings */ - res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); - - return res; -} - -static inline unsigned long host_s2_pgtable_pages(void) -{ - unsigned long res; - - /* - * Include an extra 16 pages to safely upper-bound the worst case of - * concatenated pgds. - */ - res = __hyp_pgtable_total_pages() + 16; - - /* Allow 1 GiB for MMIO mappings */ - res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); - - return res; -} - #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index c1a90dd022b8..92262e89672d 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 2fabeceb889a..9e0ff5a700dd 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 578f71798c2e..51e68a040d8a 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c deleted file mode 100644 index 578670e3f608..000000000000 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2020 - Google LLC - * Author: Quentin Perret - */ - -#include -#include -#include - -#include - -#include -#include - -static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); -static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); - -phys_addr_t hyp_mem_base; -phys_addr_t hyp_mem_size; - -static int cmp_hyp_memblock(const void *p1, const void *p2) -{ - const struct memblock_region *r1 = p1; - const struct memblock_region *r2 = p2; - - return r1->base < r2->base ? -1 : (r1->base > r2->base); -} - -static void __init sort_memblock_regions(void) -{ - sort(hyp_memory, - *hyp_memblock_nr_ptr, - sizeof(struct memblock_region), - cmp_hyp_memblock, - NULL); -} - -static int __init register_memblock_regions(void) -{ - struct memblock_region *reg; - - for_each_mem_region(reg) { - if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) - return -ENOMEM; - - hyp_memory[*hyp_memblock_nr_ptr] = *reg; - (*hyp_memblock_nr_ptr)++; - } - sort_memblock_regions(); - - return 0; -} - -void __init kvm_hyp_reserve(void) -{ - u64 nr_pages, prev, hyp_mem_pages = 0; - int ret; - - if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) - return; - - if (kvm_get_mode() != KVM_MODE_PROTECTED) - return; - - ret = register_memblock_regions(); - if (ret) { - *hyp_memblock_nr_ptr = 0; - kvm_err("Failed to register hyp memblocks: %d\n", ret); - return; - } - - hyp_mem_pages += hyp_s1_pgtable_pages(); - hyp_mem_pages += host_s2_pgtable_pages(); - - /* - * The hyp_vmemmap needs to be backed by pages, but these pages - * themselves need to be present in the vmemmap, so compute the number - * of pages needed by looking for a fixed point. - */ - nr_pages = 0; - do { - prev = nr_pages; - nr_pages = hyp_mem_pages + prev; - nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE); - nr_pages += __hyp_pgtable_max_pages(nr_pages); - } while (nr_pages != prev); - hyp_mem_pages += nr_pages; - - /* - * Try to allocate a PMD-aligned region to reduce TLB pressure once - * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. - */ - hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; - hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE), - PMD_SIZE); - if (!hyp_mem_base) - hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE); - else - hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); - - if (!hyp_mem_base) { - kvm_err("Failed to reserve hyp memory\n"); - return; - } - - kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, - hyp_mem_base); -} -- cgit v1.2.3 From 34b43a8849229e8363c19236ecdf463b7a89d085 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 8 Dec 2021 15:22:54 +0000 Subject: KVM: arm64: pkvm: Fix hyp_pool max order The EL2 page allocator in protected mode maintains a per-pool max order value to optimize allocations when the memory region it covers is small. However, the max order value is currently under-estimated whenever the number of pages in the region is a power of two. Fix the estimation. Signed-off-by: Quentin Perret Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211208152300.2478542-2-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 0bd7701ad1df..543cad6c376a 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -241,7 +241,7 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, int i; hyp_spin_lock_init(&pool->lock); - pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT)); + pool->max_order = min(MAX_ORDER, get_order((nr_pages + 1) << PAGE_SHIFT)); for (i = 0; i < pool->max_order; i++) INIT_LIST_HEAD(&pool->free_area[i]); pool->range_start = phys; -- cgit v1.2.3 From 53a563b01fa2ae2376a0b7d547f26a0ae9c78b5c Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 8 Dec 2021 15:22:56 +0000 Subject: KVM: arm64: Make the hyp memory pool static The hyp memory pool struct is sized to fit exactly the needs of the hypervisor stage-1 page-table allocator, so it is important it is not used for anything else. As it is currently used only from setup.c, reduce its visibility by marking it static. Signed-off-by: Quentin Perret Reviewed-by: Andrew Walbran Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211208152300.2478542-4-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mm.h | 1 - arch/arm64/kvm/hyp/nvhe/setup.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index c9a8f535212e..75f58b783fd7 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -15,7 +15,6 @@ extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); extern struct kvm_pgtable pkvm_pgtable; extern hyp_spinlock_t pkvm_pgd_lock; -extern struct hyp_pool hpool; extern u64 __io_map_base; int hyp_create_idmap(u32 hyp_va_bits); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 578f71798c2e..e35709367598 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -17,7 +17,6 @@ #include #include -struct hyp_pool hpool; unsigned long hyp_nr_cpus; #define hyp_percpu_size ((unsigned long)__per_cpu_end - \ @@ -27,6 +26,7 @@ static void *vmemmap_base; static void *hyp_pgt_base; static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; +static struct hyp_pool hpool; static int divide_memory_pool(void *virt, unsigned long size) { -- cgit v1.2.3 From 473a3efbafaa9ffd06c8b8f653f24c97b5ac3ff0 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 8 Dec 2021 15:22:57 +0000 Subject: KVM: arm64: Make __io_map_base static The __io_map_base variable is used at EL2 to track the end of the hypervisor's "private" VA range in nVHE protected mode. However it doesn't need to be used outside of mm.c, so let's make it static to keep all the hyp VA allocation logic in one place. Signed-off-by: Quentin Perret Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211208152300.2478542-5-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mm.h | 1 - arch/arm64/kvm/hyp/nvhe/mm.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 75f58b783fd7..5ed5ba637d00 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -15,7 +15,6 @@ extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); extern struct kvm_pgtable pkvm_pgtable; extern hyp_spinlock_t pkvm_pgd_lock; -extern u64 __io_map_base; int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 2fabeceb889a..e78fd8e1beef 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -18,11 +18,12 @@ struct kvm_pgtable pkvm_pgtable; hyp_spinlock_t pkvm_pgd_lock; -u64 __io_map_base; struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; unsigned int hyp_memblock_nr; +static u64 __io_map_base; + static int __pkvm_create_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot) { -- cgit v1.2.3 From 1fac3cfb9cc60d71b66ee5127b2bc5b5f9f79df8 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 15 Dec 2021 16:12:18 +0000 Subject: KVM: arm64: Provide {get,put}_page() stubs for early hyp allocator In nVHE protected mode, the EL2 code uses a temporary allocator during boot while re-creating its stage-1 page-table. Unfortunately, the hyp_vmmemap is not ready to use at this stage, so refcounting pages is not possible. That is not currently a problem because hyp stage-1 mappings are never removed, which implies refcounting of page-table pages is unnecessary. In preparation for allowing hypervisor stage-1 mappings to be removed, provide stub implementations for {get,put}_page() in the early allocator. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-2-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/early_alloc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c index 1306c430ab87..00de04153cc6 100644 --- a/arch/arm64/kvm/hyp/nvhe/early_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c @@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg) return hyp_early_alloc_contig(1); } +static void hyp_early_alloc_get_page(void *addr) { } +static void hyp_early_alloc_put_page(void *addr) { } + void hyp_early_alloc_init(void *virt, unsigned long size) { base = cur = (unsigned long)virt; @@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size) hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page; hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt; hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys; + hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page; + hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page; } -- cgit v1.2.3 From 2ea2ff91e82293909d4879b0b4c6c94b02d52b7e Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 15 Dec 2021 16:12:19 +0000 Subject: KVM: arm64: Refcount hyp stage-1 pgtable pages To prepare the ground for allowing hyp stage-1 mappings to be removed at run-time, update the KVM page-table code to maintain a correct refcount using the ->{get,put}_page() function callbacks. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-3-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f8ceebe4982e..e50e9158fc56 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) return prot; } -static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new) -{ - /* - * Tolerate KVM recreating the exact same mapping, or changing software - * bits if the existing mapping was valid. - */ - if (old == new) - return false; - - if (!kvm_pte_valid(old)) - return true; - - return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW); -} - static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct hyp_map_data *data) { @@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return false; + data->phys += granule; new = kvm_init_valid_leaf_pte(phys, data->attr, level); - if (hyp_pte_needs_update(old, new)) - smp_store_release(ptep, new); + if (old == new) + return true; + if (!kvm_pte_valid(old)) + data->mm_ops->get_page(ptep); + else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) + return false; - data->phys += granule; + smp_store_release(ptep, new); return true; } @@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, return -ENOMEM; kvm_set_table_pte(ptep, childp, mm_ops); + mm_ops->get_page(ptep); return 0; } @@ -482,8 +473,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { struct kvm_pgtable_mm_ops *mm_ops = arg; + kvm_pte_t pte = *ptep; + + if (!kvm_pte_valid(pte)) + return 0; + + mm_ops->put_page(ptep); + + if (kvm_pte_table(pte, level)) + mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); - mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops)); return 0; } @@ -491,7 +490,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) { struct kvm_pgtable_walker walker = { .cb = hyp_free_walker, - .flags = KVM_PGTABLE_WALK_TABLE_POST, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, .arg = pgt->mm_ops, }; -- cgit v1.2.3 From d6b4bd3f4897f3b60ac9e8c9e2f0300e739b3392 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Wed, 15 Dec 2021 16:12:20 +0000 Subject: KVM: arm64: Fixup hyp stage-1 refcount In nVHE-protected mode, the hyp stage-1 page-table refcount is broken due to the lack of refcount support in the early allocator. Fix-up the refcount in the finalize walker, once the 'hyp_vmemmap' is up and running. Acked-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-4-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/setup.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 578f71798c2e..875b5174342f 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -165,6 +165,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, enum kvm_pgtable_walk_flags flag, void * const arg) { + struct kvm_pgtable_mm_ops *mm_ops = arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; kvm_pte_t pte = *ptep; @@ -173,6 +174,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, if (!kvm_pte_valid(pte)) return 0; + /* + * Fix-up the refcount for the page-table pages as the early allocator + * was unable to access the hyp_vmemmap and so the buddy allocator has + * initialised the refcount to '1'. + */ + mm_ops->get_page(ptep); + if (flag != KVM_PGTABLE_WALK_LEAF) + return 0; + if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; @@ -205,7 +215,8 @@ static int finalize_host_mappings(void) { struct kvm_pgtable_walker walker = { .cb = finalize_host_mappings_walker, - .flags = KVM_PGTABLE_WALK_LEAF, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + .arg = pkvm_pgtable.mm_ops, }; int i, ret; @@ -240,10 +251,6 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; - ret = finalize_host_mappings(); - if (ret) - goto out; - pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, @@ -253,6 +260,10 @@ void __noreturn __pkvm_init_finalise(void) }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; + ret = finalize_host_mappings(); + if (ret) + goto out; + out: /* * We tail-called to here from handle___pkvm_init() and will not return, -- cgit v1.2.3 From 34ec7cbf1ee0c45e66a0c24311bcd5b83b7109f5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Dec 2021 16:12:21 +0000 Subject: KVM: arm64: Hook up ->page_count() for hypervisor stage-1 page-table kvm_pgtable_hyp_unmap() relies on the ->page_count() function callback being provided by the memory-management operations for the page-table. Wire up this callback for the hypervisor stage-1 page-table. Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-5-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 875b5174342f..855a19056627 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -257,6 +257,7 @@ void __noreturn __pkvm_init_finalise(void) .virt_to_phys = hyp_virt_to_phys, .get_page = hpool_get_page, .put_page = hpool_put_page, + .page_count = hyp_page_count, }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; -- cgit v1.2.3 From 82bb02445de57bb3072052705f6f5dea9465592e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Dec 2021 16:12:22 +0000 Subject: KVM: arm64: Implement kvm_pgtable_hyp_unmap() at EL2 Implement kvm_pgtable_hyp_unmap() which can be used to remove hypervisor stage-1 mappings at EL2. Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-6-qperret@google.com --- arch/arm64/kvm/hyp/pgtable.c | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index e50e9158fc56..adc73f8cd24f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -451,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, return ret; } +struct hyp_unmap_data { + u64 unmapped; + struct kvm_pgtable_mm_ops *mm_ops; +}; + +static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + kvm_pte_t pte = *ptep, *childp = NULL; + u64 granule = kvm_granule_size(level); + struct hyp_unmap_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + + if (!kvm_pte_valid(pte)) + return -EINVAL; + + if (kvm_pte_table(pte, level)) { + childp = kvm_pte_follow(pte, mm_ops); + + if (mm_ops->page_count(childp) != 1) + return 0; + + kvm_clear_pte(ptep); + dsb(ishst); + __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level); + } else { + if (end - addr < granule) + return -EINVAL; + + kvm_clear_pte(ptep); + dsb(ishst); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); + data->unmapped += granule; + } + + dsb(ish); + isb(); + mm_ops->put_page(ptep); + + if (childp) + mm_ops->put_page(childp); + + return 0; +} + +u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + struct hyp_unmap_data unmap_data = { + .mm_ops = pgt->mm_ops, + }; + struct kvm_pgtable_walker walker = { + .cb = hyp_unmap_walker, + .arg = &unmap_data, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + }; + + if (!pgt->mm_ops->page_count) + return 0; + + kvm_pgtable_walk(pgt, addr, size, &walker); + return unmap_data.unmapped; +} + int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, struct kvm_pgtable_mm_ops *mm_ops) { -- cgit v1.2.3 From 3d467f7b8c0a179a10aa4e9f17cd2d3c3b7e5403 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Dec 2021 16:12:25 +0000 Subject: KVM: arm64: Extend pkvm_page_state enumeration to handle absent pages Explicitly name the combination of SW0 | SW1 as reserved in the pte and introduce a new PKVM_NOPAGE meta-state which, although not directly stored in the software bits of the pte, can be used to represent an entry for which there is no underlying page. This is distinct from an invalid pte, as stage-2 identity mappings for the host are created lazily and so an invalid pte there is the same as a valid mapping for the purposes of ownership information. This state will be used for permission checking during page transitions in later patches. Reviewed-by: Andrew Walbran Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-9-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index b58c910babaf..56445586c755 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -24,6 +24,11 @@ enum pkvm_page_state { PKVM_PAGE_OWNED = 0ULL, PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, + __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 | + KVM_PGTABLE_PROT_SW1, + + /* Meta-states which aren't encoded directly in the PTE's SW bits */ + PKVM_NOPAGE, }; #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) -- cgit v1.2.3 From 61d99e33e757a21b47b8b130e49dcbdfaa5d2b1c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Dec 2021 16:12:26 +0000 Subject: KVM: arm64: Introduce wrappers for host and hyp spin lock accessors In preparation for adding additional locked sections for manipulating page-tables at EL2, introduce some simple wrappers around the host and hypervisor locks so that it's a bit easier to read and bit more difficult to take the wrong lock (or even take them in the wrong order). Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-10-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index c1a90dd022b8..757dfefe3aeb 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -27,6 +27,26 @@ static struct hyp_pool host_s2_pool; const u8 pkvm_hyp_id = 1; +static void host_lock_component(void) +{ + hyp_spin_lock(&host_kvm.lock); +} + +static void host_unlock_component(void) +{ + hyp_spin_unlock(&host_kvm.lock); +} + +static void hyp_lock_component(void) +{ + hyp_spin_lock(&pkvm_pgd_lock); +} + +static void hyp_unlock_component(void) +{ + hyp_spin_unlock(&pkvm_pgd_lock); +} + static void *host_s2_zalloc_pages_exact(size_t size) { void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); @@ -338,14 +358,14 @@ static int host_stage2_idmap(u64 addr) prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; - hyp_spin_lock(&host_kvm.lock); + host_lock_component(); ret = host_stage2_adjust_range(addr, &range); if (ret) goto unlock; ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); unlock: - hyp_spin_unlock(&host_kvm.lock); + host_unlock_component(); return ret; } @@ -369,8 +389,8 @@ int __pkvm_host_share_hyp(u64 pfn) if (!addr_is_memory(addr)) return -EINVAL; - hyp_spin_lock(&host_kvm.lock); - hyp_spin_lock(&pkvm_pgd_lock); + host_lock_component(); + hyp_lock_component(); ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); if (ret) @@ -432,8 +452,8 @@ map_shared: BUG_ON(ret); unlock: - hyp_spin_unlock(&pkvm_pgd_lock); - hyp_spin_unlock(&host_kvm.lock); + hyp_unlock_component(); + host_unlock_component(); return ret; } -- cgit v1.2.3 From e82edcc75c4e2389a3d7223c4ef1737bd9a07e5d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Dec 2021 16:12:27 +0000 Subject: KVM: arm64: Implement do_share() helper for sharing memory By default, protected KVM isolates memory pages so that they are accessible only to their owner: be it the host kernel, the hypervisor at EL2 or (in future) the guest. Establishing shared-memory regions between these components therefore involves a transition for each page so that the owner can share memory with a borrower under a certain set of permissions. Introduce a do_share() helper for safely sharing a memory region between two components. Currently, only host-to-hyp sharing is implemented, but the code is easily extended to handle other combinations and the permission checks for each component are reusable. Reviewed-by: Andrew Walbran Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-11-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 237 ++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 757dfefe3aeb..e612fd9d8975 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -471,3 +471,240 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) ret = host_stage2_idmap(addr); BUG_ON(ret && ret != -EAGAIN); } + +/* This corresponds to locking order */ +enum pkvm_component_id { + PKVM_ID_HOST, + PKVM_ID_HYP, +}; + +struct pkvm_mem_transition { + u64 nr_pages; + + struct { + enum pkvm_component_id id; + /* Address in the initiator's address space */ + u64 addr; + + union { + struct { + /* Address in the completer's address space */ + u64 completer_addr; + } host; + }; + } initiator; + + struct { + enum pkvm_component_id id; + } completer; +}; + +struct pkvm_mem_share { + const struct pkvm_mem_transition tx; + const enum kvm_pgtable_prot completer_prot; +}; + +struct check_walk_data { + enum pkvm_page_state desired; + enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); +}; + +static int __check_page_state_visitor(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + struct check_walk_data *d = arg; + kvm_pte_t pte = *ptep; + + if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) + return -EINVAL; + + return d->get_page_state(pte) == d->desired ? 0 : -EPERM; +} + +static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct check_walk_data *data) +{ + struct kvm_pgtable_walker walker = { + .cb = __check_page_state_visitor, + .arg = data, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + return kvm_pgtable_walk(pgt, addr, size, &walker); +} + +static enum pkvm_page_state host_get_page_state(kvm_pte_t pte) +{ + if (!kvm_pte_valid(pte) && pte) + return PKVM_NOPAGE; + + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); +} + +static int __host_check_page_state_range(u64 addr, u64 size, + enum pkvm_page_state state) +{ + struct check_walk_data d = { + .desired = state, + .get_page_state = host_get_page_state, + }; + + hyp_assert_lock_held(&host_kvm.lock); + return check_page_state_range(&host_kvm.pgt, addr, size, &d); +} + +static int __host_set_page_state_range(u64 addr, u64 size, + enum pkvm_page_state state) +{ + enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state); + + return host_stage2_idmap_locked(addr, size, prot); +} + +static int host_request_owned_transition(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + +static int host_initiate_share(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); +} + +static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) +{ + if (!kvm_pte_valid(pte)) + return PKVM_NOPAGE; + + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); +} + +static int __hyp_check_page_state_range(u64 addr, u64 size, + enum pkvm_page_state state) +{ + struct check_walk_data d = { + .desired = state, + .get_page_state = hyp_get_page_state, + }; + + hyp_assert_lock_held(&pkvm_pgd_lock); + return check_page_state_range(&pkvm_pgtable, addr, size, &d); +} + +static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) +{ + return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || + tx->initiator.id != PKVM_ID_HOST); +} + +static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (perms != PAGE_HYP) + return -EPERM; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); +} + +static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); + enum kvm_pgtable_prot prot; + + prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED); + return pkvm_create_mappings_locked(start, end, prot); +} + +static int check_share(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_owned_transition(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HYP: + ret = hyp_ack_share(completer_addr, tx, share->completer_prot); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int __do_share(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_share(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HYP: + ret = hyp_complete_share(completer_addr, tx, share->completer_prot); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/* + * do_share(): + * + * The page owner grants access to another component with a given set + * of permissions. + * + * Initiator: OWNED => SHARED_OWNED + * Completer: NOPAGE => SHARED_BORROWED + */ +static int do_share(struct pkvm_mem_share *share) +{ + int ret; + + ret = check_share(share); + if (ret) + return ret; + + return WARN_ON(__do_share(share)); +} -- cgit v1.2.3 From 1ee32109fd78720259f7431740897d37ebcd84f6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Dec 2021 16:12:28 +0000 Subject: KVM: arm64: Implement __pkvm_host_share_hyp() using do_share() __pkvm_host_share_hyp() shares memory between the host and the hypervisor so implement it as an invocation of the new do_share() mechanism. Note that double-sharing is no longer permitted (as this allows us to reduce the number of page-table walks significantly), but is thankfully no longer relied upon by the host. Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-12-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 121 ++++++++++------------------------ 1 file changed, 33 insertions(+), 88 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index e612fd9d8975..492b9930609d 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -370,94 +370,6 @@ unlock: return ret; } -static inline bool check_prot(enum kvm_pgtable_prot prot, - enum kvm_pgtable_prot required, - enum kvm_pgtable_prot denied) -{ - return (prot & (required | denied)) == required; -} - -int __pkvm_host_share_hyp(u64 pfn) -{ - phys_addr_t addr = hyp_pfn_to_phys(pfn); - enum kvm_pgtable_prot prot, cur; - void *virt = __hyp_va(addr); - enum pkvm_page_state state; - kvm_pte_t pte; - int ret; - - if (!addr_is_memory(addr)) - return -EINVAL; - - host_lock_component(); - hyp_lock_component(); - - ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); - if (ret) - goto unlock; - if (!pte) - goto map_shared; - - /* - * Check attributes in the host stage-2 PTE. We need the page to be: - * - mapped RWX as we're sharing memory; - * - not borrowed, as that implies absence of ownership. - * Otherwise, we can't let it got through - */ - cur = kvm_pgtable_stage2_pte_prot(pte); - prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED); - if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) { - ret = -EPERM; - goto unlock; - } - - state = pkvm_getstate(cur); - if (state == PKVM_PAGE_OWNED) - goto map_shared; - - /* - * Tolerate double-sharing the same page, but this requires - * cross-checking the hypervisor stage-1. - */ - if (state != PKVM_PAGE_SHARED_OWNED) { - ret = -EPERM; - goto unlock; - } - - ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL); - if (ret) - goto unlock; - - /* - * If the page has been shared with the hypervisor, it must be - * already mapped as SHARED_BORROWED in its stage-1. - */ - cur = kvm_pgtable_hyp_pte_prot(pte); - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); - if (!check_prot(cur, prot, ~prot)) - ret = -EPERM; - goto unlock; - -map_shared: - /* - * If the page is not yet shared, adjust mappings in both page-tables - * while both locks are held. - */ - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); - ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot); - BUG_ON(ret); - - prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); - ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot); - BUG_ON(ret); - -unlock: - hyp_unlock_component(); - host_unlock_component(); - - return ret; -} - void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) { struct kvm_vcpu_fault_info fault; @@ -708,3 +620,36 @@ static int do_share(struct pkvm_mem_share *share) return WARN_ON(__do_share(share)); } + +int __pkvm_host_share_hyp(u64 pfn) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_share share = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + .completer_prot = PAGE_HYP, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_share(&share); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} -- cgit v1.2.3 From 376a240f037959c2b9a2486e53bcd8d388cbec17 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Dec 2021 16:12:29 +0000 Subject: KVM: arm64: Implement do_unshare() helper for unsharing memory Tearing down a previously shared memory region results in the borrower losing access to the underlying pages and returning them to the "owned" state in the owner. Implement a do_unshare() helper, along the same lines as do_share(), to provide this functionality for the host-to-hyp case. Reviewed-by: Andrew Walbran Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-13-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 115 ++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 492b9930609d..06973a93db00 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -485,6 +485,16 @@ static int host_request_owned_transition(u64 *completer_addr, return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED); } +static int host_request_unshare(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); +} + static int host_initiate_share(u64 *completer_addr, const struct pkvm_mem_transition *tx) { @@ -495,6 +505,16 @@ static int host_initiate_share(u64 *completer_addr, return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); } +static int host_initiate_unshare(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) { if (!kvm_pte_valid(pte)) @@ -535,6 +555,17 @@ static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx, return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); } +static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, + PKVM_PAGE_SHARED_BORROWED); +} + static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, enum kvm_pgtable_prot perms) { @@ -545,6 +576,14 @@ static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, return pkvm_create_mappings_locked(start, end, prot); } +static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size); + + return (ret != size) ? -EFAULT : 0; +} + static int check_share(struct pkvm_mem_share *share) { const struct pkvm_mem_transition *tx = &share->tx; @@ -621,6 +660,82 @@ static int do_share(struct pkvm_mem_share *share) return WARN_ON(__do_share(share)); } +static int check_unshare(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_unshare(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HYP: + ret = hyp_ack_unshare(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int __do_unshare(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_unshare(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HYP: + ret = hyp_complete_unshare(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/* + * do_unshare(): + * + * The page owner revokes access from another component for a range of + * pages which were previously shared using do_share(). + * + * Initiator: SHARED_OWNED => OWNED + * Completer: SHARED_BORROWED => NOPAGE + */ +static int do_unshare(struct pkvm_mem_share *share) +{ + int ret; + + ret = check_unshare(share); + if (ret) + return ret; + + return WARN_ON(__do_unshare(share)); +} + int __pkvm_host_share_hyp(u64 pfn) { int ret; -- cgit v1.2.3 From b8cc6eb5bded7078f796b2ebf548f79850281eb6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Dec 2021 16:12:30 +0000 Subject: KVM: arm64: Expose unshare hypercall to the host Introduce an unshare hypercall which can be used to unmap memory from the hypervisor stage-1 in nVHE protected mode. This will be useful to update the EL2 ownership state of pages during guest teardown, and avoids keeping dangling mappings to unreferenced portions of memory. Signed-off-by: Will Deacon Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211215161232.1480836-14-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 + arch/arm64/kvm/hyp/nvhe/hyp-main.c | 8 +++++++ arch/arm64/kvm/hyp/nvhe/mem_protect.c | 33 +++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 56445586c755..80e99836eac7 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -55,6 +55,7 @@ extern const u8 pkvm_hyp_id; int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); +int __pkvm_host_unshare_hyp(u64 pfn); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index b096bf009144..5e2197db0d32 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn); } +static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, pfn, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn); +} + static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(phys_addr_t, phys, host_ctxt, 1); @@ -184,6 +191,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_prot_finalize), HANDLE_FUNC(__pkvm_host_share_hyp), + HANDLE_FUNC(__pkvm_host_unshare_hyp), HANDLE_FUNC(__kvm_adjust_pc), HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_flush_vm_context), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 06973a93db00..33c105ddb6b7 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -768,3 +768,36 @@ int __pkvm_host_share_hyp(u64 pfn) return ret; } + +int __pkvm_host_unshare_hyp(u64 pfn) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_share share = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + .completer_prot = PAGE_HYP, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_unshare(&share); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} -- cgit v1.2.3 From 9d8604b28575ccab3afd8d6f56cab9a6c0d281ef Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 29 Nov 2021 20:00:45 +0000 Subject: KVM: arm64: Rework kvm_pgtable initialisation Ganapatrao reported that the kvm_pgtable->mmu pointer is more or less hardcoded to the main S2 mmu structure, while the nested code needs it to point to other instances (as we have one instance per nested context). Rework the initialisation of the kvm_pgtable structure so that this assumtion doesn't hold true anymore. This requires some minor changes to the order in which things are initialised (the mmu->arch pointer being the critical one). Reported-by: Ganapatrao Kulkarni Reviewed-by: Ganapatrao Kulkarni Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211129200150.351436-5-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++-- arch/arm64/kvm/hyp/pgtable.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm/hyp') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index c1a90dd022b8..7d7b7037dc68 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -103,19 +103,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) prepare_host_vtcr(); hyp_spin_lock_init(&host_kvm.lock); + mmu->arch = &host_kvm.arch; ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; - ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch, + ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu, &host_kvm.mm_ops, KVM_HOST_S2_FLAGS, host_stage2_force_pte_cb); if (ret) return ret; mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); - mmu->arch = &host_kvm.arch; mmu->pgt = &host_kvm.pgt; WRITE_ONCE(mmu->vmid.vmid_gen, 0); WRITE_ONCE(mmu->vmid.vmid, 0); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f8ceebe4982e..8cdbc43fa651 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1116,13 +1116,13 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) } -int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, +int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops, enum kvm_pgtable_stage2_flags flags, kvm_pgtable_force_pte_cb_t force_pte_cb) { size_t pgd_sz; - u64 vtcr = arch->vtcr; + u64 vtcr = mmu->arch->vtcr; u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; @@ -1135,7 +1135,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, pgt->ia_bits = ia_bits; pgt->start_level = start_level; pgt->mm_ops = mm_ops; - pgt->mmu = &arch->mmu; + pgt->mmu = mmu; pgt->flags = flags; pgt->force_pte_cb = force_pte_cb; -- cgit v1.2.3