From e9ada6c208c15c907afe5afb1aa82e23e81eb8ba Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sat, 28 May 2022 12:38:13 +0100 Subject: KVM: arm64: Drop FP_FOREIGN_STATE from the hypervisor code The vcpu KVM_ARM64_FP_FOREIGN_FPSTATE flag tracks the thread's own TIF_FOREIGN_FPSTATE so that we can evaluate just before running the vcpu whether it the FP regs contain something that is owned by the vcpu or not by updating the rest of the FP flags. We do this in the hypervisor code in order to make sure we're in a context where we are not interruptible. But we already have a hook in the run loop to generate this flag. We may as well update the FP flags directly and save the pointless flag tracking. Whilst we're at it, rename update_fp_enabled() to guest_owns_fp_regs() to indicate what the leftover of this helper actually do. Signed-off-by: Marc Zyngier <maz@kernel.org> Reviewed-by: Reiji Watanabe <reijiw@google.com> Reviewed-by: Mark Brown <broonie@kernel.org> --- arch/arm64/kvm/fpsimd.c | 17 ++++++++++------- arch/arm64/kvm/hyp/include/hyp/switch.h | 16 ++-------------- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 4 files changed, 14 insertions(+), 23 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 6012b08ecb14..edbc0183c89b 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -107,16 +107,19 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) } /* - * Called just before entering the guest once we are no longer - * preemptable. Syncs the host's TIF_FOREIGN_FPSTATE with the KVM - * mirror of the flag used by the hypervisor. + * Called just before entering the guest once we are no longer preemptable + * and interrupts are disabled. If we have managed to run anything using + * FP while we were preemptible (such as off the back of an interrupt), + * then neither the host nor the guest own the FP hardware (and it was the + * responsibility of the code that used FP to save the existing state). + * + * Note that not supporting FP is basically the same thing as far as the + * hypervisor is concerned (nothing to save). */ void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) { - if (test_thread_flag(TIF_FOREIGN_FPSTATE)) - vcpu->arch.flags |= KVM_ARM64_FP_FOREIGN_FPSTATE; - else - vcpu->arch.flags &= ~KVM_ARM64_FP_FOREIGN_FPSTATE; + if (!system_supports_fpsimd() || test_thread_flag(TIF_FOREIGN_FPSTATE)) + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_FP_HOST); } /* diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 37d9f211c200..e54320384943 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -37,21 +37,9 @@ struct kvm_exception_table_entry { extern struct kvm_exception_table_entry __start___kvm_ex_table; extern struct kvm_exception_table_entry __stop___kvm_ex_table; -/* Check whether the FP regs were dirtied while in the host-side run loop: */ -static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) +/* Check whether the FP regs are owned by the guest */ +static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) { - /* - * When the system doesn't support FP/SIMD, we cannot rely on - * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an - * abort on the very first access to FP and thus we should never - * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always - * trap the accesses. - */ - if (!system_supports_fpsimd() || - vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE) - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | - KVM_ARM64_FP_HOST); - return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6db801db8f27..a6b9f1186577 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -43,7 +43,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val = vcpu->arch.cptr_el2; val |= CPTR_EL2_TTA | CPTR_EL2_TAM; - if (!update_fp_enabled(vcpu)) { + if (!guest_owns_fp_regs(vcpu)) { val |= CPTR_EL2_TFP | CPTR_EL2_TZ; __activate_traps_fpsimd32(vcpu); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 969f20daf97a..46f365254e9f 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -55,7 +55,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= CPTR_EL2_TAM; - if (update_fp_enabled(vcpu)) { + if (guest_owns_fp_regs(vcpu)) { if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; } else { -- cgit v1.2.3 From f8077b0d59230cbb58e0b98839e04b564529a5ac Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sat, 28 May 2022 12:38:14 +0100 Subject: KVM: arm64: Move FP state ownership from flag to a tristate The KVM FP code uses a pair of flags to denote three states: - FP_ENABLED set: the guest owns the FP state - FP_HOST set: the host owns the FP state - FP_ENABLED and FP_HOST clear: nobody owns the FP state at all and both flags set is an illegal state, which nothing ever checks for... As it turns out, this isn't really a good match for flags, and we'd be better off if this was a simpler tristate, each state having a name that actually reflect the state: - FP_STATE_FREE - FP_STATE_HOST_OWNED - FP_STATE_GUEST_OWNED Kill the two flags, and move over to an enum encoding these three states. This results in less confusing code, and less risk of ending up in the uncharted territory of a 4th state if we forget to clear one of the two flags. Signed-off-by: Marc Zyngier <maz@kernel.org> Reviewed-by: Mark Brown <broonie@kernel.org> Reviewed-by: Reiji Watanabe <reijiw@google.com> --- arch/arm64/kvm/fpsimd.c | 14 ++++++-------- arch/arm64/kvm/hyp/include/hyp/switch.h | 8 +++----- arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 4 files changed, 12 insertions(+), 16 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index edbc0183c89b..d397efe1a378 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -77,8 +77,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) BUG_ON(!current->mm); BUG_ON(test_thread_flag(TIF_SVE)); - vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; - vcpu->arch.flags |= KVM_ARM64_FP_HOST; + vcpu->arch.fp_state = FP_STATE_HOST_OWNED; vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) @@ -98,9 +97,8 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; - if (read_sysreg_s(SYS_SVCR) & - (SVCR_SM_MASK | SVCR_ZA_MASK)) { - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { + vcpu->arch.fp_state = FP_STATE_FREE; fpsimd_save_and_flush_cpu_state(); } } @@ -119,7 +117,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) { if (!system_supports_fpsimd() || test_thread_flag(TIF_FOREIGN_FPSTATE)) - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_FP_HOST); + vcpu->arch.fp_state = FP_STATE_FREE; } /* @@ -133,7 +131,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(!irqs_disabled()); - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { /* * Currently we do not support SME guests so SVCR is * always 0 and we just need a variable to point to. @@ -176,7 +174,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) CPACR_EL1_SMEN_EL1EN); } - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) { if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index e54320384943..6cbbb6c02f66 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -40,7 +40,7 @@ extern struct kvm_exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs are owned by the guest */ static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) { - return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); + return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED; } /* Save the 32-bit only FPSIMD system register state */ @@ -179,10 +179,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { + if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; - } /* Restore the guest state */ if (sve_guest) @@ -194,7 +192,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) if (!(read_sysreg(hcr_el2) & HCR_RW)) write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); - vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; + vcpu->arch.fp_state = FP_STATE_GUEST_OWNED; return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index a6b9f1186577..764bdc423cb8 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -123,7 +123,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) } cptr = CPTR_EL2_DEFAULT; - if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)) + if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)) cptr |= CPTR_EL2_TZ; if (cpus_have_final_cap(ARM64_SME)) cptr &= ~CPTR_EL2_TSM; @@ -335,7 +335,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 46f365254e9f..bce7fc51f9a1 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -175,7 +175,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_host_state_vhe(host_ctxt); - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); -- cgit v1.2.3 From 4c0680d394d8a77868049931101e4a59372346b5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sat, 28 May 2022 12:38:17 +0100 Subject: KVM: arm64: Move vcpu configuration flags into their own set The KVM_ARM64_{GUEST_HAS_SVE,VCPU_SVE_FINALIZED,GUEST_HAS_PTRAUTH} flags are purely configuration flags. Once set, they are never cleared, but evaluated all over the code base. Move these three flags into the configuration set in one go, using the new accessors, and take this opportunity to drop the KVM_ARM64_ prefix which doesn't provide any help. Reviewed-by: Fuad Tabba <tabba@google.com> Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/reset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 6c70c6f61c70..0e08fbe68715 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -81,7 +81,7 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu) * KVM_REG_ARM64_SVE_VLS. Allocation is deferred until * kvm_arm_vcpu_finalize(), which freezes the configuration. */ - vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE; + vcpu_set_flag(vcpu, GUEST_HAS_SVE); return 0; } @@ -120,7 +120,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) } vcpu->arch.sve_state = buf; - vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED; + vcpu_set_flag(vcpu, VCPU_SVE_FINALIZED); return 0; } @@ -177,7 +177,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) !system_has_full_ptr_auth()) return -EINVAL; - vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; + vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH); return 0; } -- cgit v1.2.3 From 699bb2e0c6f3796549dabac329501df7ffd99439 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sat, 28 May 2022 12:38:18 +0100 Subject: KVM: arm64: Move vcpu PC/Exception flags to the input flag set The PC update flags (which also deal with exception injection) is one of the most complicated use of the flag we have. Make it more fool prof by: - moving it over to the new accessors and assign it to the input flag set - turn the combination of generic ELx flags with another flag indicating the target EL itself into an explicit set of flags for each EL and vector combination - add a new accessor to pend the exception This is otherwise a pretty straightformward conversion. Reviewed-by: Fuad Tabba <tabba@google.com> Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/arm.c | 4 ++-- arch/arm64/kvm/hyp/exception.c | 23 +++++++++++------------ arch/arm64/kvm/hyp/nvhe/sys_regs.c | 4 +--- arch/arm64/kvm/inject_fault.c | 17 +++++------------ 4 files changed, 19 insertions(+), 29 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 400bb0fe2745..5beabbe69585 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1013,8 +1013,8 @@ out: * the vcpu state. Note that this relies on __kvm_adjust_pc() * being preempt-safe on VHE. */ - if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION | - KVM_ARM64_INCREMENT_PC))) + if (unlikely(vcpu_get_flag(vcpu, PENDING_EXCEPTION) || + vcpu_get_flag(vcpu, INCREMENT_PC))) kvm_call_hyp(__kvm_adjust_pc, vcpu); vcpu_put(vcpu); diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index c5d009715402..b7557b25ed56 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -303,14 +303,14 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) static void kvm_inject_exception(struct kvm_vcpu *vcpu) { if (vcpu_el1_is_32bit(vcpu)) { - switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { - case KVM_ARM64_EXCEPT_AA32_UND: + switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) { + case unpack_vcpu_flag(EXCEPT_AA32_UND): enter_exception32(vcpu, PSR_AA32_MODE_UND, 4); break; - case KVM_ARM64_EXCEPT_AA32_IABT: + case unpack_vcpu_flag(EXCEPT_AA32_IABT): enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12); break; - case KVM_ARM64_EXCEPT_AA32_DABT: + case unpack_vcpu_flag(EXCEPT_AA32_DABT): enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16); break; default: @@ -318,9 +318,8 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) break; } } else { - switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { - case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | - KVM_ARM64_EXCEPT_AA64_EL1): + switch (vcpu_get_flag(vcpu, EXCEPT_MASK)) { + case unpack_vcpu_flag(EXCEPT_AA64_EL1_SYNC): enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); break; default: @@ -340,12 +339,12 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu) */ void __kvm_adjust_pc(struct kvm_vcpu *vcpu) { - if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { + if (vcpu_get_flag(vcpu, PENDING_EXCEPTION)) { kvm_inject_exception(vcpu); - vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | - KVM_ARM64_EXCEPT_MASK); - } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { + vcpu_clear_flag(vcpu, PENDING_EXCEPTION); + vcpu_clear_flag(vcpu, EXCEPT_MASK); + } else if (vcpu_get_flag(vcpu, INCREMENT_PC)) { kvm_skip_instr(vcpu); - vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; + vcpu_clear_flag(vcpu, INCREMENT_PC); } } diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index b6d86e423319..edd3eabf520f 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -38,9 +38,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | - KVM_ARM64_EXCEPT_AA64_ELx_SYNC | - KVM_ARM64_PENDING_EXCEPTION); + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); __kvm_adjust_pc(vcpu); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 55a5dbe957e0..f32f4a2a347f 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -20,9 +20,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u64 esr = 0; - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | - KVM_ARM64_EXCEPT_AA64_ELx_SYNC | - KVM_ARM64_PENDING_EXCEPTION); + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -52,9 +50,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) { u64 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | - KVM_ARM64_EXCEPT_AA64_ELx_SYNC | - KVM_ARM64_PENDING_EXCEPTION); + kvm_pend_exception(vcpu, EXCEPT_AA64_EL1_SYNC); /* * Build an unknown exception, depending on the instruction @@ -73,8 +69,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) static void inject_undef32(struct kvm_vcpu *vcpu) { - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | - KVM_ARM64_PENDING_EXCEPTION); + kvm_pend_exception(vcpu, EXCEPT_AA32_UND); } /* @@ -97,14 +92,12 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) far = vcpu_read_sys_reg(vcpu, FAR_EL1); if (is_pabt) { - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | - KVM_ARM64_PENDING_EXCEPTION); + kvm_pend_exception(vcpu, EXCEPT_AA32_IABT); far &= GENMASK(31, 0); far |= (u64)addr << 32; vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2); } else { /* !iabt */ - vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | - KVM_ARM64_PENDING_EXCEPTION); + kvm_pend_exception(vcpu, EXCEPT_AA32_DABT); far &= GENMASK(63, 32); far |= addr; vcpu_write_sys_reg(vcpu, fsr, ESR_EL1); -- cgit v1.2.3 From b1da49088ac68a21c613efd734dada8272ec0b00 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sat, 28 May 2022 12:38:19 +0100 Subject: KVM: arm64: Move vcpu debug/SPE/TRBE flags to the input flag set The three debug flags (which deal with the debug registers, SPE and TRBE) all are input flags to the hypervisor code. Move them into the input set and convert them to the new accessors. Reviewed-by: Fuad Tabba <tabba@google.com> Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/debug.c | 25 ++++++++++++------------- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 6 +++--- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/debug-sr.c | 8 ++++---- arch/arm64/kvm/sys_regs.c | 8 ++++---- 5 files changed, 25 insertions(+), 26 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 4fd5c216c4bb..0b28d7db7c76 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -104,11 +104,11 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu) * Trap debug register access when one of the following is true: * - Userspace is using the hardware to debug the guest * (KVM_GUESTDBG_USE_HW is set). - * - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear). + * - The guest is not using debug (DEBUG_DIRTY clear). * - The guest has enabled the OS Lock (debug exceptions are blocked). */ if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) || - !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) || + !vcpu_get_flag(vcpu, DEBUG_DIRTY) || kvm_vcpu_os_lock_enabled(vcpu)) vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; @@ -147,8 +147,8 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) * debug related registers. * * Additionally, KVM only traps guest accesses to the debug registers if - * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY - * flag on vcpu->arch.flags). Since the guest must not interfere + * the guest is not actively using them (see the DEBUG_DIRTY + * flag on vcpu->arch.iflags). Since the guest must not interfere * with the hardware state when debugging the guest, we must ensure that * trapping is enabled whenever we are debugging the guest using the * debug registers. @@ -205,9 +205,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) * * We simply switch the debug_ptr to point to our new * external_debug_state which has been populated by the - * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY - * mechanism ensures the registers are updated on the - * world switch. + * debug ioctl. The existing DEBUG_DIRTY mechanism ensures + * the registers are updated on the world switch. */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) { /* Enable breakpoints/watchpoints */ @@ -216,7 +215,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu_set_flag(vcpu, DEBUG_DIRTY); trace_kvm_arm_set_regset("BKPTS", get_num_brps(), &vcpu->arch.debug_ptr->dbg_bcr[0], @@ -246,7 +245,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) /* If KDE or MDE are set, perform a full save/restore cycle. */ if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu_set_flag(vcpu, DEBUG_DIRTY); /* Write mdcr_el2 changes since vcpu_load on VHE systems */ if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) @@ -298,16 +297,16 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu) */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) && !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT))) - vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_SPE; + vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE); /* Check if we have TRBE implemented and available at the host */ if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) && !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG)) - vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_TRBE; + vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE); } void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu) { - vcpu->arch.flags &= ~(KVM_ARM64_DEBUG_STATE_SAVE_SPE | - KVM_ARM64_DEBUG_STATE_SAVE_TRBE); + vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE); + vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE); } diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 4ebe9f558f3a..961bbef104a6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -132,7 +132,7 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) + if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; @@ -151,7 +151,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) + if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) return; host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; @@ -162,7 +162,7 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) __debug_save_state(guest_dbg, guest_ctxt); __debug_restore_state(host_dbg, host_ctxt); - vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; + vcpu_clear_flag(vcpu, DEBUG_DIRTY); } #endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 7ecca8b07851..baa5b9b3dde5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -195,7 +195,7 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY)) __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); } @@ -212,7 +212,7 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); - if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) + if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY)) write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); } diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index df361d839902..e17455773b98 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -84,10 +84,10 @@ static void __debug_restore_trace(u64 trfcr_el1) void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ - if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE) + if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); /* Disable and flush Self-Hosted Trace generation */ - if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE) + if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1); } @@ -98,9 +98,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { - if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE) + if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); - if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE) + if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c06c0477fab5..f24797c57df8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -387,7 +387,7 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, { if (p->is_write) { vcpu_write_sys_reg(vcpu, p->regval, r->reg); - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu_set_flag(vcpu, DEBUG_DIRTY); } else { p->regval = vcpu_read_sys_reg(vcpu, r->reg); } @@ -403,8 +403,8 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, * A 32 bit write to a debug register leave top bits alone * A 32 bit read from a debug register only returns the bottom bits * - * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the - * hyp.S code switches between host and guest values in future. + * All writes will set the DEBUG_DIRTY flag to ensure the hyp code + * switches between host and guest values in future. */ static void reg_to_dbg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -420,7 +420,7 @@ static void reg_to_dbg(struct kvm_vcpu *vcpu, val |= (p->regval & (mask >> shift)) << shift; *dbg_reg = val; - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu_set_flag(vcpu, DEBUG_DIRTY); } static void dbg_to_reg(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 0affa37fcd1d6f701a0fe805c4ceb7f348d377d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sat, 28 May 2022 12:38:20 +0100 Subject: KVM: arm64: Move vcpu SVE/SME flags to the state flag set The two HOST_{SVE,SME}_ENABLED are only used for the host kernel to track its own state across a vcpu run so that it can be fully restored. Move these flags to the so called state set. Reviewed-by: Fuad Tabba <tabba@google.com> Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/fpsimd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index d397efe1a378..557a96f8e06a 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -79,9 +79,9 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) vcpu->arch.fp_state = FP_STATE_HOST_OWNED; - vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; + vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) - vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; + vcpu_set_flag(vcpu, HOST_SVE_ENABLED); /* * We don't currently support SME guests but if we leave @@ -93,9 +93,9 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * operations. Do this for ZA as well for now for simplicity. */ if (system_supports_sme()) { - vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED; + vcpu_clear_flag(vcpu, HOST_SME_ENABLED); if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) - vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; + vcpu_set_flag(vcpu, HOST_SME_ENABLED); if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { vcpu->arch.fp_state = FP_STATE_FREE; @@ -164,7 +164,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) */ if (has_vhe() && system_supports_sme()) { /* Also restore EL0 state seen on entry */ - if (vcpu->arch.flags & KVM_ARM64_HOST_SME_ENABLED) + if (vcpu_get_flag(vcpu, HOST_SME_ENABLED)) sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN); @@ -193,7 +193,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) * for EL0. To avoid spurious traps, restore the trap state * seen by kvm_arch_vcpu_load_fp(): */ - if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED) + if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED)) sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); else sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); -- cgit v1.2.3 From eebc538d8e07e0ec759823664cbe2011a8bd885d Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sat, 28 May 2022 12:38:22 +0100 Subject: KVM: arm64: Move vcpu WFIT flag to the state flag set The host kernel uses the WFIT flag to remember that a vcpu has used this instruction and wake it up as required. Move it to the state set, as nothing in the hypervisor uses this information. Reviewed-by: Fuad Tabba <tabba@google.com> Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/arch_timer.c | 2 +- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/handle_exit.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 4e39ace073af..5290ca5db663 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -242,7 +242,7 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu) { return (cpus_have_final_cap(ARM64_HAS_WFXT) && - (vcpu->arch.flags & KVM_ARM64_WFIT)); + vcpu_get_flag(vcpu, IN_WFIT)); } static u64 wfit_delay_ns(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 5beabbe69585..8b9da9d30485 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -657,7 +657,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu) preempt_enable(); kvm_vcpu_halt(vcpu); - vcpu->arch.flags &= ~KVM_ARM64_WFIT; + vcpu_clear_flag(vcpu, IN_WFIT); kvm_clear_request(KVM_REQ_UNHALT, vcpu); preempt_disable(); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index f66c0142b335..d045f5b973b9 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -120,7 +120,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) kvm_vcpu_on_spin(vcpu, vcpu_mode_priv(vcpu)); } else { if (esr & ESR_ELx_WFx_ISS_WFxT) - vcpu->arch.flags |= KVM_ARM64_WFIT; + vcpu_set_flag(vcpu, IN_WFIT); kvm_vcpu_wfi(vcpu); } -- cgit v1.2.3 From 30b6ab45f81334e83dcb440451b6a4c4a753a118 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sat, 28 May 2022 12:38:24 +0100 Subject: KVM: arm64: Convert vcpu sysregs_loaded_on_cpu to a state flag The aptly named boolean 'sysregs_loaded_on_cpu' tracks whether some of the vcpu system registers are resident on the physical CPU when running in VHE mode. This is obviously a flag in hidding, so let's convert it to a state flag, since this is solely a host concern (the hypervisor itself always knows which state we're in). Reviewed-by: Fuad Tabba <tabba@google.com> Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 4 ++-- arch/arm64/kvm/sys_regs.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 007a12dd4351..7b44f6b3b547 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -79,7 +79,7 @@ void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu) __sysreg_restore_user_state(guest_ctxt); __sysreg_restore_el1_state(guest_ctxt); - vcpu->arch.sysregs_loaded_on_cpu = true; + vcpu_set_flag(vcpu, SYSREGS_ON_CPU); activate_traps_vhe_load(vcpu); } @@ -110,5 +110,5 @@ void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu) /* Restore host user state */ __sysreg_restore_user_state(host_ctxt); - vcpu->arch.sysregs_loaded_on_cpu = false; + vcpu_clear_flag(vcpu, SYSREGS_ON_CPU); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f24797c57df8..1c562bcfeccf 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -72,7 +72,7 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { u64 val = 0x8badf00d8badf00d; - if (vcpu->arch.sysregs_loaded_on_cpu && + if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) && __vcpu_read_sys_reg_from_cpu(reg, &val)) return val; @@ -81,7 +81,7 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) { - if (vcpu->arch.sysregs_loaded_on_cpu && + if (vcpu_get_flag(vcpu, SYSREGS_ON_CPU) && __vcpu_write_sys_reg_to_cpu(val, reg)) return; -- cgit v1.2.3 From b4da91879e98bdd5998ee84f47f02426ac50a729 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Wed, 8 Jun 2022 14:22:31 +0100 Subject: KVM: arm64: Move the handling of !FP outside of the fast path We currently start by assuming that the host owns the FP unit at load time, then check again whether this is the case as we are about to run. Only at this point do we account for the fact that there is a (vanishingly small) chance that we're running on a system without a FPSIMD unit (yes, this is madness). We can actually move this FPSIMD check as early as load-time, and drop the check at run time. No intended change in behaviour. Suggested-by: Reiji Watanabe <reijiw@google.com> Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/arm.c | 6 ++++++ arch/arm64/kvm/fpsimd.c | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 8b9da9d30485..a9dd7ec38f38 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -328,6 +328,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; + /* + * Default value for the FP state, will be overloaded at load + * time if we support FP (pretty likely) + */ + vcpu->arch.fp_state = FP_STATE_FREE; + /* Set up the timer */ kvm_timer_vcpu_init(vcpu); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 557a96f8e06a..ec8e4494873d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -77,6 +77,9 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) BUG_ON(!current->mm); BUG_ON(test_thread_flag(TIF_SVE)); + if (!system_supports_fpsimd()) + return; + vcpu->arch.fp_state = FP_STATE_HOST_OWNED; vcpu_clear_flag(vcpu, HOST_SVE_ENABLED); @@ -110,13 +113,10 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) * FP while we were preemptible (such as off the back of an interrupt), * then neither the host nor the guest own the FP hardware (and it was the * responsibility of the code that used FP to save the existing state). - * - * Note that not supporting FP is basically the same thing as far as the - * hypervisor is concerned (nothing to save). */ void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) { - if (!system_supports_fpsimd() || test_thread_flag(TIF_FOREIGN_FPSTATE)) + if (test_thread_flag(TIF_FOREIGN_FPSTATE)) vcpu->arch.fp_state = FP_STATE_FREE; } -- cgit v1.2.3 From 3d5697f95e492899d0bf813cbab2af03dde77fa2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <masahiroy@kernel.org> Date: Mon, 13 Jun 2022 18:20:25 +0900 Subject: KVM: arm64: nvhe: Rename confusing obj-y This Makefile appends several objects to obj-y from line 15, but none of them is linked to vmlinux in an ordinary way. obj-y is overwritten at line 30: obj-y := kvm_nvhe.o So, kvm_nvhe.o is the only object directly linked to vmlinux. Replace the abused obj-y with hyp-obj-y. Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220613092026.1705630-1-masahiroy@kernel.org --- arch/arm64/kvm/hyp/nvhe/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index f9fe4dc21b1f..3c6d3a18171c 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -12,13 +12,13 @@ HOST_EXTRACFLAGS += -I$(objtree)/include lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) -obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ +hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o -obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ +hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o -obj-$(CONFIG_DEBUG_LIST) += list_debug.o -obj-y += $(lib-objs) +hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o +hyp-obj-y += $(lib-objs) ## ## Build rules for compiling nVHE hyp code @@ -26,7 +26,7 @@ obj-y += $(lib-objs) ## file containing all nVHE hyp code and data. ## -hyp-obj := $(patsubst %.o,%.nvhe.o,$(obj-y)) +hyp-obj := $(patsubst %.o,%.nvhe.o,$(hyp-obj-y)) obj-y := kvm_nvhe.o extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o -- cgit v1.2.3 From 40c56bd8e1aea7929a09f1d4d68ac3221bb142c4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <masahiroy@kernel.org> Date: Mon, 13 Jun 2022 18:20:26 +0900 Subject: KVM: arm64: nvhe: Add intermediates to 'targets' instead of extra-y These are generated on demand. Adding them to 'targets' is enough. Signed-off-by: Masahiro Yamada <masahiroy@kernel.org> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220613092026.1705630-2-masahiroy@kernel.org --- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 3c6d3a18171c..a2b0d043dddf 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -28,7 +28,7 @@ hyp-obj-y += $(lib-objs) hyp-obj := $(patsubst %.o,%.nvhe.o,$(hyp-obj-y)) obj-y := kvm_nvhe.o -extra-y := $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o +targets += $(hyp-obj) kvm_nvhe.tmp.o kvm_nvhe.rel.o hyp.lds hyp-reloc.S hyp-reloc.o # 1) Compile all source files to `.nvhe.o` object files. The file extension # avoids file name clashes for files shared with VHE. -- cgit v1.2.3 From 1c3ace2b8b3995d3213c5e2d2aca01a0577a3b0f Mon Sep 17 00:00:00 2001 From: Quentin Perret <qperret@google.com> Date: Tue, 5 Jul 2022 14:23:10 +0000 Subject: KVM: arm64: Don't return from void function Although harmless, the return statement in kvm_unexpected_el2_exception is rather confusing as the function itself has a void return type. The C standard is also pretty clear that "A return statement with an expression shall not appear in a function whose return type is void". Given that this return statement does not seem to add any actual value, let's not pointlessly violate the standard. Build-tested with GCC 10 and CLANG 13 for good measure, the disassembled code is identical with or without the return statement. Fixes: e9ee186bb735 ("KVM: arm64: Add kvm_extable for vaxorcism code") Signed-off-by: Quentin Perret <qperret@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220705142310.3847918-1-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6db801db8f27..925b34b7708d 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -386,5 +386,5 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void) asmlinkage void kvm_unexpected_el2_exception(void) { - return __kvm_unexpected_el2_exception(); + __kvm_unexpected_el2_exception(); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 969f20daf97a..390af1a6a9b4 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -249,5 +249,5 @@ void __noreturn hyp_panic(void) asmlinkage void kvm_unexpected_el2_exception(void) { - return __kvm_unexpected_el2_exception(); + __kvm_unexpected_el2_exception(); } -- cgit v1.2.3 From ed6313a93fd11d2015ad17046f3c418bf6a8dab1 Mon Sep 17 00:00:00 2001 From: Kalesh Singh <kaleshsingh@google.com> Date: Fri, 15 Jul 2022 16:58:24 -0700 Subject: KVM: arm64: Fix hypervisor address symbolization With CONFIG_RANDOMIZE_BASE=y vmlinux addresses will resolve incorrectly from kallsyms. Fix this by adding the KASLR offset before printing the symbols. Fixes: 6ccf9cb557bd ("KVM: arm64: Symbolize the nVHE HYP addresses") Reported-by: Fuad Tabba <tabba@google.com> Signed-off-by: Kalesh Singh <kaleshsingh@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220715235824.2549012-1-kaleshsingh@google.com --- arch/arm64/kvm/handle_exit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index f66c0142b335..e43926ef2bc2 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -347,10 +347,10 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); else kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr, - (void *)panic_addr); + (void *)(panic_addr + kaslr_offset())); } else { kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr, - (void *)panic_addr); + (void *)(panic_addr + kaslr_offset())); } /* -- cgit v1.2.3 From da8d120fbafe1d3217d25ac45493538b37cff87c Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sun, 3 Jul 2022 14:08:46 +0100 Subject: KVM: arm64: Add get_reg_by_id() as a sys_reg_desc retrieving helper find_reg_by_id() requires a sys_reg_param as input, which most users provide as a on-stack variable, but don't make any use of the result. Provide a helper that doesn't have this requirement and simplify the callers (all but one). Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/sys_regs.c | 28 +++++++++++++++++----------- arch/arm64/kvm/sys_regs.h | 4 ++++ arch/arm64/kvm/vgic-sys-reg-v3.c | 8 ++------ 3 files changed, 23 insertions(+), 17 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c06c0477fab5..1f410283c592 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2650,21 +2650,29 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, return find_reg(params, table, num); } +const struct sys_reg_desc *get_reg_by_id(u64 id, + const struct sys_reg_desc table[], + unsigned int num) +{ + struct sys_reg_params params; + + if (!index_to_params(id, ¶ms)) + return NULL; + + return find_reg(¶ms, table, num); +} + /* Decode an index value, and find the sys_reg_desc entry. */ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id) { const struct sys_reg_desc *r; - struct sys_reg_params params; /* We only do sys_reg for now. */ if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) return NULL; - if (!index_to_params(id, ¶ms)) - return NULL; - - r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + r = get_reg_by_id(id, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); /* Not saved in the sys_reg array and not otherwise accessible? */ if (r && !(r->reg || r->get_user)) @@ -2723,11 +2731,10 @@ static int reg_to_user(void __user *uaddr, const u64 *val, u64 id) static int get_invariant_sys_reg(u64 id, void __user *uaddr) { - struct sys_reg_params params; const struct sys_reg_desc *r; - r = find_reg_by_id(id, ¶ms, invariant_sys_regs, - ARRAY_SIZE(invariant_sys_regs)); + r = get_reg_by_id(id, invariant_sys_regs, + ARRAY_SIZE(invariant_sys_regs)); if (!r) return -ENOENT; @@ -2736,13 +2743,12 @@ static int get_invariant_sys_reg(u64 id, void __user *uaddr) static int set_invariant_sys_reg(u64 id, void __user *uaddr) { - struct sys_reg_params params; const struct sys_reg_desc *r; int err; u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ - r = find_reg_by_id(id, ¶ms, invariant_sys_regs, - ARRAY_SIZE(invariant_sys_regs)); + r = get_reg_by_id(id, invariant_sys_regs, + ARRAY_SIZE(invariant_sys_regs)); if (!r) return -ENOENT; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index aee8ea054f0d..ce30ed9566ae 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -195,6 +195,10 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, const struct sys_reg_desc table[], unsigned int num); +const struct sys_reg_desc *get_reg_by_id(u64 id, + const struct sys_reg_desc table[], + unsigned int num); + #define AA32(_x) .aarch32_map = AA32_##_x #define Op0(_x) .Op0 = _x #define Op1(_x) .Op1 = _x diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 07d5271e9f05..644acda33c7c 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -263,14 +263,10 @@ static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, u64 *reg) { - struct sys_reg_params params; u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64; - params.regval = *reg; - params.is_write = is_write; - - if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, - ARRAY_SIZE(gic_v3_icc_reg_descs))) + if (get_reg_by_id(sysreg, gic_v3_icc_reg_descs, + ARRAY_SIZE(gic_v3_icc_reg_descs))) return 0; return -ENXIO; -- cgit v1.2.3 From 1deeffb559663dc44e4b8a61fe7e271fe3b4b836 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sun, 3 Jul 2022 15:11:50 +0100 Subject: KVM: arm64: Reorder handling of invariant sysregs from userspace In order to allow some further refactor of the sysreg helpers, move the handling of invariant sysreg to occur before we handle all the other ones. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/sys_regs.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1f410283c592..9291cb94c2e4 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2849,6 +2849,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg { const struct sys_reg_desc *r; void __user *uaddr = (void __user *)(unsigned long)reg->addr; + int err; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_get(reg->id, uaddr); @@ -2856,12 +2857,14 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) return -ENOENT; + err = get_invariant_sys_reg(reg->id, uaddr); + if (err != -ENOENT) + return err; + r = index_to_sys_reg_desc(vcpu, reg->id); - if (!r) - return get_invariant_sys_reg(reg->id, uaddr); /* Check for regs disabled by runtime config */ - if (sysreg_hidden(vcpu, r)) + if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; if (r->get_user) @@ -2874,6 +2877,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg { const struct sys_reg_desc *r; void __user *uaddr = (void __user *)(unsigned long)reg->addr; + int err; if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_set(reg->id, uaddr); @@ -2881,12 +2885,14 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) return -ENOENT; + err = set_invariant_sys_reg(reg->id, uaddr); + if (err != -ENOENT) + return err; + r = index_to_sys_reg_desc(vcpu, reg->id); - if (!r) - return set_invariant_sys_reg(reg->id, uaddr); /* Check for regs disabled by runtime config */ - if (sysreg_hidden(vcpu, r)) + if (!r || sysreg_hidden(vcpu, r)) return -ENOENT; if (r->set_user) -- cgit v1.2.3 From ba23aec9f4f27c00ac7a504aae60cae8a4087a19 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sun, 3 Jul 2022 16:06:51 +0100 Subject: KVM: arm64: Introduce generic get_user/set_user helpers for system registers The userspace access to the system registers is done using helpers that hardcode the table that is looked up. extract some generic helpers from this, moving the handling of hidden sysregs into the core code. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/sys_regs.c | 60 ++++++++++++++++++++++++++++++----------------- arch/arm64/kvm/sys_regs.h | 6 +++++ 2 files changed, 44 insertions(+), 22 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9291cb94c2e4..0fbdb21a3600 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2663,8 +2663,10 @@ const struct sys_reg_desc *get_reg_by_id(u64 id, } /* Decode an index value, and find the sys_reg_desc entry. */ -static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, - u64 id) +static const struct sys_reg_desc * +id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id, + const struct sys_reg_desc table[], unsigned int num) + { const struct sys_reg_desc *r; @@ -2672,10 +2674,10 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) return NULL; - r = get_reg_by_id(id, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); + r = get_reg_by_id(id, table, num); /* Not saved in the sys_reg array and not otherwise accessible? */ - if (r && !(r->reg || r->get_user)) + if (r && (!(r->reg || r->get_user) || sysreg_hidden(vcpu, r))) r = NULL; return r; @@ -2845,9 +2847,24 @@ static int demux_c15_set(u64 id, void __user *uaddr) } } -int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, + const struct sys_reg_desc table[], unsigned int num) { + void __user *uaddr = (void __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + + r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + if (!r) + return -ENOENT; + + if (r->get_user) + return (r->get_user)(vcpu, r, reg, uaddr); + + return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id); +} + +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ void __user *uaddr = (void __user *)(unsigned long)reg->addr; int err; @@ -2861,21 +2878,28 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (err != -ENOENT) return err; - r = index_to_sys_reg_desc(vcpu, reg->id); + return kvm_sys_reg_get_user(vcpu, reg, + sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); +} - /* Check for regs disabled by runtime config */ - if (!r || sysreg_hidden(vcpu, r)) +int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, + const struct sys_reg_desc table[], unsigned int num) +{ + void __user *uaddr = (void __user *)(unsigned long)reg->addr; + const struct sys_reg_desc *r; + + r = id_to_sys_reg_desc(vcpu, reg->id, table, num); + if (!r) return -ENOENT; - if (r->get_user) - return (r->get_user)(vcpu, r, reg, uaddr); + if (r->set_user) + return (r->set_user)(vcpu, r, reg, uaddr); - return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id); + return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); } int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - const struct sys_reg_desc *r; void __user *uaddr = (void __user *)(unsigned long)reg->addr; int err; @@ -2889,16 +2913,8 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if (err != -ENOENT) return err; - r = index_to_sys_reg_desc(vcpu, reg->id); - - /* Check for regs disabled by runtime config */ - if (!r || sysreg_hidden(vcpu, r)) - return -ENOENT; - - if (r->set_user) - return (r->set_user)(vcpu, r, reg, uaddr); - - return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); + return kvm_sys_reg_set_user(vcpu, reg, + sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); } static unsigned int num_demux_regs(void) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index ce30ed9566ae..4fb6d59e7874 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -199,6 +199,12 @@ const struct sys_reg_desc *get_reg_by_id(u64 id, const struct sys_reg_desc table[], unsigned int num); +int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, + const struct sys_reg_desc table[], unsigned int num); + +int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, + const struct sys_reg_desc table[], unsigned int num); + #define AA32(_x) .aarch32_map = AA32_##_x #define Op0(_x) .Op0 = _x #define Op1(_x) .Op1 = _x -- cgit v1.2.3 From e48407ff9796529a1e5048b9e4d6ea8a0334468e Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Mon, 4 Jul 2022 17:01:50 +0100 Subject: KVM: arm64: Rely on index_to_param() for size checks on userspace access index_to_param() already checks that we use 64bit accesses for all registers accessed from userspace. However, we have extra checks in other places (such as index_to_params), which is pretty confusing. Get rid off these redundant checks. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/sys_regs.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0fbdb21a3600..5dbe0f4b8167 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2871,9 +2871,6 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_get(reg->id, uaddr); - if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) - return -ENOENT; - err = get_invariant_sys_reg(reg->id, uaddr); if (err != -ENOENT) return err; @@ -2906,9 +2903,6 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) return demux_c15_set(reg->id, uaddr); - if (KVM_REG_SIZE(reg->id) != sizeof(__u64)) - return -ENOENT; - err = set_invariant_sys_reg(reg->id, uaddr); if (err != -ENOENT) return err; -- cgit v1.2.3 From 978ceeb3e40a59973ff1d1c3d23484f71f141819 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Mon, 4 Jul 2022 17:27:00 +0100 Subject: KVM: arm64: Consolidate sysreg userspace accesses Until now, the .set_user and .get_user callbacks have to implement (directly or not) the userspace memory accesses. Although this gives us maximem flexibility, this is also a maintenance burden, making it hard to audit, and I'd feel much better if it was all located in a single place. So let's do just that, simplifying most of the function signatures in the process (the callbacks are now only concerned with the data itself, and not with userspace). Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/sys_regs.c | 160 +++++++++++++++++----------------------------- arch/arm64/kvm/sys_regs.h | 4 +- 2 files changed, 61 insertions(+), 103 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5dbe0f4b8167..526798524697 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -321,16 +321,8 @@ static bool trap_oslsr_el1(struct kvm_vcpu *vcpu, } static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 val) { - u64 id = sys_reg_to_index(rd); - u64 val; - int err; - - err = reg_from_user(&val, uaddr, id); - if (err) - return err; - /* * The only modifiable bit is the OSLK bit. Refuse the write if * userspace attempts to change any other bit in the register. @@ -451,22 +443,16 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, } static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 val) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; - - if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) - return -EFAULT; + vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = val; return 0; } static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 *val) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; - - if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) - return -EFAULT; + *val = vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; return 0; } @@ -493,23 +479,16 @@ static bool trap_bcr(struct kvm_vcpu *vcpu, } static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 val) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; - - if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) - return -EFAULT; - + vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = val; return 0; } static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 *val) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; - - if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) - return -EFAULT; + *val = vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; return 0; } @@ -537,22 +516,16 @@ static bool trap_wvr(struct kvm_vcpu *vcpu, } static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 val) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; - - if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) - return -EFAULT; + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = val; return 0; } static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 *val) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; - - if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) - return -EFAULT; + *val = vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; return 0; } @@ -579,22 +552,16 @@ static bool trap_wcr(struct kvm_vcpu *vcpu, } static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 val) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; - - if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) - return -EFAULT; + vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = val; return 0; } static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 *val) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; - - if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) - return -EFAULT; + *val = vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; return 0; } @@ -1227,16 +1194,9 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 val) { - const u64 id = sys_reg_to_index(rd); u8 csv2, csv3; - int err; - u64 val; - - err = reg_from_user(&val, uaddr, id); - if (err) - return err; /* * Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as @@ -1262,7 +1222,7 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, return -EINVAL; vcpu->kvm->arch.pfr0_csv2 = csv2; - vcpu->kvm->arch.pfr0_csv3 = csv3 ; + vcpu->kvm->arch.pfr0_csv3 = csv3; return 0; } @@ -1275,27 +1235,17 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, * to be changed. */ static int __get_id_reg(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, void __user *uaddr, + const struct sys_reg_desc *rd, u64 *val, bool raz) { - const u64 id = sys_reg_to_index(rd); - const u64 val = read_id_reg(vcpu, rd, raz); - - return reg_to_user(uaddr, &val, id); + *val = read_id_reg(vcpu, rd, raz); + return 0; } static int __set_id_reg(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, void __user *uaddr, + const struct sys_reg_desc *rd, u64 val, bool raz) { - const u64 id = sys_reg_to_index(rd); - int err; - u64 val; - - err = reg_from_user(&val, uaddr, id); - if (err) - return err; - /* This is what we mean by invariant: you can't change it. */ if (val != read_id_reg(vcpu, rd, raz)) return -EINVAL; @@ -1304,47 +1254,37 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu, } static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 *val) { bool raz = sysreg_visible_as_raz(vcpu, rd); - return __get_id_reg(vcpu, rd, uaddr, raz); + return __get_id_reg(vcpu, rd, val, raz); } static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 val) { bool raz = sysreg_visible_as_raz(vcpu, rd); - return __set_id_reg(vcpu, rd, uaddr, raz); + return __set_id_reg(vcpu, rd, val, raz); } static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 val) { - return __set_id_reg(vcpu, rd, uaddr, true); + return __set_id_reg(vcpu, rd, val, true); } static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 *val) { - const u64 id = sys_reg_to_index(rd); - const u64 val = 0; - - return reg_to_user(uaddr, &val, id); + *val = 0; + return 0; } static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) + u64 val) { - int err; - u64 val; - - /* Perform the access even if we are going to ignore the value */ - err = reg_from_user(&val, uaddr, sys_reg_to_index(rd)); - if (err) - return err; - return 0; } @@ -2850,17 +2790,26 @@ static int demux_c15_set(u64 id, void __user *uaddr) int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { - void __user *uaddr = (void __user *)(unsigned long)reg->addr; + u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 val; + int ret; r = id_to_sys_reg_desc(vcpu, reg->id, table, num); if (!r) return -ENOENT; - if (r->get_user) - return (r->get_user)(vcpu, r, reg, uaddr); + if (r->get_user) { + ret = (r->get_user)(vcpu, r, &val); + } else { + val = __vcpu_sys_reg(vcpu, r->reg); + ret = 0; + } + + if (!ret) + ret = put_user(val, uaddr); - return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id); + return ret; } int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) @@ -2882,17 +2831,26 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num) { - void __user *uaddr = (void __user *)(unsigned long)reg->addr; + u64 __user *uaddr = (u64 __user *)(unsigned long)reg->addr; const struct sys_reg_desc *r; + u64 val; + int ret; + + if (get_user(val, uaddr)) + return -EFAULT; r = id_to_sys_reg_desc(vcpu, reg->id, table, num); if (!r) return -ENOENT; - if (r->set_user) - return (r->set_user)(vcpu, r, reg, uaddr); + if (r->set_user) { + ret = (r->set_user)(vcpu, r, val); + } else { + __vcpu_sys_reg(vcpu, r->reg) = val; + ret = 0; + } - return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id); + return ret; } int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 4fb6d59e7874..b8b576a2af2b 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -75,9 +75,9 @@ struct sys_reg_desc { /* Custom get/set_user functions, fallback to generic if NULL */ int (*get_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr); + u64 *val); int (*set_user)(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr); + u64 val); /* Return mask of REG_* runtime visibility overrides */ unsigned int (*visibility)(const struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 5a420ed9646a934e983358aeba1bf3cd993d1cc5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Mon, 4 Jul 2022 17:55:43 +0100 Subject: KVM: arm64: Get rid of reg_from/to_user() These helpers are only used by the invariant stuff now, and while they pretend to support non-64bit registers, this only serves as a way to scare the casual reviewer... Replace these helpers with our good friends get/put_user(), and don't look back. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/sys_regs.c | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 526798524697..379478eecfaa 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -44,8 +44,6 @@ * 64bit interface. */ -static int reg_from_user(u64 *val, const void __user *uaddr, u64 id); -static int reg_to_user(void __user *uaddr, const u64 *val, u64 id); static u64 sys_reg_to_index(const struct sys_reg_desc *reg); static bool read_from_write_only(struct kvm_vcpu *vcpu, @@ -2657,21 +2655,7 @@ static struct sys_reg_desc invariant_sys_regs[] = { { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 }, }; -static int reg_from_user(u64 *val, const void __user *uaddr, u64 id) -{ - if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0) - return -EFAULT; - return 0; -} - -static int reg_to_user(void __user *uaddr, const u64 *val, u64 id) -{ - if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0) - return -EFAULT; - return 0; -} - -static int get_invariant_sys_reg(u64 id, void __user *uaddr) +static int get_invariant_sys_reg(u64 id, u64 __user *uaddr) { const struct sys_reg_desc *r; @@ -2680,23 +2664,21 @@ static int get_invariant_sys_reg(u64 id, void __user *uaddr) if (!r) return -ENOENT; - return reg_to_user(uaddr, &r->val, id); + return put_user(r->val, uaddr); } -static int set_invariant_sys_reg(u64 id, void __user *uaddr) +static int set_invariant_sys_reg(u64 id, u64 __user *uaddr) { const struct sys_reg_desc *r; - int err; - u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */ + u64 val; r = get_reg_by_id(id, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)); if (!r) return -ENOENT; - err = reg_from_user(&val, uaddr, id); - if (err) - return err; + if (get_user(val, uaddr)) + return -EFAULT; /* This is what we mean by invariant: you can't change it. */ if (r->val != val) -- cgit v1.2.3 From b61fc0857a3ad4cdee44128ad13685033e237367 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Sun, 3 Jul 2022 14:57:29 +0100 Subject: KVM: arm64: vgic-v3: Simplify vgic_v3_has_cpu_sysregs_attr() Finding out whether a sysreg exists has little to do with that register being accessed, so drop the is_write parameter. Also, the reg pointer is completely unused, and we're better off just passing the attr pointer to the function. This result in a small cleanup of the calling site, with a new helper converting the vGIC view of a sysreg into the canonical one (this is purely cosmetic, as the encoding is the same). Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic-sys-reg-v3.c | 14 ++++++++++---- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 8 ++------ arch/arm64/kvm/vgic/vgic.h | 3 +-- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 644acda33c7c..85a5e1d15e9f 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -260,12 +260,18 @@ static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { { SYS_DESC(SYS_ICC_IGRPEN1_EL1), access_gic_grpen1 }, }; -int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, - u64 *reg) +static u64 attr_to_id(u64 attr) { - u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64; + return ARM64_SYS_REG(FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP0_MASK, attr), + FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP1_MASK, attr), + FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_CRN_MASK, attr), + FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_CRM_MASK, attr), + FIELD_GET(KVM_REG_ARM_VGIC_SYSREG_OP2_MASK, attr)); +} - if (get_reg_by_id(sysreg, gic_v3_icc_reg_descs, +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + if (get_reg_by_id(attr_to_id(attr->attr), gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs))) return 0; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index f15e29cc63ce..a2ff73899976 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -986,12 +986,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) iodev.base_addr = 0; break; } - case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { - u64 reg, id; - - id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); - return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, ®); - } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + return vgic_v3_has_cpu_sysregs_attr(vcpu, attr); default: return -ENXIO; } diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 4c6bdd321faa..ffc2d3c81b28 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -247,8 +247,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, u64 *val); -int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, - u64 *reg); +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, u32 intid, u64 *val); int kvm_register_vgic_device(unsigned long type); -- cgit v1.2.3 From db25081e147c3cc496b8cd8c9d67f992546df6d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Mon, 4 Jul 2022 08:07:44 +0100 Subject: KVM: arm64: vgic-v3: Push user access into vgic_v3_cpu_sysregs_uaccess() In order to start making the vgic sysreg access from userspace similar to all the other sysregs, push the userspace memory access one level down into vgic_v3_cpu_sysregs_uaccess(). The next step will be to rely on the sysreg infrastructure to perform this task. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic-sys-reg-v3.c | 22 +++++++++++++++------- arch/arm64/kvm/vgic/vgic-kvm-device.c | 33 +++++++-------------------------- arch/arm64/kvm/vgic/vgic.h | 4 ++-- 3 files changed, 24 insertions(+), 35 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 85a5e1d15e9f..88eb5b049c2c 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -278,15 +278,21 @@ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr * return -ENXIO; } -int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, - u64 *reg) +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr, + bool is_write) { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; struct sys_reg_params params; const struct sys_reg_desc *r; - u64 sysreg = (id & KVM_DEV_ARM_VGIC_SYSREG_MASK) | KVM_REG_SIZE_U64; + u64 sysreg; - if (is_write) - params.regval = *reg; + sysreg = attr_to_id(attr->attr); + + if (is_write) { + if (get_user(params.regval, uaddr)) + return -EFAULT; + } params.is_write = is_write; r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, @@ -297,8 +303,10 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, if (!r->access(vcpu, ¶ms, r)) return -EINVAL; - if (!is_write) - *reg = params.regval; + if (!is_write) { + if (put_user(params.regval, uaddr)) + return -EFAULT; + } return 0; } diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index c6d52a1fd9c8..bf745c6ab2ea 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -512,7 +512,7 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, * * @dev: kvm device handle * @attr: kvm device attribute - * @reg: address the value is read or written + * @reg: address the value is read or written, NULL for sysregs * @is_write: true if userspace is writing a register */ static int vgic_v3_attr_regs_access(struct kvm_device *dev, @@ -561,14 +561,9 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, if (!is_write) *reg = tmp32; break; - case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { - u64 regid; - - regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); - ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write, - regid, reg); + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + ret = vgic_v3_cpu_sysregs_uaccess(vcpu, attr, is_write); break; - } case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { unsigned int info, intid; @@ -617,15 +612,8 @@ static int vgic_v3_set_attr(struct kvm_device *dev, reg = tmp32; return vgic_v3_attr_regs_access(dev, attr, ®, true); } - case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 reg; - - if (get_user(reg, uaddr)) - return -EFAULT; - - return vgic_v3_attr_regs_access(dev, attr, ®, true); - } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + return vgic_v3_attr_regs_access(dev, attr, NULL, true); case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { u32 __user *uaddr = (u32 __user *)(long)attr->addr; u64 reg; @@ -681,15 +669,8 @@ static int vgic_v3_get_attr(struct kvm_device *dev, tmp32 = reg; return put_user(tmp32, uaddr); } - case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 reg; - - ret = vgic_v3_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - return put_user(reg, uaddr); - } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + return vgic_v3_attr_regs_access(dev, attr, NULL, false); case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { u32 __user *uaddr = (u32 __user *)(long)attr->addr; u64 reg; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index ffc2d3c81b28..c23118467a35 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -245,8 +245,8 @@ int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); -int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, - u64 id, u64 *val); +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr, bool is_write); int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, u32 intid, u64 *val); -- cgit v1.2.3 From cbcf14dd23bcf228eb6061991acf3721506b97ae Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Mon, 4 Jul 2022 09:57:38 +0100 Subject: KVM: arm64: vgic-v3: Make the userspace accessors use sysreg API The vgic-v3 sysreg accessors have been ignored as the rest of the sysreg internal API was evolving, and are stuck with the .access method (which is normally reserved to the guest's own access) for the userspace accesses (which should use the .set/.get_user() methods). Catch up with the program and repaint all the accessors so that they fit into the normal userspace model, and plug the result into the helpers that have been introduced earlier. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic-sys-reg-v3.c | 454 ++++++++++++++++++++++----------------- 1 file changed, 257 insertions(+), 197 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 88eb5b049c2c..b755b02bc8ba 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -10,254 +10,330 @@ #include "vgic/vgic.h" #include "sys_regs.h" -static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int set_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) { u32 host_pri_bits, host_id_bits, host_seis, host_a3v, seis, a3v; struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu; struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + + /* + * Disallow restoring VM state if not supported by this + * hardware. + */ + host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >> + ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1; + if (host_pri_bits > vgic_v3_cpu->num_pri_bits) + return -EINVAL; + + vgic_v3_cpu->num_pri_bits = host_pri_bits; + + host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >> + ICC_CTLR_EL1_ID_BITS_SHIFT; + if (host_id_bits > vgic_v3_cpu->num_id_bits) + return -EINVAL; + + vgic_v3_cpu->num_id_bits = host_id_bits; + + host_seis = ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT); + seis = (val & ICC_CTLR_EL1_SEIS_MASK) >> + ICC_CTLR_EL1_SEIS_SHIFT; + if (host_seis != seis) + return -EINVAL; + + host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT); + a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT; + if (host_a3v != a3v) + return -EINVAL; + + /* + * Here set VMCR.CTLR in ICC_CTLR_EL1 layout. + * The vgic_set_vmcr() will convert to ICH_VMCR layout. + */ + vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT; + vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); + + return 0; +} + +static int get_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *valp) +{ + struct vgic_cpu *vgic_v3_cpu = &vcpu->arch.vgic_cpu; + struct vgic_vmcr vmcr; u64 val; vgic_get_vmcr(vcpu, &vmcr); - if (p->is_write) { - val = p->regval; - - /* - * Disallow restoring VM state if not supported by this - * hardware. - */ - host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >> - ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1; - if (host_pri_bits > vgic_v3_cpu->num_pri_bits) - return false; - - vgic_v3_cpu->num_pri_bits = host_pri_bits; - - host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >> - ICC_CTLR_EL1_ID_BITS_SHIFT; - if (host_id_bits > vgic_v3_cpu->num_id_bits) - return false; - - vgic_v3_cpu->num_id_bits = host_id_bits; - - host_seis = ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT); - seis = (val & ICC_CTLR_EL1_SEIS_MASK) >> - ICC_CTLR_EL1_SEIS_SHIFT; - if (host_seis != seis) - return false; - - host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT); - a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT; - if (host_a3v != a3v) - return false; - - /* - * Here set VMCR.CTLR in ICC_CTLR_EL1 layout. - * The vgic_set_vmcr() will convert to ICH_VMCR layout. - */ - vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT; - vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT; - vgic_set_vmcr(vcpu, &vmcr); - } else { - val = 0; - val |= (vgic_v3_cpu->num_pri_bits - 1) << - ICC_CTLR_EL1_PRI_BITS_SHIFT; - val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT; - val |= ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) << - ICC_CTLR_EL1_SEIS_SHIFT; - val |= ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) << - ICC_CTLR_EL1_A3V_SHIFT; - /* - * The VMCR.CTLR value is in ICC_CTLR_EL1 layout. - * Extract it directly using ICC_CTLR_EL1 reg definitions. - */ - val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK; - val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK; - - p->regval = val; - } + val = 0; + val |= (vgic_v3_cpu->num_pri_bits - 1) << ICC_CTLR_EL1_PRI_BITS_SHIFT; + val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT; + val |= ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) << + ICC_CTLR_EL1_SEIS_SHIFT; + val |= ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) << + ICC_CTLR_EL1_A3V_SHIFT; + /* + * The VMCR.CTLR value is in ICC_CTLR_EL1 layout. + * Extract it directly using ICC_CTLR_EL1 reg definitions. + */ + val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK; + val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK; + + *valp = val; - return true; + return 0; } -static bool access_gic_pmr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int set_gic_pmr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) { struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - if (p->is_write) { - vmcr.pmr = (p->regval & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT; - vgic_set_vmcr(vcpu, &vmcr); - } else { - p->regval = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK; - } + vmcr.pmr = (val & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); - return true; + return 0; } -static bool access_gic_bpr0(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int get_gic_pmr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) { struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - if (p->is_write) { - vmcr.bpr = (p->regval & ICC_BPR0_EL1_MASK) >> - ICC_BPR0_EL1_SHIFT; - vgic_set_vmcr(vcpu, &vmcr); - } else { - p->regval = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) & - ICC_BPR0_EL1_MASK; - } + *val = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK; - return true; + return 0; } -static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int set_gic_bpr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) { struct vgic_vmcr vmcr; - if (!p->is_write) - p->regval = 0; + vgic_get_vmcr(vcpu, &vmcr); + vmcr.bpr = (val & ICC_BPR0_EL1_MASK) >> ICC_BPR0_EL1_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); + + return 0; +} + +static int get_gic_bpr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) +{ + struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - if (!vmcr.cbpr) { - if (p->is_write) { - vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >> - ICC_BPR1_EL1_SHIFT; - vgic_set_vmcr(vcpu, &vmcr); - } else { - p->regval = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) & - ICC_BPR1_EL1_MASK; - } - } else { - if (!p->is_write) - p->regval = min((vmcr.bpr + 1), 7U); - } + *val = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) & ICC_BPR0_EL1_MASK; - return true; + return 0; } -static bool access_gic_grpen0(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int set_gic_bpr1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) { struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - if (p->is_write) { - vmcr.grpen0 = (p->regval & ICC_IGRPEN0_EL1_MASK) >> - ICC_IGRPEN0_EL1_SHIFT; + if (!vmcr.cbpr) { + vmcr.abpr = (val & ICC_BPR1_EL1_MASK) >> ICC_BPR1_EL1_SHIFT; vgic_set_vmcr(vcpu, &vmcr); - } else { - p->regval = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) & - ICC_IGRPEN0_EL1_MASK; } - return true; + return 0; } -static bool access_gic_grpen1(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int get_gic_bpr1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) { struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - if (p->is_write) { - vmcr.grpen1 = (p->regval & ICC_IGRPEN1_EL1_MASK) >> - ICC_IGRPEN1_EL1_SHIFT; - vgic_set_vmcr(vcpu, &vmcr); - } else { - p->regval = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) & - ICC_IGRPEN1_EL1_MASK; - } + if (!vmcr.cbpr) + *val = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) & ICC_BPR1_EL1_MASK; + else + *val = min((vmcr.bpr + 1), 7U); + + + return 0; +} + +static int set_gic_grpen0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + vmcr.grpen0 = (val & ICC_IGRPEN0_EL1_MASK) >> ICC_IGRPEN0_EL1_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); + + return 0; +} + +static int get_gic_grpen0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + *val = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) & ICC_IGRPEN0_EL1_MASK; + + return 0; +} + +static int set_gic_grpen1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) +{ + struct vgic_vmcr vmcr; - return true; + vgic_get_vmcr(vcpu, &vmcr); + vmcr.grpen1 = (val & ICC_IGRPEN1_EL1_MASK) >> ICC_IGRPEN1_EL1_SHIFT; + vgic_set_vmcr(vcpu, &vmcr); + + return 0; } -static void vgic_v3_access_apr_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, u8 apr, u8 idx) +static int get_gic_grpen1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + *val = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) & ICC_IGRPEN1_EL1_MASK; + + return 0; +} + +static void set_apr_reg(struct kvm_vcpu *vcpu, u64 val, u8 apr, u8 idx) { struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; - uint32_t *ap_reg; if (apr) - ap_reg = &vgicv3->vgic_ap1r[idx]; + vgicv3->vgic_ap1r[idx] = val; else - ap_reg = &vgicv3->vgic_ap0r[idx]; + vgicv3->vgic_ap0r[idx] = val; +} + +static u64 get_apr_reg(struct kvm_vcpu *vcpu, u8 apr, u8 idx) +{ + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; - if (p->is_write) - *ap_reg = p->regval; + if (apr) + return vgicv3->vgic_ap1r[idx]; else - p->regval = *ap_reg; + return vgicv3->vgic_ap0r[idx]; +} + +static int set_gic_ap0r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) + +{ + u8 idx = r->Op2 & 3; + + if (idx > vgic_v3_max_apr_idx(vcpu)) + return -EINVAL; + + set_apr_reg(vcpu, val, 0, idx); + return 0; } -static bool access_gic_aprn(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r, u8 apr) +static int get_gic_ap0r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) { u8 idx = r->Op2 & 3; if (idx > vgic_v3_max_apr_idx(vcpu)) - goto err; + return -EINVAL; - vgic_v3_access_apr_reg(vcpu, p, apr, idx); - return true; -err: - if (!p->is_write) - p->regval = 0; + *val = get_apr_reg(vcpu, 0, idx); - return false; + return 0; } -static bool access_gic_ap0r(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int set_gic_ap1r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) { - return access_gic_aprn(vcpu, p, r, 0); + u8 idx = r->Op2 & 3; + + if (idx > vgic_v3_max_apr_idx(vcpu)) + return -EINVAL; + + set_apr_reg(vcpu, val, 1, idx); + return 0; +} + +static int get_gic_ap1r(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) +{ + u8 idx = r->Op2 & 3; + + if (idx > vgic_v3_max_apr_idx(vcpu)) + return -EINVAL; + + *val = get_apr_reg(vcpu, 1, idx); + + return 0; } -static bool access_gic_ap1r(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int set_gic_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 val) { - return access_gic_aprn(vcpu, p, r, 1); + /* Validate SRE bit */ + if (!(val & ICC_SRE_EL1_SRE)) + return -EINVAL; + + return 0; } -static bool access_gic_sre(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static int get_gic_sre(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, + u64 *val) { struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; - /* Validate SRE bit */ - if (p->is_write) { - if (!(p->regval & ICC_SRE_EL1_SRE)) - return false; - } else { - p->regval = vgicv3->vgic_sre; - } + *val = vgicv3->vgic_sre; - return true; + return 0; } + static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { - { SYS_DESC(SYS_ICC_PMR_EL1), access_gic_pmr }, - { SYS_DESC(SYS_ICC_BPR0_EL1), access_gic_bpr0 }, - { SYS_DESC(SYS_ICC_AP0R0_EL1), access_gic_ap0r }, - { SYS_DESC(SYS_ICC_AP0R1_EL1), access_gic_ap0r }, - { SYS_DESC(SYS_ICC_AP0R2_EL1), access_gic_ap0r }, - { SYS_DESC(SYS_ICC_AP0R3_EL1), access_gic_ap0r }, - { SYS_DESC(SYS_ICC_AP1R0_EL1), access_gic_ap1r }, - { SYS_DESC(SYS_ICC_AP1R1_EL1), access_gic_ap1r }, - { SYS_DESC(SYS_ICC_AP1R2_EL1), access_gic_ap1r }, - { SYS_DESC(SYS_ICC_AP1R3_EL1), access_gic_ap1r }, - { SYS_DESC(SYS_ICC_BPR1_EL1), access_gic_bpr1 }, - { SYS_DESC(SYS_ICC_CTLR_EL1), access_gic_ctlr }, - { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, - { SYS_DESC(SYS_ICC_IGRPEN0_EL1), access_gic_grpen0 }, - { SYS_DESC(SYS_ICC_IGRPEN1_EL1), access_gic_grpen1 }, + { SYS_DESC(SYS_ICC_PMR_EL1), + .set_user = set_gic_pmr, .get_user = get_gic_pmr, }, + { SYS_DESC(SYS_ICC_BPR0_EL1), + .set_user = set_gic_bpr0, .get_user = get_gic_bpr0, }, + { SYS_DESC(SYS_ICC_AP0R0_EL1), + .set_user = set_gic_ap0r, .get_user = get_gic_ap0r, }, + { SYS_DESC(SYS_ICC_AP0R1_EL1), + .set_user = set_gic_ap0r, .get_user = get_gic_ap0r, }, + { SYS_DESC(SYS_ICC_AP0R2_EL1), + .set_user = set_gic_ap0r, .get_user = get_gic_ap0r, }, + { SYS_DESC(SYS_ICC_AP0R3_EL1), + .set_user = set_gic_ap0r, .get_user = get_gic_ap0r, }, + { SYS_DESC(SYS_ICC_AP1R0_EL1), + .set_user = set_gic_ap1r, .get_user = get_gic_ap1r, }, + { SYS_DESC(SYS_ICC_AP1R1_EL1), + .set_user = set_gic_ap1r, .get_user = get_gic_ap1r, }, + { SYS_DESC(SYS_ICC_AP1R2_EL1), + .set_user = set_gic_ap1r, .get_user = get_gic_ap1r, }, + { SYS_DESC(SYS_ICC_AP1R3_EL1), + .set_user = set_gic_ap1r, .get_user = get_gic_ap1r, }, + { SYS_DESC(SYS_ICC_BPR1_EL1), + .set_user = set_gic_bpr1, .get_user = get_gic_bpr1, }, + { SYS_DESC(SYS_ICC_CTLR_EL1), + .set_user = set_gic_ctlr, .get_user = get_gic_ctlr, }, + { SYS_DESC(SYS_ICC_SRE_EL1), + .set_user = set_gic_sre, .get_user = get_gic_sre, }, + { SYS_DESC(SYS_ICC_IGRPEN0_EL1), + .set_user = set_gic_grpen0, .get_user = get_gic_grpen0, }, + { SYS_DESC(SYS_ICC_IGRPEN1_EL1), + .set_user = set_gic_grpen1, .get_user = get_gic_grpen1, }, }; static u64 attr_to_id(u64 attr) @@ -282,31 +358,15 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr, bool is_write) { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - struct sys_reg_params params; - const struct sys_reg_desc *r; - u64 sysreg; - - sysreg = attr_to_id(attr->attr); - - if (is_write) { - if (get_user(params.regval, uaddr)) - return -EFAULT; - } - params.is_write = is_write; - - r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, - ARRAY_SIZE(gic_v3_icc_reg_descs)); - if (!r) - return -ENXIO; - - if (!r->access(vcpu, ¶ms, r)) - return -EINVAL; - - if (!is_write) { - if (put_user(params.regval, uaddr)) - return -EFAULT; - } - - return 0; + struct kvm_one_reg reg = { + .id = attr_to_id(attr->attr), + .addr = attr->addr, + }; + + if (is_write) + return kvm_sys_reg_set_user(vcpu, ®, gic_v3_icc_reg_descs, + ARRAY_SIZE(gic_v3_icc_reg_descs)); + else + return kvm_sys_reg_get_user(vcpu, ®, gic_v3_icc_reg_descs, + ARRAY_SIZE(gic_v3_icc_reg_descs)); } -- cgit v1.2.3 From 71c3c7753c722b8b10566dcdf1ff0a2eaf33a9c1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 5 Jul 2022 08:11:54 +0100 Subject: KVM: arm64: vgic-v3: Convert userspace accessors over to FIELD_GET/FIELD_PREP The GICv3 userspace accessors are all about dealing with conversion between fields from architectural registers and internal representations. However, and owing to the age of this code, the accessors use a combination of shift/mask that is hard to read. It is nonetheless easy to make it better by using the FIELD_{GET,PREP} macros that solely rely on a mask. This results in somewhat nicer looking code, and is probably easier to maintain. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic-sys-reg-v3.c | 60 ++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 33 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index b755b02bc8ba..9e7c486b48c2 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -23,30 +23,25 @@ static int set_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, * Disallow restoring VM state if not supported by this * hardware. */ - host_pri_bits = ((val & ICC_CTLR_EL1_PRI_BITS_MASK) >> - ICC_CTLR_EL1_PRI_BITS_SHIFT) + 1; + host_pri_bits = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1; if (host_pri_bits > vgic_v3_cpu->num_pri_bits) return -EINVAL; vgic_v3_cpu->num_pri_bits = host_pri_bits; - host_id_bits = (val & ICC_CTLR_EL1_ID_BITS_MASK) >> - ICC_CTLR_EL1_ID_BITS_SHIFT; + host_id_bits = FIELD_GET(ICC_CTLR_EL1_ID_BITS_MASK, val); if (host_id_bits > vgic_v3_cpu->num_id_bits) return -EINVAL; vgic_v3_cpu->num_id_bits = host_id_bits; - host_seis = ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT); - seis = (val & ICC_CTLR_EL1_SEIS_MASK) >> - ICC_CTLR_EL1_SEIS_SHIFT; + host_seis = FIELD_GET(ICH_VTR_SEIS_MASK, kvm_vgic_global_state.ich_vtr_el2); + seis = FIELD_GET(ICC_CTLR_EL1_SEIS_MASK, val); if (host_seis != seis) return -EINVAL; - host_a3v = ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT); - a3v = (val & ICC_CTLR_EL1_A3V_MASK) >> ICC_CTLR_EL1_A3V_SHIFT; + host_a3v = FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2); + a3v = FIELD_GET(ICC_CTLR_EL1_A3V_MASK, val); if (host_a3v != a3v) return -EINVAL; @@ -54,8 +49,8 @@ static int set_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, * Here set VMCR.CTLR in ICC_CTLR_EL1 layout. * The vgic_set_vmcr() will convert to ICH_VMCR layout. */ - vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT; - vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT; + vmcr.cbpr = FIELD_GET(ICC_CTLR_EL1_CBPR_MASK, val); + vmcr.eoim = FIELD_GET(ICC_CTLR_EL1_EOImode_MASK, val); vgic_set_vmcr(vcpu, &vmcr); return 0; @@ -70,20 +65,19 @@ static int get_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, vgic_get_vmcr(vcpu, &vmcr); val = 0; - val |= (vgic_v3_cpu->num_pri_bits - 1) << ICC_CTLR_EL1_PRI_BITS_SHIFT; - val |= vgic_v3_cpu->num_id_bits << ICC_CTLR_EL1_ID_BITS_SHIFT; - val |= ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_SEIS_MASK) >> ICH_VTR_SEIS_SHIFT) << - ICC_CTLR_EL1_SEIS_SHIFT; - val |= ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_A3V_MASK) >> ICH_VTR_A3V_SHIFT) << - ICC_CTLR_EL1_A3V_SHIFT; + val |= FIELD_PREP(ICC_CTLR_EL1_PRI_BITS_MASK, vgic_v3_cpu->num_pri_bits - 1); + val |= FIELD_PREP(ICC_CTLR_EL1_ID_BITS_MASK, vgic_v3_cpu->num_id_bits); + val |= FIELD_PREP(ICC_CTLR_EL1_SEIS_MASK, + FIELD_GET(ICH_VTR_SEIS_MASK, + kvm_vgic_global_state.ich_vtr_el2)); + val |= FIELD_PREP(ICC_CTLR_EL1_A3V_MASK, + FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2)); /* * The VMCR.CTLR value is in ICC_CTLR_EL1 layout. * Extract it directly using ICC_CTLR_EL1 reg definitions. */ - val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK; - val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK; + val |= FIELD_PREP(ICC_CTLR_EL1_CBPR_MASK, vmcr.cbpr); + val |= FIELD_PREP(ICC_CTLR_EL1_EOImode_MASK, vmcr.eoim); *valp = val; @@ -96,7 +90,7 @@ static int set_gic_pmr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - vmcr.pmr = (val & ICC_PMR_EL1_MASK) >> ICC_PMR_EL1_SHIFT; + vmcr.pmr = FIELD_GET(ICC_PMR_EL1_MASK, val); vgic_set_vmcr(vcpu, &vmcr); return 0; @@ -108,7 +102,7 @@ static int get_gic_pmr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - *val = (vmcr.pmr << ICC_PMR_EL1_SHIFT) & ICC_PMR_EL1_MASK; + *val = FIELD_PREP(ICC_PMR_EL1_MASK, vmcr.pmr); return 0; } @@ -119,7 +113,7 @@ static int set_gic_bpr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - vmcr.bpr = (val & ICC_BPR0_EL1_MASK) >> ICC_BPR0_EL1_SHIFT; + vmcr.bpr = FIELD_GET(ICC_BPR0_EL1_MASK, val); vgic_set_vmcr(vcpu, &vmcr); return 0; @@ -131,7 +125,7 @@ static int get_gic_bpr0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - *val = (vmcr.bpr << ICC_BPR0_EL1_SHIFT) & ICC_BPR0_EL1_MASK; + *val = FIELD_PREP(ICC_BPR0_EL1_MASK, vmcr.bpr); return 0; } @@ -143,7 +137,7 @@ static int set_gic_bpr1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, vgic_get_vmcr(vcpu, &vmcr); if (!vmcr.cbpr) { - vmcr.abpr = (val & ICC_BPR1_EL1_MASK) >> ICC_BPR1_EL1_SHIFT; + vmcr.abpr = FIELD_GET(ICC_BPR1_EL1_MASK, val); vgic_set_vmcr(vcpu, &vmcr); } @@ -157,7 +151,7 @@ static int get_gic_bpr1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, vgic_get_vmcr(vcpu, &vmcr); if (!vmcr.cbpr) - *val = (vmcr.abpr << ICC_BPR1_EL1_SHIFT) & ICC_BPR1_EL1_MASK; + *val = FIELD_PREP(ICC_BPR1_EL1_MASK, vmcr.abpr); else *val = min((vmcr.bpr + 1), 7U); @@ -171,7 +165,7 @@ static int set_gic_grpen0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - vmcr.grpen0 = (val & ICC_IGRPEN0_EL1_MASK) >> ICC_IGRPEN0_EL1_SHIFT; + vmcr.grpen0 = FIELD_GET(ICC_IGRPEN0_EL1_MASK, val); vgic_set_vmcr(vcpu, &vmcr); return 0; @@ -183,7 +177,7 @@ static int get_gic_grpen0(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - *val = (vmcr.grpen0 << ICC_IGRPEN0_EL1_SHIFT) & ICC_IGRPEN0_EL1_MASK; + *val = FIELD_PREP(ICC_IGRPEN0_EL1_MASK, vmcr.grpen0); return 0; } @@ -194,7 +188,7 @@ static int set_gic_grpen1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - vmcr.grpen1 = (val & ICC_IGRPEN1_EL1_MASK) >> ICC_IGRPEN1_EL1_SHIFT; + vmcr.grpen1 = FIELD_GET(ICC_IGRPEN1_EL1_MASK, val); vgic_set_vmcr(vcpu, &vmcr); return 0; @@ -206,7 +200,7 @@ static int get_gic_grpen1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, struct vgic_vmcr vmcr; vgic_get_vmcr(vcpu, &vmcr); - *val = (vmcr.grpen1 << ICC_IGRPEN1_EL1_SHIFT) & ICC_IGRPEN1_EL1_MASK; + *val = FIELD_GET(ICC_IGRPEN1_EL1_MASK, vmcr.grpen1); return 0; } -- cgit v1.2.3 From 38cf0bb7625a58625efeef9ec944671464ff7430 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 5 Jul 2022 10:16:44 +0100 Subject: KVM: arm64: vgic-v3: Use u32 to manage the line level from userspace Despite the userspace ABI clearly defining the bits dealt with by KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO as a __u32, the kernel uses a u64. Use a u32 to match the userspace ABI, which will subsequently lead to some simplifications. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 6 +++++- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 2 +- arch/arm64/kvm/vgic/vgic-mmio.c | 6 +++--- arch/arm64/kvm/vgic/vgic-mmio.h | 4 ++-- arch/arm64/kvm/vgic/vgic.h | 2 +- 5 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index bf745c6ab2ea..f02294b9aef1 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -570,10 +570,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, info = (attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT; if (info == VGIC_LEVEL_INFO_LINE_LEVEL) { + if (is_write) + tmp32 = *reg; intid = attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK; ret = vgic_v3_line_level_info_uaccess(vcpu, is_write, - intid, reg); + intid, &tmp32); + if (!is_write) + *reg = tmp32; } else { ret = -EINVAL; } diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a2ff73899976..91201f743033 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -1154,7 +1154,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, } int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, - u32 intid, u64 *val) + u32 intid, u32 *val) { if (intid % 32) return -EINVAL; diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 997d0fce2088..b32d434c1d4a 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -775,10 +775,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, } } -u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) +u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) { int i; - u64 val = 0; + u32 val = 0; int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; for (i = 0; i < 32; i++) { @@ -798,7 +798,7 @@ u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) } void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, - const u64 val) + const u32 val) { int i; int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h index 6082d4b66d39..5b490a4dfa5e 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.h +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -207,10 +207,10 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, bool is_write, int offset, u32 *val); -u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); +u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, - const u64 val); + const u32 val); unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index c23118467a35..0c8da72953f0 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -249,7 +249,7 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr, bool is_write); int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, - u32 intid, u64 *val); + u32 intid, u32 *val); int kvm_register_vgic_device(unsigned long type); void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -- cgit v1.2.3 From e1246f3f2df7aec025fd587ac3d7912007d1144d Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 5 Jul 2022 10:26:07 +0100 Subject: KVM: arm64: vgic-v3: Consolidate userspace access for MMIO registers For userspace accesses to GICv3 MMIO registers (and related data), vgic_v3_{get,set}_attr are littered with {get,put}_user() calls, making it hard to audit and reason about. Consolidate all userspace accesses in vgic_v3_attr_regs_access(), making the code far simpler to audit. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 103 ++++++++++++---------------------- 1 file changed, 37 insertions(+), 66 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index f02294b9aef1..e9db6795fb90 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -512,18 +512,18 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, * * @dev: kvm device handle * @attr: kvm device attribute - * @reg: address the value is read or written, NULL for sysregs * @is_write: true if userspace is writing a register */ static int vgic_v3_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, - u64 *reg, bool is_write) + bool is_write) { struct vgic_reg_attr reg_attr; gpa_t addr; struct kvm_vcpu *vcpu; + bool uaccess; + u32 val; int ret; - u32 tmp32; ret = vgic_v3_parse_attr(dev, attr, ®_attr); if (ret) @@ -532,6 +532,21 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, vcpu = reg_attr.vcpu; addr = reg_attr.addr; + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + /* Sysregs uaccess is performed by the sysreg handling code */ + uaccess = false; + break; + default: + uaccess = true; + } + + if (uaccess && is_write) { + u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + if (get_user(val, uaddr)) + return -EFAULT; + } + mutex_lock(&dev->kvm->lock); if (unlikely(!vgic_initialized(dev->kvm))) { @@ -546,20 +561,10 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - if (is_write) - tmp32 = *reg; - - ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32); - if (!is_write) - *reg = tmp32; + ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &val); break; case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: - if (is_write) - tmp32 = *reg; - - ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32); - if (!is_write) - *reg = tmp32; + ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &val); break; case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: ret = vgic_v3_cpu_sysregs_uaccess(vcpu, attr, is_write); @@ -570,14 +575,10 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, info = (attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT; if (info == VGIC_LEVEL_INFO_LINE_LEVEL) { - if (is_write) - tmp32 = *reg; intid = attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK; ret = vgic_v3_line_level_info_uaccess(vcpu, is_write, - intid, &tmp32); - if (!is_write) - *reg = tmp32; + intid, &val); } else { ret = -EINVAL; } @@ -591,6 +592,12 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, unlock_all_vcpus(dev->kvm); out: mutex_unlock(&dev->kvm->lock); + + if (!ret && uaccess && !is_write) { + u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; + ret = put_user(val, uaddr); + } + return ret; } @@ -605,30 +612,12 @@ static int vgic_v3_set_attr(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 tmp32; - u64 reg; - - if (get_user(tmp32, uaddr)) - return -EFAULT; - - reg = tmp32; - return vgic_v3_attr_regs_access(dev, attr, ®, true); - } + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + return vgic_v3_attr_regs_access(dev, attr, true); case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: - return vgic_v3_attr_regs_access(dev, attr, NULL, true); - case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u64 reg; - u32 tmp32; - - if (get_user(tmp32, uaddr)) - return -EFAULT; - - reg = tmp32; - return vgic_v3_attr_regs_access(dev, attr, ®, true); - } + return vgic_v3_attr_regs_access(dev, attr, true); + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + return vgic_v3_attr_regs_access(dev, attr, true); case KVM_DEV_ARM_VGIC_GRP_CTRL: { int ret; @@ -662,30 +651,12 @@ static int vgic_v3_get_attr(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u64 reg; - u32 tmp32; - - ret = vgic_v3_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - tmp32 = reg; - return put_user(tmp32, uaddr); - } + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + return vgic_v3_attr_regs_access(dev, attr, false); case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: - return vgic_v3_attr_regs_access(dev, attr, NULL, false); - case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u64 reg; - u32 tmp32; - - ret = vgic_v3_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - tmp32 = reg; - return put_user(tmp32, uaddr); - } + return vgic_v3_attr_regs_access(dev, attr, false); + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + return vgic_v3_attr_regs_access(dev, attr, false); } return -ENXIO; } -- cgit v1.2.3 From 7e9f723c2a90e41407d5889700169be4797a2009 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 5 Jul 2022 10:26:07 +0100 Subject: KVM: arm64: vgic-v2: Consolidate userspace access for MMIO registers Align the GICv2 MMIO accesses from userspace with the way the GICv3 code is now structured. Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 39 +++++++++++++++-------------------- 1 file changed, 17 insertions(+), 22 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index e9db6795fb90..066b95d606fd 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -348,17 +348,18 @@ bool lock_all_vcpus(struct kvm *kvm) * * @dev: kvm device handle * @attr: kvm device attribute - * @reg: address the value is read or written * @is_write: true if userspace is writing a register */ static int vgic_v2_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, - u32 *reg, bool is_write) + bool is_write) { + u32 __user *uaddr = (u32 __user *)(unsigned long)attr->addr; struct vgic_reg_attr reg_attr; gpa_t addr; struct kvm_vcpu *vcpu; int ret; + u32 val; ret = vgic_v2_parse_attr(dev, attr, ®_attr); if (ret) @@ -367,6 +368,10 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, vcpu = reg_attr.vcpu; addr = reg_attr.addr; + if (is_write) + if (get_user(val, uaddr)) + return -EFAULT; + mutex_lock(&dev->kvm->lock); ret = vgic_init(dev->kvm); @@ -380,10 +385,10 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg); + ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, &val); break; case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg); + ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, &val); break; default: ret = -EINVAL; @@ -393,6 +398,10 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev, unlock_all_vcpus(dev->kvm); out: mutex_unlock(&dev->kvm->lock); + + if (!ret && !is_write) + ret = put_user(val, uaddr); + return ret; } @@ -407,15 +416,8 @@ static int vgic_v2_set_attr(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg; - - if (get_user(reg, uaddr)) - return -EFAULT; - - return vgic_v2_attr_regs_access(dev, attr, ®, true); - } + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return vgic_v2_attr_regs_access(dev, attr, true); } return -ENXIO; @@ -432,15 +434,8 @@ static int vgic_v2_get_attr(struct kvm_device *dev, switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg = 0; - - ret = vgic_v2_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - return put_user(reg, uaddr); - } + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return vgic_v2_attr_regs_access(dev, attr, false); } return -ENXIO; -- cgit v1.2.3 From d7df6f282db67677c06456fd29d47eda0ba060b9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 5 Jul 2022 11:27:37 +0100 Subject: KVM: arm64: vgic: Use {get,put}_user() instead of copy_{from.to}_user Tidy-up vgic_get_common_attr() and vgic_set_common_attr() to use {get,put}_user() instead of the more complex (and less type-safe) copy_{from,to}_user(). Reviewed-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 066b95d606fd..c17e5502c0b3 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -170,7 +170,7 @@ static int vgic_set_common_attr(struct kvm_device *dev, u64 addr; unsigned long type = (unsigned long)attr->attr; - if (copy_from_user(&addr, uaddr, sizeof(addr))) + if (get_user(addr, uaddr)) return -EFAULT; r = kvm_vgic_addr(dev->kvm, type, &addr, true); @@ -233,14 +233,14 @@ static int vgic_get_common_attr(struct kvm_device *dev, u64 addr; unsigned long type = (unsigned long)attr->attr; - if (copy_from_user(&addr, uaddr, sizeof(addr))) + if (get_user(addr, uaddr)) return -EFAULT; r = kvm_vgic_addr(dev->kvm, type, &addr, false); if (r) return (r == -ENODEV) ? -ENXIO : r; - if (copy_to_user(uaddr, &addr, sizeof(addr))) + if (put_user(addr, uaddr)) return -EFAULT; break; } -- cgit v1.2.3 From 9f968c9266aa30b0e81be0c6a560e45b93bed3dc Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 5 Jul 2022 14:34:33 +0100 Subject: KVM: arm64: vgic-v2: Add helper for legacy dist/cpuif base address setting We carry a legacy interface to set the base addresses for GICv2. As this is currently plumbed into the same handling code as the modern interface, it limits the evolution we can make there. Add a helper dedicated to this handling, with a view of maybe removing this in the future. Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/arm.c | 11 ++--------- arch/arm64/kvm/vgic/vgic-kvm-device.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a0188144a122..fd26beacbbe5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1414,18 +1414,11 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr) { - unsigned long dev_id, type; - - dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >> - KVM_ARM_DEVICE_ID_SHIFT; - type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >> - KVM_ARM_DEVICE_TYPE_SHIFT; - - switch (dev_id) { + switch (FIELD_GET(KVM_ARM_DEVICE_ID_MASK, dev_addr->id)) { case KVM_ARM_DEVICE_VGIC_V2: if (!vgic_present) return -ENXIO; - return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); + return kvm_set_legacy_vgic_v2_addr(kvm, dev_addr); default: return -ENODEV; } diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index c17e5502c0b3..04175fd55da6 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -41,6 +41,38 @@ static int vgic_check_type(struct kvm *kvm, int type_needed) return 0; } +int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr) +{ + struct vgic_dist *vgic = &kvm->arch.vgic; + int r; + + mutex_lock(&kvm->lock); + switch (FIELD_GET(KVM_ARM_DEVICE_TYPE_MASK, dev_addr->id)) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + if (!r) + r = vgic_check_iorange(kvm, vgic->vgic_dist_base, dev_addr->addr, + SZ_4K, KVM_VGIC_V2_DIST_SIZE); + if (!r) + vgic->vgic_dist_base = dev_addr->addr; + break; + case KVM_VGIC_V2_ADDR_TYPE_CPU: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + if (!r) + r = vgic_check_iorange(kvm, vgic->vgic_cpu_base, dev_addr->addr, + SZ_4K, KVM_VGIC_V2_CPU_SIZE); + if (!r) + vgic->vgic_cpu_base = dev_addr->addr; + break; + default: + r = -ENODEV; + } + + mutex_unlock(&kvm->lock); + + return r; +} + /** * kvm_vgic_addr - set or get vgic VM base addresses * @kvm: pointer to the vm struct -- cgit v1.2.3 From 4b85080f4e378f617f88964dec94fd282bcf2af4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Tue, 5 Jul 2022 14:39:24 +0100 Subject: KVM: arm64: vgic: Consolidate userspace access for base address setting Align kvm_vgic_addr() with the rest of the code by moving the userspace accesses into it. kvm_vgic_addr() is also made static. Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 75 +++++++++++++++-------------------- 1 file changed, 31 insertions(+), 44 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 04175fd55da6..011171dc41c5 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -76,8 +76,7 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev /** * kvm_vgic_addr - set or get vgic VM base addresses * @kvm: pointer to the vm struct - * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX - * @addr: pointer to address value + * @attr: pointer to the attribute being retrieved/updated * @write: if true set the address in the VM address space, if false read the * address * @@ -89,15 +88,22 @@ int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev * overlapping regions in case of a virtual GICv3 here, since we don't know * the number of VCPUs yet, so we defer this check to map_resources(). */ -int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) +static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool write) { - int r = 0; + u64 __user *uaddr = (u64 __user *)attr->addr; struct vgic_dist *vgic = &kvm->arch.vgic; phys_addr_t *addr_ptr, alignment, size; u64 undef_value = VGIC_ADDR_UNDEF; + u64 addr; + int r; + + /* Reading a redistributor region addr implies getting the index */ + if (write || attr->attr == KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION) + if (get_user(addr, uaddr)) + return -EFAULT; mutex_lock(&kvm->lock); - switch (type) { + switch (attr->attr) { case KVM_VGIC_V2_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_dist_base; @@ -123,7 +129,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) if (r) break; if (write) { - r = vgic_v3_set_redist_base(kvm, 0, *addr, 0); + r = vgic_v3_set_redist_base(kvm, 0, addr, 0); goto out; } rdreg = list_first_entry_or_null(&vgic->rd_regions, @@ -143,14 +149,12 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) if (r) break; - index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK; + index = addr & KVM_VGIC_V3_RDIST_INDEX_MASK; if (write) { - gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK; - u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK) - >> KVM_VGIC_V3_RDIST_COUNT_SHIFT; - u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK) - >> KVM_VGIC_V3_RDIST_FLAGS_SHIFT; + gpa_t base = addr & KVM_VGIC_V3_RDIST_BASE_MASK; + u32 count = FIELD_GET(KVM_VGIC_V3_RDIST_COUNT_MASK, addr); + u8 flags = FIELD_GET(KVM_VGIC_V3_RDIST_FLAGS_MASK, addr); if (!count || flags) r = -EINVAL; @@ -166,9 +170,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) goto out; } - *addr = index; - *addr |= rdreg->base; - *addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT; + addr = index; + addr |= rdreg->base; + addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT; goto out; } default: @@ -179,15 +183,19 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) goto out; if (write) { - r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size); + r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size); if (!r) - *addr_ptr = *addr; + *addr_ptr = addr; } else { - *addr = *addr_ptr; + addr = *addr_ptr; } out: mutex_unlock(&kvm->lock); + + if (!r && !write) + r = put_user(addr, uaddr); + return r; } @@ -197,17 +205,9 @@ static int vgic_set_common_attr(struct kvm_device *dev, int r; switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - if (get_user(addr, uaddr)) - return -EFAULT; - - r = kvm_vgic_addr(dev->kvm, type, &addr, true); + case KVM_DEV_ARM_VGIC_GRP_ADDR: + r = kvm_vgic_addr(dev->kvm, attr, true); return (r == -ENODEV) ? -ENXIO : r; - } case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { u32 __user *uaddr = (u32 __user *)(long)attr->addr; u32 val; @@ -260,22 +260,9 @@ static int vgic_get_common_attr(struct kvm_device *dev, int r = -ENXIO; switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - if (get_user(addr, uaddr)) - return -EFAULT; - - r = kvm_vgic_addr(dev->kvm, type, &addr, false); - if (r) - return (r == -ENODEV) ? -ENXIO : r; - - if (put_user(addr, uaddr)) - return -EFAULT; - break; - } + case KVM_DEV_ARM_VGIC_GRP_ADDR: + r = kvm_vgic_addr(dev->kvm, attr, false); + return (r == -ENODEV) ? -ENXIO : r; case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { u32 __user *uaddr = (u32 __user *)(long)attr->addr; -- cgit v1.2.3 From 619064afa9b6f0088b86a1fed20c049cfe94cdf7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Thu, 14 Jul 2022 08:10:09 +0100 Subject: KVM: arm64: vgic: Tidy-up calls to vgic_{get,set}_common_attr() The userspace accessors have an early call to vgic_{get,set}_common_attr() that makes the code hard to follow. Move it to the default: clause of the decoding switch statement, which results in a nice cleanup. This requires us to move the handling of the pending table into the common handling, even if it is strictly a GICv3 feature (it has the benefit of keeping the whole control group handling in the same function). Also cleanup vgic_v3_{get,set}_attr() while we're at it, deduplicating the calls to vgic_v3_attr_regs_access(). Suggested-by: Reiji Watanabe <reijiw@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 78 ++++++++++++----------------------- 1 file changed, 26 insertions(+), 52 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 011171dc41c5..edeac2380591 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -246,6 +246,24 @@ static int vgic_set_common_attr(struct kvm_device *dev, r = vgic_init(dev->kvm); mutex_unlock(&dev->kvm->lock); return r; + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + /* + * OK, this one isn't common at all, but we + * want to handle all control group attributes + * in a single place. + */ + if (vgic_check_type(dev->kvm, KVM_DEV_TYPE_ARM_VGIC_V3)) + return -ENXIO; + mutex_lock(&dev->kvm->lock); + + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + r = vgic_v3_save_pending_tables(dev->kvm); + unlock_all_vcpus(dev->kvm); + mutex_unlock(&dev->kvm->lock); + return r; } break; } @@ -427,37 +445,25 @@ out: static int vgic_v2_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: return vgic_v2_attr_regs_access(dev, attr, true); + default: + return vgic_set_common_attr(dev, attr); } - - return -ENXIO; } static int vgic_v2_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: return vgic_v2_attr_regs_access(dev, attr, false); + default: + return vgic_get_common_attr(dev, attr); } - - return -ENXIO; } static int vgic_v2_has_attr(struct kvm_device *dev, @@ -618,61 +624,29 @@ out: static int vgic_v3_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: - return vgic_v3_attr_regs_access(dev, attr, true); case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: - return vgic_v3_attr_regs_access(dev, attr, true); case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: return vgic_v3_attr_regs_access(dev, attr, true); - case KVM_DEV_ARM_VGIC_GRP_CTRL: { - int ret; - - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: - mutex_lock(&dev->kvm->lock); - - if (!lock_all_vcpus(dev->kvm)) { - mutex_unlock(&dev->kvm->lock); - return -EBUSY; - } - ret = vgic_v3_save_pending_tables(dev->kvm); - unlock_all_vcpus(dev->kvm); - mutex_unlock(&dev->kvm->lock); - return ret; - } - break; - } + default: + return vgic_set_common_attr(dev, attr); } - return -ENXIO; } static int vgic_v3_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: - return vgic_v3_attr_regs_access(dev, attr, false); case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: - return vgic_v3_attr_regs_access(dev, attr, false); case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: return vgic_v3_attr_regs_access(dev, attr, false); + default: + return vgic_get_common_attr(dev, attr); } - return -ENXIO; } static int vgic_v3_has_attr(struct kvm_device *dev, -- cgit v1.2.3 From f6dddbb25572218d2e8ab93bfdad20cddeb99b5a Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Mon, 4 Jul 2022 10:03:33 +0100 Subject: KVM: arm64: Get rid of find_reg_by_id() This helper doesn't have a user anymore, let's get rid of it. Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/sys_regs.c | 11 ----------- arch/arm64/kvm/sys_regs.h | 5 ----- 2 files changed, 16 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 379478eecfaa..7ab67a7fc0d8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2577,17 +2577,6 @@ static bool index_to_params(u64 id, struct sys_reg_params *params) } } -const struct sys_reg_desc *find_reg_by_id(u64 id, - struct sys_reg_params *params, - const struct sys_reg_desc table[], - unsigned int num) -{ - if (!index_to_params(id, params)) - return NULL; - - return find_reg(params, table, num); -} - const struct sys_reg_desc *get_reg_by_id(u64 id, const struct sys_reg_desc table[], unsigned int num) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index b8b576a2af2b..49517f58deb5 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -190,11 +190,6 @@ find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[], return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); } -const struct sys_reg_desc *find_reg_by_id(u64 id, - struct sys_reg_params *params, - const struct sys_reg_desc table[], - unsigned int num); - const struct sys_reg_desc *get_reg_by_id(u64 id, const struct sys_reg_desc table[], unsigned int num); -- cgit v1.2.3 From c5332898dc35bbed7d3aa02b491e3388315ee481 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Mon, 4 Jul 2022 18:25:41 +0100 Subject: KVM: arm64: Descope kvm_arm_sys_reg_{get,set}_reg() Having kvm_arm_sys_reg_get_reg and co in kvm_host.h gives the impression that these functions are free to be called from anywhere. Not quite. They really are tied to out internal sysreg handling, and they would be better off in the sys_regs.h header, which is private. kvm_host.h could also get a bit of a diet, so let's just do that. Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/sys_regs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 49517f58deb5..a8c4cc32eb9a 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -194,9 +194,10 @@ const struct sys_reg_desc *get_reg_by_id(u64 id, const struct sys_reg_desc table[], unsigned int num); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num); - int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, const struct sys_reg_desc table[], unsigned int num); -- cgit v1.2.3 From 4274d42716d87d5301fdf67eb799e7db08fe73de Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Mon, 4 Jul 2022 18:11:04 +0100 Subject: KVM: arm64: Get rid or outdated comments Once apon a time, the 32bit KVM/arm port was the reference, while the arm64 version was the new kid on the block, without a clear future... This was a long time ago. "The times, they are a-changing." Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/kvm/sys_regs.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 7ab67a7fc0d8..b4fda04413f2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -34,11 +34,6 @@ #include "trace.h" /* - * All of this file is extremely similar to the ARM coproc.c, but the - * types are different. My gut feeling is that it should be pretty - * easy to merge, but that would be an ABI breakage -- again. VFP - * would also need to be abstracted. - * * For AArch32, we only take care of what is being trapped. Anything * that has to do with init and userspace access has to go via the * 64bit interface. -- cgit v1.2.3 From 548ec3336f323db56260b312c232ab37285f0284 Mon Sep 17 00:00:00 2001 From: Kalesh Singh <kaleshsingh@google.com> Date: Tue, 26 Jul 2022 00:37:40 -0700 Subject: KVM: arm64: On stack overflow switch to hyp overflow_stack On hyp stack overflow switch to 16-byte aligned secondary stack. This provides us stack space to better handle overflows; and is used in a subsequent patch to dump the hypervisor stacktrace. Signed-off-by: Kalesh Singh <kaleshsingh@google.com> Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220726073750.3219117-8-kaleshsingh@google.com --- arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/host.S | 9 ++------- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 11 +++++++++++ 3 files changed, 14 insertions(+), 8 deletions(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/stacktrace.c (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index f9fe4dc21b1f..524e7dad5739 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ - cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o + cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index ea6a397b64a6..b6c0188c4b35 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -177,13 +177,8 @@ SYM_FUNC_END(__host_hvc) b hyp_panic .L__hyp_sp_overflow\@: - /* - * Reset SP to the top of the stack, to allow handling the hyp_panic. - * This corrupts the stack but is ok, since we won't be attempting - * any unwinding here. - */ - ldr_this_cpu x0, kvm_init_params + NVHE_INIT_STACK_HYP_VA, x1 - mov sp, x0 + /* Switch to the overflow stack */ + adr_this_cpu sp, overflow_stack + OVERFLOW_STACK_SIZE, x0 b hyp_panic_bad_stack ASM_BUG() diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c new file mode 100644 index 000000000000..a3d5b34e1249 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KVM nVHE hypervisor stack tracing support. + * + * Copyright (C) 2022 Google LLC + */ +#include <asm/memory.h> +#include <asm/percpu.h> + +DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) + __aligned(16); -- cgit v1.2.3 From 879e5ac7b2e4db05799a905b5a07fc9e5dedf651 Mon Sep 17 00:00:00 2001 From: Kalesh Singh <kaleshsingh@google.com> Date: Tue, 26 Jul 2022 00:37:42 -0700 Subject: KVM: arm64: Prepare non-protected nVHE hypervisor stacktrace In non-protected nVHE mode (non-pKVM) the host can directly access hypervisor memory; and unwinding of the hypervisor stacktrace is done from EL1 to save on memory for shared buffers. To unwind the hypervisor stack from EL1 the host needs to know the starting point for the unwind and information that will allow it to translate hypervisor stack addresses to the corresponding kernel addresses. This patch sets up this book keeping. It is made use of later in the series. Signed-off-by: Kalesh Singh <kaleshsingh@google.com> Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220726073750.3219117-10-kaleshsingh@google.com --- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 41 ++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/switch.c | 6 ++++++ 2 files changed, 47 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index a3d5b34e1249..b8a280aa026a 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -4,8 +4,49 @@ * * Copyright (C) 2022 Google LLC */ +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> #include <asm/memory.h> #include <asm/percpu.h> DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); + +DEFINE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info); + +/* + * hyp_prepare_backtrace - Prepare non-protected nVHE backtrace. + * + * @fp : frame pointer at which to start the unwinding. + * @pc : program counter at which to start the unwinding. + * + * Save the information needed by the host to unwind the non-protected + * nVHE hypervisor stack in EL1. + */ +static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc) +{ + struct kvm_nvhe_stacktrace_info *stacktrace_info = this_cpu_ptr(&kvm_stacktrace_info); + struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); + + stacktrace_info->stack_base = (unsigned long)(params->stack_hyp_va - PAGE_SIZE); + stacktrace_info->overflow_stack_base = (unsigned long)this_cpu_ptr(overflow_stack); + stacktrace_info->fp = fp; + stacktrace_info->pc = pc; +} + +/* + * kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace + * + * @fp : frame pointer at which to start the unwinding. + * @pc : program counter at which to start the unwinding. + * + * Saves the information needed by the host to dump the nVHE hypervisor + * backtrace. + */ +void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc) +{ + if (is_protected_kvm_enabled()) + return; + else + hyp_prepare_backtrace(fp, pc); +} diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6db801db8f27..64e13445d0d9 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -34,6 +34,8 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DEFINE_PER_CPU(unsigned long, kvm_hyp_vector); +extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); + static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -375,6 +377,10 @@ asmlinkage void __noreturn hyp_panic(void) __sysreg_restore_state_nvhe(host_ctxt); } + /* Prepare to dump kvm nvhe hyp stacktrace */ + kvm_nvhe_prepare_backtrace((unsigned long)__builtin_frame_address(0), + _THIS_IP_); + __hyp_do_panic(host_ctxt, spsr, elr, par); unreachable(); } -- cgit v1.2.3 From db129d486ebdf4e3168282236f9d9008b42cac7e Mon Sep 17 00:00:00 2001 From: Kalesh Singh <kaleshsingh@google.com> Date: Tue, 26 Jul 2022 00:37:43 -0700 Subject: KVM: arm64: Implement non-protected nVHE hyp stack unwinder Implements the common framework necessary for unwind() to work for non-protected nVHE mode: - on_accessible_stack() - on_overflow_stack() - unwind_next() Non-protected nVHE unwind() is used to unwind and dump the hypervisor stacktrace by the host in EL1 Signed-off-by: Kalesh Singh <kaleshsingh@google.com> Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220726073750.3219117-11-kaleshsingh@google.com --- arch/arm64/kvm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a0188144a122..6a64293108c5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -49,7 +49,7 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); -static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); +DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); -- cgit v1.2.3 From 314a61dc31845c233e47c53db3fe6f34284034f4 Mon Sep 17 00:00:00 2001 From: Kalesh Singh <kaleshsingh@google.com> Date: Tue, 26 Jul 2022 00:37:44 -0700 Subject: KVM: arm64: Introduce hyp_dump_backtrace() In non-protected nVHE mode, unwinds and dumps the hypervisor backtrace from EL1. This is possible beacause the host can directly access the hypervisor stack pages in non-protected mode. The nVHE backtrace is dumped on hyp_panic(), before panicking the host. [ 101.498183] kvm [377]: nVHE call trace: [ 101.498363] kvm [377]: [<ffff8000090a6570>] __kvm_nvhe_hyp_panic+0xac/0xf8 [ 101.499045] kvm [377]: [<ffff8000090a65cc>] __kvm_nvhe_hyp_panic_bad_stack+0x10/0x10 [ 101.499498] kvm [377]: [<ffff8000090a61e4>] __kvm_nvhe_recursive_death+0x24/0x34 . . . [ 101.524929] kvm [377]: [<ffff8000090a61e4>] __kvm_nvhe_recursive_death+0x24/0x34 [ 101.525062] kvm [377]: [<ffff8000090a61e4>] __kvm_nvhe_recursive_death+0x24/0x34 [ 101.525195] kvm [377]: [<ffff8000090a5de4>] __kvm_nvhe___kvm_vcpu_run+0x30/0x40c [ 101.525333] kvm [377]: [<ffff8000090a8b64>] __kvm_nvhe_handle___kvm_vcpu_run+0x30/0x48 [ 101.525468] kvm [377]: [<ffff8000090a88b8>] __kvm_nvhe_handle_trap+0xc4/0x128 [ 101.525602] kvm [377]: [<ffff8000090a7864>] __kvm_nvhe___host_exit+0x64/0x64 [ 101.525745] kvm [377]: ---[ end nVHE call trace ]--- Signed-off-by: Kalesh Singh <kaleshsingh@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220726073750.3219117-12-kaleshsingh@google.com --- arch/arm64/kvm/handle_exit.c | 69 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index f66c0142b335..e83e6f735100 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -17,6 +17,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include <asm/debug-monitors.h> +#include <asm/stacktrace/nvhe.h> #include <asm/traps.h> #include <kvm/arm_hypercalls.h> @@ -318,6 +319,71 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu)); } +/* + * kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry + * + * @arg : the hypervisor offset, used for address translation + * @where : the program counter corresponding to the stack frame + */ +static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where) +{ + unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0); + unsigned long hyp_offset = (unsigned long)arg; + + /* Mask tags and convert to kern addr */ + where = (where & va_mask) + hyp_offset; + kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset())); + + return true; +} + +static inline void kvm_nvhe_dump_backtrace_start(void) +{ + kvm_err("nVHE call trace:\n"); +} + +static inline void kvm_nvhe_dump_backtrace_end(void) +{ + kvm_err("---[ end nVHE call trace ]---\n"); +} + +/* + * hyp_dump_backtrace - Dump the non-protected nVHE backtrace. + * + * @hyp_offset: hypervisor offset, used for address translation. + * + * The host can directly access HYP stack pages in non-protected + * mode, so the unwinding is done directly from EL1. This removes + * the need for shared buffers between host and hypervisor for + * the stacktrace. + */ +static void hyp_dump_backtrace(unsigned long hyp_offset) +{ + struct kvm_nvhe_stacktrace_info *stacktrace_info; + struct unwind_state state; + + stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); + + kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc); + + kvm_nvhe_dump_backtrace_start(); + unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset); + kvm_nvhe_dump_backtrace_end(); +} + +/* + * kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace. + * + * @hyp_offset: hypervisor offset, used for address translation. + */ +static void kvm_nvhe_dump_backtrace(unsigned long hyp_offset) +{ + if (is_protected_kvm_enabled()) + return; + else + hyp_dump_backtrace(hyp_offset); +} + void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, @@ -353,6 +419,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, (void *)panic_addr); } + /* Dump the nVHE hypervisor backtrace */ + kvm_nvhe_dump_backtrace(hyp_offset); + /* * Hyp has panicked and we're going to handle that by panicking the * kernel. The kernel offset will be revealed in the panic so we're -- cgit v1.2.3 From 72adac1bd234002a65cef738e0eebfd6c2ce2e30 Mon Sep 17 00:00:00 2001 From: Kalesh Singh <kaleshsingh@google.com> Date: Tue, 26 Jul 2022 00:37:45 -0700 Subject: KVM: arm64: Add PROTECTED_NVHE_STACKTRACE Kconfig This can be used to disable stacktrace for the protected KVM nVHE hypervisor, in order to save on the associated memory usage. This option is disabled by default, since protected KVM is not widely used on platforms other than Android currently. Signed-off-by: Kalesh Singh <kaleshsingh@google.com> Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220726073750.3219117-13-kaleshsingh@google.com --- arch/arm64/kvm/Kconfig | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 8a5fbbf084df..09c995869916 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -46,6 +46,21 @@ menuconfig KVM If unsure, say N. +config PROTECTED_NVHE_STACKTRACE + bool "Protected KVM hypervisor stacktraces" + depends on NVHE_EL2_DEBUG + default n + help + Say Y here to enable pKVM hypervisor stacktraces on hyp_panic() + + If you are not using protected nVHE (pKVM), say N. + + If using protected nVHE mode, but cannot afford the associated + memory cost (less than 0.75 page per CPU) of pKVM stacktraces, + say N. + + If unsure, say N. + config NVHE_EL2_DEBUG bool "Debug mode for non-VHE EL2 object" depends on KVM -- cgit v1.2.3 From 6928bcc84bc4bd9a24a1cb1986418c3de76e1d99 Mon Sep 17 00:00:00 2001 From: Kalesh Singh <kaleshsingh@google.com> Date: Tue, 26 Jul 2022 00:37:46 -0700 Subject: KVM: arm64: Allocate shared pKVM hyp stacktrace buffers In protected nVHE mode the host cannot directly access hypervisor memory, so we will dump the hypervisor stacktrace to a shared buffer with the host. The minimum size for the buffer required, assuming the min frame size of [x29, x30] (2 * sizeof(long)), is half the combined size of the hypervisor and overflow stacks plus an additional entry to delimit the end of the stacktrace. The stacktrace buffers are used later in the series to dump the nVHE hypervisor stacktrace when using protected-mode. Signed-off-by: Kalesh Singh <kaleshsingh@google.com> Reviewed-by: Fuad Tabba <tabba@google.com> Tested-by: Fuad Tabba <tabba@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220726073750.3219117-14-kaleshsingh@google.com --- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index b8a280aa026a..e2edda92a108 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -34,6 +34,10 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc) stacktrace_info->pc = pc; } +#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE +DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace); +#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */ + /* * kvm_nvhe_prepare_backtrace - prepare to dump the nVHE backtrace * -- cgit v1.2.3 From 871c5d931417d3c0e1aa32c9e04da1dc74703843 Mon Sep 17 00:00:00 2001 From: Kalesh Singh <kaleshsingh@google.com> Date: Tue, 26 Jul 2022 00:37:48 -0700 Subject: KVM: arm64: Save protected-nVHE (pKVM) hyp stacktrace In protected nVHE mode, the host cannot access private owned hypervisor memory. Also the hypervisor aims to remains simple to reduce the attack surface and does not provide any printk support. For the above reasons, the approach taken to provide hypervisor stacktraces in protected mode is: 1) Unwind and save the hyp stack addresses in EL2 to a shared buffer with the host (done in this patch). 2) Delegate the dumping and symbolization of the addresses to the host in EL1 (later patch in the series). On hyp_panic(), the hypervisor prepares the stacktrace before returning to the host. Signed-off-by: Kalesh Singh <kaleshsingh@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220726073750.3219117-16-kaleshsingh@google.com --- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 55 +++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index e2edda92a108..900324b7a08f 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -35,7 +35,60 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc) } #ifdef CONFIG_PROTECTED_NVHE_STACKTRACE +#include <asm/stacktrace/nvhe.h> + DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace); + +/* + * pkvm_save_backtrace_entry - Saves a protected nVHE HYP stacktrace entry + * + * @arg : index of the entry in the stacktrace buffer + * @where : the program counter corresponding to the stack frame + * + * Save the return address of a stack frame to the shared stacktrace buffer. + * The host can access this shared buffer from EL1 to dump the backtrace. + */ +static bool pkvm_save_backtrace_entry(void *arg, unsigned long where) +{ + unsigned long *stacktrace = this_cpu_ptr(pkvm_stacktrace); + int size = NVHE_STACKTRACE_SIZE / sizeof(long); + int *idx = (int *)arg; + + /* + * Need 2 free slots: 1 for current entry and 1 for the + * delimiter. + */ + if (*idx > size - 2) + return false; + + stacktrace[*idx] = where; + stacktrace[++*idx] = 0UL; + + return true; +} + +/* + * pkvm_save_backtrace - Saves the protected nVHE HYP stacktrace + * + * @fp : frame pointer at which to start the unwinding. + * @pc : program counter at which to start the unwinding. + * + * Save the unwinded stack addresses to the shared stacktrace buffer. + * The host can access this shared buffer from EL1 to dump the backtrace. + */ +static void pkvm_save_backtrace(unsigned long fp, unsigned long pc) +{ + struct unwind_state state; + int idx = 0; + + kvm_nvhe_unwind_init(&state, fp, pc); + + unwind(&state, pkvm_save_backtrace_entry, &idx); +} +#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */ +static void pkvm_save_backtrace(unsigned long fp, unsigned long pc) +{ +} #endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */ /* @@ -50,7 +103,7 @@ DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrac void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc) { if (is_protected_kvm_enabled()) - return; + pkvm_save_backtrace(fp, pc); else hyp_prepare_backtrace(fp, pc); } -- cgit v1.2.3 From 3a7e1b55aad45c0cf86bd4e2f212bb9a61905142 Mon Sep 17 00:00:00 2001 From: Kalesh Singh <kaleshsingh@google.com> Date: Tue, 26 Jul 2022 00:37:50 -0700 Subject: KVM: arm64: Introduce pkvm_dump_backtrace() Dumps the pKVM hypervisor backtrace from EL1 by reading the unwinded addresses from the shared stacktrace buffer. The nVHE hyp backtrace is dumped on hyp_panic(), before panicking the host. [ 111.623091] kvm [367]: nVHE call trace: [ 111.623215] kvm [367]: [<ffff8000090a6570>] __kvm_nvhe_hyp_panic+0xac/0xf8 [ 111.623448] kvm [367]: [<ffff8000090a65cc>] __kvm_nvhe_hyp_panic_bad_stack+0x10/0x10 [ 111.623642] kvm [367]: [<ffff8000090a61e4>] __kvm_nvhe_recursive_death+0x24/0x34 . . . [ 111.640366] kvm [367]: [<ffff8000090a61e4>] __kvm_nvhe_recursive_death+0x24/0x34 [ 111.640467] kvm [367]: [<ffff8000090a61e4>] __kvm_nvhe_recursive_death+0x24/0x34 [ 111.640574] kvm [367]: [<ffff8000090a5de4>] __kvm_nvhe___kvm_vcpu_run+0x30/0x40c [ 111.640676] kvm [367]: [<ffff8000090a8b64>] __kvm_nvhe_handle___kvm_vcpu_run+0x30/0x48 [ 111.640778] kvm [367]: [<ffff8000090a88b8>] __kvm_nvhe_handle_trap+0xc4/0x128 [ 111.640880] kvm [367]: [<ffff8000090a7864>] __kvm_nvhe___host_exit+0x64/0x64 [ 111.640996] kvm [367]: ---[ end nVHE call trace ]--- Signed-off-by: Kalesh Singh <kaleshsingh@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220726073750.3219117-18-kaleshsingh@google.com --- arch/arm64/kvm/handle_exit.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index e83e6f735100..c14fc4ba4422 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -371,6 +371,39 @@ static void hyp_dump_backtrace(unsigned long hyp_offset) kvm_nvhe_dump_backtrace_end(); } +#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE +DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], + pkvm_stacktrace); + +/* + * pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace. + * + * @hyp_offset: hypervisor offset, used for address translation. + * + * Dumping of the pKVM HYP backtrace is done by reading the + * stack addresses from the shared stacktrace buffer, since the + * host cannot directly access hypervisor memory in protected + * mode. + */ +static void pkvm_dump_backtrace(unsigned long hyp_offset) +{ + unsigned long *stacktrace + = (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace); + int i, size = NVHE_STACKTRACE_SIZE / sizeof(long); + + kvm_nvhe_dump_backtrace_start(); + /* The saved stacktrace is terminated by a null entry */ + for (i = 0; i < size && stacktrace[i]; i++) + kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]); + kvm_nvhe_dump_backtrace_end(); +} +#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */ +static void pkvm_dump_backtrace(unsigned long hyp_offset) +{ + kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n"); +} +#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */ + /* * kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace. * @@ -379,7 +412,7 @@ static void hyp_dump_backtrace(unsigned long hyp_offset) static void kvm_nvhe_dump_backtrace(unsigned long hyp_offset) { if (is_protected_kvm_enabled()) - return; + pkvm_dump_backtrace(hyp_offset); else hyp_dump_backtrace(hyp_offset); } -- cgit v1.2.3 From 03fe9cd05b9f38353208c23bd791dac47c912054 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Wed, 27 Jul 2022 15:29:01 +0100 Subject: KVM: arm64: Move PROTECTED_NVHE_STACKTRACE around Make the dependency with EL2_DEBUG more obvious by moving the stacktrace configurtion *after* it. Signed-off-by: Marc Zyngier <maz@kernel.org> Reviewed-by: Kalesh Singh <kaleshsingh@google.com> Tested-by: Kalesh Singh <kaleshsingh@google.com> Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20220727142906.1856759-2-maz@kernel.org --- arch/arm64/kvm/Kconfig | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 09c995869916..815cc118c675 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -46,6 +46,16 @@ menuconfig KVM If unsure, say N. +config NVHE_EL2_DEBUG + bool "Debug mode for non-VHE EL2 object" + depends on KVM + help + Say Y here to enable the debug mode for the non-VHE KVM EL2 object. + Failure reports will BUG() in the hypervisor. This is intended for + local EL2 hypervisor development. + + If unsure, say N. + config PROTECTED_NVHE_STACKTRACE bool "Protected KVM hypervisor stacktraces" depends on NVHE_EL2_DEBUG @@ -53,22 +63,10 @@ config PROTECTED_NVHE_STACKTRACE help Say Y here to enable pKVM hypervisor stacktraces on hyp_panic() - If you are not using protected nVHE (pKVM), say N. - If using protected nVHE mode, but cannot afford the associated memory cost (less than 0.75 page per CPU) of pKVM stacktraces, say N. - If unsure, say N. - -config NVHE_EL2_DEBUG - bool "Debug mode for non-VHE EL2 object" - depends on KVM - help - Say Y here to enable the debug mode for the non-VHE KVM EL2 object. - Failure reports will BUG() in the hypervisor. This is intended for - local EL2 hypervisor development. - - If unsure, say N. + If unsure, or not using protected nVHE (pKVM), say N. endif # VIRTUALIZATION -- cgit v1.2.3 From 9f5fee05f6897d0fe0e3a44ade71bb85cd97b2ef Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Wed, 27 Jul 2022 15:29:02 +0100 Subject: KVM: arm64: Move nVHE stacktrace unwinding into its own compilation unit The unwinding code doesn't really belong to the exit handling code. Instead, move it to a file (conveniently named stacktrace.c to confuse the reviewer), and move all the stacktrace-related stuff there. It will be joined by more code very soon. Signed-off-by: Marc Zyngier <maz@kernel.org> Reviewed-by: Kalesh Singh <kaleshsingh@google.com> Tested-by: Kalesh Singh <kaleshsingh@google.com> Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20220727142906.1856759-3-maz@kernel.org --- arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/handle_exit.c | 98 ----------------------------------- arch/arm64/kvm/stacktrace.c | 120 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 99 deletions(-) create mode 100644 arch/arm64/kvm/stacktrace.c (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index aa127ae9f675..5e33c2d4645a 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ - guest.o debug.o reset.o sys_regs.o \ + guest.o debug.o reset.o sys_regs.o stacktrace.o \ vgic-sys-reg-v3.o fpsimd.o pkvm.o \ arch_timer.o trng.o vmid.o \ vgic/vgic.o vgic/vgic-init.o \ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index c14fc4ba4422..ef8b57953aa2 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -319,104 +319,6 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu)); } -/* - * kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry - * - * @arg : the hypervisor offset, used for address translation - * @where : the program counter corresponding to the stack frame - */ -static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where) -{ - unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0); - unsigned long hyp_offset = (unsigned long)arg; - - /* Mask tags and convert to kern addr */ - where = (where & va_mask) + hyp_offset; - kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset())); - - return true; -} - -static inline void kvm_nvhe_dump_backtrace_start(void) -{ - kvm_err("nVHE call trace:\n"); -} - -static inline void kvm_nvhe_dump_backtrace_end(void) -{ - kvm_err("---[ end nVHE call trace ]---\n"); -} - -/* - * hyp_dump_backtrace - Dump the non-protected nVHE backtrace. - * - * @hyp_offset: hypervisor offset, used for address translation. - * - * The host can directly access HYP stack pages in non-protected - * mode, so the unwinding is done directly from EL1. This removes - * the need for shared buffers between host and hypervisor for - * the stacktrace. - */ -static void hyp_dump_backtrace(unsigned long hyp_offset) -{ - struct kvm_nvhe_stacktrace_info *stacktrace_info; - struct unwind_state state; - - stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); - - kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc); - - kvm_nvhe_dump_backtrace_start(); - unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset); - kvm_nvhe_dump_backtrace_end(); -} - -#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE -DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], - pkvm_stacktrace); - -/* - * pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace. - * - * @hyp_offset: hypervisor offset, used for address translation. - * - * Dumping of the pKVM HYP backtrace is done by reading the - * stack addresses from the shared stacktrace buffer, since the - * host cannot directly access hypervisor memory in protected - * mode. - */ -static void pkvm_dump_backtrace(unsigned long hyp_offset) -{ - unsigned long *stacktrace - = (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace); - int i, size = NVHE_STACKTRACE_SIZE / sizeof(long); - - kvm_nvhe_dump_backtrace_start(); - /* The saved stacktrace is terminated by a null entry */ - for (i = 0; i < size && stacktrace[i]; i++) - kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]); - kvm_nvhe_dump_backtrace_end(); -} -#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */ -static void pkvm_dump_backtrace(unsigned long hyp_offset) -{ - kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n"); -} -#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */ - -/* - * kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace. - * - * @hyp_offset: hypervisor offset, used for address translation. - */ -static void kvm_nvhe_dump_backtrace(unsigned long hyp_offset) -{ - if (is_protected_kvm_enabled()) - pkvm_dump_backtrace(hyp_offset); - else - hyp_dump_backtrace(hyp_offset); -} - void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr_virt, u64 elr_phys, u64 par, uintptr_t vcpu, diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c new file mode 100644 index 000000000000..9812aefdcfb4 --- /dev/null +++ b/arch/arm64/kvm/stacktrace.c @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * KVM nVHE hypervisor stack tracing support. + * + * The unwinder implementation depends on the nVHE mode: + * + * 1) Non-protected nVHE mode - the host can directly access the + * HYP stack pages and unwind the HYP stack in EL1. This saves having + * to allocate shared buffers for the host to read the unwinded + * stacktrace. + * + * 2) pKVM (protected nVHE) mode - the host cannot directly access + * the HYP memory. The stack is unwinded in EL2 and dumped to a shared + * buffer where the host can read and print the stacktrace. + * + * Copyright (C) 2022 Google LLC + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> + +#include <asm/stacktrace/nvhe.h> + +/* + * kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry + * + * @arg : the hypervisor offset, used for address translation + * @where : the program counter corresponding to the stack frame + */ +static bool kvm_nvhe_dump_backtrace_entry(void *arg, unsigned long where) +{ + unsigned long va_mask = GENMASK_ULL(vabits_actual - 1, 0); + unsigned long hyp_offset = (unsigned long)arg; + + /* Mask tags and convert to kern addr */ + where = (where & va_mask) + hyp_offset; + kvm_err(" [<%016lx>] %pB\n", where, (void *)(where + kaslr_offset())); + + return true; +} + +static void kvm_nvhe_dump_backtrace_start(void) +{ + kvm_err("nVHE call trace:\n"); +} + +static void kvm_nvhe_dump_backtrace_end(void) +{ + kvm_err("---[ end nVHE call trace ]---\n"); +} + +/* + * hyp_dump_backtrace - Dump the non-protected nVHE backtrace. + * + * @hyp_offset: hypervisor offset, used for address translation. + * + * The host can directly access HYP stack pages in non-protected + * mode, so the unwinding is done directly from EL1. This removes + * the need for shared buffers between host and hypervisor for + * the stacktrace. + */ +static void hyp_dump_backtrace(unsigned long hyp_offset) +{ + struct kvm_nvhe_stacktrace_info *stacktrace_info; + struct unwind_state state; + + stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); + + kvm_nvhe_unwind_init(&state, stacktrace_info->fp, stacktrace_info->pc); + + kvm_nvhe_dump_backtrace_start(); + unwind(&state, kvm_nvhe_dump_backtrace_entry, (void *)hyp_offset); + kvm_nvhe_dump_backtrace_end(); +} + +#ifdef CONFIG_PROTECTED_NVHE_STACKTRACE +DECLARE_KVM_NVHE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], + pkvm_stacktrace); + +/* + * pkvm_dump_backtrace - Dump the protected nVHE HYP backtrace. + * + * @hyp_offset: hypervisor offset, used for address translation. + * + * Dumping of the pKVM HYP backtrace is done by reading the + * stack addresses from the shared stacktrace buffer, since the + * host cannot directly access hypervisor memory in protected + * mode. + */ +static void pkvm_dump_backtrace(unsigned long hyp_offset) +{ + unsigned long *stacktrace + = (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace); + int i, size = NVHE_STACKTRACE_SIZE / sizeof(long); + + kvm_nvhe_dump_backtrace_start(); + /* The saved stacktrace is terminated by a null entry */ + for (i = 0; i < size && stacktrace[i]; i++) + kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]); + kvm_nvhe_dump_backtrace_end(); +} +#else /* !CONFIG_PROTECTED_NVHE_STACKTRACE */ +static void pkvm_dump_backtrace(unsigned long hyp_offset) +{ + kvm_err("Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE\n"); +} +#endif /* CONFIG_PROTECTED_NVHE_STACKTRACE */ + +/* + * kvm_nvhe_dump_backtrace - Dump KVM nVHE hypervisor backtrace. + * + * @hyp_offset: hypervisor offset, used for address translation. + */ +void kvm_nvhe_dump_backtrace(unsigned long hyp_offset) +{ + if (is_protected_kvm_enabled()) + pkvm_dump_backtrace(hyp_offset); + else + hyp_dump_backtrace(hyp_offset); +} -- cgit v1.2.3 From 4e00532f37365967e9896966b1fe61888e659259 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Wed, 27 Jul 2022 15:29:03 +0100 Subject: KVM: arm64: Make unwind()/on_accessible_stack() per-unwinder functions Having multiple versions of on_accessible_stack() (one per unwinder) makes it very hard to reason about what is used where due to the complexity of the various includes, the forward declarations, and the reliance on everything being 'inline'. Instead, move the code back where it should be. Each unwinder implements: - on_accessible_stack() as well as the helpers it depends on, - unwind()/unwind_next(), as they pass on_accessible_stack as a parameter to unwind_next_common() (which is the only common code here) This hardly results in any duplication, and makes it much easier to reason about the code. Signed-off-by: Marc Zyngier <maz@kernel.org> Reviewed-by: Kalesh Singh <kaleshsingh@google.com> Tested-by: Kalesh Singh <kaleshsingh@google.com> Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20220727142906.1856759-4-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 52 ++++++++++++++++++++++++++++++++++ arch/arm64/kvm/stacktrace.c | 55 ++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index 900324b7a08f..acbe272ecb32 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -39,6 +39,58 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc) DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace); +static bool on_overflow_stack(unsigned long sp, unsigned long size, + struct stack_info *info) +{ + unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack); + unsigned long high = low + OVERFLOW_STACK_SIZE; + + return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info); +} + +static bool on_hyp_stack(unsigned long sp, unsigned long size, + struct stack_info *info) +{ + struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); + unsigned long high = params->stack_hyp_va; + unsigned long low = high - PAGE_SIZE; + + return on_stack(sp, size, low, high, STACK_TYPE_HYP, info); +} + +static bool on_accessible_stack(const struct task_struct *tsk, + unsigned long sp, unsigned long size, + struct stack_info *info) +{ + if (info) + info->type = STACK_TYPE_UNKNOWN; + + return (on_overflow_stack(sp, size, info) || + on_hyp_stack(sp, size, info)); +} + +static int unwind_next(struct unwind_state *state) +{ + struct stack_info info; + + return unwind_next_common(state, &info, on_accessible_stack, NULL); +} + +static void notrace unwind(struct unwind_state *state, + stack_trace_consume_fn consume_entry, + void *cookie) +{ + while (1) { + int ret; + + if (!consume_entry(cookie, state->pc)) + break; + ret = unwind_next(state); + if (ret < 0) + break; + } +} + /* * pkvm_save_backtrace_entry - Saves a protected nVHE HYP stacktrace entry * diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c index 9812aefdcfb4..4d5fec3175ff 100644 --- a/arch/arm64/kvm/stacktrace.c +++ b/arch/arm64/kvm/stacktrace.c @@ -21,6 +21,61 @@ #include <asm/stacktrace/nvhe.h> +static bool on_overflow_stack(unsigned long sp, unsigned long size, + struct stack_info *info) +{ + struct kvm_nvhe_stacktrace_info *stacktrace_info + = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); + unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base; + unsigned long high = low + OVERFLOW_STACK_SIZE; + + return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info); +} + +static bool on_hyp_stack(unsigned long sp, unsigned long size, + struct stack_info *info) +{ + struct kvm_nvhe_stacktrace_info *stacktrace_info + = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); + unsigned long low = (unsigned long)stacktrace_info->stack_base; + unsigned long high = low + PAGE_SIZE; + + return on_stack(sp, size, low, high, STACK_TYPE_HYP, info); +} + +static bool on_accessible_stack(const struct task_struct *tsk, + unsigned long sp, unsigned long size, + struct stack_info *info) +{ + if (info) + info->type = STACK_TYPE_UNKNOWN; + + return (on_overflow_stack(sp, size, info) || + on_hyp_stack(sp, size, info)); +} + +static int unwind_next(struct unwind_state *state) +{ + struct stack_info info; + + return unwind_next_common(state, &info, on_accessible_stack, + kvm_nvhe_stack_kern_va); +} + +static void unwind(struct unwind_state *state, + stack_trace_consume_fn consume_entry, void *cookie) +{ + while (1) { + int ret; + + if (!consume_entry(cookie, state->pc)) + break; + ret = unwind_next(state); + if (ret < 0) + break; + } +} + /* * kvm_nvhe_dump_backtrace_entry - Symbolize and print an nVHE backtrace entry * -- cgit v1.2.3 From 0e773da1e688a1425ef7deae58fa11c5c7e09533 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <maz@kernel.org> Date: Wed, 27 Jul 2022 15:29:04 +0100 Subject: KVM: arm64: Move nVHE-only helpers into kvm/stacktrace.c kvm_nvhe_stack_kern_va() only makes sense as part of the nVHE unwinder, so simply move it there. Signed-off-by: Marc Zyngier <maz@kernel.org> Reviewed-by: Kalesh Singh <kaleshsingh@google.com> Tested-by: Kalesh Singh <kaleshsingh@google.com> Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20220727142906.1856759-5-maz@kernel.org --- arch/arm64/kvm/stacktrace.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c index 4d5fec3175ff..417665854f86 100644 --- a/arch/arm64/kvm/stacktrace.c +++ b/arch/arm64/kvm/stacktrace.c @@ -21,6 +21,47 @@ #include <asm/stacktrace/nvhe.h> +/* + * kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs + * + * The nVHE hypervisor stack is mapped in the flexible 'private' VA range, to + * allow for guard pages below the stack. Consequently, the fixed offset address + * translation macros won't work here. + * + * The kernel VA is calculated as an offset from the kernel VA of the hypervisor + * stack base. + * + * Returns true on success and updates @addr to its corresponding kernel VA; + * otherwise returns false. + */ +static bool kvm_nvhe_stack_kern_va(unsigned long *addr, + enum stack_type type) +{ + struct kvm_nvhe_stacktrace_info *stacktrace_info; + unsigned long hyp_base, kern_base, hyp_offset; + + stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info); + + switch (type) { + case STACK_TYPE_HYP: + kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page); + hyp_base = (unsigned long)stacktrace_info->stack_base; + break; + case STACK_TYPE_OVERFLOW: + kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack); + hyp_base = (unsigned long)stacktrace_info->overflow_stack_base; + break; + default: + return false; + } + + hyp_offset = *addr - hyp_base; + + *addr = kern_base + hyp_offset; + + return true; +} + static bool on_overflow_stack(unsigned long sp, unsigned long size, struct stack_info *info) { -- cgit v1.2.3 From 62ae21627aa96f6ef361981dd181c74dc7aa314c Mon Sep 17 00:00:00 2001 From: Oliver Upton <oliver.upton@linux.dev> Date: Wed, 27 Jul 2022 15:29:05 +0100 Subject: KVM: arm64: Don't open code ARRAY_SIZE() Use ARRAY_SIZE() instead of an open-coded version. Signed-off-by: Oliver Upton <oliver.upton@linux.dev> Signed-off-by: Marc Zyngier <maz@kernel.org> Reviewed-by: Kalesh Singh <kaleshsingh@google.com> Tested-by: Kalesh Singh <kaleshsingh@google.com> Link: https://lore.kernel.org/r/20220727142906.1856759-6-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/stacktrace.c | 3 +-- arch/arm64/kvm/stacktrace.c | 6 ++++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kvm') diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c index acbe272ecb32..58f645ad66bc 100644 --- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c +++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c @@ -103,14 +103,13 @@ static void notrace unwind(struct unwind_state *state, static bool pkvm_save_backtrace_entry(void *arg, unsigned long where) { unsigned long *stacktrace = this_cpu_ptr(pkvm_stacktrace); - int size = NVHE_STACKTRACE_SIZE / sizeof(long); int *idx = (int *)arg; /* * Need 2 free slots: 1 for current entry and 1 for the * delimiter. */ - if (*idx > size - 2) + if (*idx > ARRAY_SIZE(pkvm_stacktrace) - 2) return false; stacktrace[*idx] = where; diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c index 417665854f86..949d19d603fb 100644 --- a/arch/arm64/kvm/stacktrace.c +++ b/arch/arm64/kvm/stacktrace.c @@ -187,11 +187,13 @@ static void pkvm_dump_backtrace(unsigned long hyp_offset) { unsigned long *stacktrace = (unsigned long *) this_cpu_ptr_nvhe_sym(pkvm_stacktrace); - int i, size = NVHE_STACKTRACE_SIZE / sizeof(long); + int i; kvm_nvhe_dump_backtrace_start(); /* The saved stacktrace is terminated by a null entry */ - for (i = 0; i < size && stacktrace[i]; i++) + for (i = 0; + i < ARRAY_SIZE(kvm_nvhe_sym(pkvm_stacktrace)) && stacktrace[i]; + i++) kvm_nvhe_dump_backtrace_entry((void *)hyp_offset, stacktrace[i]); kvm_nvhe_dump_backtrace_end(); } -- cgit v1.2.3